Problem
I am writing a python password manager, and I know there’s a lot of scrutiny that goes into storing passwords (don’t worry, mine aren’t plaintext). I was hoping that this community could help me improve style, use of libraries, or anything else. Any and all pointers are gladly accepted.
There were a few ideas that I implemented here:
- encrypting each password with a unique salt, even in memory
- encrypting each database with a unique salt when they are stored long-term
- be able to save to a database file (custom format)
- be able to read from a database file (custom format)
I know that there are a lot of services that do this kind of thing already, but I thought I’d give it a spin, to learn and have fun. Some samples of how to use the library are provided by the runner file.
As this got a lot of attention, my most recent code will be kept on this GitHub repo.
runner:
import sys, os
from .passdb import PassDB
if __name__ == "__main__":
a = PassDB()
# print(a)
a.password = "password"
a.set_entry("user", "localhost", "sample_password")
# print(a.enc_str())
a_copy = PassDB.open_db(a.enc_str(), "password")
# print(a_copy.password)
if a_copy is not None:
print(a_copy.get_entry("user@localhost"))
print(a_copy.get_password("user@localhost"))
a_copy.save_as("tmp.passdb", "sample Password")
passdb.py
:
import base64
import hashlib
import pandas
from Crypto import Random
from Crypto.Cipher import AES
import json
import re
from io import StringIO
import datetime
class PassDB(object):
_valid_init_fields = ["data", "path", "password", "settings"]
version = "Version 0.0.1"
settings: dict
data: pandas.DataFrame
_defaults = {
"salt_size": 64,
"block_size": 32, # Using AES256
"enc_sample_content": "The provided password is correct",
"salt": None,
"path": None,
"hash_depth": 9
}
_format = """### PYPASSMAN {version} ###
{settings}
### SAMPLE ###
{enc_sample}
### DATA ###
{data}
"""
def __init__(self, *args, **kwargs):
if len(args) > 3:
raise TypeError("Too Many Arguments")
if len(args) > 2:
self.data = args[2]
else:
self.data = None
if len(args) > 1:
self.password = args[1]
else:
self.password = None
if len(args) > 0:
self.path = args[0]
else:
self.path = None
for key, arg in kwargs.items():
if key in self._valid_init_fields:
setattr(self, key, arg)
if self.data is None:
self.data = pandas.DataFrame(
columns=[
"account",
"hostname",
"salt",
"password",
"hash_depth",
"dateModified",
"dateCreated"
]
)
if getattr(self, "settings", None) is None:
self.settings = self._defaults.copy()
if self.settings.get("salt", None) is None:
self.settings["salt"] = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
for key in self._defaults.keys():
if key not in self.settings:
self.settings[key] = self._defaults[key]
@classmethod
def open_db(cls, raw, password):
settings, sample, data = (*map(
lambda string: string.strip(),
re.split(r"###.*###n", raw)[1:]
),)
settings = json.loads(settings)
sample = cls._decrypt(sample, password, settings["salt"], settings["hash_depth"])
if not sample == settings["enc_sample_content"]:
raise ValueError(
"Cannot open PassDB: incorrect password provided")
data = cls._decrypt(data, password, settings["salt"], settings["hash_depth"])
data = pandas.read_csv(StringIO(data))
output = cls(
settings=settings,
data=data,
password=password
)
return output
def save_as(self, path, password):
settings_cp = self.settings.copy()
settings_cp["path"] = path
new_dict = self.__class__(
data = self.data,
path = path,
password = password,
settings = settings_cp
)
new_dict.save()
return True
def save(self):
with open(self.path, "w+") as dest:
enc_data = self._encrypt(
self.data.to_csv(index_label="index"),
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
enc_sample = self._encrypt(
self.settings["enc_sample_content"],
self.password, self.settings["salt"],
self.settings["hash_depth"])
dest.write(self._format.format(
version=str(self.version),
settings=json.dumps(self.settings),
data=enc_data,
enc_sample=enc_sample
))
@classmethod
def _encrypt(cls, raw, password, salt, hash_depth):
raw = cls._pad(raw)
iv = Random.new().read(AES.block_size)
salt = base64.b64decode(salt)
key = hashlib.sha256(
str(password).encode() + salt
).digest()
for i in range(hash_depth):
key = hashlib.sha256(key + salt).digest()
cipher = AES.new(key, AES.MODE_CBC, iv)
return base64.b64encode(iv + cipher.encrypt(raw)).decode("utf-8")
@classmethod
def _decrypt(cls, enc, password, salt, hash_depth):
enc = base64.b64decode(enc)
iv = enc[:AES.block_size]
salt = base64.b64decode(salt)
key = hashlib.sha256(
password.encode() + salt
).digest()
for i in range(hash_depth):
key = hashlib.sha256(key + salt).digest()
cipher = AES.new(key, AES.MODE_CBC, iv)
try:
return cls._unpad(
cipher.decrypt(
enc[AES.block_size:]
)
).decode('utf-8')
except UnicodeDecodeError:
raise ValueError("Incorrect Password")
@classmethod
def _pad(cls, s):
bs = cls._defaults["block_size"]
return (
s + (bs - len(s) % bs) *
chr(bs - len(s) % bs)
)
@staticmethod
def _unpad(s):
return s[:-ord(s[len(s)-1:])]
def enc_str(self):
enc_data = self._encrypt(
self.data.to_csv(index_label="index"),
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
enc_sample = self._encrypt(
self.settings["enc_sample_content"],
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
return (self._format.format(
version=str(self.version),
enc_sample=enc_sample,
settings=json.dumps(self.settings),
data=enc_data
))
def __str__(self):
path = self.settings["path"]
return "PassDB <{} entries{}>".format(
len(self.data),
" at '{}'".format(path) if path is not None else ""
)
def set_entry(self, *args):
account, hostname, password = None, None, None
if len(args) == 1:
account, hostname_password = args[0].split("@")
hostname, password, other = hostname_password.split(":")
elif len(args) == 2:
account_hostname, password = args
account, hostname = account_hostname.split("@")
elif len(args) == 3:
account, hostname, password = args
else:
raise ValueError("""
PassDB.set_entry :: Too many arguments
usage(1): get_password(account, hostname, password)
usage(2): get_password("{account}@{hostname}", password)
usage(3): get_password("{account}@{hostname}:{password}") """
)
for char in (":", "@"):
for item in account, hostname, password:
if char in item:
raise ValueError("""
account, hostname, and password cannot contain colon (:) or at symbol (@)""")
if len(self.data) > 0:
for index, entry in self.data.iterrows():
if entry["account"] == account and entry["hostname"] == hostname:
salt = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
password = self._encrypt(
password,
self.settings["salt"],
salt,
self.settings["hash_depth"]
)
self.data.loc[index] = (
account, hostname,
salt, password,
self.settings["hash_depth"],
str(datetime.datetime.utcnow().isoformat()),
str(datetime.datetime.utcnow().isoformat())
)
else:
salt = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
password = self._encrypt(
password,
self.settings["salt"],
salt,
self.settings["hash_depth"]
)
self.data.loc[0] = (
account,
hostname,
salt,
password,
self.settings["hash_depth"],
str(datetime.datetime.utcnow().isoformat()),
str(datetime.datetime.utcnow().isoformat())
)
def get_entry(self, *args):
if len(args) == 1:
account, hostname = args[0].split("@")
elif len(args) == 2:
account, hostname = args
else:
raise ValueError("""
PassDB.get_entry :: Too many arguments
usage(1): get_entry(account, hostname)
usage(2): get_entry("{account}@{hostname}")""")
if(getattr(self, "password") is None):
raise ValueError("Cannot get entry when PassDB instance password is None")
if(len(self.data)) == 0:
return None
for index, entry in self.data.iterrows():
if entry["account"] == account and entry["hostname"] == hostname:
return entry
return None
def get_password(self, *args):
if len(args) == 1:
account, hostname = args[0].split("@")
elif len(args) == 2:
account, hostname = args
else:
raise ValueError("""
PassDB.get_password :: Too many arguments
usage(1): get_password(account, hostname)
usage(2): get_password("{account}@{hostname}")""")
entry = self.get_entry(account, hostname)
if isinstance(entry["password"], str):
return self._decrypt(entry["password"], self.settings["salt"], entry["salt"], entry["hash_depth"])
raise ValueError("Password for {account}@{hostname} in unexpected format".format(**entry))
```
Solution
Some general tips:
- The runner should use argparse to parse arguments. It most definitely should not hardcode passwords.
(object)
is redundant in Python 3 class definitions.-
I’d recommend running any Python code through Black, flake8 and mypy with a strict configuration like this one:
[flake8] doctests = true exclude = .git max-complexity = 5 max-line-length = 120 ignore = W503,E203 [mypy] check_untyped_defs = true disallow_untyped_defs = true ignore_missing_imports = true no_implicit_optional = true warn_redundant_casts = true warn_return_any = true warn_unused_ignores = true
- You reuse variable names with completely different semantics. This is a really bad idea for understanding what the code is doing and following along even otherwise trivial logic. For example,
settings = json.loads(settings)
means that settings is originally astr
, effectively a serialized JSON object, and afterwards adict
. These have completely different semantics and interaction patterns. The easiest way to deal with this is to treat almost every variable as immutable, and naming the variables according to what they really are. For example,settings = json.loads(serialized_settings)
. - Names should be descriptive, for example
password_database = PasswordDatabase()
. - Don’t use
*args
and**kwargs
unless you need dynamic parameter lists. Rather than indexing*args
you should use named parameters. If they have default values those should go in the method signature. .get(foo, None)
can be simplified to.get(foo)
–get()
returnsNone
by default.if foo is None
can in the vast majority of cases be changed to the more idiomaticif foo
.- I would highly recommend using a well-known open format such as the KeePass one for storing this data.
-
This should not be in there:
if not sample == settings["enc_sample_content"]: raise ValueError( "Cannot open PassDB: incorrect password provided")
- There is a lot of encoding and decoding happening, which greatly obfuscates the state and looks unnecessary in several places.
- I would not trust this sort of code without a comprehensive test suite.
With the caveat that I’m not a cryptographer:
- Salting does not make sense unless you’re hashing the password (which you don’t want to do in this case). I’ll refrain from any other comments on how the salting is done unless someone corrects this.
Something about the cryptography:
- Do you still use the unmaintained PyCrypto library or the new PyCryptodome (a maintained mostly compatible drop-in-replacement)?
- You are using the CBC mode correctly (random IV for encryption), which is good.
- Data is not authenticated – even encrypted data can be changed without the possibility to detect. You can use HMAC (hash based message authentication code) or an AEAD (authenticated encryption with additional data) encryption mode.
- Your password derivation function has good ideas (Rounds + Salt), but is still a bit weak: Only 9 rounds by default are too less for todays standards. As the derivation functions apply the same ideas as for password storage, consider looking at those: E.g. PBKDF2 (which is included in Python) or Argon2 (one of the most modern).
concerning is None
In case of self.data
(pandas DataFrame) your usage of if foo is None
is the only valid option. In the other cases I disagree with the opinion, that if not foo
is better than if foo is None
, as it is in no way generally correct to assume that an empty object should be handled by the if-clause. An if foo is None
explicitley tells me, that there is only a single case, that needs special treatment. However, you have some rather strange constructs: I don’t see the reason for using getattr
in if(getattr(self, "password") is None)
(also: redundant parentheses). This should be just if self.password is None
– or if not self.password
in case you also want to refuse empty passwords. There are other ones, but they mostly originate from you rather complicated __init__
mechanisms.
concerning __init__
Your constructor is too complicated. It either takes keyword arguments, that it maps, or maps arguments that may come via command line. I highly recommend to split the two cases: Create an alternative constructor as classmethod from_cli
that parses the command line arguments (argparse
or similar), and uses them as named arguments for the real constructor, that needs a clear signature like __init__(self, data=None, path=None, password=None, settings=None)
and sets the member variables explicitely. That way it’s much easier to grasp, what state an instance of PassDB
is in after creation.