1058 lines
35 KiB
Python
1058 lines
35 KiB
Python
"""passlib.crypto.digest -- crytographic helpers used by the password hashes in passlib
|
|
|
|
.. versionadded:: 1.7
|
|
"""
|
|
#=============================================================================
|
|
# imports
|
|
#=============================================================================
|
|
from __future__ import division
|
|
# core
|
|
import hashlib
|
|
import logging; log = logging.getLogger(__name__)
|
|
try:
|
|
# new in py3.4
|
|
from hashlib import pbkdf2_hmac as _stdlib_pbkdf2_hmac
|
|
if _stdlib_pbkdf2_hmac.__module__ == "hashlib":
|
|
# builtin pure-python backends are slightly faster than stdlib's pure python fallback,
|
|
# so only using stdlib's version if it's backed by openssl's pbkdf2_hmac()
|
|
log.debug("ignoring pure-python hashlib.pbkdf2_hmac()")
|
|
_stdlib_pbkdf2_hmac = None
|
|
except ImportError:
|
|
_stdlib_pbkdf2_hmac = None
|
|
import re
|
|
import os
|
|
from struct import Struct
|
|
from warnings import warn
|
|
# site
|
|
try:
|
|
# https://pypi.python.org/pypi/fastpbkdf2/
|
|
from fastpbkdf2 import pbkdf2_hmac as _fast_pbkdf2_hmac
|
|
except ImportError:
|
|
_fast_pbkdf2_hmac = None
|
|
# pkg
|
|
from passlib import exc
|
|
from passlib.utils import join_bytes, to_native_str, join_byte_values, to_bytes, \
|
|
SequenceMixin, as_bool
|
|
from passlib.utils.compat import irange, int_types, unicode_or_bytes_types, PY3, error_from
|
|
from passlib.utils.decor import memoized_property
|
|
# local
|
|
__all__ = [
|
|
# hash utils
|
|
"lookup_hash",
|
|
"HashInfo",
|
|
"norm_hash_name",
|
|
|
|
# hmac utils
|
|
"compile_hmac",
|
|
|
|
# kdfs
|
|
"pbkdf1",
|
|
"pbkdf2_hmac",
|
|
]
|
|
|
|
#=============================================================================
|
|
# generic constants
|
|
#=============================================================================
|
|
|
|
#: max 32-bit value
|
|
MAX_UINT32 = (1 << 32) - 1
|
|
|
|
#: max 64-bit value
|
|
MAX_UINT64 = (1 << 64) - 1
|
|
|
|
#=============================================================================
|
|
# hash utils
|
|
#=============================================================================
|
|
|
|
#: list of known hash names, used by lookup_hash()'s _norm_hash_name() helper
|
|
_known_hash_names = [
|
|
# format: (hashlib/ssl name, iana name or standin, other known aliases ...)
|
|
|
|
#----------------------------------------------------
|
|
# hashes with official IANA-assigned names
|
|
# (as of 2012-03 - http://www.iana.org/assignments/hash-function-text-names)
|
|
#----------------------------------------------------
|
|
("md2", "md2"), # NOTE: openssl dropped md2 support in v1.0.0
|
|
("md5", "md5"),
|
|
("sha1", "sha-1"),
|
|
("sha224", "sha-224", "sha2-224"),
|
|
("sha256", "sha-256", "sha2-256"),
|
|
("sha384", "sha-384", "sha2-384"),
|
|
("sha512", "sha-512", "sha2-512"),
|
|
|
|
# TODO: add sha3 to this table.
|
|
|
|
#----------------------------------------------------
|
|
# hashlib/ssl-supported hashes without official IANA names,
|
|
# (hopefully-) compatible stand-ins have been chosen.
|
|
#----------------------------------------------------
|
|
|
|
("blake2b", "blake-2b"),
|
|
("blake2s", "blake-2s"),
|
|
("md4", "md4"),
|
|
# NOTE: there was an older "ripemd" and "ripemd-128",
|
|
# but python 2.7+ resolves "ripemd" -> "ripemd160",
|
|
# so treating "ripemd" as alias here.
|
|
("ripemd160", "ripemd-160", "ripemd"),
|
|
]
|
|
|
|
|
|
#: dict mapping hashlib names to hardcoded digest info;
|
|
#: so this is available even when hashes aren't present.
|
|
_fallback_info = {
|
|
# name: (digest_size, block_size)
|
|
'blake2b': (64, 128),
|
|
'blake2s': (32, 64),
|
|
'md4': (16, 64),
|
|
'md5': (16, 64),
|
|
'sha1': (20, 64),
|
|
'sha224': (28, 64),
|
|
'sha256': (32, 64),
|
|
'sha384': (48, 128),
|
|
'sha3_224': (28, 144),
|
|
'sha3_256': (32, 136),
|
|
'sha3_384': (48, 104),
|
|
'sha3_512': (64, 72),
|
|
'sha512': (64, 128),
|
|
'shake128': (16, 168),
|
|
'shake256': (32, 136),
|
|
}
|
|
|
|
|
|
def _gen_fallback_info():
|
|
"""
|
|
internal helper used to generate ``_fallback_info`` dict.
|
|
currently only run manually to update the above list;
|
|
not invoked at runtime.
|
|
"""
|
|
out = {}
|
|
for alg in sorted(hashlib.algorithms_available | set(["md4"])):
|
|
info = lookup_hash(alg)
|
|
out[info.name] = (info.digest_size, info.block_size)
|
|
return out
|
|
|
|
|
|
#: cache of hash info instances used by lookup_hash()
|
|
_hash_info_cache = {}
|
|
|
|
def _get_hash_aliases(name):
|
|
"""
|
|
internal helper used by :func:`lookup_hash` --
|
|
normalize arbitrary hash name to hashlib format.
|
|
if name not recognized, returns dummy record and issues a warning.
|
|
|
|
:arg name:
|
|
unnormalized name
|
|
|
|
:returns:
|
|
tuple with 2+ elements: ``(hashlib_name, iana_name|None, ... 0+ aliases)``.
|
|
"""
|
|
|
|
# normalize input
|
|
orig = name
|
|
if not isinstance(name, str):
|
|
name = to_native_str(name, 'utf-8', 'hash name')
|
|
name = re.sub("[_ /]", "-", name.strip().lower())
|
|
if name.startswith("scram-"): # helper for SCRAM protocol (see passlib.handlers.scram)
|
|
name = name[6:]
|
|
if name.endswith("-plus"):
|
|
name = name[:-5]
|
|
|
|
# look through standard names and known aliases
|
|
def check_table(name):
|
|
for row in _known_hash_names:
|
|
if name in row:
|
|
return row
|
|
result = check_table(name)
|
|
if result:
|
|
return result
|
|
|
|
# try to clean name up some more
|
|
m = re.match(r"(?i)^(?P<name>[a-z]+)-?(?P<rev>\d)?-?(?P<size>\d{3,4})?$", name)
|
|
if m:
|
|
# roughly follows "SHA2-256" style format, normalize representation,
|
|
# and checked table.
|
|
iana_name, rev, size = m.group("name", "rev", "size")
|
|
if rev:
|
|
iana_name += rev
|
|
hashlib_name = iana_name
|
|
if size:
|
|
iana_name += "-" + size
|
|
if rev:
|
|
hashlib_name += "_"
|
|
hashlib_name += size
|
|
result = check_table(iana_name)
|
|
if result:
|
|
return result
|
|
|
|
# not found in table, but roughly recognize format. use names we built up as fallback.
|
|
log.info("normalizing unrecognized hash name %r => %r / %r",
|
|
orig, hashlib_name, iana_name)
|
|
|
|
else:
|
|
# just can't make sense of it. return something
|
|
iana_name = name
|
|
hashlib_name = name.replace("-", "_")
|
|
log.warning("normalizing unrecognized hash name and format %r => %r / %r",
|
|
orig, hashlib_name, iana_name)
|
|
|
|
return hashlib_name, iana_name
|
|
|
|
|
|
def _get_hash_const(name):
|
|
"""
|
|
internal helper used by :func:`lookup_hash` --
|
|
lookup hash constructor by name
|
|
|
|
:arg name:
|
|
name (normalized to hashlib format, e.g. ``"sha256"``)
|
|
|
|
:returns:
|
|
hash constructor, e.g. ``hashlib.sha256()``;
|
|
or None if hash can't be located.
|
|
"""
|
|
# check hashlib.<attr> for an efficient constructor
|
|
if not name.startswith("_") and name not in ("new", "algorithms"):
|
|
try:
|
|
return getattr(hashlib, name)
|
|
except AttributeError:
|
|
pass
|
|
|
|
# check hashlib.new() in case SSL supports the digest
|
|
new_ssl_hash = hashlib.new
|
|
try:
|
|
# new() should throw ValueError if alg is unknown
|
|
new_ssl_hash(name, b"")
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# create wrapper function
|
|
# XXX: is there a faster way to wrap this?
|
|
def const(msg=b""):
|
|
return new_ssl_hash(name, msg)
|
|
const.__name__ = name
|
|
const.__module__ = "hashlib"
|
|
const.__doc__ = ("wrapper for hashlib.new(%r),\n"
|
|
"generated by passlib.crypto.digest.lookup_hash()") % name
|
|
return const
|
|
|
|
# use builtin md4 as fallback when not supported by hashlib
|
|
if name == "md4":
|
|
from passlib.crypto._md4 import md4
|
|
return md4
|
|
|
|
# XXX: any other modules / registries we should check?
|
|
# TODO: add pysha3 support.
|
|
|
|
return None
|
|
|
|
|
|
def lookup_hash(digest, # *,
|
|
return_unknown=False, required=True):
|
|
"""
|
|
Returns a :class:`HashInfo` record containing information about a given hash function.
|
|
Can be used to look up a hash constructor by name, normalize hash name representation, etc.
|
|
|
|
:arg digest:
|
|
This can be any of:
|
|
|
|
* A string containing a :mod:`!hashlib` digest name (e.g. ``"sha256"``),
|
|
* A string containing an IANA-assigned hash name,
|
|
* A digest constructor function (e.g. ``hashlib.sha256``).
|
|
|
|
Case is ignored, underscores are converted to hyphens,
|
|
and various other cleanups are made.
|
|
|
|
:param required:
|
|
By default (True), this function will throw an :exc:`~passlib.exc.UnknownHashError` if no hash constructor
|
|
can be found, or if the hash is not actually available.
|
|
|
|
If this flag is False, it will instead return a dummy :class:`!HashInfo` record
|
|
which will defer throwing the error until it's constructor function is called.
|
|
This is mainly used by :func:`norm_hash_name`.
|
|
|
|
:param return_unknown:
|
|
|
|
.. deprecated:: 1.7.3
|
|
|
|
deprecated, and will be removed in passlib 2.0.
|
|
this acts like inverse of **required**.
|
|
|
|
:returns HashInfo:
|
|
:class:`HashInfo` instance containing information about specified digest.
|
|
|
|
Multiple calls resolving to the same hash should always
|
|
return the same :class:`!HashInfo` instance.
|
|
"""
|
|
# check for cached entry
|
|
cache = _hash_info_cache
|
|
try:
|
|
return cache[digest]
|
|
except (KeyError, TypeError):
|
|
# NOTE: TypeError is to catch 'TypeError: unhashable type' (e.g. HashInfo)
|
|
pass
|
|
|
|
# legacy alias
|
|
if return_unknown:
|
|
required = False
|
|
|
|
# resolve ``digest`` to ``const`` & ``name_record``
|
|
cache_by_name = True
|
|
if isinstance(digest, unicode_or_bytes_types):
|
|
# normalize name
|
|
name_list = _get_hash_aliases(digest)
|
|
name = name_list[0]
|
|
assert name
|
|
|
|
# if name wasn't normalized to hashlib format,
|
|
# get info for normalized name and reuse it.
|
|
if name != digest:
|
|
info = lookup_hash(name, required=required)
|
|
cache[digest] = info
|
|
return info
|
|
|
|
# else look up constructor
|
|
# NOTE: may return None, which is handled by HashInfo constructor
|
|
const = _get_hash_const(name)
|
|
|
|
# if mock fips mode is enabled, replace with dummy constructor
|
|
# (to replicate how it would behave on a real fips system).
|
|
if const and mock_fips_mode and name not in _fips_algorithms:
|
|
def const(source=b""):
|
|
raise ValueError("%r disabled for fips by passlib set_mock_fips_mode()" % name)
|
|
|
|
elif isinstance(digest, HashInfo):
|
|
# handle border case where HashInfo is passed in.
|
|
return digest
|
|
|
|
elif callable(digest):
|
|
# try to lookup digest based on it's self-reported name
|
|
# (which we trust to be the canonical "hashlib" name)
|
|
const = digest
|
|
name_list = _get_hash_aliases(const().name)
|
|
name = name_list[0]
|
|
other_const = _get_hash_const(name)
|
|
if other_const is None:
|
|
# this is probably a third-party digest we don't know about,
|
|
# so just pass it on through, and register reverse lookup for it's name.
|
|
pass
|
|
|
|
elif other_const is const:
|
|
# if we got back same constructor, this is just a known stdlib constructor,
|
|
# which was passed in before we had cached it by name. proceed normally.
|
|
pass
|
|
|
|
else:
|
|
# if we got back different object, then ``const`` is something else
|
|
# (such as a mock object), in which case we want to skip caching it by name,
|
|
# as that would conflict with real hash.
|
|
cache_by_name = False
|
|
|
|
else:
|
|
raise exc.ExpectedTypeError(digest, "digest name or constructor", "digest")
|
|
|
|
# create new instance
|
|
info = HashInfo(const=const, names=name_list, required=required)
|
|
|
|
# populate cache
|
|
if const is not None:
|
|
cache[const] = info
|
|
if cache_by_name:
|
|
for name in name_list:
|
|
if name: # (skips iana name if it's empty)
|
|
assert cache.get(name) in [None, info], "%r already in cache" % name
|
|
cache[name] = info
|
|
return info
|
|
|
|
#: UT helper for clearing internal cache
|
|
lookup_hash.clear_cache = _hash_info_cache.clear
|
|
|
|
|
|
def norm_hash_name(name, format="hashlib"):
|
|
"""Normalize hash function name (convenience wrapper for :func:`lookup_hash`).
|
|
|
|
:arg name:
|
|
Original hash function name.
|
|
|
|
This name can be a Python :mod:`~hashlib` digest name,
|
|
a SCRAM mechanism name, IANA assigned hash name, etc.
|
|
Case is ignored, and underscores are converted to hyphens.
|
|
|
|
:param format:
|
|
Naming convention to normalize to.
|
|
Possible values are:
|
|
|
|
* ``"hashlib"`` (the default) - normalizes name to be compatible
|
|
with Python's :mod:`!hashlib`.
|
|
|
|
* ``"iana"`` - normalizes name to IANA-assigned hash function name.
|
|
For hashes which IANA hasn't assigned a name for, this issues a warning,
|
|
and then uses a heuristic to return a "best guess" name.
|
|
|
|
:returns:
|
|
Hash name, returned as native :class:`!str`.
|
|
"""
|
|
info = lookup_hash(name, required=False)
|
|
if info.unknown:
|
|
warn("norm_hash_name(): " + info.error_text, exc.PasslibRuntimeWarning)
|
|
if format == "hashlib":
|
|
return info.name
|
|
elif format == "iana":
|
|
return info.iana_name
|
|
else:
|
|
raise ValueError("unknown format: %r" % (format,))
|
|
|
|
|
|
class HashInfo(SequenceMixin):
|
|
"""
|
|
Record containing information about a given hash algorithm, as returned :func:`lookup_hash`.
|
|
|
|
This class exposes the following attributes:
|
|
|
|
.. autoattribute:: const
|
|
.. autoattribute:: digest_size
|
|
.. autoattribute:: block_size
|
|
.. autoattribute:: name
|
|
.. autoattribute:: iana_name
|
|
.. autoattribute:: aliases
|
|
.. autoattribute:: supported
|
|
|
|
This object can also be treated a 3-element sequence
|
|
containing ``(const, digest_size, block_size)``.
|
|
"""
|
|
#=========================================================================
|
|
# instance attrs
|
|
#=========================================================================
|
|
|
|
#: Canonical / hashlib-compatible name (e.g. ``"sha256"``).
|
|
name = None
|
|
|
|
#: IANA assigned name (e.g. ``"sha-256"``), may be ``None`` if unknown.
|
|
iana_name = None
|
|
|
|
#: Tuple of other known aliases (may be empty)
|
|
aliases = ()
|
|
|
|
#: Hash constructor function (e.g. :func:`hashlib.sha256`)
|
|
const = None
|
|
|
|
#: Hash's digest size
|
|
digest_size = None
|
|
|
|
#: Hash's block size
|
|
block_size = None
|
|
|
|
#: set when hash isn't available, will be filled in with string containing error text
|
|
#: that const() will raise.
|
|
error_text = None
|
|
|
|
#: set when error_text is due to hash algorithm being completely unknown
|
|
#: (not just unavailable on current system)
|
|
unknown = False
|
|
|
|
#=========================================================================
|
|
# init
|
|
#=========================================================================
|
|
|
|
def __init__(self, # *,
|
|
const, names, required=True):
|
|
"""
|
|
initialize new instance.
|
|
:arg const:
|
|
hash constructor
|
|
:arg names:
|
|
list of 2+ names. should be list of ``(name, iana_name, ... 0+ aliases)``.
|
|
names must be lower-case. only iana name may be None.
|
|
"""
|
|
# init names
|
|
name = self.name = names[0]
|
|
self.iana_name = names[1]
|
|
self.aliases = names[2:]
|
|
|
|
def use_stub_const(msg):
|
|
"""
|
|
helper that installs stub constructor which throws specified error <msg>.
|
|
"""
|
|
def const(source=b""):
|
|
raise exc.UnknownHashError(msg, name)
|
|
if required:
|
|
# if caller only wants supported digests returned,
|
|
# just throw error immediately...
|
|
const()
|
|
assert "shouldn't get here"
|
|
self.error_text = msg
|
|
self.const = const
|
|
try:
|
|
self.digest_size, self.block_size = _fallback_info[name]
|
|
except KeyError:
|
|
pass
|
|
|
|
# handle "constructor not available" case
|
|
if const is None:
|
|
if names in _known_hash_names:
|
|
msg = "unsupported hash: %r" % name
|
|
else:
|
|
msg = "unknown hash: %r" % name
|
|
self.unknown = True
|
|
use_stub_const(msg)
|
|
# TODO: load in preset digest size info for known hashes.
|
|
return
|
|
|
|
# create hash instance to inspect
|
|
try:
|
|
hash = const()
|
|
except ValueError as err:
|
|
# per issue 116, FIPS compliant systems will have a constructor;
|
|
# but it will throw a ValueError with this message. As of 1.7.3,
|
|
# translating this into DisabledHashError.
|
|
# "ValueError: error:060800A3:digital envelope routines:EVP_DigestInit_ex:disabled for fips"
|
|
if "disabled for fips" in str(err).lower():
|
|
msg = "%r hash disabled for fips" % name
|
|
else:
|
|
msg = "internal error in %r constructor\n(%s: %s)" % (name, type(err).__name__, err)
|
|
use_stub_const(msg)
|
|
return
|
|
|
|
# store stats about hash
|
|
self.const = const
|
|
self.digest_size = hash.digest_size
|
|
self.block_size = hash.block_size
|
|
|
|
# do sanity check on digest size
|
|
if len(hash.digest()) != hash.digest_size:
|
|
raise RuntimeError("%r constructor failed sanity check" % self.name)
|
|
|
|
# do sanity check on name.
|
|
if hash.name != self.name:
|
|
warn("inconsistent digest name: %r resolved to %r, which reports name as %r" %
|
|
(self.name, const, hash.name), exc.PasslibRuntimeWarning)
|
|
|
|
#=========================================================================
|
|
# methods
|
|
#=========================================================================
|
|
def __repr__(self):
|
|
return "<lookup_hash(%r): digest_size=%r block_size=%r)" % \
|
|
(self.name, self.digest_size, self.block_size)
|
|
|
|
def _as_tuple(self):
|
|
return self.const, self.digest_size, self.block_size
|
|
|
|
@memoized_property
|
|
def supported(self):
|
|
"""
|
|
whether hash is available for use
|
|
(if False, constructor will throw UnknownHashError if called)
|
|
"""
|
|
return self.error_text is None
|
|
|
|
@memoized_property
|
|
def supported_by_fastpbkdf2(self):
|
|
"""helper to detect if hash is supported by fastpbkdf2()"""
|
|
if not _fast_pbkdf2_hmac:
|
|
return None
|
|
try:
|
|
_fast_pbkdf2_hmac(self.name, b"p", b"s", 1)
|
|
return True
|
|
except ValueError:
|
|
# "unsupported hash type"
|
|
return False
|
|
|
|
@memoized_property
|
|
def supported_by_hashlib_pbkdf2(self):
|
|
"""helper to detect if hash is supported by hashlib.pbkdf2_hmac()"""
|
|
if not _stdlib_pbkdf2_hmac:
|
|
return None
|
|
try:
|
|
_stdlib_pbkdf2_hmac(self.name, b"p", b"s", 1)
|
|
return True
|
|
except ValueError:
|
|
# "unsupported hash type"
|
|
return False
|
|
|
|
#=========================================================================
|
|
# eoc
|
|
#=========================================================================
|
|
|
|
|
|
#---------------------------------------------------------------------
|
|
# mock fips mode monkeypatch
|
|
#---------------------------------------------------------------------
|
|
|
|
#: flag for detecting if mock fips mode is enabled.
|
|
mock_fips_mode = False
|
|
|
|
|
|
#: algorithms allowed under FIPS mode (subset of hashlib.algorithms_available);
|
|
#: per https://csrc.nist.gov/Projects/Hash-Functions FIPS 202 list.
|
|
_fips_algorithms = set([
|
|
# FIPS 180-4 and FIPS 202
|
|
'sha1',
|
|
'sha224',
|
|
'sha256',
|
|
'sha384',
|
|
'sha512',
|
|
# 'sha512/224',
|
|
# 'sha512/256',
|
|
|
|
# FIPS 202 only
|
|
'sha3_224',
|
|
'sha3_256',
|
|
'sha3_384',
|
|
'sha3_512',
|
|
'shake_128',
|
|
'shake_256',
|
|
])
|
|
|
|
|
|
def _set_mock_fips_mode(enable=True):
|
|
"""
|
|
UT helper which monkeypatches lookup_hash() internals to replicate FIPS mode.
|
|
"""
|
|
global mock_fips_mode
|
|
mock_fips_mode = enable
|
|
lookup_hash.clear_cache()
|
|
|
|
|
|
# helper for UTs
|
|
if as_bool(os.environ.get("PASSLIB_MOCK_FIPS_MODE")):
|
|
_set_mock_fips_mode()
|
|
|
|
#=============================================================================
|
|
# hmac utils
|
|
#=============================================================================
|
|
|
|
#: translation tables used by compile_hmac()
|
|
_TRANS_5C = join_byte_values((x ^ 0x5C) for x in irange(256))
|
|
_TRANS_36 = join_byte_values((x ^ 0x36) for x in irange(256))
|
|
|
|
def compile_hmac(digest, key, multipart=False):
|
|
"""
|
|
This function returns an efficient HMAC function, hardcoded with a specific digest & key.
|
|
It can be used via ``hmac = compile_hmac(digest, key)``.
|
|
|
|
:arg digest:
|
|
digest name or constructor.
|
|
|
|
:arg key:
|
|
secret key as :class:`!bytes` or :class:`!unicode` (unicode will be encoded using utf-8).
|
|
|
|
:param multipart:
|
|
request a multipart constructor instead (see return description).
|
|
|
|
:returns:
|
|
By default, the returned function has the signature ``hmac(msg) -> digest output``.
|
|
|
|
However, if ``multipart=True``, the returned function has the signature
|
|
``hmac() -> update, finalize``, where ``update(msg)`` may be called multiple times,
|
|
and ``finalize() -> digest_output`` may be repeatedly called at any point to
|
|
calculate the HMAC digest so far.
|
|
|
|
The returned object will also have a ``digest_info`` attribute, containing
|
|
a :class:`lookup_hash` instance for the specified digest.
|
|
|
|
This function exists, and has the weird signature it does, in order to squeeze as
|
|
provide as much efficiency as possible, by omitting much of the setup cost
|
|
and features of the stdlib :mod:`hmac` module.
|
|
"""
|
|
# all the following was adapted from stdlib's hmac module
|
|
|
|
# resolve digest (cached)
|
|
digest_info = lookup_hash(digest)
|
|
const, digest_size, block_size = digest_info
|
|
assert block_size >= 16, "block size too small"
|
|
|
|
# prepare key
|
|
if not isinstance(key, bytes):
|
|
key = to_bytes(key, param="key")
|
|
klen = len(key)
|
|
if klen > block_size:
|
|
key = const(key).digest()
|
|
klen = digest_size
|
|
if klen < block_size:
|
|
key += b'\x00' * (block_size - klen)
|
|
|
|
# create pre-initialized hash constructors
|
|
_inner_copy = const(key.translate(_TRANS_36)).copy
|
|
_outer_copy = const(key.translate(_TRANS_5C)).copy
|
|
|
|
if multipart:
|
|
# create multi-part function
|
|
# NOTE: this is slightly slower than the single-shot version,
|
|
# and should only be used if needed.
|
|
def hmac():
|
|
"""generated by compile_hmac(multipart=True)"""
|
|
inner = _inner_copy()
|
|
def finalize():
|
|
outer = _outer_copy()
|
|
outer.update(inner.digest())
|
|
return outer.digest()
|
|
return inner.update, finalize
|
|
else:
|
|
|
|
# single-shot function
|
|
def hmac(msg):
|
|
"""generated by compile_hmac()"""
|
|
inner = _inner_copy()
|
|
inner.update(msg)
|
|
outer = _outer_copy()
|
|
outer.update(inner.digest())
|
|
return outer.digest()
|
|
|
|
# add info attr
|
|
hmac.digest_info = digest_info
|
|
return hmac
|
|
|
|
#=============================================================================
|
|
# pbkdf1
|
|
#=============================================================================
|
|
def pbkdf1(digest, secret, salt, rounds, keylen=None):
|
|
"""pkcs#5 password-based key derivation v1.5
|
|
|
|
:arg digest:
|
|
digest name or constructor.
|
|
|
|
:arg secret:
|
|
secret to use when generating the key.
|
|
may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
|
|
|
|
:arg salt:
|
|
salt string to use when generating key.
|
|
may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
|
|
|
|
:param rounds:
|
|
number of rounds to use to generate key.
|
|
|
|
:arg keylen:
|
|
number of bytes to generate (if omitted / ``None``, uses digest's native size)
|
|
|
|
:returns:
|
|
raw :class:`bytes` of generated key
|
|
|
|
.. note::
|
|
|
|
This algorithm has been deprecated, new code should use PBKDF2.
|
|
Among other limitations, ``keylen`` cannot be larger
|
|
than the digest size of the specified hash.
|
|
"""
|
|
# resolve digest
|
|
const, digest_size, block_size = lookup_hash(digest)
|
|
|
|
# validate secret & salt
|
|
secret = to_bytes(secret, param="secret")
|
|
salt = to_bytes(salt, param="salt")
|
|
|
|
# validate rounds
|
|
if not isinstance(rounds, int_types):
|
|
raise exc.ExpectedTypeError(rounds, "int", "rounds")
|
|
if rounds < 1:
|
|
raise ValueError("rounds must be at least 1")
|
|
|
|
# validate keylen
|
|
if keylen is None:
|
|
keylen = digest_size
|
|
elif not isinstance(keylen, int_types):
|
|
raise exc.ExpectedTypeError(keylen, "int or None", "keylen")
|
|
elif keylen < 0:
|
|
raise ValueError("keylen must be at least 0")
|
|
elif keylen > digest_size:
|
|
raise ValueError("keylength too large for digest: %r > %r" %
|
|
(keylen, digest_size))
|
|
|
|
# main pbkdf1 loop
|
|
block = secret + salt
|
|
for _ in irange(rounds):
|
|
block = const(block).digest()
|
|
return block[:keylen]
|
|
|
|
#=============================================================================
|
|
# pbkdf2
|
|
#=============================================================================
|
|
|
|
_pack_uint32 = Struct(">L").pack
|
|
|
|
def pbkdf2_hmac(digest, secret, salt, rounds, keylen=None):
|
|
"""pkcs#5 password-based key derivation v2.0 using HMAC + arbitrary digest.
|
|
|
|
:arg digest:
|
|
digest name or constructor.
|
|
|
|
:arg secret:
|
|
passphrase to use to generate key.
|
|
may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
|
|
|
|
:arg salt:
|
|
salt string to use when generating key.
|
|
may be :class:`!bytes` or :class:`unicode` (encoded using UTF-8).
|
|
|
|
:param rounds:
|
|
number of rounds to use to generate key.
|
|
|
|
:arg keylen:
|
|
number of bytes to generate.
|
|
if omitted / ``None``, will use digest's native output size.
|
|
|
|
:returns:
|
|
raw bytes of generated key
|
|
|
|
.. versionchanged:: 1.7
|
|
|
|
This function will use the first available of the following backends:
|
|
|
|
* `fastpbk2 <https://pypi.python.org/pypi/fastpbkdf2>`_
|
|
* :func:`hashlib.pbkdf2_hmac` (only available in py2 >= 2.7.8, and py3 >= 3.4)
|
|
* builtin pure-python backend
|
|
|
|
See :data:`passlib.crypto.digest.PBKDF2_BACKENDS` to determine
|
|
which backend(s) are in use.
|
|
"""
|
|
# validate secret & salt
|
|
secret = to_bytes(secret, param="secret")
|
|
salt = to_bytes(salt, param="salt")
|
|
|
|
# resolve digest
|
|
digest_info = lookup_hash(digest)
|
|
digest_size = digest_info.digest_size
|
|
|
|
# validate rounds
|
|
if not isinstance(rounds, int_types):
|
|
raise exc.ExpectedTypeError(rounds, "int", "rounds")
|
|
if rounds < 1:
|
|
raise ValueError("rounds must be at least 1")
|
|
|
|
# validate keylen
|
|
if keylen is None:
|
|
keylen = digest_size
|
|
elif not isinstance(keylen, int_types):
|
|
raise exc.ExpectedTypeError(keylen, "int or None", "keylen")
|
|
elif keylen < 1:
|
|
# XXX: could allow keylen=0, but want to be compat w/ stdlib
|
|
raise ValueError("keylen must be at least 1")
|
|
|
|
# find smallest block count s.t. keylen <= block_count * digest_size;
|
|
# make sure block count won't overflow (per pbkdf2 spec)
|
|
# this corresponds to throwing error if keylen > digest_size * MAX_UINT32
|
|
# NOTE: stdlib will throw error at lower bound (keylen > MAX_SINT32)
|
|
# NOTE: have do this before other backends checked, since fastpbkdf2 raises wrong error
|
|
# (InvocationError, not OverflowError)
|
|
block_count = (keylen + digest_size - 1) // digest_size
|
|
if block_count > MAX_UINT32:
|
|
raise OverflowError("keylen too long for digest")
|
|
|
|
#
|
|
# check for various high-speed backends
|
|
#
|
|
|
|
# ~3x faster than pure-python backend
|
|
# NOTE: have to do this after above guards since fastpbkdf2 lacks bounds checks.
|
|
if digest_info.supported_by_fastpbkdf2:
|
|
return _fast_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen)
|
|
|
|
# ~1.4x faster than pure-python backend
|
|
# NOTE: have to do this after fastpbkdf2 since hashlib-ssl is slower,
|
|
# will support larger number of hashes.
|
|
if digest_info.supported_by_hashlib_pbkdf2:
|
|
return _stdlib_pbkdf2_hmac(digest_info.name, secret, salt, rounds, keylen)
|
|
|
|
#
|
|
# otherwise use our own implementation
|
|
#
|
|
|
|
# generated keyed hmac
|
|
keyed_hmac = compile_hmac(digest, secret)
|
|
|
|
# get helper to calculate pbkdf2 inner loop efficiently
|
|
calc_block = _get_pbkdf2_looper(digest_size)
|
|
|
|
# assemble & return result
|
|
return join_bytes(
|
|
calc_block(keyed_hmac, keyed_hmac(salt + _pack_uint32(i)), rounds)
|
|
for i in irange(1, block_count + 1)
|
|
)[:keylen]
|
|
|
|
#-------------------------------------------------------------------------------------
|
|
# pick best choice for pure-python helper
|
|
# TODO: consider some alternatives, such as C-accelerated xor_bytes helper if available
|
|
#-------------------------------------------------------------------------------------
|
|
# NOTE: this env var is only present to support the admin/benchmark_pbkdf2 script
|
|
_force_backend = os.environ.get("PASSLIB_PBKDF2_BACKEND") or "any"
|
|
|
|
if PY3 and _force_backend in ["any", "from-bytes"]:
|
|
from functools import partial
|
|
|
|
def _get_pbkdf2_looper(digest_size):
|
|
return partial(_pbkdf2_looper, digest_size)
|
|
|
|
def _pbkdf2_looper(digest_size, keyed_hmac, digest, rounds):
|
|
"""
|
|
py3-only implementation of pbkdf2 inner loop;
|
|
uses 'int.from_bytes' + integer XOR
|
|
"""
|
|
from_bytes = int.from_bytes
|
|
BIG = "big" # endianess doesn't matter, just has to be consistent
|
|
accum = from_bytes(digest, BIG)
|
|
for _ in irange(rounds - 1):
|
|
digest = keyed_hmac(digest)
|
|
accum ^= from_bytes(digest, BIG)
|
|
return accum.to_bytes(digest_size, BIG)
|
|
|
|
_builtin_backend = "from-bytes"
|
|
|
|
elif _force_backend in ["any", "unpack", "from-bytes"]:
|
|
from struct import Struct
|
|
from passlib.utils import sys_bits
|
|
|
|
_have_64_bit = (sys_bits >= 64)
|
|
|
|
#: cache used by _get_pbkdf2_looper
|
|
_looper_cache = {}
|
|
|
|
def _get_pbkdf2_looper(digest_size):
|
|
"""
|
|
We want a helper function which performs equivalent of the following::
|
|
|
|
def helper(keyed_hmac, digest, rounds):
|
|
accum = digest
|
|
for _ in irange(rounds - 1):
|
|
digest = keyed_hmac(digest)
|
|
accum ^= digest
|
|
return accum
|
|
|
|
However, no efficient way to implement "bytes ^ bytes" in python.
|
|
Instead, using approach where we dynamically compile a helper function based
|
|
on digest size. Instead of a single `accum` var, this helper breaks the digest
|
|
into a series of integers.
|
|
|
|
It stores these in a series of`accum_<i>` vars, and performs `accum ^= digest`
|
|
by unpacking digest and perform xor for each "accum_<i> ^= digest_<i>".
|
|
this keeps everything in locals, avoiding excessive list creation, encoding or decoding,
|
|
etc.
|
|
|
|
:param digest_size:
|
|
digest size to compile for, in bytes. (must be multiple of 4).
|
|
|
|
:return:
|
|
helper function with call signature outlined above.
|
|
"""
|
|
#
|
|
# cache helpers
|
|
#
|
|
try:
|
|
return _looper_cache[digest_size]
|
|
except KeyError:
|
|
pass
|
|
|
|
#
|
|
# figure out most efficient struct format to unpack digest into list of native ints
|
|
#
|
|
if _have_64_bit and not digest_size & 0x7:
|
|
# digest size multiple of 8, on a 64 bit system -- use array of UINT64
|
|
count = (digest_size >> 3)
|
|
fmt = "=%dQ" % count
|
|
elif not digest_size & 0x3:
|
|
if _have_64_bit:
|
|
# digest size multiple of 4, on a 64 bit system -- use array of UINT64 + 1 UINT32
|
|
count = (digest_size >> 3)
|
|
fmt = "=%dQI" % count
|
|
count += 1
|
|
else:
|
|
# digest size multiple of 4, on a 32 bit system -- use array of UINT32
|
|
count = (digest_size >> 2)
|
|
fmt = "=%dI" % count
|
|
else:
|
|
# stopping here, cause no known hashes have digest size that isn't multiple of 4 bytes.
|
|
# if needed, could go crazy w/ "H" & "B"
|
|
raise NotImplementedError("unsupported digest size: %d" % digest_size)
|
|
struct = Struct(fmt)
|
|
|
|
#
|
|
# build helper source
|
|
#
|
|
tdict = dict(
|
|
digest_size=digest_size,
|
|
accum_vars=", ".join("acc_%d" % i for i in irange(count)),
|
|
digest_vars=", ".join("dig_%d" % i for i in irange(count)),
|
|
)
|
|
|
|
# head of function
|
|
source = (
|
|
"def helper(keyed_hmac, digest, rounds):\n"
|
|
" '''pbkdf2 loop helper for digest_size={digest_size}'''\n"
|
|
" unpack_digest = struct.unpack\n"
|
|
" {accum_vars} = unpack_digest(digest)\n"
|
|
" for _ in irange(1, rounds):\n"
|
|
" digest = keyed_hmac(digest)\n"
|
|
" {digest_vars} = unpack_digest(digest)\n"
|
|
).format(**tdict)
|
|
|
|
# xor digest
|
|
for i in irange(count):
|
|
source += " acc_%d ^= dig_%d\n" % (i, i)
|
|
|
|
# return result
|
|
source += " return struct.pack({accum_vars})\n".format(**tdict)
|
|
|
|
#
|
|
# compile helper
|
|
#
|
|
code = compile(source, "<generated by passlib.crypto.digest._get_pbkdf2_looper()>", "exec")
|
|
gdict = dict(irange=irange, struct=struct)
|
|
ldict = dict()
|
|
eval(code, gdict, ldict)
|
|
helper = ldict['helper']
|
|
if __debug__:
|
|
helper.__source__ = source
|
|
|
|
#
|
|
# store in cache
|
|
#
|
|
_looper_cache[digest_size] = helper
|
|
return helper
|
|
|
|
_builtin_backend = "unpack"
|
|
|
|
else:
|
|
assert _force_backend in ["any", "hexlify"]
|
|
|
|
# XXX: older & slower approach that used int(hexlify()),
|
|
# keeping it around for a little while just for benchmarking.
|
|
|
|
from binascii import hexlify as _hexlify
|
|
from passlib.utils import int_to_bytes
|
|
|
|
def _get_pbkdf2_looper(digest_size):
|
|
return _pbkdf2_looper
|
|
|
|
def _pbkdf2_looper(keyed_hmac, digest, rounds):
|
|
hexlify = _hexlify
|
|
accum = int(hexlify(digest), 16)
|
|
for _ in irange(rounds - 1):
|
|
digest = keyed_hmac(digest)
|
|
accum ^= int(hexlify(digest), 16)
|
|
return int_to_bytes(accum, len(digest))
|
|
|
|
_builtin_backend = "hexlify"
|
|
|
|
# helper for benchmark script -- disable hashlib, fastpbkdf2 support if builtin requested
|
|
if _force_backend == _builtin_backend:
|
|
_fast_pbkdf2_hmac = _stdlib_pbkdf2_hmac = None
|
|
|
|
# expose info about what backends are active
|
|
PBKDF2_BACKENDS = [b for b in [
|
|
"fastpbkdf2" if _fast_pbkdf2_hmac else None,
|
|
"hashlib-ssl" if _stdlib_pbkdf2_hmac else None,
|
|
"builtin-" + _builtin_backend
|
|
] if b]
|
|
|
|
# *very* rough estimate of relative speed (compared to sha256 using 'unpack' backend on 64bit arch)
|
|
if "fastpbkdf2" in PBKDF2_BACKENDS:
|
|
PBKDF2_SPEED_FACTOR = 3
|
|
elif "hashlib-ssl" in PBKDF2_BACKENDS:
|
|
PBKDF2_SPEED_FACTOR = 1.4
|
|
else:
|
|
# remaining backends have *some* difference in performance, but not enough to matter
|
|
PBKDF2_SPEED_FACTOR = 1
|
|
|
|
#=============================================================================
|
|
# eof
|
|
#=============================================================================
|