1221 lines
42 KiB
Python
1221 lines
42 KiB
Python
"""passlib.utils -- helpers for writing password hashes"""
|
|
#=============================================================================
|
|
# imports
|
|
#=============================================================================
|
|
from passlib.utils.compat import JYTHON
|
|
# core
|
|
from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError
|
|
from base64 import b64encode, b64decode
|
|
try:
|
|
from collections.abc import Sequence
|
|
from collections.abc import Iterable
|
|
except ImportError:
|
|
# py2 compat
|
|
from collections import Sequence
|
|
from collections import Iterable
|
|
from codecs import lookup as _lookup_codec
|
|
from functools import update_wrapper
|
|
import itertools
|
|
import inspect
|
|
import logging; log = logging.getLogger(__name__)
|
|
import math
|
|
import os
|
|
import sys
|
|
import random
|
|
import re
|
|
if JYTHON: # pragma: no cover -- runtime detection
|
|
# Jython 2.5.2 lacks stringprep module -
|
|
# see http://bugs.jython.org/issue1758320
|
|
try:
|
|
import stringprep
|
|
except ImportError:
|
|
stringprep = None
|
|
_stringprep_missing_reason = "not present under Jython"
|
|
else:
|
|
import stringprep
|
|
import time
|
|
if stringprep:
|
|
import unicodedata
|
|
try:
|
|
import threading
|
|
except ImportError:
|
|
# module optional before py37
|
|
threading = None
|
|
import timeit
|
|
import types
|
|
from warnings import warn
|
|
# site
|
|
# pkg
|
|
from passlib.utils.binary import (
|
|
# [remove these aliases in 2.0]
|
|
BASE64_CHARS, AB64_CHARS, HASH64_CHARS, BCRYPT_CHARS,
|
|
Base64Engine, LazyBase64Engine, h64, h64big, bcrypt64,
|
|
ab64_encode, ab64_decode, b64s_encode, b64s_decode
|
|
)
|
|
from passlib.utils.decor import (
|
|
# [remove these aliases in 2.0]
|
|
deprecated_function,
|
|
deprecated_method,
|
|
memoized_property,
|
|
classproperty,
|
|
hybrid_method,
|
|
)
|
|
from passlib.exc import ExpectedStringError, ExpectedTypeError
|
|
from passlib.utils.compat import (add_doc, join_bytes, join_byte_values,
|
|
join_byte_elems, irange, imap, PY3, u,
|
|
join_unicode, unicode, byte_elem_value, nextgetter,
|
|
unicode_or_str, unicode_or_bytes_types,
|
|
get_method_function, suppress_cause, PYPY)
|
|
# local
|
|
__all__ = [
|
|
# constants
|
|
'JYTHON',
|
|
'sys_bits',
|
|
'unix_crypt_schemes',
|
|
'rounds_cost_values',
|
|
|
|
# unicode helpers
|
|
'consteq',
|
|
'saslprep',
|
|
|
|
# bytes helpers
|
|
"xor_bytes",
|
|
"render_bytes",
|
|
|
|
# encoding helpers
|
|
'is_same_codec',
|
|
'is_ascii_safe',
|
|
'to_bytes',
|
|
'to_unicode',
|
|
'to_native_str',
|
|
|
|
# host OS
|
|
'has_crypt',
|
|
'test_crypt',
|
|
'safe_crypt',
|
|
'tick',
|
|
|
|
# randomness
|
|
'rng',
|
|
'getrandbytes',
|
|
'getrandstr',
|
|
'generate_password',
|
|
|
|
# object type / interface tests
|
|
'is_crypt_handler',
|
|
'is_crypt_context',
|
|
'has_rounds_info',
|
|
'has_salt_info',
|
|
]
|
|
|
|
#=============================================================================
|
|
# constants
|
|
#=============================================================================
|
|
|
|
# bitsize of system architecture (32 or 64)
|
|
sys_bits = int(math.log(sys.maxsize if PY3 else sys.maxint, 2) + 1.5)
|
|
|
|
# list of hashes algs supported by crypt() on at least one OS.
|
|
# XXX: move to .registry for passlib 2.0?
|
|
unix_crypt_schemes = [
|
|
"sha512_crypt", "sha256_crypt",
|
|
"sha1_crypt", "bcrypt",
|
|
"md5_crypt",
|
|
# "bsd_nthash",
|
|
"bsdi_crypt", "des_crypt",
|
|
]
|
|
|
|
# list of rounds_cost constants
|
|
rounds_cost_values = [ "linear", "log2" ]
|
|
|
|
# legacy import, will be removed in 1.8
|
|
from passlib.exc import MissingBackendError
|
|
|
|
# internal helpers
|
|
_BEMPTY = b''
|
|
_UEMPTY = u("")
|
|
_USPACE = u(" ")
|
|
|
|
# maximum password size which passlib will allow; see exc.PasswordSizeError
|
|
MAX_PASSWORD_SIZE = int(os.environ.get("PASSLIB_MAX_PASSWORD_SIZE") or 4096)
|
|
|
|
#=============================================================================
|
|
# type helpers
|
|
#=============================================================================
|
|
|
|
class SequenceMixin(object):
|
|
"""
|
|
helper which lets result object act like a fixed-length sequence.
|
|
subclass just needs to provide :meth:`_as_tuple()`.
|
|
"""
|
|
def _as_tuple(self):
|
|
raise NotImplementedError("implement in subclass")
|
|
|
|
def __repr__(self):
|
|
return repr(self._as_tuple())
|
|
|
|
def __getitem__(self, idx):
|
|
return self._as_tuple()[idx]
|
|
|
|
def __iter__(self):
|
|
return iter(self._as_tuple())
|
|
|
|
def __len__(self):
|
|
return len(self._as_tuple())
|
|
|
|
def __eq__(self, other):
|
|
return self._as_tuple() == other
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
if PY3:
|
|
# getargspec() is deprecated, use this under py3.
|
|
# even though it's a lot more awkward to get basic info :|
|
|
|
|
_VAR_KEYWORD = inspect.Parameter.VAR_KEYWORD
|
|
_VAR_ANY_SET = set([_VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL])
|
|
|
|
def accepts_keyword(func, key):
|
|
"""test if function accepts specified keyword"""
|
|
params = inspect.signature(get_method_function(func)).parameters
|
|
if not params:
|
|
return False
|
|
arg = params.get(key)
|
|
if arg and arg.kind not in _VAR_ANY_SET:
|
|
return True
|
|
# XXX: annoying what we have to do to determine if VAR_KWDS in use.
|
|
return params[list(params)[-1]].kind == _VAR_KEYWORD
|
|
|
|
else:
|
|
|
|
def accepts_keyword(func, key):
|
|
"""test if function accepts specified keyword"""
|
|
spec = inspect.getargspec(get_method_function(func))
|
|
return key in spec.args or spec.keywords is not None
|
|
|
|
def update_mixin_classes(target, add=None, remove=None, append=False,
|
|
before=None, after=None, dryrun=False):
|
|
"""
|
|
helper to update mixin classes installed in target class.
|
|
|
|
:param target:
|
|
target class whose bases will be modified.
|
|
|
|
:param add:
|
|
class / classes to install into target's base class list.
|
|
|
|
:param remove:
|
|
class / classes to remove from target's base class list.
|
|
|
|
:param append:
|
|
by default, prepends mixins to front of list.
|
|
if True, appends to end of list instead.
|
|
|
|
:param after:
|
|
optionally make sure all mixins are inserted after
|
|
this class / classes.
|
|
|
|
:param before:
|
|
optionally make sure all mixins are inserted before
|
|
this class / classes.
|
|
|
|
:param dryrun:
|
|
optionally perform all calculations / raise errors,
|
|
but don't actually modify the class.
|
|
"""
|
|
if isinstance(add, type):
|
|
add = [add]
|
|
|
|
bases = list(target.__bases__)
|
|
|
|
# strip out requested mixins
|
|
if remove:
|
|
if isinstance(remove, type):
|
|
remove = [remove]
|
|
for mixin in remove:
|
|
if add and mixin in add:
|
|
continue
|
|
if mixin in bases:
|
|
bases.remove(mixin)
|
|
|
|
# add requested mixins
|
|
if add:
|
|
for mixin in add:
|
|
# if mixin already present (explicitly or not), leave alone
|
|
if any(issubclass(base, mixin) for base in bases):
|
|
continue
|
|
|
|
# determine insertion point
|
|
if append:
|
|
for idx, base in enumerate(bases):
|
|
if issubclass(mixin, base):
|
|
# don't insert mixin after one of it's own bases
|
|
break
|
|
if before and issubclass(base, before):
|
|
# don't insert mixin after any <before> classes.
|
|
break
|
|
else:
|
|
# append to end
|
|
idx = len(bases)
|
|
elif after:
|
|
for end_idx, base in enumerate(reversed(bases)):
|
|
if issubclass(base, after):
|
|
# don't insert mixin before any <after> classes.
|
|
idx = len(bases) - end_idx
|
|
assert bases[idx-1] == base
|
|
break
|
|
else:
|
|
idx = 0
|
|
else:
|
|
# insert at start
|
|
idx = 0
|
|
|
|
# insert mixin
|
|
bases.insert(idx, mixin)
|
|
|
|
# modify class
|
|
if not dryrun:
|
|
target.__bases__ = tuple(bases)
|
|
|
|
#=============================================================================
|
|
# collection helpers
|
|
#=============================================================================
|
|
def batch(source, size):
|
|
"""
|
|
split iterable into chunks of <size> elements.
|
|
"""
|
|
if size < 1:
|
|
raise ValueError("size must be positive integer")
|
|
if isinstance(source, Sequence):
|
|
end = len(source)
|
|
i = 0
|
|
while i < end:
|
|
n = i + size
|
|
yield source[i:n]
|
|
i = n
|
|
elif isinstance(source, Iterable):
|
|
itr = iter(source)
|
|
while True:
|
|
chunk_itr = itertools.islice(itr, size)
|
|
try:
|
|
first = next(chunk_itr)
|
|
except StopIteration:
|
|
break
|
|
yield itertools.chain((first,), chunk_itr)
|
|
else:
|
|
raise TypeError("source must be iterable")
|
|
|
|
#=============================================================================
|
|
# unicode helpers
|
|
#=============================================================================
|
|
|
|
# XXX: should this be moved to passlib.crypto, or compat backports?
|
|
|
|
def consteq(left, right):
|
|
"""Check two strings/bytes for equality.
|
|
|
|
This function uses an approach designed to prevent
|
|
timing analysis, making it appropriate for cryptography.
|
|
a and b must both be of the same type: either str (ASCII only),
|
|
or any type that supports the buffer protocol (e.g. bytes).
|
|
|
|
Note: If a and b are of different lengths, or if an error occurs,
|
|
a timing attack could theoretically reveal information about the
|
|
types and lengths of a and b--but not their values.
|
|
"""
|
|
# NOTE:
|
|
# resources & discussions considered in the design of this function:
|
|
# hmac timing attack --
|
|
# http://rdist.root.org/2009/05/28/timing-attack-in-google-keyczar-library/
|
|
# python developer discussion surrounding similar function --
|
|
# http://bugs.python.org/issue15061
|
|
# http://bugs.python.org/issue14955
|
|
|
|
# validate types
|
|
if isinstance(left, unicode):
|
|
if not isinstance(right, unicode):
|
|
raise TypeError("inputs must be both unicode or both bytes")
|
|
is_py3_bytes = False
|
|
elif isinstance(left, bytes):
|
|
if not isinstance(right, bytes):
|
|
raise TypeError("inputs must be both unicode or both bytes")
|
|
is_py3_bytes = PY3
|
|
else:
|
|
raise TypeError("inputs must be both unicode or both bytes")
|
|
|
|
# do size comparison.
|
|
# NOTE: the double-if construction below is done deliberately, to ensure
|
|
# the same number of operations (including branches) is performed regardless
|
|
# of whether left & right are the same size.
|
|
same_size = (len(left) == len(right))
|
|
if same_size:
|
|
# if sizes are the same, setup loop to perform actual check of contents.
|
|
tmp = left
|
|
result = 0
|
|
if not same_size:
|
|
# if sizes aren't the same, set 'result' so equality will fail regardless
|
|
# of contents. then, to ensure we do exactly 'len(right)' iterations
|
|
# of the loop, just compare 'right' against itself.
|
|
tmp = right
|
|
result = 1
|
|
|
|
# run constant-time string comparision
|
|
# TODO: use izip instead (but first verify it's faster than zip for this case)
|
|
if is_py3_bytes:
|
|
for l,r in zip(tmp, right):
|
|
result |= l ^ r
|
|
else:
|
|
for l,r in zip(tmp, right):
|
|
result |= ord(l) ^ ord(r)
|
|
return result == 0
|
|
|
|
# keep copy of this around since stdlib's version throws error on non-ascii chars in unicode strings.
|
|
# our version does, but suffers from some underlying VM issues. but something is better than
|
|
# nothing for plaintext hashes, which need this. everything else should use consteq(),
|
|
# since the stdlib one is going to be as good / better in the general case.
|
|
str_consteq = consteq
|
|
|
|
try:
|
|
# for py3.3 and up, use the stdlib version
|
|
from hmac import compare_digest as consteq
|
|
except ImportError:
|
|
pass
|
|
|
|
# TODO: could check for cryptography package's version,
|
|
# but only operates on bytes, so would need a wrapper,
|
|
# or separate consteq() into a unicode & a bytes variant.
|
|
# from cryptography.hazmat.primitives.constant_time import bytes_eq as consteq
|
|
|
|
def splitcomma(source, sep=","):
|
|
"""split comma-separated string into list of elements,
|
|
stripping whitespace.
|
|
"""
|
|
source = source.strip()
|
|
if source.endswith(sep):
|
|
source = source[:-1]
|
|
if not source:
|
|
return []
|
|
return [ elem.strip() for elem in source.split(sep) ]
|
|
|
|
def saslprep(source, param="value"):
|
|
"""Normalizes unicode strings using SASLPrep stringprep profile.
|
|
|
|
The SASLPrep profile is defined in :rfc:`4013`.
|
|
It provides a uniform scheme for normalizing unicode usernames
|
|
and passwords before performing byte-value sensitive operations
|
|
such as hashing. Among other things, it normalizes diacritic
|
|
representations, removes non-printing characters, and forbids
|
|
invalid characters such as ``\\n``. Properly internationalized
|
|
applications should run user passwords through this function
|
|
before hashing.
|
|
|
|
:arg source:
|
|
unicode string to normalize & validate
|
|
|
|
:param param:
|
|
Optional noun identifying source parameter in error messages
|
|
(Defaults to the string ``"value"``). This is mainly useful to make the caller's error
|
|
messages make more sense contextually.
|
|
|
|
:raises ValueError:
|
|
if any characters forbidden by the SASLPrep profile are encountered.
|
|
|
|
:raises TypeError:
|
|
if input is not :class:`!unicode`
|
|
|
|
:returns:
|
|
normalized unicode string
|
|
|
|
.. note::
|
|
|
|
This function is not available under Jython,
|
|
as the Jython stdlib is missing the :mod:`!stringprep` module
|
|
(`Jython issue 1758320 <http://bugs.jython.org/issue1758320>`_).
|
|
|
|
.. versionadded:: 1.6
|
|
"""
|
|
# saslprep - http://tools.ietf.org/html/rfc4013
|
|
# stringprep - http://tools.ietf.org/html/rfc3454
|
|
# http://docs.python.org/library/stringprep.html
|
|
|
|
# validate type
|
|
# XXX: support bytes (e.g. run through want_unicode)?
|
|
# might be easier to just integrate this into cryptcontext.
|
|
if not isinstance(source, unicode):
|
|
raise TypeError("input must be unicode string, not %s" %
|
|
(type(source),))
|
|
|
|
# mapping stage
|
|
# - map non-ascii spaces to U+0020 (stringprep C.1.2)
|
|
# - strip 'commonly mapped to nothing' chars (stringprep B.1)
|
|
in_table_c12 = stringprep.in_table_c12
|
|
in_table_b1 = stringprep.in_table_b1
|
|
data = join_unicode(
|
|
_USPACE if in_table_c12(c) else c
|
|
for c in source
|
|
if not in_table_b1(c)
|
|
)
|
|
|
|
# normalize to KC form
|
|
data = unicodedata.normalize('NFKC', data)
|
|
if not data:
|
|
return _UEMPTY
|
|
|
|
# check for invalid bi-directional strings.
|
|
# stringprep requires the following:
|
|
# - chars in C.8 must be prohibited.
|
|
# - if any R/AL chars in string:
|
|
# - no L chars allowed in string
|
|
# - first and last must be R/AL chars
|
|
# this checks if start/end are R/AL chars. if so, prohibited loop
|
|
# will forbid all L chars. if not, prohibited loop will forbid all
|
|
# R/AL chars instead. in both cases, prohibited loop takes care of C.8.
|
|
is_ral_char = stringprep.in_table_d1
|
|
if is_ral_char(data[0]):
|
|
if not is_ral_char(data[-1]):
|
|
raise ValueError("malformed bidi sequence in " + param)
|
|
# forbid L chars within R/AL sequence.
|
|
is_forbidden_bidi_char = stringprep.in_table_d2
|
|
else:
|
|
# forbid R/AL chars if start not setup correctly; L chars allowed.
|
|
is_forbidden_bidi_char = is_ral_char
|
|
|
|
# check for prohibited output - stringprep tables A.1, B.1, C.1.2, C.2 - C.9
|
|
in_table_a1 = stringprep.in_table_a1
|
|
in_table_c21_c22 = stringprep.in_table_c21_c22
|
|
in_table_c3 = stringprep.in_table_c3
|
|
in_table_c4 = stringprep.in_table_c4
|
|
in_table_c5 = stringprep.in_table_c5
|
|
in_table_c6 = stringprep.in_table_c6
|
|
in_table_c7 = stringprep.in_table_c7
|
|
in_table_c8 = stringprep.in_table_c8
|
|
in_table_c9 = stringprep.in_table_c9
|
|
for c in data:
|
|
# check for chars mapping stage should have removed
|
|
assert not in_table_b1(c), "failed to strip B.1 in mapping stage"
|
|
assert not in_table_c12(c), "failed to replace C.1.2 in mapping stage"
|
|
|
|
# check for forbidden chars
|
|
if in_table_a1(c):
|
|
raise ValueError("unassigned code points forbidden in " + param)
|
|
if in_table_c21_c22(c):
|
|
raise ValueError("control characters forbidden in " + param)
|
|
if in_table_c3(c):
|
|
raise ValueError("private use characters forbidden in " + param)
|
|
if in_table_c4(c):
|
|
raise ValueError("non-char code points forbidden in " + param)
|
|
if in_table_c5(c):
|
|
raise ValueError("surrogate codes forbidden in " + param)
|
|
if in_table_c6(c):
|
|
raise ValueError("non-plaintext chars forbidden in " + param)
|
|
if in_table_c7(c):
|
|
# XXX: should these have been caught by normalize?
|
|
# if so, should change this to an assert
|
|
raise ValueError("non-canonical chars forbidden in " + param)
|
|
if in_table_c8(c):
|
|
raise ValueError("display-modifying / deprecated chars "
|
|
"forbidden in" + param)
|
|
if in_table_c9(c):
|
|
raise ValueError("tagged characters forbidden in " + param)
|
|
|
|
# do bidi constraint check chosen by bidi init, above
|
|
if is_forbidden_bidi_char(c):
|
|
raise ValueError("forbidden bidi character in " + param)
|
|
|
|
return data
|
|
|
|
# replace saslprep() with stub when stringprep is missing
|
|
if stringprep is None: # pragma: no cover -- runtime detection
|
|
def saslprep(source, param="value"):
|
|
"""stub for saslprep()"""
|
|
raise NotImplementedError("saslprep() support requires the 'stringprep' "
|
|
"module, which is " + _stringprep_missing_reason)
|
|
|
|
#=============================================================================
|
|
# bytes helpers
|
|
#=============================================================================
|
|
def render_bytes(source, *args):
|
|
"""Peform ``%`` formating using bytes in a uniform manner across Python 2/3.
|
|
|
|
This function is motivated by the fact that
|
|
:class:`bytes` instances do not support ``%`` or ``{}`` formatting under Python 3.
|
|
This function is an attempt to provide a replacement:
|
|
it converts everything to unicode (decoding bytes instances as ``latin-1``),
|
|
performs the required formatting, then encodes the result to ``latin-1``.
|
|
|
|
Calling ``render_bytes(source, *args)`` should function roughly the same as
|
|
``source % args`` under Python 2.
|
|
|
|
.. todo::
|
|
python >= 3.5 added back limited support for bytes %,
|
|
can revisit when 3.3/3.4 is dropped.
|
|
"""
|
|
if isinstance(source, bytes):
|
|
source = source.decode("latin-1")
|
|
result = source % tuple(arg.decode("latin-1") if isinstance(arg, bytes)
|
|
else arg for arg in args)
|
|
return result.encode("latin-1")
|
|
|
|
if PY3:
|
|
# new in py32
|
|
def bytes_to_int(value):
|
|
return int.from_bytes(value, 'big')
|
|
def int_to_bytes(value, count):
|
|
return value.to_bytes(count, 'big')
|
|
else:
|
|
# XXX: can any of these be sped up?
|
|
from binascii import hexlify, unhexlify
|
|
def bytes_to_int(value):
|
|
return int(hexlify(value),16)
|
|
def int_to_bytes(value, count):
|
|
return unhexlify(('%%0%dx' % (count<<1)) % value)
|
|
|
|
add_doc(bytes_to_int, "decode byte string as single big-endian integer")
|
|
add_doc(int_to_bytes, "encode integer as single big-endian byte string")
|
|
|
|
def xor_bytes(left, right):
|
|
"""Perform bitwise-xor of two byte strings (must be same size)"""
|
|
return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left))
|
|
|
|
def repeat_string(source, size):
|
|
"""
|
|
repeat or truncate <source> string, so it has length <size>
|
|
"""
|
|
mult = 1 + (size - 1) // len(source)
|
|
return (source * mult)[:size]
|
|
|
|
|
|
def utf8_repeat_string(source, size):
|
|
"""
|
|
variant of repeat_string() which truncates to nearest UTF8 boundary.
|
|
"""
|
|
mult = 1 + (size - 1) // len(source)
|
|
return utf8_truncate(source * mult, size)
|
|
|
|
|
|
_BNULL = b"\x00"
|
|
_UNULL = u("\x00")
|
|
|
|
def right_pad_string(source, size, pad=None):
|
|
"""right-pad or truncate <source> string, so it has length <size>"""
|
|
cur = len(source)
|
|
if size > cur:
|
|
if pad is None:
|
|
pad = _UNULL if isinstance(source, unicode) else _BNULL
|
|
return source+pad*(size-cur)
|
|
else:
|
|
return source[:size]
|
|
|
|
|
|
def utf8_truncate(source, index):
|
|
"""
|
|
helper to truncate UTF8 byte string to nearest character boundary ON OR AFTER <index>.
|
|
returned prefix will always have length of at least <index>, and will stop on the
|
|
first byte that's not a UTF8 continuation byte (128 - 191 inclusive).
|
|
since utf8 should never take more than 4 bytes to encode known unicode values,
|
|
we can stop after ``index+3`` is reached.
|
|
|
|
:param bytes source:
|
|
:param int index:
|
|
:rtype: bytes
|
|
"""
|
|
# general approach:
|
|
#
|
|
# * UTF8 bytes will have high two bits (0xC0) as one of:
|
|
# 00 -- ascii char
|
|
# 01 -- ascii char
|
|
# 10 -- continuation of multibyte char
|
|
# 11 -- start of multibyte char.
|
|
# thus we can cut on anything where high bits aren't "10" (0x80; continuation byte)
|
|
#
|
|
# * UTF8 characters SHOULD always be 1 to 4 bytes, though they may be unbounded.
|
|
# so we just keep going until first non-continuation byte is encountered, or end of str.
|
|
# this should work predictably even for malformed/non UTF8 inputs.
|
|
|
|
if not isinstance(source, bytes):
|
|
raise ExpectedTypeError(source, bytes, "source")
|
|
|
|
# validate index
|
|
end = len(source)
|
|
if index < 0:
|
|
index = max(0, index + end)
|
|
if index >= end:
|
|
return source
|
|
|
|
# can stop search after 4 bytes, won't ever have longer utf8 sequence.
|
|
end = min(index + 3, end)
|
|
|
|
# loop until we find non-continuation byte
|
|
while index < end:
|
|
if byte_elem_value(source[index]) & 0xC0 != 0x80:
|
|
# found single-char byte, or start-char byte.
|
|
break
|
|
# else: found continuation byte.
|
|
index += 1
|
|
else:
|
|
assert index == end
|
|
|
|
# truncate at final index
|
|
result = source[:index]
|
|
|
|
def sanity_check():
|
|
# try to decode source
|
|
try:
|
|
text = source.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
# if source isn't valid utf8, byte level match is enough
|
|
return True
|
|
|
|
# validate that result was cut on character boundary
|
|
assert text.startswith(result.decode("utf-8"))
|
|
return True
|
|
|
|
assert sanity_check()
|
|
|
|
return result
|
|
|
|
#=============================================================================
|
|
# encoding helpers
|
|
#=============================================================================
|
|
_ASCII_TEST_BYTES = b"\x00\n aA:#!\x7f"
|
|
_ASCII_TEST_UNICODE = _ASCII_TEST_BYTES.decode("ascii")
|
|
|
|
def is_ascii_codec(codec):
|
|
"""Test if codec is compatible with 7-bit ascii (e.g. latin-1, utf-8; but not utf-16)"""
|
|
return _ASCII_TEST_UNICODE.encode(codec) == _ASCII_TEST_BYTES
|
|
|
|
def is_same_codec(left, right):
|
|
"""Check if two codec names are aliases for same codec"""
|
|
if left == right:
|
|
return True
|
|
if not (left and right):
|
|
return False
|
|
return _lookup_codec(left).name == _lookup_codec(right).name
|
|
|
|
_B80 = b'\x80'[0]
|
|
_U80 = u('\x80')
|
|
def is_ascii_safe(source):
|
|
"""Check if string (bytes or unicode) contains only 7-bit ascii"""
|
|
r = _B80 if isinstance(source, bytes) else _U80
|
|
return all(c < r for c in source)
|
|
|
|
def to_bytes(source, encoding="utf-8", param="value", source_encoding=None):
|
|
"""Helper to normalize input to bytes.
|
|
|
|
:arg source:
|
|
Source bytes/unicode to process.
|
|
|
|
:arg encoding:
|
|
Target encoding (defaults to ``"utf-8"``).
|
|
|
|
:param param:
|
|
Optional name of variable/noun to reference when raising errors
|
|
|
|
:param source_encoding:
|
|
If this is specified, and the source is bytes,
|
|
the source will be transcoded from *source_encoding* to *encoding*
|
|
(via unicode).
|
|
|
|
:raises TypeError: if source is not unicode or bytes.
|
|
|
|
:returns:
|
|
* unicode strings will be encoded using *encoding*, and returned.
|
|
* if *source_encoding* is not specified, byte strings will be
|
|
returned unchanged.
|
|
* if *source_encoding* is specified, byte strings will be transcoded
|
|
to *encoding*.
|
|
"""
|
|
assert encoding
|
|
if isinstance(source, bytes):
|
|
if source_encoding and not is_same_codec(source_encoding, encoding):
|
|
return source.decode(source_encoding).encode(encoding)
|
|
else:
|
|
return source
|
|
elif isinstance(source, unicode):
|
|
return source.encode(encoding)
|
|
else:
|
|
raise ExpectedStringError(source, param)
|
|
|
|
def to_unicode(source, encoding="utf-8", param="value"):
|
|
"""Helper to normalize input to unicode.
|
|
|
|
:arg source:
|
|
source bytes/unicode to process.
|
|
|
|
:arg encoding:
|
|
encoding to use when decoding bytes instances.
|
|
|
|
:param param:
|
|
optional name of variable/noun to reference when raising errors.
|
|
|
|
:raises TypeError: if source is not unicode or bytes.
|
|
|
|
:returns:
|
|
* returns unicode strings unchanged.
|
|
* returns bytes strings decoded using *encoding*
|
|
"""
|
|
assert encoding
|
|
if isinstance(source, unicode):
|
|
return source
|
|
elif isinstance(source, bytes):
|
|
return source.decode(encoding)
|
|
else:
|
|
raise ExpectedStringError(source, param)
|
|
|
|
if PY3:
|
|
def to_native_str(source, encoding="utf-8", param="value"):
|
|
if isinstance(source, bytes):
|
|
return source.decode(encoding)
|
|
elif isinstance(source, unicode):
|
|
return source
|
|
else:
|
|
raise ExpectedStringError(source, param)
|
|
else:
|
|
def to_native_str(source, encoding="utf-8", param="value"):
|
|
if isinstance(source, bytes):
|
|
return source
|
|
elif isinstance(source, unicode):
|
|
return source.encode(encoding)
|
|
else:
|
|
raise ExpectedStringError(source, param)
|
|
|
|
add_doc(to_native_str,
|
|
"""Take in unicode or bytes, return native string.
|
|
|
|
Python 2: encodes unicode using specified encoding, leaves bytes alone.
|
|
Python 3: leaves unicode alone, decodes bytes using specified encoding.
|
|
|
|
:raises TypeError: if source is not unicode or bytes.
|
|
|
|
:arg source:
|
|
source unicode or bytes string.
|
|
|
|
:arg encoding:
|
|
encoding to use when encoding unicode or decoding bytes.
|
|
this defaults to ``"utf-8"``.
|
|
|
|
:param param:
|
|
optional name of variable/noun to reference when raising errors.
|
|
|
|
:returns: :class:`str` instance
|
|
""")
|
|
|
|
@deprecated_function(deprecated="1.6", removed="1.7")
|
|
def to_hash_str(source, encoding="ascii"): # pragma: no cover -- deprecated & unused
|
|
"""deprecated, use to_native_str() instead"""
|
|
return to_native_str(source, encoding, param="hash")
|
|
|
|
_true_set = set("true t yes y on 1 enable enabled".split())
|
|
_false_set = set("false f no n off 0 disable disabled".split())
|
|
_none_set = set(["", "none"])
|
|
|
|
def as_bool(value, none=None, param="boolean"):
|
|
"""
|
|
helper to convert value to boolean.
|
|
recognizes strings such as "true", "false"
|
|
"""
|
|
assert none in [True, False, None]
|
|
if isinstance(value, unicode_or_bytes_types):
|
|
clean = value.lower().strip()
|
|
if clean in _true_set:
|
|
return True
|
|
if clean in _false_set:
|
|
return False
|
|
if clean in _none_set:
|
|
return none
|
|
raise ValueError("unrecognized %s value: %r" % (param, value))
|
|
elif isinstance(value, bool):
|
|
return value
|
|
elif value is None:
|
|
return none
|
|
else:
|
|
return bool(value)
|
|
|
|
#=============================================================================
|
|
# host OS helpers
|
|
#=============================================================================
|
|
|
|
def is_safe_crypt_input(value):
|
|
"""
|
|
UT helper --
|
|
test if value is safe to pass to crypt.crypt();
|
|
under PY3, can't pass non-UTF8 bytes to crypt.crypt.
|
|
"""
|
|
if crypt_accepts_bytes or not isinstance(value, bytes):
|
|
return True
|
|
try:
|
|
value.decode("utf-8")
|
|
return True
|
|
except UnicodeDecodeError:
|
|
return False
|
|
|
|
try:
|
|
from crypt import crypt as _crypt
|
|
except ImportError: # pragma: no cover
|
|
_crypt = None
|
|
has_crypt = False
|
|
crypt_accepts_bytes = False
|
|
crypt_needs_lock = False
|
|
_safe_crypt_lock = None
|
|
def safe_crypt(secret, hash):
|
|
return None
|
|
else:
|
|
has_crypt = True
|
|
_NULL = '\x00'
|
|
|
|
# XXX: replace this with lazy-evaluated bug detection?
|
|
if threading and PYPY and (7, 2, 0) <= sys.pypy_version_info <= (7, 3, 3):
|
|
#: internal lock used to wrap crypt() calls.
|
|
#: WARNING: if non-passlib code invokes crypt(), this lock won't be enough!
|
|
_safe_crypt_lock = threading.Lock()
|
|
|
|
#: detect if crypt.crypt() needs a thread lock around calls.
|
|
crypt_needs_lock = True
|
|
|
|
else:
|
|
from passlib.utils.compat import nullcontext
|
|
_safe_crypt_lock = nullcontext()
|
|
crypt_needs_lock = False
|
|
|
|
# some crypt() variants will return various constant strings when
|
|
# an invalid/unrecognized config string is passed in; instead of
|
|
# returning NULL / None. examples include ":", ":0", "*0", etc.
|
|
# safe_crypt() returns None for any string starting with one of the
|
|
# chars in this string...
|
|
_invalid_prefixes = u("*:!")
|
|
|
|
if PY3:
|
|
|
|
# * pypy3 (as of v7.3.1) has a crypt which accepts bytes, or ASCII-only unicode.
|
|
# * whereas CPython3 (as of v3.9) has a crypt which doesn't take bytes,
|
|
# but accepts ANY unicode (which it always encodes to UTF8).
|
|
crypt_accepts_bytes = True
|
|
try:
|
|
_crypt(b"\xEE", "xx")
|
|
except TypeError:
|
|
# CPython will throw TypeError
|
|
crypt_accepts_bytes = False
|
|
except: # no pragma
|
|
# don't care about other errors this might throw,
|
|
# just want to see if we get past initial type-coercion step.
|
|
pass
|
|
|
|
def safe_crypt(secret, hash):
|
|
if crypt_accepts_bytes:
|
|
# PyPy3 -- all bytes accepted, but unicode encoded to ASCII,
|
|
# so handling that ourselves.
|
|
if isinstance(secret, unicode):
|
|
secret = secret.encode("utf-8")
|
|
if _BNULL in secret:
|
|
raise ValueError("null character in secret")
|
|
if isinstance(hash, unicode):
|
|
hash = hash.encode("ascii")
|
|
else:
|
|
# CPython3's crypt() doesn't take bytes, only unicode; unicode which is then
|
|
# encoding using utf-8 before passing to the C-level crypt().
|
|
# so we have to decode the secret.
|
|
if isinstance(secret, bytes):
|
|
orig = secret
|
|
try:
|
|
secret = secret.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
return None
|
|
# sanity check it encodes back to original byte string,
|
|
# otherwise when crypt() does it's encoding, it'll hash the wrong bytes!
|
|
assert secret.encode("utf-8") == orig, \
|
|
"utf-8 spec says this can't happen!"
|
|
if _NULL in secret:
|
|
raise ValueError("null character in secret")
|
|
if isinstance(hash, bytes):
|
|
hash = hash.decode("ascii")
|
|
try:
|
|
with _safe_crypt_lock:
|
|
result = _crypt(secret, hash)
|
|
except OSError:
|
|
# new in py39 -- per https://bugs.python.org/issue39289,
|
|
# crypt() now throws OSError for various things, mainly unknown hash formats
|
|
# translating that to None for now (may revise safe_crypt behavior in future)
|
|
return None
|
|
# NOTE: per issue 113, crypt() may return bytes in some odd cases.
|
|
# assuming it should still return an ASCII hash though,
|
|
# or there's a bigger issue at hand.
|
|
if isinstance(result, bytes):
|
|
result = result.decode("ascii")
|
|
if not result or result[0] in _invalid_prefixes:
|
|
return None
|
|
return result
|
|
else:
|
|
|
|
#: see feature-detection in PY3 fork above
|
|
crypt_accepts_bytes = True
|
|
|
|
# Python 2 crypt handler
|
|
def safe_crypt(secret, hash):
|
|
if isinstance(secret, unicode):
|
|
secret = secret.encode("utf-8")
|
|
if _NULL in secret:
|
|
raise ValueError("null character in secret")
|
|
if isinstance(hash, unicode):
|
|
hash = hash.encode("ascii")
|
|
with _safe_crypt_lock:
|
|
result = _crypt(secret, hash)
|
|
if not result:
|
|
return None
|
|
result = result.decode("ascii")
|
|
if result[0] in _invalid_prefixes:
|
|
return None
|
|
return result
|
|
|
|
add_doc(safe_crypt, """Wrapper around stdlib's crypt.
|
|
|
|
This is a wrapper around stdlib's :func:`!crypt.crypt`, which attempts
|
|
to provide uniform behavior across Python 2 and 3.
|
|
|
|
:arg secret:
|
|
password, as bytes or unicode (unicode will be encoded as ``utf-8``).
|
|
|
|
:arg hash:
|
|
hash or config string, as ascii bytes or unicode.
|
|
|
|
:returns:
|
|
resulting hash as ascii unicode; or ``None`` if the password
|
|
couldn't be hashed due to one of the issues:
|
|
|
|
* :func:`crypt()` not available on platform.
|
|
|
|
* Under Python 3, if *secret* is specified as bytes,
|
|
it must be use ``utf-8`` or it can't be passed
|
|
to :func:`crypt()`.
|
|
|
|
* Some OSes will return ``None`` if they don't recognize
|
|
the algorithm being used (though most will simply fall
|
|
back to des-crypt).
|
|
|
|
* Some OSes will return an error string if the input config
|
|
is recognized but malformed; current code converts these to ``None``
|
|
as well.
|
|
""")
|
|
|
|
def test_crypt(secret, hash):
|
|
"""check if :func:`crypt.crypt` supports specific hash
|
|
:arg secret: password to test
|
|
:arg hash: known hash of password to use as reference
|
|
:returns: True or False
|
|
"""
|
|
# safe_crypt() always returns unicode, which means that for py3,
|
|
# 'hash' can't be bytes, or "== hash" will never be True.
|
|
# under py2 unicode & str(bytes) will compare fine;
|
|
# so just enforcing "unicode_or_str" limitation
|
|
assert isinstance(hash, unicode_or_str), \
|
|
"hash must be unicode_or_str, got %s" % type(hash)
|
|
assert hash, "hash must be non-empty"
|
|
return safe_crypt(secret, hash) == hash
|
|
|
|
timer = timeit.default_timer
|
|
# legacy alias, will be removed in passlib 2.0
|
|
tick = timer
|
|
|
|
def parse_version(source):
|
|
"""helper to parse version string"""
|
|
m = re.search(r"(\d+(?:\.\d+)+)", source)
|
|
if m:
|
|
return tuple(int(elem) for elem in m.group(1).split("."))
|
|
return None
|
|
|
|
#=============================================================================
|
|
# randomness
|
|
#=============================================================================
|
|
|
|
#------------------------------------------------------------------------
|
|
# setup rng for generating salts
|
|
#------------------------------------------------------------------------
|
|
|
|
# NOTE:
|
|
# generating salts (e.g. h64_gensalt, below) doesn't require cryptographically
|
|
# strong randomness. it just requires enough range of possible outputs
|
|
# that making a rainbow table is too costly. so it should be ok to
|
|
# fall back on python's builtin mersenne twister prng, as long as it's seeded each time
|
|
# this module is imported, using a couple of minor entropy sources.
|
|
|
|
try:
|
|
os.urandom(1)
|
|
has_urandom = True
|
|
except NotImplementedError: # pragma: no cover
|
|
has_urandom = False
|
|
|
|
def genseed(value=None):
|
|
"""generate prng seed value from system resources"""
|
|
from hashlib import sha512
|
|
if hasattr(value, "getstate") and hasattr(value, "getrandbits"):
|
|
# caller passed in RNG as seed value
|
|
try:
|
|
value = value.getstate()
|
|
except NotImplementedError:
|
|
# this method throws error for e.g. SystemRandom instances,
|
|
# so fall back to extracting 4k of state
|
|
value = value.getrandbits(1 << 15)
|
|
text = u("%s %s %s %.15f %.15f %s") % (
|
|
# if caller specified a seed value, mix it in
|
|
value,
|
|
|
|
# add current process id
|
|
# NOTE: not available in some environments, e.g. GAE
|
|
os.getpid() if hasattr(os, "getpid") else None,
|
|
|
|
# id of a freshly created object.
|
|
# (at least 1 byte of which should be hard to predict)
|
|
id(object()),
|
|
|
|
# the current time, to whatever precision os uses
|
|
time.time(),
|
|
tick(),
|
|
|
|
# if urandom available, might as well mix some bytes in.
|
|
os.urandom(32).decode("latin-1") if has_urandom else 0,
|
|
)
|
|
# hash it all up and return it as int/long
|
|
return int(sha512(text.encode("utf-8")).hexdigest(), 16)
|
|
|
|
if has_urandom:
|
|
rng = random.SystemRandom()
|
|
else: # pragma: no cover -- runtime detection
|
|
# NOTE: to reseed use ``rng.seed(genseed(rng))``
|
|
# XXX: could reseed on every call
|
|
rng = random.Random(genseed())
|
|
|
|
#------------------------------------------------------------------------
|
|
# some rng helpers
|
|
#------------------------------------------------------------------------
|
|
def getrandbytes(rng, count):
|
|
"""return byte-string containing *count* number of randomly generated bytes, using specified rng"""
|
|
# NOTE: would be nice if this was present in stdlib Random class
|
|
|
|
###just in case rng provides this...
|
|
##meth = getattr(rng, "getrandbytes", None)
|
|
##if meth:
|
|
## return meth(count)
|
|
|
|
if not count:
|
|
return _BEMPTY
|
|
def helper():
|
|
# XXX: break into chunks for large number of bits?
|
|
value = rng.getrandbits(count<<3)
|
|
i = 0
|
|
while i < count:
|
|
yield value & 0xff
|
|
value >>= 3
|
|
i += 1
|
|
return join_byte_values(helper())
|
|
|
|
def getrandstr(rng, charset, count):
|
|
"""return string containing *count* number of chars/bytes, whose elements are drawn from specified charset, using specified rng"""
|
|
# NOTE: tests determined this is 4x faster than rng.sample(),
|
|
# which is why that's not being used here.
|
|
|
|
# check alphabet & count
|
|
if count < 0:
|
|
raise ValueError("count must be >= 0")
|
|
letters = len(charset)
|
|
if letters == 0:
|
|
raise ValueError("alphabet must not be empty")
|
|
if letters == 1:
|
|
return charset * count
|
|
|
|
# get random value, and write out to buffer
|
|
def helper():
|
|
# XXX: break into chunks for large number of letters?
|
|
value = rng.randrange(0, letters**count)
|
|
i = 0
|
|
while i < count:
|
|
yield charset[value % letters]
|
|
value //= letters
|
|
i += 1
|
|
|
|
if isinstance(charset, unicode):
|
|
return join_unicode(helper())
|
|
else:
|
|
return join_byte_elems(helper())
|
|
|
|
_52charset = '2346789ABCDEFGHJKMNPQRTUVWXYZabcdefghjkmnpqrstuvwxyz'
|
|
|
|
@deprecated_function(deprecated="1.7", removed="2.0",
|
|
replacement="passlib.pwd.genword() / passlib.pwd.genphrase()")
|
|
def generate_password(size=10, charset=_52charset):
|
|
"""generate random password using given length & charset
|
|
|
|
:param size:
|
|
size of password.
|
|
|
|
:param charset:
|
|
optional string specified set of characters to draw from.
|
|
|
|
the default charset contains all normal alphanumeric characters,
|
|
except for the characters ``1IiLl0OoS5``, which were omitted
|
|
due to their visual similarity.
|
|
|
|
:returns: :class:`!str` containing randomly generated password.
|
|
|
|
.. note::
|
|
|
|
Using the default character set, on a OS with :class:`!SystemRandom` support,
|
|
this function should generate passwords with 5.7 bits of entropy per character.
|
|
"""
|
|
return getrandstr(rng, charset, size)
|
|
|
|
#=============================================================================
|
|
# object type / interface tests
|
|
#=============================================================================
|
|
_handler_attrs = (
|
|
"name",
|
|
"setting_kwds", "context_kwds",
|
|
"verify", "hash", "identify",
|
|
)
|
|
|
|
def is_crypt_handler(obj):
|
|
"""check if object follows the :ref:`password-hash-api`"""
|
|
# XXX: change to use isinstance(obj, PasswordHash) under py26+?
|
|
return all(hasattr(obj, name) for name in _handler_attrs)
|
|
|
|
_context_attrs = (
|
|
"needs_update",
|
|
"genconfig", "genhash",
|
|
"verify", "encrypt", "identify",
|
|
)
|
|
|
|
def is_crypt_context(obj):
|
|
"""check if object appears to be a :class:`~passlib.context.CryptContext` instance"""
|
|
# XXX: change to use isinstance(obj, CryptContext)?
|
|
return all(hasattr(obj, name) for name in _context_attrs)
|
|
|
|
##def has_many_backends(handler):
|
|
## "check if handler provides multiple baceknds"
|
|
## # NOTE: should also provide get_backend(), .has_backend(), and .backends attr
|
|
## return hasattr(handler, "set_backend")
|
|
|
|
def has_rounds_info(handler):
|
|
"""check if handler provides the optional :ref:`rounds information <rounds-attributes>` attributes"""
|
|
return ('rounds' in handler.setting_kwds and
|
|
getattr(handler, "min_rounds", None) is not None)
|
|
|
|
def has_salt_info(handler):
|
|
"""check if handler provides the optional :ref:`salt information <salt-attributes>` attributes"""
|
|
return ('salt' in handler.setting_kwds and
|
|
getattr(handler, "min_salt_size", None) is not None)
|
|
|
|
##def has_raw_salt(handler):
|
|
## "check if handler takes in encoded salt as unicode (False), or decoded salt as bytes (True)"
|
|
## sc = getattr(handler, "salt_chars", None)
|
|
## if sc is None:
|
|
## return None
|
|
## elif isinstance(sc, unicode):
|
|
## return False
|
|
## elif isinstance(sc, bytes):
|
|
## return True
|
|
## else:
|
|
## raise TypeError("handler.salt_chars must be None/unicode/bytes")
|
|
|
|
#=============================================================================
|
|
# eof
|
|
#=============================================================================
|