diff --git a/Doc/bytes_mode.rst b/Doc/bytes_mode.rst index bbd83db0..d7c2aa4f 100644 --- a/Doc/bytes_mode.rst +++ b/Doc/bytes_mode.rst @@ -43,37 +43,51 @@ Encoding/decoding to other formats – text, images, etc. – is left to the cal The bytes mode -------------- -The behavior of python-ldap 3.0 in Python 2 is influenced by a ``bytes_mode`` -argument to :func:`ldap.initialize`. -The argument can take these values: +In Python 3, text values are represented as ``str``, the Unicode text type. -``bytes_mode=True``: backwards-compatible +In Python 2, the behavior of python-ldap 3.0 is influenced by a ``bytes_mode`` +argument to :func:`ldap.initialize`: - Text values returned from python-ldap are always bytes (``str``). - Text values supplied to python-ldap may be either bytes or Unicode. - The encoding for bytes is always assumed to be UTF-8. +``bytes_mode=True`` (backwards compatible): + Text values are represented as bytes (``str``) encoded using UTF-8. - Not available in Python 3. +``bytes_mode=False`` (future compatible): + Text values are represented as ``unicode``. -``bytes_mode=False``: strictly future-compatible +If not given explicitly, python-ldap will default to ``bytes_mode=True``, +but if an ``unicode`` value supplied to it, if will warn and use that value. - Text values must be represented as ``unicode``. - An error is raised if python-ldap receives a text value as bytes (``str``). +Backwards-compatible behavior is not scheduled for removal until Python 2 +itself reaches end of life. -Unspecified: relaxed mode with warnings - Causes a warning on Python 2. +Errors, warnings, and automatic encoding +---------------------------------------- - Text values returned from python-ldap are always ``unicode``. - Text values supplied to python-ldap should be ``unicode``; - warnings are emitted when they are not. +While the type of values *returned* from python-ldap is always given by +``bytes_mode``, for Python 2 the behavior for “wrong-type” values *passed in* +can be controlled by the ``bytes_strictness`` argument to +:func:`ldap.initialize`: - The warnings are of type :class:`~ldap.LDAPBytesWarning`, which - is a subclass of :class:`BytesWarning` designed to be easily - :ref:`filtered out ` if needed. +``bytes_strictness='error'`` (default if ``bytes_mode`` is specified): + A ``TypeError`` is raised. -Backwards-compatible behavior is not scheduled for removal until Python 2 -itself reaches end of life. +``bytes_strictness='warn'`` (default when ``bytes_mode`` is not given explicitly): + A warning is raised, and the value is encoded/decoded + using the UTF-8 encoding. + + The warnings are of type :class:`~ldap.LDAPBytesWarning`, which + is a subclass of :class:`BytesWarning` designed to be easily + :ref:`filtered out ` if needed. + +``bytes_strictness='silent'``: + The value is automatically encoded/decoded using the UTF-8 encoding. + +On Python 3, ``bytes_strictness`` is ignored and a ``TypeError`` is always +raised. + +When setting ``bytes_strictness``, an explicit value for ``bytes_mode`` needs +to be given as well. Porting recommendations diff --git a/Doc/reference/ldap.rst b/Doc/reference/ldap.rst index 9cb1d520..5d15158e 100644 --- a/Doc/reference/ldap.rst +++ b/Doc/reference/ldap.rst @@ -29,7 +29,7 @@ Functions This module defines the following functions: -.. py:function:: initialize(uri [, trace_level=0 [, trace_file=sys.stdout [, trace_stack_limit=None, [bytes_mode=None]]]]) -> LDAPObject object +.. py:function:: initialize(uri [, trace_level=0 [, trace_file=sys.stdout [, trace_stack_limit=None, [bytes_mode=None, [bytes_strictness=None]]]]]) -> LDAPObject object Initializes a new connection object for accessing the given LDAP server, and return an LDAP object (see :ref:`ldap-objects`) used to perform operations @@ -53,7 +53,8 @@ This module defines the following functions: *trace_file* specifies a file-like object as target of the debug log and *trace_stack_limit* specifies the stack limit of tracebacks in debug log. - The *bytes_mode* argument specifies text/bytes behavior under Python 2. + The *bytes_mode* and *bytes_strictness* arguments specify text/bytes + behavior under Python 2. See :ref:`text-bytes` for a complete documentation. Possible values for *trace_level* are @@ -696,6 +697,9 @@ and wait for and return with the server's result, or with *serverctrls* and *clientctrls* like described in section :ref:`ldap-controls`. + The *dn* argument, and mod_type (second item) of *modlist* are text strings; + see :ref:`bytes_mode`. + .. py:method:: LDAPObject.bind(who, cred, method) -> int @@ -737,6 +741,8 @@ and wait for and return with the server's result, or with *serverctrls* and *clientctrls* like described in section :ref:`ldap-controls`. + The *dn* and *attr* arguments are text strings; see :ref:`bytes_mode`. + .. note:: A design fault in the LDAP API prevents *value* @@ -757,6 +763,8 @@ and wait for and return with the server's result, or with *serverctrls* and *clientctrls* like described in section :ref:`ldap-controls`. + The *dn* argument is text string; see :ref:`bytes_mode`. + .. py:method:: LDAPObject.extop(extreq[,serverctrls=None[,clientctrls=None]]]) -> int @@ -810,6 +818,9 @@ and wait for and return with the server's result, or with You might want to look into sub-module :py:mod:`ldap.modlist` for generating *modlist*. + The *dn* argument, and mod_type (second item) of *modlist* are text strings; + see :ref:`bytes_mode`. + .. py:method:: LDAPObject.modrdn(dn, newrdn [, delold=1]) -> int @@ -826,6 +837,8 @@ and wait for and return with the server's result, or with This operation is emulated by :py:meth:`rename()` and :py:meth:`rename_s()` methods since the modrdn2* routines in the C library are deprecated. + The *dn* and *newrdn* arguments are text strings; see :ref:`bytes_mode`. + .. py:method:: LDAPObject.passwd(user, oldpw, newpw [, serverctrls=None [, clientctrls=None]]) -> int @@ -844,6 +857,8 @@ and wait for and return with the server's result, or with The asynchronous version returns the initiated message id. + The *user*, *oldpw* and *newpw* arguments are text strings; see :ref:`bytes_mode`. + .. seealso:: :rfc:`3062` - LDAP Password Modify Extended Operation @@ -865,6 +880,8 @@ and wait for and return with the server's result, or with *serverctrls* and *clientctrls* like described in section :ref:`ldap-controls`. + The *dn* and *newdn* arguments are text strings; see :ref:`bytes_mode`. + .. py:method:: LDAPObject.result([msgid=RES_ANY [, all=1 [, timeout=None]]]) -> 2-tuple @@ -1015,12 +1032,13 @@ and wait for and return with the server's result, or with *serverctrls* and *clientctrls* like described in section :ref:`ldap-controls`. + The *who* and *cred* arguments are text strings; see :ref:`bytes_mode`. + .. versionchanged:: 3.0 :meth:`~LDAPObject.simple_bind` and :meth:`~LDAPObject.simple_bind_s` now accept ``None`` for *who* and *cred*, too. - .. py:method:: LDAPObject.search(base, scope [,filterstr='(objectClass=*)' [, attrlist=None [, attrsonly=0]]]) ->int .. py:method:: LDAPObject.search_s(base, scope [,filterstr='(objectClass=*)' [, attrlist=None [, attrsonly=0]]]) ->list|None @@ -1073,6 +1091,9 @@ and wait for and return with the server's result, or with or :py:meth:`search_ext_s()` (client-side search limit). If non-zero not more than *sizelimit* results are returned by the server. + The *base* and *filterstr* arguments, and *attrlist* contents, + are text strings; see :ref:`bytes_mode`. + .. versionchanged:: 3.0 ``filterstr=None`` is equivalent to ``filterstr='(objectClass=*)'``. diff --git a/Lib/ldap/ldapobject.py b/Lib/ldap/ldapobject.py index a0712a34..9e92ce66 100644 --- a/Lib/ldap/ldapobject.py +++ b/Lib/ldap/ldapobject.py @@ -93,7 +93,8 @@ class SimpleLDAPObject: def __init__( self,uri, - trace_level=0,trace_file=None,trace_stack_limit=5,bytes_mode=None + trace_level=0,trace_file=None,trace_stack_limit=5,bytes_mode=None, + bytes_strictness=None, ): self._trace_level = trace_level self._trace_file = trace_file or sys.stdout @@ -107,20 +108,26 @@ def __init__( # Bytes mode # ---------- - # By default, raise a TypeError when receiving invalid args - self.bytes_mode_hardfail = True - if bytes_mode is None and PY2: - _raise_byteswarning( - "Under Python 2, python-ldap uses bytes by default. " - "This will be removed in Python 3 (no bytes for DN/RDN/field names). " - "Please call initialize(..., bytes_mode=False) explicitly.") - bytes_mode = True - # Disable hard failure when running in backwards compatibility mode. - self.bytes_mode_hardfail = False - elif bytes_mode and not PY2: - raise ValueError("bytes_mode is *not* supported under Python 3.") - # On by default on Py2, off on Py3. + if PY2: + if bytes_mode is None: + bytes_mode = True + if bytes_strictness is None: + _raise_byteswarning( + "Under Python 2, python-ldap uses bytes by default. " + "This will be removed in Python 3 (no bytes for " + "DN/RDN/field names). " + "Please call initialize(..., bytes_mode=False) explicitly.") + bytes_strictness = 'warn' + else: + if bytes_strictness is None: + bytes_strictness = 'error' + else: + if bytes_mode: + raise ValueError("bytes_mode is *not* supported under Python 3.") + bytes_mode = False + bytes_strictness = 'error' self.bytes_mode = bytes_mode + self.bytes_strictness = bytes_strictness def _bytesify_input(self, arg_name, value): """Adapt a value following bytes_mode in Python 2. @@ -130,38 +137,46 @@ def _bytesify_input(self, arg_name, value): With bytes_mode ON, takes bytes or None and returns bytes or None. With bytes_mode OFF, takes unicode or None and returns bytes or None. - This function should be applied on all text inputs (distinguished names - and attribute names in modlists) to convert them to the bytes expected - by the C bindings. + For the wrong argument type (unicode or bytes, respectively), + behavior depends on the bytes_strictness setting. + In all cases, bytes or None are returned (or an exception is raised). """ if not PY2: return value - if value is None: return value + elif self.bytes_mode: if isinstance(value, bytes): return value + elif self.bytes_strictness == 'silent': + pass + elif self.bytes_strictness == 'warn': + _raise_byteswarning( + "Received non-bytes value for '{}' in bytes mode; " + "please choose an explicit " + "option for bytes_mode on your LDAP connection".format(arg_name)) else: - if self.bytes_mode_hardfail: raise TypeError( "All provided fields *must* be bytes when bytes mode is on; " "got type '{}' for '{}'.".format(type(value).__name__, arg_name) ) - else: - _raise_byteswarning( - "Received non-bytes value for '{}' with default (disabled) bytes mode; " - "please choose an explicit " - "option for bytes_mode on your LDAP connection".format(arg_name)) - return value.encode('utf-8') + return value.encode('utf-8') else: - if not isinstance(value, text_type): + if isinstance(value, unicode): + return value.encode('utf-8') + elif self.bytes_strictness == 'silent': + pass + elif self.bytes_strictness == 'warn': + _raise_byteswarning( + "Received non-text value for '{}' with bytes_mode off and " + "bytes_strictness='warn'".format(arg_name)) + else: raise TypeError( "All provided fields *must* be text when bytes mode is off; " "got type '{}' for '{}'.".format(type(value).__name__, arg_name) ) - assert not isinstance(value, bytes) - return value.encode('utf-8') + return value def _bytesify_modlist(self, arg_name, modlist, with_opcode): """Adapt a modlist according to bytes_mode. @@ -1064,7 +1079,7 @@ class ReconnectLDAPObject(SimpleLDAPObject): def __init__( self,uri, trace_level=0,trace_file=None,trace_stack_limit=5,bytes_mode=None, - retry_max=1,retry_delay=60.0 + bytes_strictness=None, retry_max=1, retry_delay=60.0 ): """ Parameters like SimpleLDAPObject.__init__() with these @@ -1078,7 +1093,9 @@ def __init__( self._uri = uri self._options = [] self._last_bind = None - SimpleLDAPObject.__init__(self,uri,trace_level,trace_file,trace_stack_limit,bytes_mode) + SimpleLDAPObject.__init__(self, uri, trace_level, trace_file, + trace_stack_limit, bytes_mode, + bytes_strictness=bytes_strictness) self._reconnect_lock = ldap.LDAPLock(desc='reconnect lock within %s' % (repr(self))) self._retry_max = retry_max self._retry_delay = retry_delay @@ -1097,6 +1114,11 @@ def __getstate__(self): def __setstate__(self,d): """set up the object from pickled data""" + hardfail = d.get('bytes_mode_hardfail') + if hardfail: + d.setdefault('bytes_strictness', 'error') + else: + d.setdefault('bytes_strictness', 'warn') self.__dict__.update(d) self._last_bind = getattr(SimpleLDAPObject, self._last_bind[0]), self._last_bind[1], self._last_bind[2] self._ldap_object_lock = self._ldap_lock() diff --git a/Tests/t_ldapobject.py b/Tests/t_ldapobject.py index 1c847428..0a8e78ef 100644 --- a/Tests/t_ldapobject.py +++ b/Tests/t_ldapobject.py @@ -162,9 +162,9 @@ def test_search_keys_are_text(self): for value in values: self.assertEqual(type(value), bytes) - def _get_bytes_ldapobject(self, explicit=True): + def _get_bytes_ldapobject(self, explicit=True, **kwargs): if explicit: - kwargs = {'bytes_mode': True} + kwargs.setdefault('bytes_mode', True) else: kwargs = {} return self._open_ldap_conn( @@ -231,6 +231,68 @@ def test_unset_bytesmode_search_warns_bytes(self): l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, b'(cn=Foo*)', ['*']) l.search_s(base, ldap.SCOPE_SUBTREE, b'(cn=Foo*)', [b'*']) + def _search_wrong_type(self, bytes_mode, strictness): + if bytes_mode: + l = self._get_bytes_ldapobject(bytes_strictness=strictness) + else: + l = self._open_ldap_conn(bytes_mode=False, + bytes_strictness=strictness) + base = 'cn=Foo1,' + self.server.suffix + if not bytes_mode: + base = base.encode('utf-8') + result = l.search_s(base, scope=ldap.SCOPE_SUBTREE) + return result[0][-1]['cn'] + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_bytesmode_silent(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + self._search_wrong_type(bytes_mode=True, strictness='silent') + self.assertEqual(w, []) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_bytesmode_warn(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + self._search_wrong_type(bytes_mode=True, strictness='warn') + self.assertEqual(len(w), 1) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_bytesmode_error(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + with self.assertRaises(TypeError): + self._search_wrong_type(bytes_mode=True, strictness='error') + self.assertEqual(w, []) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_textmode_silent(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + self._search_wrong_type(bytes_mode=True, strictness='silent') + self.assertEqual(w, []) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_textmode_warn(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + self._search_wrong_type(bytes_mode=True, strictness='warn') + self.assertEqual(len(w), 1) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_textmode_error(self): + with warnings.catch_warnings(record=True) as w: + warnings.resetwarnings() + warnings.simplefilter('always', ldap.LDAPBytesWarning) + with self.assertRaises(TypeError): + self._search_wrong_type(bytes_mode=True, strictness='error') + self.assertEqual(w, []) + def test_search_accepts_unicode_dn(self): base = self.server.suffix l = self._ldap_conn @@ -470,7 +532,7 @@ def test_ldapbyteswarning(self): self.assertIs(msg.category, ldap.LDAPBytesWarning) self.assertEqual( text_type(msg.message), - "Received non-bytes value for 'base' with default (disabled) bytes " + "Received non-bytes value for 'base' in bytes " "mode; please choose an explicit option for bytes_mode on your " "LDAP connection" ) @@ -632,7 +694,7 @@ def test103_reconnect_get_state(self): str('_trace_stack_limit'): 5, str('_uri'): self.server.ldap_uri, str('bytes_mode'): l1.bytes_mode, - str('bytes_mode_hardfail'): l1.bytes_mode_hardfail, + str('bytes_strictness'): l1.bytes_strictness, str('timeout'): -1, }, )