diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 1bab785684bbab..6e4dd23c8c62b0 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -77,6 +77,79 @@ The :mod:`binascii` module defines the following functions: Added the *newline* parameter. +.. function:: a2b_ascii85(string, /, *, fold_spaces=False, wrap=False, ignore=b"") + + Convert Ascii85 data back to binary and return the binary data. + + Valid Ascii85 data contains characters from the Ascii85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is + accepted as a short form of the group ``!!!!!``, which encodes four + consecutive null bytes. + + If *fold_spaces* is true, the special character ``y`` is also accepted as a + short form of the group ``+``, as in + the Adobe Ascii85 format. + + *ignore* is an optional bytes-like object that specifies characters to + ignore in the input. + + Invalid Ascii85 data will raise :exc:`binascii.Error`. + + +.. function:: b2a_ascii85(data, /, *, fold_spaces=False, wrap=False, width=0, pad=False) + + Convert binary data to a formatted sequence of ASCII characters in Ascii85 + coding. The return value is the converted data. + + If *fold_spaces* is true, four consecutive spaces are encoded as the + special character ``y`` instead of the sequence ``+``, as + in the Adobe Ascii85 format. + + If *width* is provided and greater than 0, the output is split into lines + of no more than the specified width separated by the ASCII newline + character. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + +.. function:: a2b_base85(string, /, *, strict_mode=False, z85=False) + + Convert base85 data back to binary and return the binary data. + More than one line may be passed at a time. + + If *strict_mode* is true, only valid base85 data will be converted. + Invalid base85 data will raise :exc:`binascii.Error`. + + If *z85* is true, the base85 data uses the Z85 alphabet. + See `Z85 specification `_ for more information. + + Valid base85 data contains characters from the base85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. + + +.. function:: b2a_base85(data, /, *, pad=False, newline=True, z85=False) + + Convert binary data to a line of ASCII characters in base85 coding. + The return value is the converted line. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + If *newline* is true, a newline char is appended to the result. + + If *z85* is true, the Z85 alphabet is used for conversion. + See `Z85 specification `_ for more information. + + .. function:: a2b_qp(data, header=False) Convert a block of quoted-printable data back to binary and return the binary diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 5485d0bd64f3f1..0bb6729eeabbe1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -963,6 +963,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold_spaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format_spec)); @@ -1145,6 +1146,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outpath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pad)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password)); @@ -1301,11 +1303,14 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(weekday)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(width)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wrap)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(year)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(z85)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zdict)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[0]); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[1]); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3ce192511e3879..f178ee5eb3ffe6 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -454,6 +454,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(flags) STRUCT_FOR_ID(flush) STRUCT_FOR_ID(fold) + STRUCT_FOR_ID(fold_spaces) STRUCT_FOR_ID(follow_symlinks) STRUCT_FOR_ID(format) STRUCT_FOR_ID(format_spec) @@ -636,6 +637,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(outpath) STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) + STRUCT_FOR_ID(pad) STRUCT_FOR_ID(pages) STRUCT_FOR_ID(parent) STRUCT_FOR_ID(password) @@ -792,11 +794,14 @@ struct _Py_global_strings { STRUCT_FOR_ID(weekday) STRUCT_FOR_ID(which) STRUCT_FOR_ID(who) + STRUCT_FOR_ID(width) STRUCT_FOR_ID(withdata) + STRUCT_FOR_ID(wrap) STRUCT_FOR_ID(writable) STRUCT_FOR_ID(write) STRUCT_FOR_ID(write_through) STRUCT_FOR_ID(year) + STRUCT_FOR_ID(z85) STRUCT_FOR_ID(zdict) } identifiers; struct { diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 5c95d0feddecba..0046d48ff9215a 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -961,6 +961,7 @@ extern "C" { INIT_ID(flags), \ INIT_ID(flush), \ INIT_ID(fold), \ + INIT_ID(fold_spaces), \ INIT_ID(follow_symlinks), \ INIT_ID(format), \ INIT_ID(format_spec), \ @@ -1143,6 +1144,7 @@ extern "C" { INIT_ID(outpath), \ INIT_ID(overlapped), \ INIT_ID(owner), \ + INIT_ID(pad), \ INIT_ID(pages), \ INIT_ID(parent), \ INIT_ID(password), \ @@ -1299,11 +1301,14 @@ extern "C" { INIT_ID(weekday), \ INIT_ID(which), \ INIT_ID(who), \ + INIT_ID(width), \ INIT_ID(withdata), \ + INIT_ID(wrap), \ INIT_ID(writable), \ INIT_ID(write), \ INIT_ID(write_through), \ INIT_ID(year), \ + INIT_ID(z85), \ INIT_ID(zdict), \ } diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index a1fc9736d66618..6bd14ec858f7d6 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1604,6 +1604,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fold_spaces); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(follow_symlinks); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2332,6 +2336,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pad); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(pages); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2956,10 +2964,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(width); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(withdata); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wrap); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(writable); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2976,6 +2992,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(z85); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(zdict); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/_base64.py b/Lib/_base64.py new file mode 100644 index 00000000000000..d48577f36bb8ae --- /dev/null +++ b/Lib/_base64.py @@ -0,0 +1,79 @@ +"""C accelerator wrappers for originally pure-Python parts of base64.""" + +from binascii import Error, a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85 + + +# Base 85 functions in base64 silently convert input to bytes. +# Copy the conversion logic from base64 to avoid circular imports. + +bytes_types = (bytes, bytearray) # Types acceptable as binary data + + +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + if isinstance(s, bytes_types): + return s + try: + return memoryview(s).tobytes() + except TypeError: + raise TypeError("argument should be a bytes-like object or ASCII " + "string, not %r" % s.__class__.__name__) from None + + +def _bytes_from_encode_data(b): + return b if isinstance(b, bytes_types) else memoryview(b).tobytes() + + +# Functions in binascii raise binascii.Error instead of ValueError. + +def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): + b = _bytes_from_encode_data(b) + try: + return b2a_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, width=wrapcol, pad=pad) + except Error as e: + raise ValueError(e) from None + + +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): + b = _bytes_from_decode_data(b) + try: + return a2b_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, ignore=ignorechars) + except Error as e: + raise ValueError(e) from None + +def b85encode(b, pad=False): + b = _bytes_from_encode_data(b) + try: + return b2a_base85(b, pad=pad, newline=False) + except Error as e: + raise ValueError(e) from None + + +def b85decode(b): + b = _bytes_from_decode_data(b) + try: + return a2b_base85(b, strict_mode=True) + except Error as e: + raise ValueError(e) from None + + +def z85encode(s): + s = _bytes_from_encode_data(s) + try: + return b2a_base85(s, newline=False, z85=True) + except Error as e: + raise ValueError(e) from None + + +def z85decode(s): + s = _bytes_from_decode_data(s) + try: + return a2b_base85(s, strict_mode=True, z85=True) + except Error as e: + raise ValueError(e) from None diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..602b890dec3010 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -576,6 +576,26 @@ def decodebytes(s): return binascii.a2b_base64(s) +# Use accelerated implementations of originally pure-Python parts if possible. +try: + from _base64 import (a85encode as _a85encode, a85decode as _a85decode, + b85encode as _b85encode, b85decode as _b85decode, + z85encode as _z85encode, z85decode as _z85decode) + # Avoid expensive import of update_wrapper() from functools. + def _copy_attributes(func, src_func): + func.__doc__ = src_func.__doc__ + func.__module__ = "base64" + return func + a85encode = _copy_attributes(_a85encode, a85encode) + a85decode = _copy_attributes(_a85decode, a85decode) + b85encode = _copy_attributes(_b85encode, b85encode) + b85decode = _copy_attributes(_b85decode, b85decode) + z85encode = _copy_attributes(_z85encode, z85encode) + z85decode = _copy_attributes(_z85decode, z85decode) +except ImportError: + pass + + # Usable as a script... def main(): """Small main program""" diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 409c8c109e885f..6282da6b6bc8f0 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -1,13 +1,16 @@ import unittest -import base64 import binascii import os from array import array from test.support import os_helper from test.support import script_helper +from test.support.import_helper import import_fresh_module +py_base64 = import_fresh_module("base64", blocked=["_base64"]) +c_base64 = import_fresh_module("base64", fresh=["_base64"]) -class LegacyBase64TestCase(unittest.TestCase): + +class LegacyBase64TestCase: # Legacy API is not as permissive as the modern API def check_type_errors(self, f): @@ -19,6 +22,7 @@ def check_type_errors(self, f): self.assertRaises(TypeError, f, int_data) def test_encodebytes(self): + base64 = self.module eq = self.assertEqual eq(base64.encodebytes(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=\n") eq(base64.encodebytes(b"a"), b"YQ==\n") @@ -40,6 +44,7 @@ def test_encodebytes(self): self.check_type_errors(base64.encodebytes) def test_decodebytes(self): + base64 = self.module eq = self.assertEqual eq(base64.decodebytes(b"d3d3LnB5dGhvbi5vcmc=\n"), b"www.python.org") eq(base64.decodebytes(b"YQ==\n"), b"a") @@ -61,6 +66,7 @@ def test_decodebytes(self): self.check_type_errors(base64.decodebytes) def test_encode(self): + base64 = self.module eq = self.assertEqual from io import BytesIO, StringIO infp = BytesIO(b'abcdefghijklmnopqrstuvwxyz' @@ -78,6 +84,7 @@ def test_encode(self): self.assertRaises(TypeError, base64.encode, StringIO('abc'), StringIO()) def test_decode(self): + base64 = self.module from io import BytesIO, StringIO infp = BytesIO(b'd3d3LnB5dGhvbi5vcmc=') outfp = BytesIO() @@ -89,7 +96,16 @@ def test_decode(self): self.assertRaises(TypeError, base64.encode, StringIO('YWJj\n'), StringIO()) -class BaseXYTestCase(unittest.TestCase): +class LegacyBase64TestCasePython(LegacyBase64TestCase, unittest.TestCase): + module = py_base64 + + +@unittest.skipUnless(c_base64, "requires _base64") +class LegacyBase64TestCaseC(LegacyBase64TestCase, unittest.TestCase): + module = c_base64 + + +class BaseXYTestCase: # Modern API completely ignores exported dimension and format data and # treats any buffer as a stream of bytes @@ -101,6 +117,7 @@ def check_decode_type_errors(self, f): self.assertRaises(TypeError, f, []) def check_other_types(self, f, bytes_data, expected): + base64 = self.module eq = self.assertEqual b = bytearray(bytes_data) eq(f(b), expected) @@ -127,6 +144,7 @@ def check_nonbyte_element_format(self, f, data): def test_b64encode(self): + base64 = self.module eq = self.assertEqual # Test default alphabet eq(base64.b64encode(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=") @@ -177,6 +195,7 @@ def test_b64encode(self): self.check_encode_type_errors(base64.urlsafe_b64encode) def test_b64decode(self): + base64 = self.module eq = self.assertEqual tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org", @@ -233,10 +252,12 @@ def test_b64decode(self): self.check_decode_type_errors(base64.urlsafe_b64decode) def test_b64decode_padding_error(self): + base64 = self.module self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') def test_b64decode_invalid_chars(self): + base64 = self.module # issue 1466065: Test some invalid characters. tests = ((b'%3d==', b'\xdd'), (b'$3d==', b'\xdd'), @@ -269,6 +290,7 @@ def test_b64decode_invalid_chars(self): self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): + base64 = self.module eq = self.assertEqual eq(base64.b32encode(b''), b'') eq(base64.b32encode(b'\x00'), b'AA======') @@ -282,6 +304,7 @@ def test_b32encode(self): self.check_encode_type_errors(base64.b32encode) def test_b32decode(self): + base64 = self.module eq = self.assertEqual tests = {b'': b'', b'AA======': b'\x00', @@ -299,6 +322,7 @@ def test_b32decode(self): self.check_decode_type_errors(base64.b32decode) def test_b32decode_casefold(self): + base64 = self.module eq = self.assertEqual tests = {b'': b'', b'ME======': b'a', @@ -340,6 +364,7 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, data_str) def test_b32decode_error(self): + base64 = self.module tests = [b'abc', b'ABCDEF==', b'==ABCDEF'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] for i in range(0, 17): @@ -356,6 +381,7 @@ def test_b32decode_error(self): base64.b32decode(data.decode('ascii')) def test_b32hexencode(self): + base64 = self.module test_cases = [ # to_encode, expected (b'', b''), @@ -371,10 +397,12 @@ def test_b32hexencode(self): self.assertEqual(base64.b32hexencode(to_encode), expected) def test_b32hexencode_other_types(self): + base64 = self.module self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') self.check_encode_type_errors(base64.b32hexencode) def test_b32hexdecode(self): + base64 = self.module test_cases = [ # to_decode, expected, casefold (b'', b'', False), @@ -405,10 +433,12 @@ def test_b32hexdecode(self): casefold), expected) def test_b32hexdecode_other_types(self): + base64 = self.module self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') self.check_decode_type_errors(base64.b32hexdecode) def test_b32hexdecode_error(self): + base64 = self.module tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] for i in range(0, 17): @@ -426,6 +456,7 @@ def test_b32hexdecode_error(self): def test_b16encode(self): + base64 = self.module eq = self.assertEqual eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF') eq(base64.b16encode(b'\x00'), b'00') @@ -435,6 +466,7 @@ def test_b16encode(self): self.check_encode_type_errors(base64.b16encode) def test_b16decode(self): + base64 = self.module eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef') @@ -462,6 +494,7 @@ def test_b16decode(self): self.assertRaises(binascii.Error, base64.b16decode, '010') def test_a85encode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -512,6 +545,7 @@ def test_a85encode(self): eq(base64.a85encode(b' '*5, foldspaces=True, adobe=False), b'y+9') def test_b85encode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -546,6 +580,7 @@ def test_b85encode(self): b'cXxL#aCvlSZ*DGca%T') def test_z85encode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -580,6 +615,7 @@ def test_z85encode(self): b'CxXl-AcVLsz/dgCA+t') def test_a85decode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -626,6 +662,7 @@ def test_a85decode(self): b"www.python.org") def test_b85decode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -661,6 +698,7 @@ def test_b85decode(self): b"www.python.org") def test_z85decode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -696,6 +734,7 @@ def test_z85decode(self): b'www.python.org') def test_a85_padding(self): + base64 = self.module eq = self.assertEqual eq(base64.a85encode(b"x", pad=True), b'GQ7^D') @@ -711,6 +750,7 @@ def test_a85_padding(self): eq(base64.a85decode(b'G^+IXGQ7^D'), b"xxxxx\x00\x00\x00") def test_b85_padding(self): + base64 = self.module eq = self.assertEqual eq(base64.b85encode(b"x", pad=True), b'cmMzZ') @@ -726,6 +766,7 @@ def test_b85_padding(self): eq(base64.b85decode(b'czAetcmMzZ'), b"xxxxx\x00\x00\x00") def test_a85decode_errors(self): + base64 = self.module illegal = (set(range(32)) | set(range(118, 256))) - set(b' \t\n\r\v') for c in illegal: with self.assertRaises(ValueError, msg=bytes([c])): @@ -763,6 +804,7 @@ def test_a85decode_errors(self): foldspaces=True) def test_b85decode_errors(self): + base64 = self.module illegal = list(range(33)) + \ list(b'"\',./:[\\]') + \ list(range(128, 256)) @@ -777,6 +819,7 @@ def test_b85decode_errors(self): self.assertRaises(ValueError, base64.b85decode, b'|NsC1') def test_z85decode_errors(self): + base64 = self.module illegal = list(range(33)) + \ list(b'"\',;_`|\\~') + \ list(range(128, 256)) @@ -792,6 +835,7 @@ def test_z85decode_errors(self): self.assertRaises(ValueError, base64.z85decode, b'%nSc1') def test_decode_nonascii_str(self): + base64 = self.module decode_funcs = (base64.b64decode, base64.standard_b64decode, base64.urlsafe_b64decode, @@ -807,6 +851,7 @@ def test_ErrorHeritage(self): self.assertTrue(issubclass(binascii.Error, ValueError)) def test_RFC4648_test_cases(self): + base64 = self.module # test cases from RFC 4648 section 10 b64encode = base64.b64encode b32hexencode = base64.b32hexencode @@ -846,6 +891,15 @@ def test_RFC4648_test_cases(self): self.assertEqual(b16encode(b"foobar"), b"666F6F626172") +class BaseXYTestCasePython(BaseXYTestCase, unittest.TestCase): + module = py_base64 + + +@unittest.skipUnless(c_base64, "requires _base64") +class BaseXYTestCaseC(BaseXYTestCase, unittest.TestCase): + module = c_base64 + + class TestMain(unittest.TestCase): def tearDown(self): if os.path.exists(os_helper.TESTFN): diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1f3b6746ce4a62..ac458ff8993094 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -9,9 +9,11 @@ # Note: "*_hex" functions are aliases for "(un)hexlify" -b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_qp', 'b2a_uu', +b2a_functions = ['b2a_ascii85', 'b2a_base64', 'b2a_base85', + 'b2a_hex', 'b2a_qp', 'b2a_uu', 'hexlify'] -a2b_functions = ['a2b_base64', 'a2b_hex', 'a2b_qp', 'a2b_uu', +a2b_functions = ['a2b_ascii85', 'a2b_base64', 'a2b_base85', + 'a2b_hex', 'a2b_qp', 'a2b_uu', 'unhexlify'] all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx'] @@ -207,6 +209,432 @@ def assertInvalidLength(data): assertInvalidLength(b'a' * (4 * 87 + 1)) assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters + def test_ascii85_valid(self): + # Test Ascii85 with valid data + ASCII85_PREFIX = b"<~" + ASCII85_SUFFIX = b"~>" + + # Interleave blocks of 4 null bytes and 4 spaces into test data + rawdata = bytearray() + rawlines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = b"\0\0\0\0" if k & 1 else b" " + b = b + self.rawdata[i:i + k] + b = b" " if k & 1 else b"\0\0\0\0" + rawdata += b + rawlines.append(b) + i += k + if i >= len(self.rawdata): + break + + # Test core parameter combinations + params = (False, False), (False, True), (True, False), (True, True) + for fold_spaces, wrap in params: + lines = [] + for rawline in rawlines: + b = self.type2test(rawline) + a = binascii.b2a_ascii85(b, fold_spaces=fold_spaces, wrap=wrap) + lines.append(a) + res = bytearray() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_ascii85(a, fold_spaces=fold_spaces, wrap=wrap) + res += b + self.assertEqual(res, rawdata) + + # Test decoding inputs with length 1 mod 5 + params = [ + (b"a", False, False, b"", b""), + (b"xbw", False, False, b"wx", b""), + (b"<~c~>", False, True, b"", b""), + (b"{d ~>", False, True, b" {", b""), + (b"ye", True, False, b"", b" "), + (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), + (b"<~FCfN8yg~>", True, True, b"", b"test "), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + ] + for a, fold_spaces, wrap, ignore, b in params: + kwargs = {"fold_spaces": fold_spaces, "wrap": wrap, "ignore": ignore} + self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + + def test_ascii85_invalid(self): + # Test Ascii85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_ascii85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"!\"#$%&'()*+,-./0123456789:;<=>?@" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" + b"z" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_ascii85(a, ignore=fillers) + res += b + self.assertEqual(res, self.rawdata) + + # Test Ascii85 with only invalid characters + fillers = self.type2test(fillers) + b = binascii.a2b_ascii85(fillers, ignore=fillers) + self.assertEqual(b, b"") + + def test_ascii85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_ascii85(self.type2test(data), **kwargs) + + def assertMissingDelimiter(data): + _assertRegexTemplate(r"(?i)end with b'~>'", data, wrap=True) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)Ascii85 overflow", data) + + def assertInvalidSpecial(data): + _assertRegexTemplate(r"(?i)'[yz]'.+5-tuple", data, fold_spaces=True) + + def assertInvalidChar(data, **kwargs): + _assertRegexTemplate(r"(?i)Non-Ascii85 digit", data, **kwargs) + + # Test Ascii85 with missing delimiters + assertMissingDelimiter(b"") + assertMissingDelimiter(b"a") + assertMissingDelimiter(b"<~") + assertMissingDelimiter(b"<~!~") + assertMissingDelimiter(b"<~abc>") + assertMissingDelimiter(b"<~has delimiter but not terminal~> !") + + # Test Ascii85 with out-of-range encoded value + assertOverflow(b"t") + assertOverflow(b"s9") + assertOverflow(b"s8X") + assertOverflow(b"s8W.") + assertOverflow(b's8W-"') + assertOverflow(b"s8W-!u") + assertOverflow(b"s8W-!s8W-!zs8X") + + # Test Ascii85 with misplaced short form groups + assertInvalidSpecial(b"ay") + assertInvalidSpecial(b"az") + assertInvalidSpecial(b"aby") + assertInvalidSpecial(b"ayz") + assertInvalidSpecial(b"abcz") + assertInvalidSpecial(b"abcdy") + assertInvalidSpecial(b"y!and!z!then!!y") + + # Test Ascii85 with non-ignored invalid characters + assertInvalidChar(b"j\n") + assertInvalidChar(b" ", ignore=b"") + assertInvalidChar(b" valid\x02until\x03", ignore=b"\x00\x01\x02\x04") + assertInvalidChar(b"\tFCb", ignore=b"\n") + assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignore=b" \n\tv") + + def test_ascii85_width(self): + # Test Ascii85 splitting lines by width + def assertEncode(a_expected, data, n, wrap=False): + b = self.type2test(data) + a = binascii.b2a_ascii85(b, wrap=wrap, width=n) + self.assertEqual(a, a_expected) + + def assertDecode(data, b_expected, wrap=False): + a = self.type2test(data) + b = binascii.a2b_ascii85(a, wrap=wrap, ignore=b"\n") + self.assertEqual(b, b_expected) + + tests = [ + (b"", 0, b"", b"<~~>"), + (b"", 1, b"", b"<~\n~>"), + (b"a", 0, b"@/", b"<~@/~>"), + (b"a", 1, b"@\n/", b"<~\n@/\n~>"), + (b"a", 2, b"@/", b"<~\n@/\n~>"), + (b"a", 3, b"@/", b"<~@\n/~>"), + (b"a", 4, b"@/", b"<~@/\n~>"), + (b"a", 5, b"@/", b"<~@/\n~>"), + (b"a", 6, b"@/", b"<~@/~>"), + (b"a", 7, b"@/", b"<~@/~>"), + (b"a", 123, b"@/", b"<~@/~>"), + (b"this is a test", 7, b"FD,B0+D\nGm>@3BZ\n'F*%", + b"<~FD,B0\n+DGm>@3\nBZ'F*%\n~>"), + (b"a test!!!!!!! ", 11, b"@3BZ'F*&QK+\nX&!P+WqmM+9", + b"<~@3BZ'F*&Q\nK+X&!P+WqmM\n+9~>"), + (b"\0" * 56, 7, b"zzzzzzz\nzzzzzzz", b"<~zzzzz\nzzzzzzz\nzz~>"), + ] + for b, n, a, a_wrap in tests: + assertEncode(a, b, n) + assertEncode(a_wrap, b, n, wrap=True) + assertDecode(a, b) + assertDecode(a_wrap, b, wrap=True) + + def test_ascii85_pad(self): + # Test Ascii85 with encode padding + rawdata = b"n1n3tee\n ch@rAcTer$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_ascii85(self.type2test(b), pad=True) + b_pad = binascii.a2b_ascii85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + # Test Ascii85 short form groups with encode padding + def assertShortPad(data, expected, **kwargs): + data = self.type2test(data) + res = binascii.b2a_ascii85(data, **kwargs) + self.assertEqual(res, expected) + + assertShortPad(b"\0", b"!!", pad=False) + assertShortPad(b"\0", b"z", pad=True) + assertShortPad(b"\0" * 2, b"z", pad=True) + assertShortPad(b"\0" * 3, b"z", pad=True) + assertShortPad(b"\0" * 4, b"z", pad=True) + assertShortPad(b"\0" * 5, b"zz", pad=True) + assertShortPad(b"\0" * 6, b"z!!!") + assertShortPad(b" " * 7, b"y+", + fold_spaces=True, wrap=True, pad=True) + assertShortPad(b"\0\0\0\0abcd \0\0", b"<~z@:E_Wy\nz~>", + fold_spaces=True, wrap=True, width=9, pad=True) + + def test_ascii85_ignore(self): + # Test Ascii85 with ignored characters + def assertIgnore(data, expected, ignore=b"", **kwargs): + data = self.type2test(data) + ignore = self.type2test(ignore) + with self.assertRaisesRegex(binascii.Error, r"(?i)Non-Ascii85 digit"): + binascii.a2b_ascii85(data, **kwargs) + res = binascii.a2b_ascii85(data, ignore=ignore, **kwargs) + self.assertEqual(res, expected) + + assertIgnore(b"\n", b"", ignore=b"\n") + assertIgnore(b"<~ ~>", b"", ignore=b" ", wrap=True) + assertIgnore(b"z|z", b"\0" * 8, ignore=b"|||") # repeats don't matter + assertIgnore(b"zz!!|", b"\0" * 9, ignore=b"|!z") # ignore only if invalid + assertIgnore(b"<~B P~@~>", b"hi", ignore=b" <~>", wrap=True) + assertIgnore(b"zy}", b"\0\0\0\0", ignore=b"zy}") + assertIgnore(b"zy}", b"\0\0\0\0 ", ignore=b"zy}", fold_spaces=True) + + def test_base85_valid(self): + # Test base85 with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + # Test decoding inputs with length 1 mod 5 + self.assertEqual(binascii.a2b_base85(self.type2test(b"a")), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b" b ")), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b"b/Y\"*,j'Nc")), b"test") + + def test_base85_invalid(self): + # Test base85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base85(self.type2test(data), **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)base85 overflow", data) + + # Test base85 with out-of-range encoded value + assertOverflow(b"}") + assertOverflow(b"|O") + assertOverflow(b"|Nt") + assertOverflow(b"|NsD") + assertOverflow(b"|NsC1") + assertOverflow(b"|NsC0~") + assertOverflow(b"|NsC0|NsC0|NsD0") + + def test_base85_pad(self): + # Test base85 with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_base85(self.type2test(b), pad=True) + b_pad = binascii.a2b_base85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_base85_strict_mode(self): + # Test base85 with strict mode on + def assertNonBase85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)bad base85 character"): + binascii.a2b_base85(data, strict_mode=True) + default_res = binascii.a2b_base85(data) + non_strict_res = binascii.a2b_base85(data, strict_mode=False) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonBase85Data(b"\xda", b"") + assertNonBase85Data(b"00\0\0", b"\0") + assertNonBase85Data(b"Z )*", b"ok") + assertNonBase85Data(b"bY*jNb0Hyq\n", b"tests!!~") + + def test_base85_newline(self): + # Test base85 newline parameter + b = self.type2test(b"t3s\t ") + self.assertEqual(binascii.b2a_base85(b), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=True), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=False), b"bTe}aAO") + + def test_base85_z85_valid(self): + # Test base85 (Z85 alphabet) with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b, z85=True) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_base85(a, z85=True) + res += b + self.assertEqual(res, self.rawdata) + + # Test decoding inputs with length 1 mod 5 + self.assertEqual(binascii.a2b_base85(self.type2test(b"a"), z85=True), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b" b "), z85=True), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b"B y,/;J_n\\c"), z85=True), b"test") + + def test_base85_z85_invalid(self): + # Test base85 (Z85 alphabet) with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b, z85=True) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789abcdefghijklmnopqrstuvwxyz" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base85(a, z85=True) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_z85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base85(self.type2test(data), z85=True, **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)z85 overflow", data) + + # Test base85 (Z85 alphabet) with out-of-range encoded value + assertOverflow(b"%") + assertOverflow(b"%n") + assertOverflow(b"%nS") + assertOverflow(b"%nSc") + assertOverflow(b"%nSc1") + assertOverflow(b"%nSc0$") + assertOverflow(b"%nSc0%nSc0%nSD0") + + def test_base85_z85_pad(self): + # Test base85 (Z85 alphabet) with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_base85(self.type2test(b), pad=True, z85=True) + b_pad = binascii.a2b_base85(self.type2test(a_pad), z85=True) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_base85_z85_strict_mode(self): + # Test base85 (Z85 alphabet) with strict mode on + def assertNonZ85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)bad z85 character"): + binascii.a2b_base85(data, strict_mode=True, z85=True) + default_res = binascii.a2b_base85(data, z85=True) + non_strict_res = binascii.a2b_base85(data, strict_mode=False, z85=True) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonZ85Data(b"\xda", b"") + assertNonZ85Data(b"00\0\0", b"\0") + assertNonZ85Data(b"z !/", b"ok") + assertNonZ85Data(b"By/JnB0hYQ\n", b"tests!!~") + + def test_base85_z85_newline(self): + # Test base85 (Z85 alphabet) newline parameter + b = self.type2test(b"t3s\t ") + self.assertEqual(binascii.b2a_base85(b, z85=True), b"BtE$Aao\n") + self.assertEqual(binascii.b2a_base85(b, newline=True, z85=True), b"BtE$Aao\n") + self.assertEqual(binascii.b2a_base85(b, newline=False, z85=True), b"BtE$Aao") + def test_uu(self): MAX_UU = 45 for backtick in (True, False): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst new file mode 100644 index 00000000000000..fef1052b738a80 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst @@ -0,0 +1,2 @@ +Add Ascii85 and base85 support to :mod:`binascii` and improve the +performance of the base-85 converters in :mod:`base64`. diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..fb760d2c4ad5ae 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -105,6 +105,84 @@ static const unsigned char table_b2a_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const unsigned char table_a2b_base85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,62,-1,63, 64,65,66,-1, 67,68,69,70, -1,71,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,72, 73,74,75,76, + 77,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,-1, -1,-1,78,79, + 80,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,81, 82,83,84,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_a85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, + 15,16,17,18, 19,20,21,22, 23,24,25,26, 27,28,29,30, + 31,32,33,34, 35,36,37,38, 39,40,41,42, 43,44,45,46, + 47,48,49,50, 51,52,53,54, 55,56,57,58, 59,60,61,62, + 63,64,65,66, 67,68,69,70, 71,72,73,74, 75,76,77,78, + 79,80,81,82, 83,84,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_z85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,68,-1,84, 83,82,72,-1, 75,76,70,65, -1,63,62,69, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,64,-1, 73,66,74,71, + 81,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,77, -1,78,67,-1, + -1,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,79, -1,80,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_b2a_base85[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; + +static const unsigned char table_b2a_base85_a85[] = + "!\"#$%&\'()*+,-./0123456789:;<=>?@" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"; + +static const unsigned char table_b2a_base85_z85[] = + "0123456789abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/\x2a?&<>()[]{}@%$#"; /* clinic doesn't like '/' followed by '*' */ + +#define BASE85_A85_PREFIX '<' +#define BASE85_A85_AFFIX '~' +#define BASE85_A85_SUFFIX '>' +#define BASE85_A85_Z 0x00000000 +#define BASE85_A85_Y 0x20202020 + static const unsigned short crctab_hqx[256] = { 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, @@ -587,6 +665,447 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) return _PyBytesWriter_Finish(&writer, ascii_data); } +/*[clinic input] +binascii.a2b_ascii85 + + data: ascii_buffer + / + * + fold_spaces: bool = False + Allow 'y' as a short form encoding four spaces. + wrap: bool = False + Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. + ignore: Py_buffer(c_default="NULL", py_default="b''") = None + An optional bytes-like object with input characters to be ignored. + +Decode Ascii85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore) +/*[clinic end generated code: output=6ab30f2a26d301a1 input=11c60c016d4f334b]*/ +{ + const unsigned char *ascii_data, *ignore_data; + unsigned char *bin_data; + int group_pos = 0; + unsigned char this_ch, this_digit; + unsigned char ignore_map[256] = {0}; + uint32_t leftchar = 0; + Py_ssize_t ascii_len, bin_len, chunk_len, ignore_len; + _PyBytesWriter writer; + binascii_state *state; + + ascii_data = data->buf; + ascii_len = data->len; + + assert(ascii_len >= 0); + + /* Consume Ascii85 prefix and suffix if present. */ + if (wrap) { + if (ascii_len < 2 || + ascii_data[ascii_len - 2] != BASE85_A85_AFFIX || + ascii_data[ascii_len - 1] != BASE85_A85_SUFFIX) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Ascii85 encoded byte sequences must end with b'~>'"); + } + return NULL; + } + ascii_len -= 2; + if (ascii_len >= 2 && + ascii_data[0] == BASE85_A85_PREFIX && + ascii_data[1] == BASE85_A85_AFFIX) { + ascii_data += 2; + ascii_len -= 2; + } + } + + /* Allocate output buffer. */ + bin_len = ascii_len; + for (Py_ssize_t i = 0; i < ascii_len; i++) { + this_ch = ascii_data[i]; + if (this_ch == 'y' || this_ch == 'z') { + bin_len += 4; + } + } + bin_len = 4 * ((bin_len + 4) / 5); + + _PyBytesWriter_Init(&writer); + bin_data = _PyBytesWriter_Alloc(&writer, bin_len); + if (bin_data == NULL) { + return NULL; + } + + /* Build ignore map. */ + if (ignore->obj != NULL) { + ignore_data = ignore->buf; + ignore_len = ignore->len; + for (Py_ssize_t i = 0; i < ignore_len; i++) { + this_ch = ignore_data[i]; + ignore_map[this_ch] = -1; + } + } + + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + if (ascii_len > 0) { + this_ch = *ascii_data; + this_digit = table_a2b_base85_a85[this_ch]; + } else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 || + (leftchar *= 85) > UINT32_MAX - this_digit) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, "Ascii85 overflow"); + } + goto error_end; + } + leftchar += this_digit; + group_pos++; + } else if ((this_ch == 'y' && fold_spaces) || this_ch == 'z') { + if (group_pos != 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "'%c' inside Ascii85 5-tuple", this_ch); + } + goto error_end; + } + leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + group_pos = 5; + } else if (!ignore_map[this_ch]) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "Non-Ascii85 digit found: %c", this_ch); + } + goto error_end; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Write current chunk. */ + chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return _PyBytesWriter_Finish(&writer, bin_data); + +error_end: + _PyBytesWriter_Dealloc(&writer); + return NULL; +} + +/*[clinic input] +binascii.b2a_ascii85 + + data: Py_buffer + / + * + fold_spaces: bool = False + Emit 'y' as a short form encoding four spaces. + wrap: bool = False + Wrap result in '<~' and '~>' as in Adobe Ascii85. + width: unsigned_int(bitwise=True) = 0 + Split result into lines of provided width. + pad: bool = False + Pad input to a multiple of 4 before encoding. + +Ascii85-encode data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad) +/*[clinic end generated code: output=78426392ad3fc75b input=d5122dbab4dbb9f2]*/ +{ + unsigned char *ascii_data; + const unsigned char *bin_data; + int chunk_pos = 0; + unsigned char this_group[5]; + uint32_t leftchar = 0; + unsigned int line_len = 0; + Py_ssize_t bin_len, group_len, out_len; + _PyBytesWriter writer; + + bin_data = data->buf; + bin_len = data->len; + + assert(bin_len >= 0); + + if (wrap && width == 1) { + width = 2; + } + + /* Allocate output buffer. + XXX: Do a pre-pass above some threshold estimate (cf. 'yz')? + */ + out_len = 5 * ((bin_len + 3) / 4); + if (wrap) out_len += 4; + if (!pad && (bin_len % 4)) out_len -= 4 - (bin_len % 4); + if (width && out_len) out_len += (out_len - 1) / width; + + _PyBytesWriter_Init(&writer); + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) { + return NULL; + } + + if (wrap) { + *ascii_data++ = BASE85_A85_PREFIX; + *ascii_data++ = BASE85_A85_AFFIX; + line_len = 2; + } + + for (; bin_len > 0 || chunk_pos != 0; bin_len--, bin_data++) { + /* Shift data or padding into our buffer. */ + leftchar <<= 8; /* Pad with zero when encoding. */ + if (bin_len > 0) { + leftchar |= *bin_data; + } + + /* Wait until buffer is full. */ + if (++chunk_pos != 4) { + continue; + } + + /* Encode current chunk. */ + if (((bin_len > 0 || pad) && leftchar == BASE85_A85_Z) || + (fold_spaces && leftchar == BASE85_A85_Y)) { + this_group[0] = leftchar == BASE85_A85_Y ? 'y' : 'z'; + group_len = 1; + leftchar = 0; + } else { + group_len = bin_len > 0 || pad ? 5 : 4 + bin_len; + for (Py_ssize_t i = 4; i >= 0; i--) { + this_group[i] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + } + } + + /* Write current group. */ + for (Py_ssize_t i = 0; i < group_len; i++) { + if (width && line_len == width) { + *ascii_data++ = '\n'; + line_len = 0; + } + *ascii_data++ = this_group[i]; + line_len++; + } + + chunk_pos = 0; + } + + if (wrap) { + if (width && line_len + 2 > width) { + *ascii_data++ = '\n'; + } + *ascii_data++ = BASE85_A85_AFFIX; + *ascii_data++ = BASE85_A85_SUFFIX; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} + +/*[clinic input] +binascii.a2b_base85 + + data: ascii_buffer + / + * + strict_mode: bool = False + When set to True, bytes that are not in the base85 alphabet + (or the Z85 alphabet, if z85 is True) are not allowed. + z85: bool = False + When set to True, the Z85 alphabet is used instead of the standard + base85 alphabet. + +Decode a line of base85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, + int z85) +/*[clinic end generated code: output=c5b9118ffe77f1cb input=65c2a532ad64ebd5]*/ +{ + const unsigned char *ascii_data, *table_a2b; + unsigned char *bin_data; + int group_pos = 0; + unsigned char this_ch, this_digit; + uint32_t leftchar = 0; + Py_ssize_t ascii_len, bin_len, chunk_len; + _PyBytesWriter writer; + binascii_state *state; + + table_a2b = z85 ? table_a2b_base85_z85 : table_a2b_base85; + ascii_data = data->buf; + ascii_len = data->len; + + assert(ascii_len >= 0); + + /* Allocate output buffer. */ + bin_len = 4 * ((ascii_len + 4) / 5); + + _PyBytesWriter_Init(&writer); + bin_data = _PyBytesWriter_Alloc(&writer, bin_len); + if (bin_data == NULL) { + return NULL; + } + + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + if (ascii_len > 0) { + this_ch = *ascii_data; + this_digit = table_a2b[this_ch]; + } else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 || + (leftchar *= 85) > UINT32_MAX - this_digit) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "%s overflow in hunk starting at byte %d", + z85 ? "z85" : "base85", + (data->len - ascii_len) / 5 * 5); + } + goto error_end; + } + leftchar += this_digit; + group_pos++; + } else if (strict_mode) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, "bad %s character at position %d", + z85 ? "z85" : "base85", data->len - ascii_len); + } + goto error_end; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Write current chunk. */ + chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return _PyBytesWriter_Finish(&writer, bin_data); + +error_end: + _PyBytesWriter_Dealloc(&writer); + return NULL; +} + +/*[clinic input] +binascii.b2a_base85 + + data: Py_buffer + / + * + pad: bool = False + Pad input to a multiple of 4 before encoding. + newline: bool = True + Append a newline to the result. + z85: bool = False + Use Z85 alphabet instead of standard base85 alphabet. + +Base85-code line of data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline, int z85) +/*[clinic end generated code: output=d3740e9a20c8e071 input=e4e07591f7a11ae4]*/ +{ + unsigned char *ascii_data; + const unsigned char *bin_data, *table_b2a; + uint32_t leftchar = 0; + Py_ssize_t bin_len, group_len, out_len; + _PyBytesWriter writer; + + table_b2a = z85 ? table_b2a_base85_z85 : table_b2a_base85; + bin_data = data->buf; + bin_len = data->len; + + assert(bin_len >= 0); + + /* Allocate output buffer. */ + out_len = 5 * ((bin_len + 3) / 4); + if (!pad && (bin_len % 4)) out_len -= 4 - (bin_len % 4); + if (newline) out_len++; + + _PyBytesWriter_Init(&writer); + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) { + return NULL; + } + + /* Encode all full-length chunks. */ + for (; bin_len >= 4; bin_len -= 4, bin_data += 4) { + leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) | + (bin_data[2] << 8) | bin_data[3]; + + ascii_data[4] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[3] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[2] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[1] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[0] = table_b2a[leftchar]; + + ascii_data += 5; + } + + /* Encode partial-length final chunk. */ + if (bin_len > 0) { + for (Py_ssize_t i = 0; i < 4; i++) { + leftchar <<= 8; /* Pad with zero when encoding. */ + if (i < bin_len) { + leftchar |= *bin_data++; + } + } + group_len = pad ? 5 : bin_len + 1; + for (Py_ssize_t i = 4; i >= 0; i--) { + if (i < group_len) { + ascii_data[i] = table_b2a[leftchar % 85]; + } + leftchar /= 85; + } + ascii_data += group_len; + } + + if (newline) { + *ascii_data++ = '\n'; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} /*[clinic input] binascii.crc_hqx @@ -1246,6 +1765,10 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF + BINASCII_B2A_ASCII85_METHODDEF + BINASCII_A2B_ASCII85_METHODDEF + BINASCII_A2B_BASE85_METHODDEF + BINASCII_B2A_BASE85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 602e42a4c1aaa4..c119c07bb6f22c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -267,6 +267,423 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P return return_value; } +PyDoc_STRVAR(binascii_a2b_ascii85__doc__, +"a2b_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" ignore=b\'\')\n" +"--\n" +"\n" +"Decode Ascii85 data.\n" +"\n" +" fold_spaces\n" +" Allow \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" ignore\n" +" An optional bytes-like object with input characters to be ignored."); + +#define BINASCII_A2B_ASCII85_METHODDEF \ + {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore); + +static PyObject * +binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(ignore), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "ignore", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + Py_buffer ignore = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &ignore, PyBUF_SIMPLE) != 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_ascii85_impl(module, &data, fold_spaces, wrap, &ignore); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + /* Cleanup for ignore */ + if (ignore.obj) { + PyBuffer_Release(&ignore); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_ascii85__doc__, +"b2a_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" width=0, pad=False)\n" +"--\n" +"\n" +"Ascii85-encode data.\n" +"\n" +" fold_spaces\n" +" Emit \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Wrap result in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" width\n" +" Split result into lines of provided width.\n" +" pad\n" +" Pad input to a multiple of 4 before encoding."); + +#define BINASCII_B2A_ASCII85_METHODDEF \ + {"b2a_ascii85", _PyCFunction_CAST(binascii_b2a_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_ascii85__doc__}, + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad); + +static PyObject * +binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(width), &_Py_ID(pad), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "width", "pad", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + unsigned int width = 0; + int pad = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[3]) { + width = (unsigned int)PyLong_AsUnsignedLongMask(args[3]); + if (width == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + pad = PyObject_IsTrue(args[4]); + if (pad < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_ascii85_impl(module, &data, fold_spaces, wrap, width, pad); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_a2b_base85__doc__, +"a2b_base85($module, data, /, *, strict_mode=False, z85=False)\n" +"--\n" +"\n" +"Decode a line of base85 data.\n" +"\n" +" strict_mode\n" +" When set to True, bytes that are not in the base85 alphabet\n" +" (or the Z85 alphabet, if z85 is True) are not allowed.\n" +" z85\n" +" When set to True, the Z85 alphabet is used instead of the standard\n" +" base85 alphabet."); + +#define BINASCII_A2B_BASE85_METHODDEF \ + {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, + int z85); + +static PyObject * +binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(z85), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "strict_mode", "z85", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int strict_mode = 0; + int z85 = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + strict_mode = PyObject_IsTrue(args[1]); + if (strict_mode < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + z85 = PyObject_IsTrue(args[2]); + if (z85 < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_base85_impl(module, &data, strict_mode, z85); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_base85__doc__, +"b2a_base85($module, data, /, *, pad=False, newline=True, z85=False)\n" +"--\n" +"\n" +"Base85-code line of data.\n" +"\n" +" pad\n" +" Pad input to a multiple of 4 before encoding.\n" +" newline\n" +" Append a newline to the result.\n" +" z85\n" +" Use Z85 alphabet instead of standard base85 alphabet."); + +#define BINASCII_B2A_BASE85_METHODDEF \ + {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__}, + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline, int z85); + +static PyObject * +binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pad), &_Py_ID(newline), &_Py_ID(z85), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "pad", "newline", "z85", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int pad = 0; + int newline = 1; + int z85 = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + pad = PyObject_IsTrue(args[1]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + newline = PyObject_IsTrue(args[2]); + if (newline < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + z85 = PyObject_IsTrue(args[3]); + if (z85 < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_base85_impl(module, &data, pad, newline, z85); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + PyDoc_STRVAR(binascii_crc_hqx__doc__, "crc_hqx($module, data, crc, /)\n" "--\n" @@ -788,4 +1205,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=adb855a2797c3cad input=a9049054013a1b77]*/ +/*[clinic end generated code: output=95db68a6c51e7370 input=a9049054013a1b77]*/ diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 26f6272ae9cfbc..b76ef2262d4de2 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -10,6 +10,7 @@ static const char* _Py_stdlib_module_names[] = { "_ast", "_ast_unparse", "_asyncio", +"_base64", "_bisect", "_blake2", "_bz2",