From d933210d960a6f9337e85753e9568619bbfd54ec Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 16 Apr 2024 17:07:05 +1000 Subject: [PATCH 1/2] py/misc: Move mp_clz and mp_ctz intrinsics into misc.h. Signed-off-by: Angus Gratton --- py/asmthumb.c | 18 +----------------- py/misc.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/py/asmthumb.c b/py/asmthumb.c index 0df79e5fd6203..420815e80269a 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -35,23 +35,7 @@ #include "py/mpstate.h" #include "py/asmthumb.h" - -#ifdef _MSC_VER -#include - -static uint32_t mp_clz(uint32_t x) { - unsigned long lz = 0; - return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; -} - -static uint32_t mp_ctz(uint32_t x) { - unsigned long tz = 0; - return _BitScanForward(&tz, x) ? tz : 0; -} -#else -#define mp_clz(x) __builtin_clz(x) -#define mp_ctz(x) __builtin_ctz(x) -#endif +#include "py/misc.h" #define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32) #define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128) diff --git a/py/misc.h b/py/misc.h index eea3e8b0fe7f5..9f8a8c1e13e53 100644 --- a/py/misc.h +++ b/py/misc.h @@ -334,4 +334,22 @@ typedef const char *mp_rom_error_text_t; // For now, forward directly to MP_COMPRESSED_ROM_TEXT. #define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x) +// Portable implementations of CLZ and CTZ intrinsics +#ifdef _MSC_VER +#include + +static uint32_t mp_clz(uint32_t x) { + unsigned long lz = 0; + return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; +} + +static uint32_t mp_ctz(uint32_t x) { + unsigned long tz = 0; + return _BitScanForward(&tz, x) ? tz : 0; +} +#else +#define mp_clz(x) __builtin_clz(x) +#define mp_ctz(x) __builtin_ctz(x) +#endif + #endif // MICROPY_INCLUDED_PY_MISC_H From 908ab1ceca15ee6fd0ef82ca4cba770a3ec41894 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 29 Nov 2023 11:23:16 +1100 Subject: [PATCH 2/2] py/objint: Fix int.to_bytes() buffer size checks. Fixes and improvements to `int.to_bytes()` are: - No longer overflows if byte size is 0 (closes #13041). - Raises OverflowError in any case where number won't fit into byte length (now matches CPython, previously MicroPython would return a truncated bytes object). - Document that `micropython int.to_bytes()` doesn't implement the optional signed kwarg, but will behave as if `signed=True` when the integer is negative (this is the current behaviour). Add tests for this also. Requires changes for small ints, MPZ large ints, and "long long" large ints. Adds a new set of unit tests for ints between 32 and 64 bits to increase coverage of "long long" large ints, which are otherwise untested. Tested on unix port (64 bit small ints, MPZ long ints) and Zephyr STM32WB board (32 bit small ints, long long large ints). This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- docs/library/builtins.rst | 4 ++ py/misc.h | 33 +++++++++++++ py/mpz.c | 32 ++++++++----- py/mpz.h | 9 ++-- py/objint.c | 37 +++++++++++---- py/objint.h | 3 +- py/objint_longlong.c | 20 +++++++- py/objint_mpz.c | 4 +- tests/basics/int_bytes.py | 73 +++++++++++++++++++++++++++++ tests/basics/int_bytes_int64.py | 52 ++++++++++++++++++++ tests/basics/int_bytes_intbig.py | 48 +++++++++++++++++++ tests/cpydiff/types_int_to_bytes.py | 16 +++++++ 12 files changed, 302 insertions(+), 29 deletions(-) create mode 100644 tests/basics/int_bytes_int64.py create mode 100644 tests/cpydiff/types_int_to_bytes.py diff --git a/docs/library/builtins.rst b/docs/library/builtins.rst index 7a0229c2aaaf6..e489375b1f917 100644 --- a/docs/library/builtins.rst +++ b/docs/library/builtins.rst @@ -82,6 +82,10 @@ Functions and types In MicroPython, `byteorder` parameter must be positional (this is compatible with CPython). + .. note:: The optional ``signed`` kwarg from CPython is not supported. + MicroPython currently converts negative integers as signed, + and positive as unsigned. (:ref:`Details `.) + .. function:: isinstance() .. function:: issubclass() diff --git a/py/misc.h b/py/misc.h index 9f8a8c1e13e53..cf1810d4e784b 100644 --- a/py/misc.h +++ b/py/misc.h @@ -343,13 +343,46 @@ static uint32_t mp_clz(uint32_t x) { return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; } +static uint32_t mp_clzl(unsigned long x) { + unsigned long lz = 0; + return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; +} + +#ifdef _WIN64 +static uint32_t mp_clzll(unsigned long long x) { + unsigned long lz = 0; + return _BitScanReverse64(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0; +} +#else +// Microsoft don't ship _BitScanReverse64 on Win32, so emulate it +static uint32_t mp_clzll(unsigned long long x) { + unsigned long h = x >> 32; + return h ? mp_clzl(h) : (mp_clzl(x) + 32); +} +#endif + static uint32_t mp_ctz(uint32_t x) { unsigned long tz = 0; return _BitScanForward(&tz, x) ? tz : 0; } #else #define mp_clz(x) __builtin_clz(x) +#define mp_clzl(x) __builtin_clzl(x) +#define mp_clzll(x) __builtin_clzll(x) #define mp_ctz(x) __builtin_ctz(x) #endif +// mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants +static inline uint32_t mp_clz_mpi(mp_int_t x) { + MP_STATIC_ASSERT(sizeof(mp_int_t) == sizeof(long long) + || sizeof(mp_int_t) == sizeof(long)); + + // ugly, but should compile to single intrinsic unless O0 is set + if (sizeof(mp_int_t) == sizeof(long)) { + return mp_clzl(x); + } else { + return mp_clzll(x); + } +} + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/mpz.c b/py/mpz.c index 502d4e1c138f0..750664ad9aaf8 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { int bits = 0; mpz_dbl_dig_t d = 0; mpz_dbl_dig_t carry = 1; + size_t olen = len; // bytes in output buffer + bool ok = true; for (size_t zlen = z->len; zlen > 0; --zlen) { bits += DIG_SIZE; d = (d << DIG_SIZE) | *zdig++; @@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { val = (~val & 0xff) + carry; carry = val >> 8; } + + if (!olen) { + // Buffer is full, only OK if all remaining bytes are zeroes + ok = ok && ((byte)val == 0); + continue; + } + if (big_endian) { *--b = val; - if (b == buf) { - return; - } } else { *b++ = val; - if (b == buf + len) { - return; - } } + olen--; } } - // fill remainder of buf with zero/sign extension of the integer - if (big_endian) { - len = b - buf; + if (as_signed && olen == 0 && len > 0) { + // If output exhausted then ensure there was enough space for the sign bit + byte most_sig = big_endian ? buf[0] : buf[len - 1]; + ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg; } else { - len = buf + len - b; - buf = b; + // fill remainder of buf with zero/sign extension of the integer + memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen); } - memset(buf, z->neg ? 0xff : 0x00, len); + + return ok; } #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/mpz.h b/py/mpz.h index d27f5724047ae..6f1ac930b0214 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t; typedef struct _mpz_t { // Zero has neg=0, len=0. Negative zero is not allowed. size_t neg : 1; - size_t fixed_dig : 1; - size_t alloc : (8 * sizeof(size_t) - 2); - size_t len; + size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated + size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig' + size_t len; // number of entries used in 'dig' mpz_dig_t *dig; } mpz_t; @@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { mp_int_t mpz_hash(const mpz_t *z); bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value); bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value); -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf); +// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mpz_as_float(const mpz_t *z); #endif diff --git a/py/objint.c b/py/objint.c index 6caa608f33035..467a4714ef197 100644 --- a/py/objint.c +++ b/py/objint.c @@ -421,29 +421,50 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj)); static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { - // TODO: Support signed param (assumes signed=False) + // TODO: Support signed (currently behaves as if signed=(val < 0)) (void)n_args; + bool overflow; - mp_int_t len = mp_obj_get_int(args[1]); - if (len < 0) { + mp_int_t dlen = mp_obj_get_int(args[1]); + if (dlen < 0) { mp_raise_ValueError(NULL); } bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); vstr_t vstr; - vstr_init_len(&vstr, len); + vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - memset(data, 0, len); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (!mp_obj_is_small_int(args[0])) { - mp_obj_int_to_bytes_impl(args[0], big_endian, len, data); + overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); } else #endif { mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); - size_t l = MIN((size_t)len, sizeof(val)); - mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val); + int slen = 0; // Number of bytes to represent val + + // This logic has a twin in objint_longlong.c + if (val > 0) { + slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8; + } else if (val < -1) { + slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8; + } else { + // clz of 0 is defined, so 0 and -1 map to 0 and 1 + slen = -val; + } + + if (slen <= dlen) { + memset(data, val < 0 ? 0xFF : 0x00, dlen); + mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val); + overflow = false; + } else { + overflow = true; + } + } + + if (overflow) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small")); } return mp_obj_new_bytes_from_vstr(&vstr); diff --git a/py/objint.h b/py/objint.h index 5eed87705dedb..28930e35adb49 100644 --- a/py/objint.h +++ b/py/objint.h @@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, int base, const char *prefix, char base_char, char comma); mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); +// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); int mp_obj_int_sign(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index ee499e0265b32..00fe5636c1607 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -57,10 +57,27 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return mp_obj_new_int_from_ll(value); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = self_in; long long val = self->val; + size_t slen; // Number of bytes to represent val + + // This logic has a twin in objint.c + if (val > 0) { + slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8; + } else if (val < -1) { + slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8; + } else { + // clz of 0 is defined, so 0 and -1 map to 0 and 1 + slen = -val; + } + + if (slen > len) { + return false; // Would overflow + // TODO: Determine whether to copy and truncate, as some callers probably expect this...? + } + if (big_endian) { byte *b = buf + len; while (b > buf) { @@ -73,6 +90,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt val >>= 8; } } + return true; } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 600316a42abba..4a1a685bbd42e 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return MP_OBJ_FROM_PTR(o); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); - mpz_as_bytes(&self->mpz, big_endian, len, buf); + return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf); } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/tests/basics/int_bytes.py b/tests/basics/int_bytes.py index d1837ea75ca4a..15c12640e951b 100644 --- a/tests/basics/int_bytes.py +++ b/tests/basics/int_bytes.py @@ -1,3 +1,5 @@ +import sys + print((10).to_bytes(1, "little")) print((111111).to_bytes(4, "little")) print((100).to_bytes(10, "little")) @@ -20,3 +22,74 @@ (1).to_bytes(-1, "little") except ValueError: print("ValueError") + +# zero byte destination should also raise an error +try: + (1).to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# except for converting 0 to a zero-length byte array +print((0).to_bytes(0, "big")) + +# byte length can fit the integer directly +print((0xFF).to_bytes(1, "little")) +print((0xFF).to_bytes(1, "big")) +print((0xEFF).to_bytes(2, "little")) +print((0xEFF).to_bytes(2, "big")) +print((0xCDEFF).to_bytes(3, "little")) +print((0xCDEFF).to_bytes(3, "big")) + +# OverFlowError if not big enough + +try: + (0x123).to_bytes(1, "big") +except OverflowError: + print("OverflowError") + +try: + (0x12345).to_bytes(2, "big") +except OverflowError: + print("OverflowError") + +try: + (0x1234567).to_bytes(3, "big") +except OverflowError: + print("OverflowError") + + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-1, 1, "little")) +print(to_bytes_compat(-1, 3, "little")) +print(to_bytes_compat(-1, 1, "big")) +print(to_bytes_compat(-1, 3, "big")) +print(to_bytes_compat(-128, 1, "big")) +print(to_bytes_compat(-32768, 2, "big")) +print(to_bytes_compat(-(1 << 23), 3, "big")) + +try: + print(to_bytes_compat(-129, 1, "big")) +except OverflowError: + print("OverflowError") + +try: + print(to_bytes_compat(-32769, 2, "big")) +except OverflowError: + print("OverflowError") + +try: + print(to_bytes_compat(-(1 << 23) - 1, 2, "big")) +except OverflowError: + print("OverflowError") diff --git a/tests/basics/int_bytes_int64.py b/tests/basics/int_bytes_int64.py new file mode 100644 index 0000000000000..032dbccc5b14e --- /dev/null +++ b/tests/basics/int_bytes_int64.py @@ -0,0 +1,52 @@ +import sys + +# Depending on the port, the numbers in this test may be implemented as "small" +# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit +# long longs. + +try: + x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big") +except OverflowError: + print("SKIP") # Port can't represent this size of integer at all + raise SystemExit + +print(hex(x)) +b = x.to_bytes(8, "little") +print(b) +print(x.to_bytes(8, "big")) + +# padding in output +print(x.to_bytes(20, "little")) +print(x.to_bytes(20, "big")) + +# check that extra zero bytes don't change the internal int value +print(int.from_bytes(b + bytes(10), "little") == x) + +# can't write to a zero-length bytes object +try: + x.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + x.to_bytes(7, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-x, 8, "little")) +print(to_bytes_compat(-x, 20, "big")) +print(to_bytes_compat(-x, 20, "little")) diff --git a/tests/basics/int_bytes_intbig.py b/tests/basics/int_bytes_intbig.py index 147362bef1378..13cf5d0085890 100644 --- a/tests/basics/int_bytes_intbig.py +++ b/tests/basics/int_bytes_intbig.py @@ -1,3 +1,5 @@ +import sys + print((2**64).to_bytes(9, "little")) print((2**64).to_bytes(9, "big")) @@ -10,5 +12,51 @@ print(il.to_bytes(20, "little")) print(ib.to_bytes(20, "big")) +# check padding comes out correctly +print(il.to_bytes(40, "little")) +print(ib.to_bytes(40, "big")) + # check that extra zero bytes don't change the internal int value print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little")) + +# can't write to a zero-length bytes object +try: + ib.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + ib.to_bytes(18, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +# MicroPython int.to_bytes() behaves as if signed=True for negative numbers +if "micropython" in repr(sys.implementation): + + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e) +else: + # Implement MicroPython compatible behaviour for CPython + def to_bytes_compat(i, l, e): + return i.to_bytes(l, e, signed=i < 0) + + +print(to_bytes_compat(-ib, 20, "big")) +print(to_bytes_compat(ib * -ib, 40, "big")) + +# case where an additional byte is needed for sign bit +ib = (2**64) - 1 +print(ib.to_bytes(8, "little")) + +ib *= -1 + +try: + print(to_bytes_compat(ib, 8, "little")) +except OverflowError: + print("OverflowError") + +print(to_bytes_compat(ib, 9, "little")) +print(to_bytes_compat(ib, 9, "big")) diff --git a/tests/cpydiff/types_int_to_bytes.py b/tests/cpydiff/types_int_to_bytes.py new file mode 100644 index 0000000000000..6530a2a32ecb7 --- /dev/null +++ b/tests/cpydiff/types_int_to_bytes.py @@ -0,0 +1,16 @@ +""" +categories: Types,int +description: ``to_bytes`` method doesn't implement signed parameter. +cause: The ``signed`` keyword-only parameter is not implemented for ``int.to_bytes()``. + +When the integer is negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=True)`` + +When the integer is non-negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=False)``. + +(The difference is subtle, but in CPython a positive integer converted with ``signed=True`` may require one byte more in the output length, in order to fit the 0 sign bit.) + +workaround: Take care when calling ``to_bytes()`` on an integer value which may be negative. +""" + +x = -1 +print(x.to_bytes(1, "big"))