Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/library/builtins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ Functions and types
In MicroPython, `byteorder` parameter must be positional (this is
compatible with CPython).

.. note:: The optional ``signed`` kwarg from CPython is not supported.
MicroPython currently converts negative integers as signed,
and positive as unsigned. (:ref:`Details <cpydiff_types_int_to_bytes>`.)

.. function:: isinstance()

.. function:: issubclass()
Expand Down
18 changes: 1 addition & 17 deletions py/asmthumb.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,7 @@

#include "py/mpstate.h"
#include "py/asmthumb.h"

#ifdef _MSC_VER
#include <intrin.h>

static uint32_t mp_clz(uint32_t x) {
unsigned long lz = 0;
return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
}

static uint32_t mp_ctz(uint32_t x) {
unsigned long tz = 0;
return _BitScanForward(&tz, x) ? tz : 0;
}
#else
#define mp_clz(x) __builtin_clz(x)
#define mp_ctz(x) __builtin_ctz(x)
#endif
#include "py/misc.h"

#define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32)
#define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128)
Expand Down
51 changes: 51 additions & 0 deletions py/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,4 +334,55 @@ typedef const char *mp_rom_error_text_t;
// For now, forward directly to MP_COMPRESSED_ROM_TEXT.
#define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x)

// Portable implementations of CLZ and CTZ intrinsics
#ifdef _MSC_VER
#include <intrin.h>

static uint32_t mp_clz(uint32_t x) {
unsigned long lz = 0;
return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
}

static uint32_t mp_clzl(unsigned long x) {
unsigned long lz = 0;
return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
}

#ifdef _WIN64
static uint32_t mp_clzll(unsigned long long x) {
unsigned long lz = 0;
return _BitScanReverse64(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
}
#else
// Microsoft don't ship _BitScanReverse64 on Win32, so emulate it
static uint32_t mp_clzll(unsigned long long x) {
unsigned long h = x >> 32;
return h ? mp_clzl(h) : (mp_clzl(x) + 32);
}
#endif

static uint32_t mp_ctz(uint32_t x) {
unsigned long tz = 0;
return _BitScanForward(&tz, x) ? tz : 0;
}
#else
#define mp_clz(x) __builtin_clz(x)
#define mp_clzl(x) __builtin_clzl(x)
#define mp_clzll(x) __builtin_clzll(x)
#define mp_ctz(x) __builtin_ctz(x)
#endif

// mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants
static inline uint32_t mp_clz_mpi(mp_int_t x) {
MP_STATIC_ASSERT(sizeof(mp_int_t) == sizeof(long long)
|| sizeof(mp_int_t) == sizeof(long));

// ugly, but should compile to single intrinsic unless O0 is set
if (sizeof(mp_int_t) == sizeof(long)) {
return mp_clzl(x);
} else {
return mp_clzll(x);
}
}
Copy link
Contributor Author

@projectgus projectgus May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like this approach but it seems the least bad that works across all configurations and isn't too intrusive.

The other way I thought to do it was to only define mp_clz32() and mp_clz64() and then have each port define MP_SIZEOF_INT or a similar macro.


#endif // MICROPY_INCLUDED_PY_MISC_H
32 changes: 19 additions & 13 deletions py/mpz.c
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
return true;
}

void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
byte *b = buf;
if (big_endian) {
b += len;
Expand All @@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
int bits = 0;
mpz_dbl_dig_t d = 0;
mpz_dbl_dig_t carry = 1;
size_t olen = len; // bytes in output buffer
bool ok = true;
for (size_t zlen = z->len; zlen > 0; --zlen) {
bits += DIG_SIZE;
d = (d << DIG_SIZE) | *zdig++;
Expand All @@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
val = (~val & 0xff) + carry;
carry = val >> 8;
}

if (!olen) {
// Buffer is full, only OK if all remaining bytes are zeroes
ok = ok && ((byte)val == 0);
continue;
}

if (big_endian) {
*--b = val;
if (b == buf) {
return;
}
} else {
*b++ = val;
if (b == buf + len) {
return;
}
}
olen--;
}
}

// fill remainder of buf with zero/sign extension of the integer
if (big_endian) {
len = b - buf;
if (as_signed && olen == 0 && len > 0) {
// If output exhausted then ensure there was enough space for the sign bit
byte most_sig = big_endian ? buf[0] : buf[len - 1];
ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
} else {
len = buf + len - b;
buf = b;
// fill remainder of buf with zero/sign extension of the integer
memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
}
memset(buf, z->neg ? 0xff : 0x00, len);

return ok;
}

#if MICROPY_PY_BUILTINS_FLOAT
Expand Down
9 changes: 5 additions & 4 deletions py/mpz.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
typedef struct _mpz_t {
// Zero has neg=0, len=0. Negative zero is not allowed.
size_t neg : 1;
size_t fixed_dig : 1;
size_t alloc : (8 * sizeof(size_t) - 2);
size_t len;
size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
size_t len; // number of entries used in 'dig'
mpz_dig_t *dig;
} mpz_t;

Expand Down Expand Up @@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
mp_int_t mpz_hash(const mpz_t *z);
bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
#if MICROPY_PY_BUILTINS_FLOAT
mp_float_t mpz_as_float(const mpz_t *z);
#endif
Expand Down
37 changes: 29 additions & 8 deletions py/objint.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,29 +421,50 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));

static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
// TODO: Support signed param (assumes signed=False)
// TODO: Support signed (currently behaves as if signed=(val < 0))
(void)n_args;
bool overflow;

mp_int_t len = mp_obj_get_int(args[1]);
if (len < 0) {
mp_int_t dlen = mp_obj_get_int(args[1]);
if (dlen < 0) {
mp_raise_ValueError(NULL);
}
bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);

vstr_t vstr;
vstr_init_len(&vstr, len);
vstr_init_len(&vstr, dlen);
byte *data = (byte *)vstr.buf;
memset(data, 0, len);

#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
if (!mp_obj_is_small_int(args[0])) {
mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
} else
#endif
{
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
size_t l = MIN((size_t)len, sizeof(val));
mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
int slen = 0; // Number of bytes to represent val

// This logic has a twin in objint_longlong.c
if (val > 0) {
slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8;
} else if (val < -1) {
slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8;
} else {
// clz of 0 is defined, so 0 and -1 map to 0 and 1
slen = -val;
}

if (slen <= dlen) {
memset(data, val < 0 ? 0xFF : 0x00, dlen);
mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
Copy link
Contributor Author

@projectgus projectgus Apr 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For later consideration (not this PR): can mp_binary_set_val and this function be combined to save size?

overflow = false;
} else {
overflow = true;
}
}

if (overflow) {
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small"));
}

return mp_obj_new_bytes_from_vstr(&vstr);
Expand Down
3 changes: 2 additions & 1 deletion py/objint.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
int base, const char *prefix, char base_char, char comma);
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
int mp_obj_int_sign(mp_obj_t self_in);
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
Expand Down
20 changes: 19 additions & 1 deletion py/objint_longlong.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,27 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
return mp_obj_new_int_from_ll(value);
}

void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
long long val = self->val;
size_t slen; // Number of bytes to represent val

// This logic has a twin in objint.c
if (val > 0) {
slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8;
} else if (val < -1) {
slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8;
} else {
// clz of 0 is defined, so 0 and -1 map to 0 and 1
slen = -val;
}

if (slen > len) {
return false; // Would overflow
// TODO: Determine whether to copy and truncate, as some callers probably expect this...?
}

if (big_endian) {
byte *b = buf + len;
while (b > buf) {
Expand All @@ -73,6 +90,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt
val >>= 8;
}
}
return true;
}

int mp_obj_int_sign(mp_obj_t self_in) {
Expand Down
4 changes: 2 additions & 2 deletions py/objint_mpz.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
return MP_OBJ_FROM_PTR(o);
}

void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
mpz_as_bytes(&self->mpz, big_endian, len, buf);
return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf);
}

int mp_obj_int_sign(mp_obj_t self_in) {
Expand Down
73 changes: 73 additions & 0 deletions tests/basics/int_bytes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sys

print((10).to_bytes(1, "little"))
print((111111).to_bytes(4, "little"))
print((100).to_bytes(10, "little"))
Expand All @@ -20,3 +22,74 @@
(1).to_bytes(-1, "little")
except ValueError:
print("ValueError")

# zero byte destination should also raise an error
try:
(1).to_bytes(0, "little")
except OverflowError:
print("OverflowError")

# except for converting 0 to a zero-length byte array
print((0).to_bytes(0, "big"))

# byte length can fit the integer directly
print((0xFF).to_bytes(1, "little"))
print((0xFF).to_bytes(1, "big"))
print((0xEFF).to_bytes(2, "little"))
print((0xEFF).to_bytes(2, "big"))
print((0xCDEFF).to_bytes(3, "little"))
print((0xCDEFF).to_bytes(3, "big"))

# OverFlowError if not big enough

try:
(0x123).to_bytes(1, "big")
except OverflowError:
print("OverflowError")

try:
(0x12345).to_bytes(2, "big")
except OverflowError:
print("OverflowError")

try:
(0x1234567).to_bytes(3, "big")
except OverflowError:
print("OverflowError")


# negative representations

# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
if "micropython" in repr(sys.implementation):

def to_bytes_compat(i, l, e):
return i.to_bytes(l, e)
else:
# Implement MicroPython compatible behaviour for CPython
def to_bytes_compat(i, l, e):
return i.to_bytes(l, e, signed=i < 0)


print(to_bytes_compat(-1, 1, "little"))
print(to_bytes_compat(-1, 3, "little"))
print(to_bytes_compat(-1, 1, "big"))
print(to_bytes_compat(-1, 3, "big"))
print(to_bytes_compat(-128, 1, "big"))
print(to_bytes_compat(-32768, 2, "big"))
print(to_bytes_compat(-(1 << 23), 3, "big"))

try:
print(to_bytes_compat(-129, 1, "big"))
except OverflowError:
print("OverflowError")

try:
print(to_bytes_compat(-32769, 2, "big"))
except OverflowError:
print("OverflowError")

try:
print(to_bytes_compat(-(1 << 23) - 1, 2, "big"))
except OverflowError:
print("OverflowError")
Loading