diff --git a/extmod/moductypes.c b/extmod/moductypes.c index eb72f441bbbce..776df16a96d56 100644 --- a/extmod/moductypes.c +++ b/extmod/moductypes.c @@ -72,9 +72,12 @@ enum { #define TYPE2SMALLINT(x, nbits) ((((int)x) << (32 - nbits)) >> 1) #define GET_TYPE(x, nbits) (((x) >> (31 - nbits)) & ((1 << nbits) - 1)) // Bit 0 is "is_signed" -#define GET_SCALAR_SIZE(val_type) (1 << ((val_type) >> 1)) +#define GET_SCALAR_SIZE(val_type) (1 << (((val_type) & 7) >> 1)) #define VALUE_MASK(type_nbits) ~((int)0x80000000 >> type_nbits) +#define INT_TYPE_IS_SIGNED(TYPE) ((TYPE) & 1) +#define INT_TYPE_TO_UNSIGNED(TYPE) ((TYPE) & 6) + #define IS_SCALAR_ARRAY(tuple_desc) ((tuple_desc)->len == 2) // We cannot apply the below to INT8, as their range [-128, 127] #define IS_SCALAR_ARRAY_OF_BYTES(tuple_desc) (GET_TYPE(MP_OBJ_SMALL_INT_VALUE((tuple_desc)->items[1]), VAL_TYPE_BITS) == UINT8) @@ -137,7 +140,7 @@ static inline mp_uint_t uctypes_struct_scalar_size(int val_type) { if (val_type == FLOAT32) { return 4; } else { - return GET_SCALAR_SIZE(val_type & 7); + return GET_SCALAR_SIZE(val_type); } } @@ -305,6 +308,12 @@ static inline mp_uint_t get_aligned_basic(uint val_type, void *p) { return 0; } +#if MICROPY_PREVIEW_VERSION_2 +static void raise_overflow_exception(void) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("value would truncate")); +} +#endif + static inline void set_aligned_basic(uint val_type, void *p, mp_uint_t v) { switch (val_type) { case UINT8: @@ -361,7 +370,58 @@ static void set_aligned(uint val_type, void *p, mp_int_t index, mp_obj_t val) { return; } #endif + + // Special case where mp_int_t can't hold the target type, fall through + if (sizeof(mp_int_t) < 8 && (val_type == INT64 || val_type == UINT64)) { + // Doesn't offer atomic store semantics, but should at least try + set_unaligned(val_type, (void *)&((uint64_t *)p)[index], MP_ENDIANNESS_BIG, val); + return; + } + + #if MICROPY_PREVIEW_VERSION_2 + // V2 raises exception if setting int will truncate + mp_int_t v; + bool ok = mp_obj_get_int_maybe(val, &v); + if (ok) { + switch (val_type) { + case UINT8: + ok = (v == (uint8_t)v); + break; + case INT8: + ok = (v == (int8_t)v); + break; + case UINT16: + ok = (v == (uint16_t)v); + break; + case INT16: + ok = (v == (int16_t)v); + break; + case UINT32: + ok = (v == (uint32_t)v); + break; + case INT32: + ok = (v == (int32_t)v); + break; + case UINT64: + assert(sizeof(mp_int_t) == 8); + ok = v >= 0; + break; + case INT64: + assert(sizeof(mp_int_t) == 8); + break; + default: + assert(0); + ok = false; + } + if (!ok) { + raise_overflow_exception(); + } + } + + #else mp_int_t v = mp_obj_get_int_truncated(val); + #endif + switch (val_type) { case UINT8: ((uint8_t *)p)[index] = (uint8_t)v; @@ -383,12 +443,8 @@ static void set_aligned(uint val_type, void *p, mp_int_t index, mp_obj_t val) { return; case INT64: case UINT64: - if (sizeof(mp_int_t) == 8) { - ((uint64_t *)p)[index] = (uint64_t)v; - } else { - // TODO: Doesn't offer atomic store semantics, but should at least try - set_unaligned(val_type, (void *)&((uint64_t *)p)[index], MP_ENDIANNESS_BIG, val); - } + assert(sizeof(mp_int_t) == 8); + ((uint64_t *)p)[index] = (uint64_t)v; return; default: assert(0); @@ -430,29 +486,39 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set offset &= (1 << OFFSET_BITS) - 1; mp_uint_t val; if (self->flags == LAYOUT_NATIVE) { - val = get_aligned_basic(val_type & 6, self->addr + offset); + val = get_aligned_basic(INT_TYPE_TO_UNSIGNED(val_type), self->addr + offset); } else { - val = mp_binary_get_int(GET_SCALAR_SIZE(val_type & 7), val_type & 1, self->flags, self->addr + offset); + val = mp_binary_get_int(GET_SCALAR_SIZE(val_type), INT_TYPE_IS_SIGNED(val_type), + self->flags, self->addr + offset); } if (set_val == MP_OBJ_NULL) { val >>= bit_offset; val &= (1 << bit_len) - 1; // TODO: signed - assert((val_type & 1) == 0); + assert(!INT_TYPE_IS_SIGNED(val_type)); return mp_obj_new_int(val); } else { - mp_uint_t set_val_int = (mp_uint_t)mp_obj_get_int(set_val); mp_uint_t mask = (1 << bit_len) - 1; + mp_uint_t set_val_int; + + #if MICROPY_PREVIEW_VERSION_2 + if (!mp_obj_get_int_maybe(set_val, (mp_int_t *)&set_val_int) || (set_val_int & mask) != set_val_int) { + raise_overflow_exception(); + } + #else + set_val_int = (mp_uint_t)mp_obj_get_int(set_val); + #endif + set_val_int &= mask; set_val_int <<= bit_offset; mask <<= bit_offset; val = (val & ~mask) | set_val_int; if (self->flags == LAYOUT_NATIVE) { - set_aligned_basic(val_type & 6, self->addr + offset, val); + set_aligned_basic(INT_TYPE_TO_UNSIGNED(val_type), self->addr + offset, val); } else { - mp_binary_set_int(GET_SCALAR_SIZE(val_type & 7), self->flags == LAYOUT_BIG_ENDIAN, - self->addr + offset, val); + size_t item_size = GET_SCALAR_SIZE(val_type); + mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN); } return set_val; // just !MP_OBJ_NULL } diff --git a/ports/stm32/adc.c b/ports/stm32/adc.c index f47e9eaad7b35..d68b6427cdb53 100644 --- a/ports/stm32/adc.c +++ b/ports/stm32/adc.c @@ -695,7 +695,7 @@ static mp_obj_t adc_read_timed(mp_obj_t self_in, mp_obj_t buf_in, mp_obj_t freq_ if (typesize == 1) { value >>= 4; } - mp_binary_set_val_array_from_int(bufinfo.typecode, bufinfo.buf, index, value); + mp_binary_set_val_array(bufinfo.typecode, bufinfo.buf, index, MP_OBJ_NEW_SMALL_INT(value)); } // turn the ADC off @@ -803,7 +803,7 @@ static mp_obj_t adc_read_timed_multi(mp_obj_t adc_array_in, mp_obj_t buf_array_i if (typesize == 1) { value >>= 4; } - mp_binary_set_val_array_from_int(bufinfo.typecode, bufptrs[array_index], elem_index, value); + mp_binary_set_val_array(bufinfo.typecode, bufptrs[array_index], elem_index, MP_OBJ_NEW_SMALL_INT(value)); } } diff --git a/ports/unix/coverage.c b/ports/unix/coverage.c index 68340d7f239a8..6e7c18f4e2c28 100644 --- a/ports/unix/coverage.c +++ b/ports/unix/coverage.c @@ -627,19 +627,6 @@ static mp_obj_t extra_coverage(void) { mp_printf(&mp_plat_print, "%s\n", buf2); } - // binary - { - mp_printf(&mp_plat_print, "# binary\n"); - - // call function with float and double typecodes - float far[1]; - double dar[1]; - mp_binary_set_val_array_from_int('f', far, 0, 123); - mp_printf(&mp_plat_print, "%.0f\n", (double)far[0]); - mp_binary_set_val_array_from_int('d', dar, 0, 456); - mp_printf(&mp_plat_print, "%.0lf\n", dar[0]); - } - // VM { mp_printf(&mp_plat_print, "# VM\n"); diff --git a/ports/unix/main.c b/ports/unix/main.c index 530e20a3863b4..a835b6f661946 100644 --- a/ports/unix/main.c +++ b/ports/unix/main.c @@ -480,8 +480,8 @@ int main(int argc, char **argv) { // Define a reasonable stack limit to detect stack overflow. mp_uint_t stack_size = 40000 * (sizeof(void *) / 4); - #if defined(__arm__) && !defined(__thumb2__) - // ARM (non-Thumb) architectures require more stack. + #if (defined(__arm__) && !defined(__thumb2__)) || defined(_MSC_VER) + // ARM (non-Thumb) architectures require more stack, as does Windows stack_size *= 2; #endif diff --git a/ports/unix/modffi.c b/ports/unix/modffi.c index b469e932e0d5a..c16d40ad3b485 100644 --- a/ports/unix/modffi.c +++ b/ports/unix/modffi.c @@ -446,7 +446,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) { return MP_OBJ_SMALL_INT_VALUE(o); } else { unsigned long long res; - mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res); + mp_obj_int_to_bytes(o, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false); return res; } } diff --git a/py/binary.c b/py/binary.c index 48d3421bca963..62a5c8a8ff5c2 100644 --- a/py/binary.c +++ b/py/binary.c @@ -42,6 +42,10 @@ #define alignof(type) offsetof(struct { char c; type t; }, t) #endif +// MicroPython V1.x truncates integers when writing into arrays, +// MicroPython V2 will raise OverflowError in these cases, same as CPython +#define OVERFLOW_CHECKS MICROPY_PREVIEW_VERSION_2 + size_t mp_binary_get_size(char struct_type, char val_type, size_t *palign) { size_t size = 0; int align = 1; @@ -370,7 +374,21 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte * } } -void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val) { +void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian) { + if (dest_sz > val_sz) { + // zero/sign extension if needed + int c = ((mp_int_t)val < 0) ? 0xff : 0x00; + memset(dest, c, dest_sz); + + // big endian: write val_sz bytes at end of 'dest' + if (big_endian) { + dest += dest_sz - val_sz; + } + } else if (dest_sz < val_sz) { + // truncate 'val' into 'dest' + val_sz = dest_sz; + } + if (MP_ENDIANNESS_LITTLE && !big_endian) { memcpy(dest, &val, val_sz); } else if (MP_ENDIANNESS_BIG && big_endian) { @@ -434,34 +452,21 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p val = fp_dp.i64; } else { int be = struct_type == '>'; - mp_binary_set_int(sizeof(uint32_t), be, p, fp_dp.i32[MP_ENDIANNESS_BIG ^ be]); + mp_binary_set_int(sizeof(uint32_t), p, sizeof(uint32_t), fp_dp.i32[MP_ENDIANNESS_BIG ^ be], be); + // Now fall through and copy the second word, below p += sizeof(uint32_t); + size = sizeof(uint32_t); val = fp_dp.i32[MP_ENDIANNESS_LITTLE ^ be]; } break; } #endif default: - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); - return; - } - #endif - - val = mp_obj_get_int(val_in); - // zero/sign extend if needed - if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { - int c = (mp_int_t)val < 0 ? 0xff : 0x00; - memset(p, c, size); - if (struct_type == '>') { - p += size - sizeof(val); - } - } - break; + mp_obj_int_to_bytes(val_in, size, p, struct_type == '>', is_signed(val_type), OVERFLOW_CHECKS); + return; } - mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); + mp_binary_set_int(size, p, sizeof(val), val, struct_type == '>'); } void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in) { @@ -478,65 +483,11 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ case 'O': ((mp_obj_t *)p)[index] = val_in; break; - default: - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - size_t size = mp_binary_get_size('@', typecode, NULL); - mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, - size, (uint8_t *)p + index * size); - return; - } - #endif - mp_binary_set_val_array_from_int(typecode, p, index, mp_obj_get_int(val_in)); - } -} - -void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_int_t val) { - switch (typecode) { - case 'b': - ((signed char *)p)[index] = val; - break; - case BYTEARRAY_TYPECODE: - case 'B': - ((unsigned char *)p)[index] = val; - break; - case 'h': - ((short *)p)[index] = val; - break; - case 'H': - ((unsigned short *)p)[index] = val; - break; - case 'i': - ((int *)p)[index] = val; - break; - case 'I': - ((unsigned int *)p)[index] = val; - break; - case 'l': - ((long *)p)[index] = val; - break; - case 'L': - ((unsigned long *)p)[index] = val; - break; - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - case 'q': - ((long long *)p)[index] = val; - break; - case 'Q': - ((unsigned long long *)p)[index] = val; - break; - #endif - #if MICROPY_PY_BUILTINS_FLOAT - case 'f': - ((float *)p)[index] = (float)val; - break; - case 'd': - ((double *)p)[index] = (double)val; - break; - #endif - // Extension to CPython: array of pointers - case 'P': - ((void **)p)[index] = (void *)(uintptr_t)val; - break; + default: { + size_t size = mp_binary_get_size('@', typecode, NULL); + p = (uint8_t *)p + index * size; + mp_obj_int_to_bytes(val_in, size, p, MP_ENDIANNESS_BIG, is_signed(typecode), OVERFLOW_CHECKS); + return; + } } } diff --git a/py/binary.h b/py/binary.h index 5c645bcaaa9c2..851dc50110eae 100644 --- a/py/binary.h +++ b/py/binary.h @@ -37,10 +37,9 @@ size_t mp_binary_get_size(char struct_type, char val_type, size_t *palign); mp_obj_t mp_binary_get_val_array(char typecode, void *p, size_t index); void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in); -void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_int_t val); mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte **ptr); void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p_base, byte **ptr); long long mp_binary_get_int(size_t size, bool is_signed, bool big_endian, const byte *src); -void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val); +void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian); #endif // MICROPY_INCLUDED_PY_BINARY_H diff --git a/py/mpz.c b/py/mpz.c index 5a4d7d27d94d5..6b4bee943694b 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1592,7 +1592,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { +void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1602,7 +1602,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b mpz_dbl_dig_t d = 0; mpz_dbl_dig_t carry = 1; size_t olen = len; // bytes in output buffer - bool ok = true; for (size_t zlen = z->len; zlen > 0; --zlen) { bits += DIG_SIZE; d = (d << DIG_SIZE) | *zdig++; @@ -1615,7 +1614,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b if (!olen) { // Buffer is full, only OK if all remaining bytes are zeroes - ok = ok && ((byte)val == 0); continue; } @@ -1628,16 +1626,10 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b } } - if (as_signed && olen == 0 && len > 0) { - // If output exhausted then ensure there was enough space for the sign bit - byte most_sig = big_endian ? buf[0] : buf[len - 1]; - ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg; - } else { + if (!(as_signed && olen == 0 && len > 0)) { // fill remainder of buf with zero/sign extension of the integer memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen); } - - return ok; } #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/mpz.h b/py/mpz.h index 6f1ac930b0214..d38c4b1c0c09d 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -145,8 +145,7 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { mp_int_t mpz_hash(const mpz_t *z); bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value); bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value); -// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. -bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); +void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mpz_as_float(const mpz_t *z); #endif diff --git a/py/obj.c b/py/obj.c index 586759460762b..080795d511526 100644 --- a/py/obj.c +++ b/py/obj.c @@ -338,7 +338,7 @@ long long mp_obj_get_ll(mp_const_obj_t arg) { return MP_OBJ_SMALL_INT_VALUE(arg); } else { long long res; - mp_obj_int_to_bytes_impl((mp_obj_t)arg, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res); + mp_obj_int_to_bytes((mp_obj_t)arg, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false); return res; } } diff --git a/py/objint.c b/py/objint.c index 87d8a27852d34..d7329316a7258 100644 --- a/py/objint.c +++ b/py/objint.c @@ -39,6 +39,11 @@ #include #endif +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ +// Generally this implementation lives in objint_mpz.c, but some small functions inlined here... +#include "py/mpz.h" +#endif + // This dispatcher function is expected to be independent of the implementation of long int static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { (void)type_in; @@ -99,8 +104,8 @@ static mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) { #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE e = u.i[MP_ENDIANNESS_LITTLE]; #endif -#define MP_FLOAT_SIGN_SHIFT_I32 ((MP_FLOAT_FRAC_BITS + MP_FLOAT_EXP_BITS) % 32) -#define MP_FLOAT_EXP_SHIFT_I32 (MP_FLOAT_FRAC_BITS % 32) + #define MP_FLOAT_SIGN_SHIFT_I32 ((MP_FLOAT_FRAC_BITS + MP_FLOAT_EXP_BITS) % 32) + #define MP_FLOAT_EXP_SHIFT_I32 (MP_FLOAT_FRAC_BITS % 32) if (e & (1U << MP_FLOAT_SIGN_SHIFT_I32)) { #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE @@ -199,10 +204,10 @@ static const uint8_t log_base2_floor[] = { 3, 3, 3, 3, 3, 3, 3, 4, /* if needed, these are the values for higher bases - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 5 + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 5 */ }; @@ -301,6 +306,185 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co return b; } +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ + +static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { + if (is_signed) { + // edge = 1 << (nbytes * 8 - 1) + mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT, + mp_obj_new_int(1), + mp_obj_new_int(nbytes * 8 - 1)); + + // if self >= edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_MORE_EQUAL, self_in, edge) == mp_const_true) { + goto raise; + } + + // edge = -edge + edge = mp_unary_op(MP_UNARY_OP_NEGATIVE, edge); + + // if self < edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_LESS, self_in, edge) == mp_const_true) { + goto raise; + } + } else { + if (mp_obj_int_sign(self_in) < 0) { + // Negative numbers never fit in an unsigned value + goto raise; + } + + // edge = 1 << (nbytes * 8) + mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT, + mp_obj_new_int(1), + mp_obj_new_int(nbytes * 8)); + + // if self >= edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_MORE_EQUAL, self_in, edge) == mp_const_true) { + goto raise; + } + } + + return; + +raise: + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} +#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ + +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + +// Same as the general mp_small_int_buffer_overflow_check() below, but using 64-bit integers +static void longint_buffer_overflow_check(mp_longint_impl_t val, size_t nbytes, bool is_signed) { + // Fast path for zero. + if (val == 0) { + return; + } + // Trying to store negative values in unsigned bytes falls through to failure. + if (is_signed || val >= 0) { + + if (nbytes >= sizeof(val)) { + // All non-negative N bit signed integers fit in an unsigned N bit integer. + // This case prevents shifting too far below. + return; + } + + if (is_signed) { + mp_longint_impl_t edge = 1LL << (nbytes * 8 - 1); + if (-edge <= val && val < edge) { + return; + } + // Out of range, fall through to failure. + } else { + // Unsigned. We already know val >= 0. + mp_longint_impl_t edge = 1LL << (nbytes * 8); + if (val < edge) { + return; + } + } + // Fall through to failure. + } + + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} + +static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { + const mp_obj_int_t *self = self_in; + mp_longint_impl_t val = self->val; + longint_buffer_overflow_check(val, nbytes, is_signed); +} + +// save some code size by calling into the longint version for both sizes of int +static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { + longint_buffer_overflow_check((mp_longint_impl_t)val, nbytes, is_signed); +} + +// Placed here rather than objint_longlong.c for code size reasons +static void longint_to_bytes(mp_obj_int_t *self, bool big_endian, size_t len, byte *buf) { + MP_STATIC_ASSERT(sizeof(mp_uint_t) == 4); + long long val = self->val; + mp_uint_t lower = val; + mp_uint_t upper = (val >> 32); + + if (big_endian) { + if (len > 4) { + // write the least significant 4 bytes at the end + mp_binary_set_int(4, buf + len - 4, sizeof(lower), lower, true); + } + // write most significant bytes at the start, extending if necessary + mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(upper), upper, true); + } else { + // write the least significant 4 bytes at the start + mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(lower), lower, false); + if (len > 4) { + // write the most significant bytes at the end, extending if necessary + mp_binary_set_int(len - 4, buf + 4, sizeof(upper), upper, false); + } + } +} + +#else + +static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { + // Fast path for zero. + if (val == 0) { + return; + } + // Trying to store negative values in unsigned bytes falls through to failure. + if (is_signed || val >= 0) { + + if (nbytes >= sizeof(val)) { + // All non-negative N bit signed integers fit in an unsigned N bit integer. + // This case prevents shifting too far below. + return; + } + + if (is_signed) { + mp_int_t edge = ((mp_int_t)1 << (nbytes * 8 - 1)); + if (-edge <= val && val < edge) { + return; + } + // Out of range, fall through to failure. + } else { + // Unsigned. We already know val >= 0. + mp_int_t edge = ((mp_int_t)1 << (nbytes * 8)); + if (val < edge) { + return; + } + } + // Fall through to failure. + } + + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} + +#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + +void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check) { + #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + if (mp_obj_is_exact_type(self_in, &mp_type_int)) { + if (overflow_check) { + mp_obj_int_buffer_overflow_check(self_in, buf_len, is_signed); + } + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ + mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); + mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, buf_len, buf); + #else // MICROPY_LONGINT_IMPL_LONGLONG + longint_to_bytes(self_in, big_endian, buf_len, buf); + #endif + return; + } + #endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + + // self_in is either a smallint, or another type convertible to mp_int_t (i.e. bool) + + mp_int_t val = mp_obj_get_int(self_in); + if (overflow_check) { + mp_small_int_buffer_overflow_check(val, buf_len, is_signed); + } + mp_binary_set_int(buf_len, buf, sizeof(val), val, big_endian); +} + + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE int mp_obj_int_sign(mp_obj_t self_in) { @@ -421,55 +605,35 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) { static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes); static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj)); -static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { - // TODO: Support signed (currently behaves as if signed=(val < 0)) - bool overflow; +static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + enum { ARG_length, ARG_byteorder, ARG_signed }; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_length, MP_ARG_INT, { .u_int = 1 } }, + { MP_QSTR_byteorder, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_QSTR(MP_QSTR_big) } }, + { MP_QSTR_signed, MP_ARG_KW_ONLY | MP_ARG_BOOL, {.u_bool = false} }, + }; + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + mp_obj_t self = pos_args[0]; - mp_int_t dlen = n_args < 2 ? 1 : mp_obj_get_int(args[1]); + mp_int_t dlen = args[ARG_length].u_int; if (dlen < 0) { mp_raise_ValueError(NULL); } - bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); vstr_t vstr; vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (!mp_obj_is_small_int(args[0])) { - overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); - } else - #endif - { - mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); - int slen = 0; // Number of bytes to represent val - - // This logic has a twin in objint_longlong.c - if (val > 0) { - slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8; - } else if (val < -1) { - slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8; - } else { - // clz of 0 is defined, so 0 and -1 map to 0 and 1 - slen = -val; - } - - if (slen <= dlen) { - memset(data, val < 0 ? 0xFF : 0x00, dlen); - mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val); - overflow = false; - } else { - overflow = true; - } - } + bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little); + bool signed_ = args[ARG_signed].u_bool; - if (overflow) { - mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small")); - } + mp_obj_int_to_bytes(self, dlen, data, big_endian, signed_, true); return mp_obj_new_bytes_from_vstr(&vstr); } -static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 1, 4, int_to_bytes); +static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 1, int_to_bytes); static const mp_rom_map_elem_t int_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) }, diff --git a/py/objint.h b/py/objint.h index 28930e35adb49..f6a551ea96143 100644 --- a/py/objint.h +++ b/py/objint.h @@ -53,10 +53,12 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co int base, const char *prefix, char base_char, char comma); char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in, int base, const char *prefix, char base_char, char comma); + mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); -// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); +// Write an integer to a byte sequence. +// If overflow_check is true, raises OverflowError if 'self_in' doesn't fit. If false, truncate to fit. +void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check); int mp_obj_int_sign(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 22ac0ba12efa3..b0e3f5ac740ff 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -62,42 +62,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return mp_obj_new_int_from_ll(value); } -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_exact_type(self_in, &mp_type_int)); - mp_obj_int_t *self = self_in; - long long val = self->val; - size_t slen; // Number of bytes to represent val - - // This logic has a twin in objint.c - if (val > 0) { - slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8; - } else if (val < -1) { - slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8; - } else { - // clz of 0 is defined, so 0 and -1 map to 0 and 1 - slen = -val; - } - - if (slen > len) { - return false; // Would overflow - // TODO: Determine whether to copy and truncate, as some callers probably expect this...? - } - - if (big_endian) { - byte *b = buf + len; - while (b > buf) { - *--b = val; - val >>= 8; - } - } else { - for (; len > 0; --len) { - *buf++ = val; - val >>= 8; - } - } - return true; -} - int mp_obj_int_sign(mp_obj_t self_in) { mp_longint_impl_t val; if (mp_obj_is_small_int(self_in)) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 6f2ea616c779c..895ed17e5dc5b 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -112,12 +112,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return MP_OBJ_FROM_PTR(o); } -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_exact_type(self_in, &mp_type_int)); - mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); - return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf); -} - int mp_obj_int_sign(mp_obj_t self_in) { if (mp_obj_is_small_int(self_in)) { mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self_in); diff --git a/tests/basics/array_int_repr.py b/tests/basics/array_int_repr.py new file mode 100644 index 0000000000000..95d5487a69c37 --- /dev/null +++ b/tests/basics/array_int_repr.py @@ -0,0 +1,80 @@ +# Test array integer representations in memory +# +# This has to be a unit test because correct internal representation depends on +# native endianness +# +# These test cases should pass on both CPython and MicroPython. + +try: + from array import array + from sys import byteorder +except ImportError: + print("SKIP") + raise SystemExit + +import unittest + +# Ports without bigint support don't support typecode 'q' +try: + array('q', []) + array_has_typecode_q = True +except: + array_has_typecode_q = False + +class TestIntReprs(unittest.TestCase): + def _test_repr(self, typecode, values): + # create an array with the specified typecode and list of values + a = array(typecode, values) + a_hex = memoryview(a).hex() + print(a, a_hex) + + self.assertEqual(len(a_hex) % len(values), 0) + # no array.itemsize in MicroPython, so calculate item size + sz = len(a_hex) // 2 // len(values) + if hasattr(a, "itemsize"): + self.assertEqual(a.itemsize, sz) + + # build alternative hex representation of the array using int.to_bytes() + # on each value + values_hex = "" + for v in values: + v_bytes = v.to_bytes(sz, byteorder=byteorder, signed=typecode.islower()) + values_hex += v_bytes.hex() + + # compare with the raw array contents + self.assertEqual(a_hex, values_hex) + + def test_smaller_ints(self): + for (typecode, initialiser) in ( + ('b', [1, -1, 120, -120]), + ('B', [1, 5, 220]), + ('h', [5, -1, 32_000, -32_000]), + ('H', [5, 1, 32_000, 65_535]), + ('i', [5, -1, 32_000, -32_000]), # CPython only guarantees min 2 bytes, C style! + ('I', [5, 1, 32_000, 65_535]), + ('l', [5, -1, 2_000_000, -2_000_000, 0x7FFF_FFFF]), + ('L', [5, 1, 65_536, 2_000_000, 0x7FFF_FFFF, 0xFFFF_FFFF]), + ): + self._test_repr(typecode, initialiser) + + @unittest.skipIf(not array_has_typecode_q, "port has no bigint support") + def test_bigints(self): + # Note: need to be careful not to write any literal expressions that can't be compiled on non-bigint MP + a = 0x1FFF_FFF + b = 62 + + try: + # this calculation will trigger OverflowError if bigint is set to long long + max_uint64 = [2 ** (b + 1)] + except OverflowError: + max_uint64 = [] + + for (typecode, initialiser) in ( + ('q', [a * 5, -a * 10, 2 ** b, (2 ** b) * -1]), + ('Q', [a * 5, a * 10, 2 ** b, (2 ** b) - 1, (2 ** b) + 1] + max_uint64) + ): + self._test_repr(typecode, initialiser) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/basics/array_limits_intbig.py b/tests/basics/array_limits_intbig.py new file mode 100644 index 0000000000000..50eb00a0ff613 --- /dev/null +++ b/tests/basics/array_limits_intbig.py @@ -0,0 +1,78 @@ +# Test behaviour when array module is provided out of bounds values +# +# This test is intended to also pass on CPython. + +try: + from array import array +except ImportError: + print("SKIP") + raise SystemExit + +import unittest + +# MicroPython V2.0 will enforce bounds on items (same as CPython), V1.x truncates +# +# Note: once 1 +import unittest try: import uctypes @@ -13,14 +14,79 @@ print("SKIP") raise SystemExit +# MicroPython V2.0 will enforce bounds on uctypes items, V1.x truncates +import sys + +is_v2 = hasattr(sys.implementation, "_v2") + N = 5 +PLACEHOLDER = 99 + + +class Test(unittest.TestCase): + def test_native_endian(self): + self._test_endian("NATIVE") -for endian in ("NATIVE", "LITTLE_ENDIAN", "BIG_ENDIAN"): - for type_ in ("INT8", "UINT8", "INT16", "UINT16", "INT32", "UINT32", "INT64", "UINT64"): - desc = {"arr": (uctypes.ARRAY | 0, getattr(uctypes, type_) | N)} + def test_little_endian(self): + self._test_endian("LITTLE_ENDIAN") + + def test_big_endian(self): + self._test_endian("BIG_ENDIAN") + + def _test_endian(self, endian): + for item_type in ( + "INT8", + "UINT8", + "INT16", + "UINT16", + "INT32", + "UINT32", + "INT64", + "UINT64", + ): + print(endian, item_type) + self._test_endian_type(endian, item_type) + + def _test_endian_type(self, endian, item_type): + print("Testing array of", item_type, "with", endian, "endianness") + desc = {"arr": (uctypes.ARRAY | 0, getattr(uctypes, item_type) | N)} + print(repr(desc)) sz = uctypes.sizeof(desc) data = bytearray(sz) + print(sz, repr((uctypes.addressof(data), desc, getattr(uctypes, endian)))) s = uctypes.struct(uctypes.addressof(data), desc, getattr(uctypes, endian)) + is_unsigned = item_type.startswith("U") + item_sz = uctypes.sizeof({"": getattr(uctypes, item_type)}) + + # V2 enforces range limits when setting fields + item_min = 0 if is_unsigned else -(2 ** (item_sz * 8 - 1)) + 1 + item_max = 2 ** (item_sz * 8 if is_unsigned else (item_sz * 8 - 1)) - 1 + print("representable range", item_min, item_max) + + for i in range(N): + n = i - 2 + print(i, n) + if is_v2 and (n < item_min or n > item_max): + with self.assertRaises(OverflowError): + s.arr[i] = n + s.arr[i] = PLACEHOLDER + n = PLACEHOLDER + else: + s.arr[i] = n + + print(endian, item_type, sz, *(s.arr[i] for i in range(N))) + for i in range(N): - s.arr[i] = i - 2 - print(endian, type_, sz, *(s.arr[i] for i in range(N))) + n = i - 2 + if is_v2 and (n < item_min or n > item_max): + # V2 will raise OverflowError, test swaps in placeholder + n = PLACEHOLDER + elif is_unsigned and n < 0: + # V1 will mask and then load as unsigned int + n = n & ((1 << (item_sz * 8)) - 1) + + self.assertEqual(s.arr[i], n) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/extmod/uctypes_array_load_store.py.exp b/tests/extmod/uctypes_array_load_store.py.exp deleted file mode 100644 index 10de8046454ba..0000000000000 --- a/tests/extmod/uctypes_array_load_store.py.exp +++ /dev/null @@ -1,24 +0,0 @@ -NATIVE INT8 5 -2 -1 0 1 2 -NATIVE UINT8 5 254 255 0 1 2 -NATIVE INT16 10 -2 -1 0 1 2 -NATIVE UINT16 10 65534 65535 0 1 2 -NATIVE INT32 20 -2 -1 0 1 2 -NATIVE UINT32 20 4294967294 4294967295 0 1 2 -NATIVE INT64 40 -2 -1 0 1 2 -NATIVE UINT64 40 18446744073709551614 18446744073709551615 0 1 2 -LITTLE_ENDIAN INT8 5 -2 -1 0 1 2 -LITTLE_ENDIAN UINT8 5 254 255 0 1 2 -LITTLE_ENDIAN INT16 10 -2 -1 0 1 2 -LITTLE_ENDIAN UINT16 10 65534 65535 0 1 2 -LITTLE_ENDIAN INT32 20 -2 -1 0 1 2 -LITTLE_ENDIAN UINT32 20 4294967294 4294967295 0 1 2 -LITTLE_ENDIAN INT64 40 -2 -1 0 1 2 -LITTLE_ENDIAN UINT64 40 18446744073709551614 18446744073709551615 0 1 2 -BIG_ENDIAN INT8 5 -2 -1 0 1 2 -BIG_ENDIAN UINT8 5 254 255 0 1 2 -BIG_ENDIAN INT16 10 -2 -1 0 1 2 -BIG_ENDIAN UINT16 10 65534 65535 0 1 2 -BIG_ENDIAN INT32 20 -2 -1 0 1 2 -BIG_ENDIAN UINT32 20 4294967294 4294967295 0 1 2 -BIG_ENDIAN INT64 40 -2 -1 0 1 2 -BIG_ENDIAN UINT64 40 18446744073709551614 18446744073709551615 0 1 2 diff --git a/tests/ports/unix/extra_coverage.py.exp b/tests/ports/unix/extra_coverage.py.exp index e20871273d709..f4da30fe9e53c 100644 --- a/tests/ports/unix/extra_coverage.py.exp +++ b/tests/ports/unix/extra_coverage.py.exp @@ -131,9 +131,6 @@ Warning: test ? +1e+00 +1e+00 -# binary -123 -456 # VM 2 1 # scheduler diff --git a/tools/ci.sh b/tools/ci.sh index 510bb3a4d3c8c..b7e4dec2114af 100755 --- a/tools/ci.sh +++ b/tools/ci.sh @@ -630,9 +630,17 @@ function ci_unix_coverage_run_mpy_merge_tests { outdir=$(mktemp -d) allmpy=() + export MICROPYPATH="${mptop}/lib/micropython-lib/python-stdlib/unittest" + # Compile a selection of tests to .mpy and execute them, collecting the output. # None of the tests should SKIP. for inpy in $mptop/tests/basics/[acdel]*.py; do + if grep -q "import unittest" $inpy; then + # Merging >1 unittest-enabled module leads to unexpected + # results, as each file runs all previously registered unittest cases + echo "SKIPPING $inpy" + continue + fi test=$(basename $inpy .py) echo $test outmpy=$outdir/$test.mpy