Skip to content

Commit 77160cc

Browse files
committed
WIP: py/objint,py/binary: Reduce the code size of int to byte conversions.
Refactors similar code paths to a common mp_obj_int_to_bytes() function to reduce code size. This commit should have no functional changes. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
1 parent 0419b66 commit 77160cc

File tree

10 files changed

+101
-146
lines changed

10 files changed

+101
-146
lines changed

extmod/moductypes.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,8 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set
448448
if (self->flags == LAYOUT_NATIVE) {
449449
set_aligned_basic(val_type & 6, self->addr + offset, val);
450450
} else {
451-
mp_binary_set_int(GET_SCALAR_SIZE(val_type & 7), self->flags == LAYOUT_BIG_ENDIAN,
452-
self->addr + offset, val);
451+
size_t item_size = GET_SCALAR_SIZE(val_type & 7);
452+
mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN);
453453
}
454454
return set_val; // just !MP_OBJ_NULL
455455
}

ports/unix/modffi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {
446446
return MP_OBJ_SMALL_INT_VALUE(o);
447447
} else {
448448
unsigned long long res;
449-
mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);
449+
mp_obj_int_to_bytes(o, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false);
450450
return res;
451451
}
452452
}

py/binary.c

Lines changed: 25 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,21 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte *
374374
}
375375
}
376376

377-
void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val) {
377+
void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian) {
378+
if (dest_sz > val_sz) {
379+
// zero/sign extension if needed
380+
int c = ((mp_int_t)val < 0) ? 0xff : 0x00;
381+
memset(dest, c, dest_sz);
382+
383+
// big endian: write val_sz bytes at end of 'dest'
384+
if (big_endian) {
385+
dest += dest_sz - val_sz;
386+
}
387+
} else if (dest_sz < val_sz) {
388+
// truncate 'val' into 'dest'
389+
val_sz = dest_sz;
390+
}
391+
378392
if (MP_ENDIANNESS_LITTLE && !big_endian) {
379393
memcpy(dest, &val, val_sz);
380394
} else if (MP_ENDIANNESS_BIG && big_endian) {
@@ -438,48 +452,21 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
438452
val = fp_dp.i64;
439453
} else {
440454
int be = struct_type == '>';
441-
mp_binary_set_int(sizeof(uint32_t), be, p, fp_dp.i32[MP_ENDIANNESS_BIG ^ be]);
455+
mp_binary_set_int(sizeof(uint32_t), p, sizeof(uint32_t), fp_dp.i32[MP_ENDIANNESS_BIG ^ be], be);
456+
// Now fall through and copy the second word, below
442457
p += sizeof(uint32_t);
458+
size = sizeof(uint32_t);
443459
val = fp_dp.i32[MP_ENDIANNESS_LITTLE ^ be];
444460
}
445461
break;
446462
}
447463
#endif
448-
default: {
449-
#if OVERFLOW_CHECKS
450-
bool signed_type = is_signed(val_type);
451-
#endif
452-
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
453-
if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
454-
// It's a longint.
455-
#if OVERFLOW_CHECKS
456-
mp_obj_int_buffer_overflow_check(val_in, size, signed_type);
457-
#endif
458-
mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
459-
return;
460-
}
461-
#endif
462-
{
463-
val = mp_obj_get_int(val_in);
464-
465-
#if OVERFLOW_CHECKS
466-
// Small int checking is separate, to be fast.
467-
mp_small_int_buffer_overflow_check(val, size, signed_type);
468-
#endif
469-
// zero/sign extend if needed
470-
if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) {
471-
int c = (mp_int_t)val < 0 ? 0xff : 0x00;
472-
memset(p, c, size);
473-
if (struct_type == '>') {
474-
p += size - sizeof(val);
475-
}
476-
}
477-
}
478-
break;
479-
}
464+
default:
465+
mp_obj_int_to_bytes(val_in, size, p, struct_type == '>', is_signed(val_type), OVERFLOW_CHECKS);
466+
return;
480467
}
481468

482-
mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val);
469+
mp_binary_set_int(size, p, sizeof(val), val, struct_type == '>');
483470
}
484471

485472
void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in) {
@@ -498,29 +485,9 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
498485
break;
499486
default: {
500487
size_t size = mp_binary_get_size('@', typecode, NULL);
501-
#if OVERFLOW_CHECKS
502-
bool signed_type = is_signed(typecode);
503-
#endif
504-
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
505-
if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
506-
// It's a long int.
507-
#if OVERFLOW_CHECKS
508-
mp_obj_int_buffer_overflow_check(val_in, size, signed_type);
509-
#endif
510-
mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
511-
size, (uint8_t *)p + index * size);
512-
return;
513-
}
514-
#endif
515-
mp_int_t val = mp_obj_get_int(val_in);
516-
if (val < 0 && typecode == BYTEARRAY_TYPECODE) {
517-
val = val & 0xFF;
518-
}
519-
#if OVERFLOW_CHECKS
520-
// Small int checking is separate, to be fast.
521-
mp_small_int_buffer_overflow_check(val, size, signed_type);
522-
#endif
523-
mp_binary_set_val_array_from_int(typecode, p, index, val);
488+
p = (uint8_t *)p + index * size;
489+
mp_obj_int_to_bytes(val_in, size, p, MP_ENDIANNESS_BIG, is_signed(typecode), OVERFLOW_CHECKS);
490+
return;
524491
}
525492
}
526493
}

py/binary.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,6 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_i
4141
mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte **ptr);
4242
void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p_base, byte **ptr);
4343
long long mp_binary_get_int(size_t size, bool is_signed, bool big_endian, const byte *src);
44-
void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val);
44+
void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian);
4545

4646
#endif // MICROPY_INCLUDED_PY_BINARY_H

py/mpz.c

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
15891589
return true;
15901590
}
15911591

1592-
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
1592+
void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
15931593
byte *b = buf;
15941594
if (big_endian) {
15951595
b += len;
@@ -1599,7 +1599,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
15991599
mpz_dbl_dig_t d = 0;
16001600
mpz_dbl_dig_t carry = 1;
16011601
size_t olen = len; // bytes in output buffer
1602-
bool ok = true;
16031602
for (size_t zlen = z->len; zlen > 0; --zlen) {
16041603
bits += DIG_SIZE;
16051604
d = (d << DIG_SIZE) | *zdig++;
@@ -1612,7 +1611,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
16121611

16131612
if (!olen) {
16141613
// Buffer is full, only OK if all remaining bytes are zeroes
1615-
ok = ok && ((byte)val == 0);
16161614
continue;
16171615
}
16181616

@@ -1625,16 +1623,10 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
16251623
}
16261624
}
16271625

1628-
if (as_signed && olen == 0 && len > 0) {
1629-
// If output exhausted then ensure there was enough space for the sign bit
1630-
byte most_sig = big_endian ? buf[0] : buf[len - 1];
1631-
ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
1632-
} else {
1626+
if (!(as_signed && olen == 0 && len > 0)) {
16331627
// fill remainder of buf with zero/sign extension of the integer
16341628
memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
16351629
}
1636-
1637-
return ok;
16381630
}
16391631

16401632
#if MICROPY_PY_BUILTINS_FLOAT

py/mpz.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,7 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
145145
mp_int_t mpz_hash(const mpz_t *z);
146146
bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
147147
bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
148-
// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
149-
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
148+
void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
150149
#if MICROPY_PY_BUILTINS_FLOAT
151150
mp_float_t mpz_as_float(const mpz_t *z);
152151
#endif

py/objint.c

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@
3939
#include <math.h>
4040
#endif
4141

42+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
43+
// Generally this implementation lives in objint_mpz.c, but some small functions inlined here...
44+
#include "py/mpz.h"
45+
#endif
46+
4247
// This dispatcher function is expected to be independent of the implementation of long int
4348
static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
4449
(void)type_in;
@@ -302,7 +307,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
302307

303308
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
304309

305-
void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) {
310+
static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) {
306311
if (is_signed) {
307312
// edge = 1 << (nbytes * 8 - 1)
308313
mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT,
@@ -346,7 +351,34 @@ void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_s
346351

347352
#endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
348353

349-
void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) {
354+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
355+
// Placed here rather than objint_longlong.c for code size reasons
356+
static void longint_to_bytes(mp_obj_int_t *self, bool big_endian, size_t len, byte *buf) {
357+
MP_STATIC_ASSERT(sizeof(mp_uint_t) == 4);
358+
long long val = self->val;
359+
mp_uint_t lower = val;
360+
mp_uint_t upper = (val >> 32);
361+
362+
if (big_endian) {
363+
if (len > 4) {
364+
// write the least significant 4 bytes at the end
365+
mp_binary_set_int(4, buf + len - 4, sizeof(lower), lower, true);
366+
}
367+
// write most significant bytes at the start, extending if necessary
368+
mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(upper), upper, true);
369+
} else {
370+
// write the least significant 4 bytes at the start
371+
mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(lower), lower, false);
372+
if (len > 4) {
373+
// write the most significant bytes at the end, extending if necessary
374+
mp_binary_set_int(len - 4, buf + 4, sizeof(upper), upper, false);
375+
}
376+
}
377+
}
378+
#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
379+
380+
381+
static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) {
350382
// Fast path for zero.
351383
if (val == 0) {
352384
return;
@@ -379,6 +411,32 @@ void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_sig
379411
mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes);
380412
}
381413

414+
void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check) {
415+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
416+
if (mp_obj_is_exact_type(self_in, &mp_type_int)) {
417+
mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
418+
if (overflow_check) {
419+
mp_obj_int_buffer_overflow_check(self, buf_len, is_signed);
420+
}
421+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
422+
mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, buf_len, buf);
423+
#else // MICROPY_LONGINT_IMPL_LONGLONG
424+
longint_to_bytes(self, big_endian, buf_len, buf);
425+
#endif
426+
return;
427+
}
428+
#endif
429+
430+
// self_in is either a smallint, or another type convertible to mp_int_t (i.e. bool)
431+
432+
mp_int_t val = mp_obj_get_int(self_in);
433+
if (overflow_check) {
434+
mp_small_int_buffer_overflow_check(val, buf_len, is_signed);
435+
}
436+
mp_binary_set_int(buf_len, buf, sizeof(val), val, big_endian);
437+
}
438+
439+
382440
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
383441

384442
int mp_obj_int_sign(mp_obj_t self_in) {
@@ -509,36 +567,21 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *
509567
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
510568
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
511569

570+
mp_obj_t self = pos_args[0];
571+
512572
mp_int_t dlen = args[ARG_length].u_int;
513573
if (dlen < 0) {
514574
mp_raise_ValueError(NULL);
515575
}
516576

517-
mp_obj_t self = pos_args[0];
518-
bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
519-
bool signed_ = args[ARG_signed].u_bool;
520-
521577
vstr_t vstr;
522578
vstr_init_len(&vstr, dlen);
523579
byte *data = (byte *)vstr.buf;
524580

525-
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
526-
if (!mp_obj_is_small_int(self)) {
527-
mp_obj_int_buffer_overflow_check(self, dlen, signed_);
528-
mp_obj_int_to_bytes_impl(self, big_endian, dlen, data);
529-
} else
530-
#endif
531-
{
532-
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self);
533-
// Small int checking is separate, to be fast.
534-
mp_small_int_buffer_overflow_check(val, dlen, signed_);
535-
size_t l = MIN((size_t)dlen, sizeof(val));
536-
if (val < 0) {
537-
// Sign extend negative numbers.
538-
memset(data, -1, dlen);
539-
}
540-
mp_binary_set_int(l, big_endian, data + (big_endian ? (dlen - l) : 0), val);
541-
}
581+
bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
582+
bool signed_ = args[ARG_signed].u_bool;
583+
584+
mp_obj_int_to_bytes(self, dlen, data, big_endian, signed_, true);
542585

543586
return mp_obj_new_bytes_from_vstr(&vstr);
544587
}

py/objint.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,11 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
5454
char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
5555
int base, const char *prefix, char base_char, char comma);
5656

57-
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
58-
void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed);
59-
#endif
60-
void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed);
61-
6257
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
6358
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
64-
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
65-
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
59+
// Write an integer to a byte sequence.
60+
// If overflow_check is true, raises OverflowError if 'self_in' doesn't fit. If false, truncate to fit.
61+
void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check);
6662
int mp_obj_int_sign(mp_obj_t self_in);
6763
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6864
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);

py/objint_longlong.c

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -57,42 +57,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
5757
return mp_obj_new_int_from_ll(value);
5858
}
5959

60-
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
61-
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
62-
mp_obj_int_t *self = self_in;
63-
long long val = self->val;
64-
size_t slen; // Number of bytes to represent val
65-
66-
// This logic has a twin in objint.c
67-
if (val > 0) {
68-
slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8;
69-
} else if (val < -1) {
70-
slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8;
71-
} else {
72-
// clz of 0 is defined, so 0 and -1 map to 0 and 1
73-
slen = -val;
74-
}
75-
76-
if (slen > len) {
77-
return false; // Would overflow
78-
// TODO: Determine whether to copy and truncate, as some callers probably expect this...?
79-
}
80-
81-
if (big_endian) {
82-
byte *b = buf + len;
83-
while (b > buf) {
84-
*--b = val;
85-
val >>= 8;
86-
}
87-
} else {
88-
for (; len > 0; --len) {
89-
*buf++ = val;
90-
val >>= 8;
91-
}
92-
}
93-
return true;
94-
}
95-
9660
int mp_obj_int_sign(mp_obj_t self_in) {
9761
mp_longint_impl_t val;
9862
if (mp_obj_is_small_int(self_in)) {

0 commit comments

Comments
 (0)