WIP: py/objint,py/binary: Reduce the code size of int to byte conversions.

projectgus · projectgus · commit 77160ccdaa76 · 2024-11-27T17:38:41.000+11:00
Refactors similar code paths to a common mp_obj_int_to_bytes() function
to reduce code size. This commit should have no functional changes.

This work was funded through GitHub Sponsors.

Signed-off-by: Angus Gratton &lt;angus@redyak.com.au&gt;
diff --git a/extmod/moductypes.c b/extmod/moductypes.c
@@ -448,8 +448,8 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set
                 if (self->flags == LAYOUT_NATIVE) {
                     set_aligned_basic(val_type & 6, self->addr + offset, val);
                 } else {
-                    mp_binary_set_int(GET_SCALAR_SIZE(val_type & 7), self->flags == LAYOUT_BIG_ENDIAN,
-                        self->addr + offset, val);
+                    size_t item_size = GET_SCALAR_SIZE(val_type & 7);
+                    mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN);
                 }
                 return set_val; // just !MP_OBJ_NULL
             }
diff --git a/ports/unix/modffi.c b/ports/unix/modffi.c
@@ -446,7 +446,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {
         return MP_OBJ_SMALL_INT_VALUE(o);
     } else {
         unsigned long long res;
-        mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);
+        mp_obj_int_to_bytes(o, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false);
         return res;
     }
 }
diff --git a/py/binary.c b/py/binary.c
@@ -374,7 +374,21 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte *
     }
 }
 
-void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val) {
+void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian) {
+    if (dest_sz > val_sz) {
+        // zero/sign extension if needed
+        int c = ((mp_int_t)val < 0) ? 0xff : 0x00;
+        memset(dest, c, dest_sz);
+
+        // big endian: write val_sz bytes at end of 'dest'
+        if (big_endian) {
+            dest += dest_sz - val_sz;
+        }
+    } else if (dest_sz < val_sz) {
+        // truncate 'val' into 'dest'
+        val_sz = dest_sz;
+    }
+
     if (MP_ENDIANNESS_LITTLE && !big_endian) {
         memcpy(dest, &val, val_sz);
     } else if (MP_ENDIANNESS_BIG && big_endian) {
@@ -438,48 +452,21 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
                 val = fp_dp.i64;
             } else {
                 int be = struct_type == '>';
-                mp_binary_set_int(sizeof(uint32_t), be, p, fp_dp.i32[MP_ENDIANNESS_BIG ^ be]);
+                mp_binary_set_int(sizeof(uint32_t), p, sizeof(uint32_t), fp_dp.i32[MP_ENDIANNESS_BIG ^ be], be);
+                // Now fall through and copy the second word, below
                 p += sizeof(uint32_t);
+                size = sizeof(uint32_t);
                 val = fp_dp.i32[MP_ENDIANNESS_LITTLE ^ be];
             }
             break;
         }
         #endif
-        default: {
-            #if OVERFLOW_CHECKS
-            bool signed_type = is_signed(val_type);
-            #endif
-            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
-                // It's a longint.
-                #if OVERFLOW_CHECKS
-                mp_obj_int_buffer_overflow_check(val_in, size, signed_type);
-                #endif
-                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
-                return;
-            }
-            #endif
-            {
-                val = mp_obj_get_int(val_in);
-
-                #if OVERFLOW_CHECKS
-                // Small int checking is separate, to be fast.
-                mp_small_int_buffer_overflow_check(val, size, signed_type);
-                #endif
-                // zero/sign extend if needed
-                if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) {
-                    int c = (mp_int_t)val < 0 ? 0xff : 0x00;
-                    memset(p, c, size);
-                    if (struct_type == '>') {
-                        p += size - sizeof(val);
-                    }
-                }
-            }
-            break;
-        }
+        default:
+            mp_obj_int_to_bytes(val_in, size, p, struct_type == '>', is_signed(val_type), OVERFLOW_CHECKS);
+            return;
     }
 
-    mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val);
+    mp_binary_set_int(size, p, sizeof(val), val, struct_type == '>');
 }
 
 void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in) {
@@ -498,29 +485,9 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
             break;
         default: {
             size_t size = mp_binary_get_size('@', typecode, NULL);
-            #if OVERFLOW_CHECKS
-            bool signed_type = is_signed(typecode);
-            #endif
-            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
-                // It's a long int.
-                #if OVERFLOW_CHECKS
-                mp_obj_int_buffer_overflow_check(val_in, size, signed_type);
-                #endif
-                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
-                    size, (uint8_t *)p + index * size);
-                return;
-            }
-            #endif
-            mp_int_t val = mp_obj_get_int(val_in);
-            if (val < 0 && typecode == BYTEARRAY_TYPECODE) {
-                val = val & 0xFF;
-            }
-            #if OVERFLOW_CHECKS
-            // Small int checking is separate, to be fast.
-            mp_small_int_buffer_overflow_check(val, size, signed_type);
-            #endif
-            mp_binary_set_val_array_from_int(typecode, p, index, val);
+            p = (uint8_t *)p + index * size;
+            mp_obj_int_to_bytes(val_in, size, p, MP_ENDIANNESS_BIG, is_signed(typecode), OVERFLOW_CHECKS);
+            return;
         }
     }
 }
diff --git a/py/binary.h b/py/binary.h
@@ -41,6 +41,6 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_i
 mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte **ptr);
 void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p_base, byte **ptr);
 long long mp_binary_get_int(size_t size, bool is_signed, bool big_endian, const byte *src);
-void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val);
+void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian);
 
 #endif // MICROPY_INCLUDED_PY_BINARY_H
diff --git a/py/mpz.c b/py/mpz.c
@@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
     return true;
 }
 
-bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
+void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
     byte *b = buf;
     if (big_endian) {
         b += len;
@@ -1599,7 +1599,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
     mpz_dbl_dig_t d = 0;
     mpz_dbl_dig_t carry = 1;
     size_t olen = len; // bytes in output buffer
-    bool ok = true;
     for (size_t zlen = z->len; zlen > 0; --zlen) {
         bits += DIG_SIZE;
         d = (d << DIG_SIZE) | *zdig++;
@@ -1612,7 +1611,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
 
             if (!olen) {
                 // Buffer is full, only OK if all remaining bytes are zeroes
-                ok = ok && ((byte)val == 0);
                 continue;
             }
 
@@ -1625,16 +1623,10 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b
         }
     }
 
-    if (as_signed && olen == 0 && len > 0) {
-        // If output exhausted then ensure there was enough space for the sign bit
-        byte most_sig = big_endian ? buf[0] : buf[len - 1];
-        ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
-    } else {
+    if (!(as_signed && olen == 0 && len > 0)) {
         // fill remainder of buf with zero/sign extension of the integer
         memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
     }
-
-    return ok;
 }
 
 #if MICROPY_PY_BUILTINS_FLOAT
diff --git a/py/mpz.h b/py/mpz.h
@@ -145,8 +145,7 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
 mp_int_t mpz_hash(const mpz_t *z);
 bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
 bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
-// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
-bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
+void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *z);
 #endif
diff --git a/py/objint.c b/py/objint.c
@@ -39,6 +39,11 @@
 #include <math.h>
 #endif
 
+#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+// Generally this implementation lives in objint_mpz.c, but some small functions inlined here...
+#include "py/mpz.h"
+#endif
+
 // This dispatcher function is expected to be independent of the implementation of long int
 static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
     (void)type_in;
@@ -302,7 +307,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
 
 #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
 
-void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) {
+static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) {
     if (is_signed) {
         // edge = 1 << (nbytes * 8 - 1)
         mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT,
@@ -346,7 +351,34 @@ void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_s
 
 #endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
 
-void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) {
+#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
+// Placed here rather than objint_longlong.c for code size reasons
+static void longint_to_bytes(mp_obj_int_t *self, bool big_endian, size_t len, byte *buf) {
+    MP_STATIC_ASSERT(sizeof(mp_uint_t) == 4);
+    long long val = self->val;
+    mp_uint_t lower = val;
+    mp_uint_t upper = (val >> 32);
+
+    if (big_endian) {
+        if (len > 4) {
+            // write the least significant 4 bytes at the end
+            mp_binary_set_int(4, buf + len - 4, sizeof(lower), lower, true);
+        }
+        // write most significant bytes at the start, extending if necessary
+        mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(upper), upper, true);
+    } else {
+        // write the least significant 4 bytes at the start
+        mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(lower), lower, false);
+        if (len > 4) {
+            // write the most significant bytes at the end, extending if necessary
+            mp_binary_set_int(len - 4, buf + 4, sizeof(upper), upper, false);
+        }
+    }
+}
+#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
+
+
+static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) {
     // Fast path for zero.
     if (val == 0) {
         return;
@@ -379,6 +411,32 @@ void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_sig
     mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes);
 }
 
+void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check) {
+    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
+    if (mp_obj_is_exact_type(self_in, &mp_type_int)) {
+        mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
+        if (overflow_check) {
+            mp_obj_int_buffer_overflow_check(self, buf_len, is_signed);
+        }
+        #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+        mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, buf_len, buf);
+        #else // MICROPY_LONGINT_IMPL_LONGLONG
+        longint_to_bytes(self, big_endian, buf_len, buf);
+        #endif
+        return;
+    }
+    #endif
+
+    // self_in is either a smallint, or another type convertible to mp_int_t (i.e. bool)
+
+    mp_int_t val = mp_obj_get_int(self_in);
+    if (overflow_check) {
+        mp_small_int_buffer_overflow_check(val, buf_len, is_signed);
+    }
+    mp_binary_set_int(buf_len, buf, sizeof(val), val, big_endian);
+}
+
+
 #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
 
 int mp_obj_int_sign(mp_obj_t self_in) {
@@ -509,36 +567,21 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *
     mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
     mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
 
+    mp_obj_t self = pos_args[0];
+
     mp_int_t dlen = args[ARG_length].u_int;
     if (dlen < 0) {
         mp_raise_ValueError(NULL);
     }
 
-    mp_obj_t self = pos_args[0];
-    bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
-    bool signed_ = args[ARG_signed].u_bool;
-
     vstr_t vstr;
     vstr_init_len(&vstr, dlen);
     byte *data = (byte *)vstr.buf;
 
-    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-    if (!mp_obj_is_small_int(self)) {
-        mp_obj_int_buffer_overflow_check(self, dlen, signed_);
-        mp_obj_int_to_bytes_impl(self, big_endian, dlen, data);
-    } else
-    #endif
-    {
-        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self);
-        // Small int checking is separate, to be fast.
-        mp_small_int_buffer_overflow_check(val, dlen, signed_);
-        size_t l = MIN((size_t)dlen, sizeof(val));
-        if (val < 0) {
-            // Sign extend negative numbers.
-            memset(data, -1, dlen);
-        }
-        mp_binary_set_int(l, big_endian, data + (big_endian ? (dlen - l) : 0), val);
-    }
+    bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+    bool signed_ = args[ARG_signed].u_bool;
+
+    mp_obj_int_to_bytes(self, dlen, data, big_endian, signed_, true);
 
     return mp_obj_new_bytes_from_vstr(&vstr);
 }
diff --git a/py/objint.h b/py/objint.h
@@ -54,15 +54,11 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
 char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
     int base, const char *prefix, char base_char, char comma);
 
-#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed);
-#endif
-void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed);
-
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
 mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
-// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
-bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
+// Write an integer to a byte sequence.
+// If overflow_check is true, raises OverflowError if 'self_in' doesn't fit. If false, truncate to fit.
+void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
@@ -57,42 +57,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
     return mp_obj_new_int_from_ll(value);
 }
 
-bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
-    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
-    mp_obj_int_t *self = self_in;
-    long long val = self->val;
-    size_t slen; // Number of bytes to represent val
-
-    // This logic has a twin in objint.c
-    if (val > 0) {
-        slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8;
-    } else if (val < -1) {
-        slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8;
-    } else {
-        // clz of 0 is defined, so 0 and -1 map to 0 and 1
-        slen = -val;
-    }
-
-    if (slen > len) {
-        return false; // Would overflow
-        // TODO: Determine whether to copy and truncate, as some callers probably expect this...?
-    }
-
-    if (big_endian) {
-        byte *b = buf + len;
-        while (b > buf) {
-            *--b = val;
-            val >>= 8;
-        }
-    } else {
-        for (; len > 0; --len) {
-            *buf++ = val;
-            val >>= 8;
-        }
-    }
-    return true;
-}
-
 int mp_obj_int_sign(mp_obj_t self_in) {
     mp_longint_impl_t val;
     if (mp_obj_is_small_int(self_in)) {
diff --git a/py/objint_mpz.c b/py/objint_mpz.c

Original file line number	Diff line number	Diff line change
`@@ -448,8 +448,8 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set`
`448`	`448`	`if (self->flags == LAYOUT_NATIVE) {`
`449`	`449`	`set_aligned_basic(val_type & 6, self->addr + offset, val);`
`450`	`450`	`} else {`
`451`		`- mp_binary_set_int(GET_SCALAR_SIZE(val_type & 7), self->flags == LAYOUT_BIG_ENDIAN,`
`452`		`- self->addr + offset, val);`
	`451`	`+ size_t item_size = GET_SCALAR_SIZE(val_type & 7);`
	`452`	`+ mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN);`
`453`	`453`	`}`
`454`	`454`	`return set_val; // just !MP_OBJ_NULL`
`455`	`455`	`}`
Original file line number	Diff line number	Diff line change
`@@ -446,7 +446,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {`
`446`	`446`	`return MP_OBJ_SMALL_INT_VALUE(o);`
`447`	`447`	`} else {`
`448`	`448`	`unsigned long long res;`
`449`		`- mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);`
	`449`	`+ mp_obj_int_to_bytes(o, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false);`
`450`	`450`	`return res;`
`451`	`451`	`}`
`452`	`452`	`}`