From 2b1ab561c028e0696064c854fd6a2f600e83f621 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sun, 1 Jun 2025 19:47:47 +0200 Subject: [PATCH 1/5] py: Introduce MICROPY_NONNULL_COMPLIANT. Signed-off-by: Jeff Epler --- .../unix/variants/coverage/mpconfigvariant.h | 3 +++ py/misc.h | 23 +++++++++++++++++++ py/mpconfig.h | 5 ++++ 3 files changed, 31 insertions(+) diff --git a/ports/unix/variants/coverage/mpconfigvariant.h b/ports/unix/variants/coverage/mpconfigvariant.h index 04b5b8ae1ae70..1732bebc6759e 100644 --- a/ports/unix/variants/coverage/mpconfigvariant.h +++ b/ports/unix/variants/coverage/mpconfigvariant.h @@ -51,3 +51,6 @@ #define MICROPY_HW_MCU_NAME MICROPY_PY_SYS_PLATFORM // Keep the standard banner message #define MICROPY_BANNER_MACHINE MICROPY_PY_SYS_PLATFORM " [" MICROPY_PLATFORM_COMPILER "] version" + +// Add extra checks to satisfy gcc -fsanitize-undefined +#define MICROPY_NONNULL_COMPLIANT (1) diff --git a/py/misc.h b/py/misc.h index 49f2f87111157..8cf4c476b0be8 100644 --- a/py/misc.h +++ b/py/misc.h @@ -421,4 +421,27 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { #endif } +/** NULL-protected mem* wrappers ****/ +// In ISO C, memcpy(dest, NULL, 0) and similar are undefined (the source +// and destination pointers are annotated as "non-null"). +// This doesn't matter much, as the implementation provided on embedded +// platforms never dereferences the source or destintaion pointer if the +// length is zero. However, on Linux and particularly under undefined +// behavior sanitizers, this is treated as an error. +// +// Use the "0" alternative where a NULL pointer and a zero length can +// occur together, and on systems where compliance with ISO C is +// important, ensure that MICROPY_NONNULL_COMPLIANT is defined to (1). +#if MICROPY_NONNULL_COMPLIANT +#define memcpy0(dest, src, n) ((n) != 0 ? memcpy((dest), (src), (n)) : (dest)) +#define memmove0(dest, src, n) ((n) != 0 ? memmove((dest), (src), (n)) : (dest)) +#define memset0(s, c, n) ((n) != 0 ? memset((s), (c), (n)) : (s)) +#define memcmp0(s1, s2, n) ((n) != 0 ? memcmp((s1), (s2), (n)) : (0)) +#else +#define memcpy0(dest, src, n) memcpy((dest), (src), (n)) +#define memmove0(dest, src, n) memmove((dest), (src), (n)) +#define memset0(s, c, n) memset((s), (c), (n)) +#define memcmp0(s1, s2, n) memcmp((s1), (s2), (n)) +#endif + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/mpconfig.h b/py/mpconfig.h index 01712bd5b4d90..6772b704e39dd 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -2200,4 +2200,9 @@ typedef double mp_float_t; #define MP_WARN_CAT(x) (NULL) #endif +// If non-zero, introduce extra checks to ensure calls to functions like +// memcpy() are C99 compliant +#if !defined(MICROPY_NONNULL_COMPLIANT) +#define MICROPY_NONNULL_COMPLIANT (0) +#endif #endif // MICROPY_INCLUDED_PY_MPCONFIG_H From dd33e37897071a42dbf46f51d86c509975d1e107 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sun, 1 Jun 2025 20:06:56 +0200 Subject: [PATCH 2/5] py: Fix undefined left shift operations. .. by ensuring the value to be shifted is an unsigned of the appropriate type. This fixes several runtime diagnostics such as ``` ../../py/binary.c:199:28: runtime error: left shift of 32768 by 16 places cannot be represented in type 'int' ``` Signed-off-by: Jeff Epler --- py/binary.c | 2 +- py/persistentcode.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/py/binary.c b/py/binary.c index 4fc8f751ad39a..48d3421bca963 100644 --- a/py/binary.c +++ b/py/binary.c @@ -196,7 +196,7 @@ static float mp_decode_half_float(uint16_t hf) { ++e; } - fpu.i = ((hf & 0x8000) << 16) | (e << 23) | (m << 13); + fpu.i = ((hf & 0x8000u) << 16) | (e << 23) | (m << 13); return fpu.f; } diff --git a/py/persistentcode.c b/py/persistentcode.c index 43207a0cc8f66..6ec0717f948b4 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -759,7 +759,7 @@ static void bit_vector_clear(bit_vector_t *self) { static bool bit_vector_is_set(bit_vector_t *self, size_t index) { const size_t bits_size = sizeof(*self->bits) * MP_BITS_PER_BYTE; return index / bits_size < self->alloc - && (self->bits[index / bits_size] & (1 << (index % bits_size))) != 0; + && (self->bits[index / bits_size] & ((uintptr_t)1 << (index % bits_size))) != 0; } static void bit_vector_set(bit_vector_t *self, size_t index) { @@ -770,7 +770,7 @@ static void bit_vector_set(bit_vector_t *self, size_t index) { self->bits = m_renew(uintptr_t, self->bits, self->alloc, new_alloc); self->alloc = new_alloc; } - self->bits[index / bits_size] |= 1 << (index % bits_size); + self->bits[index / bits_size] |= (uintptr_t)1 << (index % bits_size); } typedef struct _mp_opcode_t { From 0bee51658b89c6e499885e12ad0c38522d42bce7 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sun, 1 Jun 2025 20:09:16 +0200 Subject: [PATCH 3/5] mpz: Fix invalid operation on NULL pointer. When an mpz number is empty, its digit pointer `dig` is NULL. If it is, then the result of decrementing it `d--` results in undefined behavior. When micropython is built to be "non-null compliant" (which is now a bit of a misnomer!), add an extra check for `d` to be non-NULL to prevent the diagnostic. Signed-off-by: Jeff Epler --- py/mpz.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/py/mpz.c b/py/mpz.c index 471bd1598188e..a1a3f38cf29f2 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1552,7 +1552,11 @@ bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { mp_uint_t val = 0; mpz_dig_t *d = i->dig + i->len; - while (d-- > i->dig) { + while ( + #if MICROPY_NONNULL_COMPLIANT + d && + #endif + d-- > i->dig) { if (val > (~(MP_OBJ_WORD_MSBIT_HIGH) >> DIG_SIZE)) { // will overflow return false; From 739434ad5817143b4a9397c07b31bea158251939 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sun, 1 Jun 2025 20:11:24 +0200 Subject: [PATCH 4/5] py: Use mem...0 variant where NULLs can appear. These sites all caused runtime diagnostics during the unix coverage test. Fix them by calling the "0" variant of the mem* function. Additionally, in `mp_seq_replace_slice_no_grow` a `do {} while(0)` construct is added to the macro, and a void cast is added to explicitly denote that the return value of memmove0 is unused. Signed-off-by: Jeff Epler --- py/obj.h | 11 ++++++----- py/objarray.c | 6 +++--- py/objclosure.c | 2 +- py/objdict.c | 2 +- py/objset.c | 2 +- py/sequence.c | 2 +- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/py/obj.h b/py/obj.h index 07362522474d6..b874eb6deb8cb 100644 --- a/py/obj.h +++ b/py/obj.h @@ -1258,7 +1258,7 @@ void mp_seq_multiply(const void *items, size_t item_sz, size_t len, size_t times #if MICROPY_PY_BUILTINS_SLICE bool mp_seq_get_fast_slice_indexes(mp_uint_t len, mp_obj_t slice, mp_bound_slice_t *indexes); #endif -#define mp_seq_copy(dest, src, len, item_t) memcpy(dest, src, len * sizeof(item_t)) +#define mp_seq_copy(dest, src, len, item_t) memcpy0(dest, src, len * sizeof(item_t)) #define mp_seq_cat(dest, src1, len1, src2, len2, item_t) { memcpy(dest, src1, (len1) * sizeof(item_t)); memcpy(dest + (len1), src2, (len2) * sizeof(item_t)); } bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, size_t len1, const byte *data2, size_t len2); bool mp_seq_cmp_objs(mp_uint_t op, const mp_obj_t *items1, size_t len1, const mp_obj_t *items2, size_t len2); @@ -1267,12 +1267,13 @@ mp_obj_t mp_seq_count_obj(const mp_obj_t *items, size_t len, mp_obj_t value); mp_obj_t mp_seq_extract_slice(const mp_obj_t *seq, mp_bound_slice_t *indexes); // Helper to clear stale pointers from allocated, but unused memory, to preclude GC problems -#define mp_seq_clear(start, len, alloc_len, item_sz) memset((byte *)(start) + (len) * (item_sz), 0, ((alloc_len) - (len)) * (item_sz)) +#define mp_seq_clear(start, len, alloc_len, item_sz) memset0((byte *)(start) + (len) * (item_sz), 0, ((alloc_len) - (len)) * (item_sz)) // Note: dest and slice regions may overlap -#define mp_seq_replace_slice_no_grow(dest, dest_len, beg, end, slice, slice_len, item_sz) \ - memmove(((char *)dest) + (beg) * (item_sz), slice, slice_len * (item_sz)); \ - memmove(((char *)dest) + (beg + slice_len) * (item_sz), ((char *)dest) + (end) * (item_sz), (dest_len - end) * (item_sz)); +#define mp_seq_replace_slice_no_grow(dest, dest_len, beg, end, slice, slice_len, item_sz) do { \ + (void)memmove0(((char *)dest) + (beg) * (item_sz), slice, slice_len * (item_sz)); \ + (void)memmove0(((char *)dest) + (beg + slice_len) * (item_sz), ((char *)dest) + (end) * (item_sz), (dest_len - end) * (item_sz)); \ +} while (0) // Note: dest and slice regions may overlap #define mp_seq_replace_slice_grow_inplace(dest, dest_len, beg, end, slice, slice_len, len_adj, item_sz) \ diff --git a/py/objarray.c b/py/objarray.c index 9d68aa70616ca..8f7fd6461e33c 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -128,7 +128,7 @@ static mp_obj_t array_construct(char typecode, mp_obj_t initializer) { size_t sz = mp_binary_get_size('@', typecode, NULL); size_t len = bufinfo.len / sz; mp_obj_array_t *o = array_new(typecode, len); - memcpy(o->items, bufinfo.buf, len * sz); + memcpy0(o->items, bufinfo.buf, len * sz); return MP_OBJ_FROM_PTR(o); } @@ -189,7 +189,7 @@ static mp_obj_t bytearray_make_new(const mp_obj_type_t *type_in, size_t n_args, // 1 arg, an integer: construct a blank bytearray of that length mp_uint_t len = mp_obj_get_int(args[0]); mp_obj_array_t *o = array_new(BYTEARRAY_TYPECODE, len); - memset(o->items, 0, len); + memset0(o->items, 0, len); return MP_OBJ_FROM_PTR(o); } else { // 1 arg: construct the bytearray from that @@ -670,7 +670,7 @@ size_t mp_obj_array_len(mp_obj_t self_in) { #if MICROPY_PY_BUILTINS_BYTEARRAY mp_obj_t mp_obj_new_bytearray(size_t n, const void *items) { mp_obj_array_t *o = array_new(BYTEARRAY_TYPECODE, n); - memcpy(o->items, items, n); + memcpy0(o->items, items, n); return MP_OBJ_FROM_PTR(o); } diff --git a/py/objclosure.c b/py/objclosure.c index 3ba507b959382..5ba017b6ac579 100644 --- a/py/objclosure.c +++ b/py/objclosure.c @@ -46,7 +46,7 @@ static mp_obj_t closure_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const // use stack to allocate temporary args array mp_obj_t args2[5]; memcpy(args2, self->closed, self->n_closed * sizeof(mp_obj_t)); - memcpy(args2 + self->n_closed, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t)); + memcpy0(args2 + self->n_closed, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t)); return mp_call_function_n_kw(self->fun, self->n_closed + n_args, n_kw, args2); } else { // use heap to allocate temporary args array diff --git a/py/objdict.c b/py/objdict.c index cf64fa9555a28..d96facd9df0f9 100644 --- a/py/objdict.c +++ b/py/objdict.c @@ -269,7 +269,7 @@ mp_obj_t mp_obj_dict_copy(mp_obj_t self_in) { other->map.all_keys_are_qstrs = self->map.all_keys_are_qstrs; other->map.is_fixed = 0; other->map.is_ordered = self->map.is_ordered; - memcpy(other->map.table, self->map.table, self->map.alloc * sizeof(mp_map_elem_t)); + memcpy0(other->map.table, self->map.table, self->map.alloc * sizeof(mp_map_elem_t)); return other_out; } static MP_DEFINE_CONST_FUN_OBJ_1(dict_copy_obj, mp_obj_dict_copy); diff --git a/py/objset.c b/py/objset.c index c8fa12a7ec58f..7970cfa158c5f 100644 --- a/py/objset.c +++ b/py/objset.c @@ -177,7 +177,7 @@ static mp_obj_t set_copy(mp_obj_t self_in) { mp_obj_set_t *other = mp_obj_malloc(mp_obj_set_t, self->base.type); mp_set_init(&other->set, self->set.alloc); other->set.used = self->set.used; - memcpy(other->set.table, self->set.table, self->set.alloc * sizeof(mp_obj_t)); + memcpy0(other->set.table, self->set.table, self->set.alloc * sizeof(mp_obj_t)); return MP_OBJ_FROM_PTR(other); } static MP_DEFINE_CONST_FUN_OBJ_1(set_copy_obj, set_copy); diff --git a/py/sequence.c b/py/sequence.c index 9eb67ada6e82e..528399e1fa88b 100644 --- a/py/sequence.c +++ b/py/sequence.c @@ -103,7 +103,7 @@ bool mp_seq_cmp_bytes(mp_uint_t op, const byte *data1, size_t len1, const byte * } } size_t min_len = len1 < len2 ? len1 : len2; - int res = memcmp(data1, data2, min_len); + int res = memcmp0(data1, data2, min_len); if (op == MP_BINARY_OP_EQUAL) { // If we are checking for equality, here's the answer return res == 0; From f549cdd6b7d08c3033aece413a529669e4417472 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Sun, 1 Jun 2025 20:11:55 +0200 Subject: [PATCH 5/5] unix: Check for UB during coverage build. Signed-off-by: Jeff Epler --- ports/unix/variants/coverage/mpconfigvariant.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ports/unix/variants/coverage/mpconfigvariant.mk b/ports/unix/variants/coverage/mpconfigvariant.mk index cc37ba1582470..2d2f0ee0a1ecb 100644 --- a/ports/unix/variants/coverage/mpconfigvariant.mk +++ b/ports/unix/variants/coverage/mpconfigvariant.mk @@ -1,13 +1,15 @@ # Disable optimisations and enable assert() on coverage builds. DEBUG ?= 1 +SANITIZER ?= -fsanitize=undefined + CFLAGS += \ -fprofile-arcs -ftest-coverage \ -Wformat -Wmissing-declarations -Wmissing-prototypes \ -Wold-style-definition -Wpointer-arith -Wshadow -Wuninitialized -Wunused-parameter \ - -DMICROPY_UNIX_COVERAGE + -DMICROPY_UNIX_COVERAGE $(SANITIZER) -LDFLAGS += -fprofile-arcs -ftest-coverage +LDFLAGS += -fprofile-arcs -ftest-coverage $(SANITIZER) FROZEN_MANIFEST ?= $(VARIANT_DIR)/manifest.py USER_C_MODULES = $(TOP)/examples/usercmodule