Skip to content

py/runtime: Avoid crash on calling members of uninitialized native type. #9997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions examples/usercmodule/cexample/examplemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,88 @@ MP_DEFINE_CONST_OBJ_TYPE(
locals_dict, &example_Timer_locals_dict
);


// What follows is a *separate* class definition that demonstrates more
// advanced techniques to implement other Python-like features, such as:
//
// - A custom representation for __repr__ and __str__.
// - Custom attribute handling to create a read/write "property".
//
// It re-uses some of the elements of the basic Timer class. This is allowed
// because they both use example_Timer_obj_t as the instance structure.

// Handles AdvancedTimer.__repr__, AdvancedTimer.__str__.
STATIC void example_AdvancedTimer_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {

// Get the elapsed time. In this case, it's also a demonstration of calling
// the equivalent of self.time() in the C API. This is not usually very
// efficient, but it can sometimes be useful.
mp_uint_t elapsed = mp_obj_get_int(example_Timer_time(self_in));

// We'll make all representations print at least the class name.
mp_printf(print, "%q()", MP_QSTR_AdvancedTimer);

// Decide what else to print based on print kind.
if (kind == PRINT_STR) {
// For __str__, let's attempt to make it more readable.
mp_printf(print, " # created %d seconds ago", elapsed / 1000);
}
}

// Handles AdvancedTimer.seconds for reading and writing.
STATIC void example_AdvancedTimer_attribute_handler(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {

// In this example, we only want to handle the .seconds attribute in a
// special way.
if (attr != MP_QSTR_seconds) {
// Attribute not found, continue lookup in locals dict. This way,
// methods like .time() will be handled normally.
dest[1] = MP_OBJ_SENTINEL;
return;
}

// Get reference to AdvancedTimer instance.
example_Timer_obj_t *self = MP_OBJ_TO_PTR(self_in);

// Check if this is a read operation.
if (dest[0] == MP_OBJ_NULL) {
// It's read, so "return" elapsed seconds by storing it in dest[0].
mp_uint_t elapsed = mp_hal_ticks_ms() - self->start_time;
dest[0] = mp_obj_new_int_from_uint(elapsed / 1000);
return;
}
// Check if this is a delete or store operation.
else if (dest[0] == MP_OBJ_SENTINEL) {
// It's delete or store. Now check which one.
if (dest[1] == MP_OBJ_NULL) {
// It's delete. But in this example we don't want to allow it
// so we just return.
return;
} else {
// It's write. First, get the value that the user is trying to set.
mp_uint_t desired_ms = mp_obj_get_int(dest[1]) * 1000;
// Use it to update the start time. This way, the next read will
// report the updated time.
self->start_time = mp_hal_ticks_ms() - desired_ms;

// Indicate successful store.
dest[0] = MP_OBJ_NULL;
return;
}
}
}

// This defines the type(AdvancedTimer) object.
MP_DEFINE_CONST_OBJ_TYPE(
example_type_AdvancedTimer,
MP_QSTR_AdvancedTimer,
MP_TYPE_FLAG_NONE,
attr, example_AdvancedTimer_attribute_handler,
print, example_AdvancedTimer_print,
make_new, example_Timer_make_new,
locals_dict, &example_Timer_locals_dict
);

// Define all properties of the module.
// Table entries are key/value pairs of the attribute name (a string)
// and the MicroPython object reference.
Expand All @@ -78,6 +160,7 @@ STATIC const mp_rom_map_elem_t example_module_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_cexample) },
{ MP_ROM_QSTR(MP_QSTR_add_ints), MP_ROM_PTR(&example_add_ints_obj) },
{ MP_ROM_QSTR(MP_QSTR_Timer), MP_ROM_PTR(&example_type_Timer) },
{ MP_ROM_QSTR(MP_QSTR_AdvancedTimer), MP_ROM_PTR(&example_type_AdvancedTimer) },
};
STATIC MP_DEFINE_CONST_DICT(example_module_globals, example_module_globals_table);

Expand Down
8 changes: 7 additions & 1 deletion py/objtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ STATIC int instance_count_native_bases(const mp_obj_type_t *type, const mp_obj_t
}
}

// This wrapper function is allows a subclass of a native type to call the
// This wrapper function allows a subclass of a native type to call the
// __init__() method (corresponding to type->make_new) of the native type.
STATIC mp_obj_t native_base_init_wrapper(size_t n_args, const mp_obj_t *args) {
mp_obj_instance_t *self = MP_OBJ_TO_PTR(args[0]);
Expand Down Expand Up @@ -170,6 +170,12 @@ STATIC void mp_obj_class_lookup(struct class_lookup_data *lookup, const mp_obj_t
if (obj != NULL && mp_obj_is_native_type(type) && type != &mp_type_object /* object is not a real type */) {
// If we're dealing with native base class, then it applies to native sub-object
obj_obj = obj->subobj[0];
#if MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG
if (obj_obj == MP_OBJ_FROM_PTR(&native_base_init_wrapper_obj)) {
// But we shouldn't attempt lookups on object that is not yet instantiated.
mp_raise_msg(&mp_type_AttributeError, MP_ERROR_TEXT("call super().__init__() first"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be possible to just fix it and make it work like CPython (as much as that's possible without having both __new__ and __init__ in MicroPython) by doing this:

if (obj_obj == ...) {
    native_base_init_wrapper(0, NULL);
    obj_obj = obj->subobj[0];
}

That will call __init__ automatically and reload the newly-created native instance.

Then I wouldn't wrap it in MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG, just do it unconditionally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(if you do this, please don't remove the C example module or tests, they are good!)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't that segfault (null dereference)? I started digging based on your suggestion. Perhaps did you mean something like this? This seems to work, with no AttributeError required, so closer to CPython indeed.

diff --git a/py/objtype.c b/py/objtype.c
index a202b023b..f5100e239 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -170,12 +170,12 @@ STATIC void mp_obj_class_lookup(struct class_lookup_data *lookup, const mp_obj_t
                     if (obj != NULL && mp_obj_is_native_type(type) && type != &mp_type_object /* object is not a real type */) {
                         // If we're dealing with native base class, then it applies to native sub-object
                         obj_obj = obj->subobj[0];
-                        #if MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG
                         if (obj_obj == MP_OBJ_FROM_PTR(&native_base_init_wrapper_obj)) {
-                            // But we shouldn't attempt lookups on object that is not yet instantiated.
-                            mp_raise_msg(&mp_type_AttributeError, MP_ERROR_TEXT("call super().__init__() first"));
+                            // Attempted lookup on uninstantiated native object, so instantiate now.
+                            mp_obj_t args[] = {obj};
+                            native_base_init_wrapper(1, args);
+                            obj_obj = obj->subobj[0];
                         }
-                        #endif // MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG
                     } else {
                         obj_obj = MP_OBJ_FROM_PTR(obj);
                     }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that's right!

(We had a fairly long discussion around this today and wrapped up the conclusion a bit quickly :) )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the instant feedback. While updating the tests, I noticed this would cause make_new to be called twice:

class SubTimer(AdvancedTimer):
    def __init__(self):

        # At this point, self does not yet represent a AdvancedTimer instance.
        print(self)  # <function>

        print(self.time())   # <--- with proposed variant, this will trigger make_new instead of raising AttributeError

        # Initialize base class.
        super().__init__(self)  # <---- But then this calls make_new again

watch = SubTimer()

Copy link
Contributor Author

@laurensvalk laurensvalk Dec 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behavior is still a bit odd (and different from CPython). Take this slightly more elaborate class, which takes a parameter to __init__:

from pybricks.parameters import Port
from pybricks.iodevices import PUPDevice

class NewSensor(PUPDevice):

    def __init__(self, port):

        # This would now try to call make_new under the hood, but it expects
        # an argument, so raises TypeError: 'port' argument required
        print(self.info())

        # Never gets here.
        super().__init__(port)

sensor = NewSensor(Port.A)

I suppose I would OK with that (anything that doesn't segfault is fine), but I just wanted to make sure. We can add an updated CPython difference.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed exactly this scenario... It makes sense to me that this should fail, but I agree that the error message is confusing because it looks like info() is missing the port argument.

What's the CPython behaviour that you'd expect here? (I'm struggling to think of an example where this could come up with a built-in type. It is different to the behaviour of a non-native base of course)

Note there's another CPython diff here which is that if you don't explicitly call super().__init__ and you have a native base, then by default we pass the derived type's __init__ args to the native base. (CPython only does this when the base is tuple and frozenset, perhaps others).

class MyList(list):
    def __init__(self, a):
        pass

class MyTuple(tuple):
    def __init__(self, a):
        pass

print(MyList([1,2,3]))  # CPython: []. MicroPython: [1,2,3]
print(MyTuple([1,2,3])) # Both: (1,2,3)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think MiroPython's make_new is really the equivalent of __new__ without __init__ in CPython. This is why immutable types like tuple have the different behavior as above - since they are immutable, all of the "init" has to be done in __new__. So I don't think we should be calling make_new more than once, otherwise it forces all implementations to have to be aware of which call it is (if first call treat as __new__, otherwise treat as __init__) or risk leaking resources or other unexpected behaviors (e.g. if make_new opens a file descriptor, then is called again, that file descriptor has to be closed, otherwise it is leaked with no way to close it later).

Copy link
Contributor

@dlech dlech Dec 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we implement something like this?

  • If self is accessed in an __init__ overload of a native type and super.__init__() was not called yet, then call make_new with the args of the overload.
  • If super().__init__() is called after make_new was called because of the condition above, raise an exception.
  • Otherwise, if super.__init__() is called before something triggered the call to make_new, then call make_new with the args from super.__init__().
  • Finally, if nothing triggered make_new before the __init__ overload returns, call make_new with the args from the overload.

The recommendation in the MicroPython docs should be to call super().__init__() as the first line of any __init__ overload of a native type (or at least before using self). But at least in other cases, it might work as expected or at least not segfault.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That proposal just above does seem to cover all points raised here.

But it makes things complicated: implementation, documentation, behaviour, and what the user needs to learn and understand.

I think the simplest thing is to just prevent the crash if trying to access super before its initialised, and the user can initialise the native type it early if needed. That's much easier to understand.

(Also I don't even know how we would implement the first bullet point above, the part where the native make_new was called with the args of the overload, because those args are not available at that point in the runtime.)

}
#endif // MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG
} else {
obj_obj = MP_OBJ_FROM_PTR(obj);
}
Expand Down
31 changes: 31 additions & 0 deletions tests/cpydiff/core_class_super_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
categories: Core,Classes
description: When inheriting native types, calling a method in ``__init__(self, ...)`` before ``super().__init__()`` raises an ``AttributeError`` (or segfaults if ``MICROPY_BUILTIN_METHOD_CHECK_SELF_ARG`` is not enabled).
cause: MicroPython does not have separate ``__new__`` and ``__init__`` methods in native types.
workaround: Call ``super().__init__()`` first.
"""


class L1(list):
def __init__(self, a):
self.append(a)


try:
L1(1)
print("OK")
except AttributeError:
print("AttributeError")


class L2(list):
def __init__(self, a):
super().__init__()
self.append(a)


try:
L2(1)
print("OK")
except AttributeError:
print("AttributeError")
17 changes: 17 additions & 0 deletions tests/misc/cexample_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,20 @@
print(timer)
print(0 <= t_start <= TOLERANCE_MS)
print(SLEEP_MS - TOLERANCE_MS <= t_end <= SLEEP_MS + TOLERANCE_MS)

advanced_timer = cexample.AdvancedTimer()

time.sleep_ms(100)

print(repr(advanced_timer))
print(str(advanced_timer))

print(advanced_timer.seconds)
advanced_timer.seconds = 123
print(advanced_timer.seconds)
print(advanced_timer.time() < 123000 + TOLERANCE_MS)

try:
advanced_timer.seconds = "bad input"
except TypeError:
print("TypeError")
6 changes: 6 additions & 0 deletions tests/misc/cexample_class.py.exp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
<Timer>
True
True
AdvancedTimer()
AdvancedTimer() # created 0 seconds ago
0
123
True
TypeError
1 change: 1 addition & 0 deletions tests/misc/cexample_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
d = dir(cexample)
d.index("add_ints")
d.index("Timer")
d.index("AdvancedTimer")

print(cexample.add_ints(1, 3))
38 changes: 38 additions & 0 deletions tests/misc/cexample_subclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# test subclassing custom native class

try:
from cexample import AdvancedTimer
except ImportError:
print("SKIP")
raise SystemExit


class SubTimer(AdvancedTimer):
def __init__(self):

# At this point, self does not yet represent a AdvancedTimer instance.
print(self)

# So lookups via type.attr handler will fail.
try:
self.seconds
except AttributeError:
print("AttributeError")

# Also applies to builtin methods.
try:
self.time()
except AttributeError:
print("AttributeError")

# Initialize base class.
super().__init__(self)

# Now you can access methods and attributes normally.
self.time()
print(self.seconds)
self.seconds = 123
print(self.seconds)


watch = SubTimer()
5 changes: 5 additions & 0 deletions tests/misc/cexample_subclass.py.exp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<function>
AttributeError
AttributeError
0
123