Skip to content

gh-128762: Include inline values in sys.getsizeof() #128763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ def requires_subinterpreters(meth):
return unittest.skipIf(interpreters is None,
'subinterpreters required')(meth)

class ObjectWithValue:
value: ObjectWithValue | None
def __init__(self, value):
self.value = value

DICT_KEY_STRUCT_FORMAT = 'n2BI2n'

Expand Down Expand Up @@ -1475,6 +1479,16 @@ def test_gc_head_size(self):
# but lists are
self.assertEqual(sys.getsizeof([]), vsize('Pn') + gc_header_size)

def test_inline_values(self):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have overall low confidence that this is the right way to test this, and I'm open to suggestions.

Copy link
Contributor

@ariebovenberg ariebovenberg Jun 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if it's what you're looking for, but here's the code I've been using to inspect the inline values from Python:

py_ssize_t = ctypes.c_ssize_t  # Almost always the case


class PyObject(ctypes.Structure):
    _fields_ = [("ob_refcnt", py_ssize_t), ("ob_type", ctypes.c_void_p)]


class PyDictValues(ctypes.Structure):
    _fields_ = [
        ("capacity", ctypes.c_uint8),
        ("size", ctypes.c_uint8),
        ("embedded", ctypes.c_uint8),
        ("valid", ctypes.c_uint8),
        ("values", ctypes.POINTER(ctypes.py_object)),
    ]

    def __repr__(self):
        return (
            f"DictValues(capacity={self.capacity}, "
            f"size={self.size}, "
            f"embedded={self.embedded}, "
            f"valid={self.valid})"
        )


class PyDictKeysObject(ctypes.Structure):
    _fields_ = [
        ("dk_refcnt", py_ssize_t),
        ("dk_log2_size", ctypes.c_uint8),
        ("dk_log2_index_bytes", ctypes.c_uint8),
        ("dk_kind", ctypes.c_uint8),
        ("dk_version", ctypes.c_uint32),
        ("dk_usable", py_ssize_t),
        ("dk_nentries", py_ssize_t),
        (
            "dk_indices",
            ctypes.POINTER(ctypes.c_void_p),
        ),  # Placeholder for indices
    ]

    def __repr__(self):
        return (
            f"DictKeysObject(refcount={self.dk_refcnt}, "
            f"log2_size={self.dk_log2_size}, "
            f"log2_index_bytes={self.dk_log2_index_bytes}, "
            f"kind={self.dk_kind}, "
            f"version={self.dk_version}, "
            f"usable={self.dk_usable}, "
            f"nentries={self.dk_nentries})"
        )


class PyDict(PyObject):
    _fields_ = [
        ("ma_used", py_ssize_t),
        ("ma_version_tag", ctypes.c_uint64),
        ("ma_keys", ctypes.POINTER(PyDictKeysObject)),
        ("ma_values", ctypes.POINTER(PyDictValues)),
    ]

    def __repr__(self):
        try:
            values = self.ma_values.contents
        except ValueError:  # NULL pointer
            values = None
        return (
            f"DictStruct(size={self.ma_used}, "
            f"refcount={self.ob_refcnt}, "
            f"version={self.ma_version_tag}, "
            f"keys={self.ma_keys.contents}, "
            f"values={values})"
        )


FLAG_INLINE_VALUES = 1 << 2


def dict_and_inline_values(obj):
    ptr = id(obj)
    if type(obj).__flags__ & FLAG_INLINE_VALUES:
        values_offset = type(obj).__basicsize__
        # Layout of a pure-python object as of python 3.13
        # see https://github.com/python/cpython/blob/main/Objects/object_layout.md
        values = PyDictValues.from_address(ptr + values_offset)
    else:
        values = None

    try:
        # NOTE: this simple logic only works for "typical" objects
        managed_dict = ctypes.POINTER(PyDict).from_address(ptr - 24).contents
    except ValueError:
        # The instance __dict__ hasn't been "materialized" yet (null pointer)
        managed_dict = None
    return (managed_dict, values)

vsize = test.support.calcvobjsize
gc_header_size = self.gc_headsize
inline_values_size = vsize('P')

linked_list = None
for i in range(28):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find that it needs this many iterations to "stabilize". Is there a right way to do this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I happend to encounter the same issue. What seems to happen is that the size of the inline values starts at 30, and then decreases by 1 for each new instance—until it reaches the minimum possible (i.e. the number of unique keys encountered so far). If more than 30 different keys are encountered, values are no longer inlined.

I wasn't able the find the exact reason why, but it seems to be a simple way of 'right-sizing' the inline values. You can imagine when creating 10k instances it all amortizes to the 'right' size to cover the attributes set in the __init__.

Relevant source:

shared_keys_usable_size(PyDictKeysObject *keys)
{
// dk_usable will decrease for each instance that is created and each
// value that is added. dk_nentries will increase for each value that
// is added. We want to always return the right value or larger.
// We therefore increase dk_nentries first and we decrease dk_usable
// second, and conversely here we read dk_usable first and dk_entries
// second (to avoid the case where we read entries before the increment
// and read usable after the decrement)
Py_ssize_t dk_usable = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_usable);
Py_ssize_t dk_nentries = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_nentries);
return dk_nentries + dk_usable;
}

#define SHARED_KEYS_MAX_SIZE 30

linked_list = ObjectWithValue(linked_list)
self.assertEqual(sys.getsizeof(linked_list), vsize('P') + gc_header_size + inline_values_size)

def test_errors(self):
class BadSizeof:
def __sizeof__(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:func:`sys.getsizeof` now accounts for inline values stored alongside the object
9 changes: 7 additions & 2 deletions Python/sysmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1842,10 +1842,11 @@ _PySys_GetSizeOf(PyObject *o)
PyObject *res = NULL;
PyObject *method;
Py_ssize_t size;
PyTypeObject *type = Py_TYPE(o);
PyThreadState *tstate = _PyThreadState_GET();

/* Make sure the type is initialized. float gets initialized late */
if (PyType_Ready(Py_TYPE(o)) < 0) {
if (PyType_Ready(type) < 0) {
return (size_t)-1;
}

Expand All @@ -1854,7 +1855,7 @@ _PySys_GetSizeOf(PyObject *o)
if (!_PyErr_Occurred(tstate)) {
_PyErr_Format(tstate, PyExc_TypeError,
"Type %.100s doesn't define __sizeof__",
Py_TYPE(o)->tp_name);
type->tp_name);
}
}
else {
Expand All @@ -1876,6 +1877,10 @@ _PySys_GetSizeOf(PyObject *o)
return (size_t)-1;
}

if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
size += _PyInlineValuesSize(type);
}

size_t presize = 0;
if (!Py_IS_TYPE(o, &PyType_Type) ||
PyType_HasFeature((PyTypeObject *)o, Py_TPFLAGS_HEAPTYPE))
Expand Down
Loading