Skip to content

gh-128137: Update PyASCIIObject to handle interned field with the atomic operation #128196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jan 5, 2025
Merged
20 changes: 13 additions & 7 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ typedef struct {
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
unsigned int interned:2;
uint16_t interned;
/* Character size:

- PyUnicode_1BYTE_KIND (1):
Expand All @@ -132,21 +132,23 @@ typedef struct {
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
unsigned short kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
unsigned short compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
unsigned short ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
unsigned short statically_allocated:1;
/* Padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue #19537 on m68k). */
unsigned int :24;
4 bytes (see issue #19537 on m68k) and we use unsigned short to avoid
the extra four bytes on 32-bit Windows. This is restricted features
for specific compilers including GCC, MSVC, Clang and IBM's XL compiler. */
unsigned short :10;
} state;
} PyASCIIObject;

Expand Down Expand Up @@ -195,7 +197,11 @@ typedef struct {

/* Use only if you know it's a string */
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned);
#else
return _PyASCIIObject_CAST(op)->state.interned;
#endif
}
#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Update :c:type:`PyASCIIObject` layout to handle interned field with the
atomic operation. Patch by Donghee Na.
6 changes: 3 additions & 3 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -15726,7 +15726,7 @@ immortalize_interned(PyObject *s)
_Py_DecRefTotal(_PyThreadState_GET());
}
#endif
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
_Py_SetImmortal(s);
}

Expand Down Expand Up @@ -15845,7 +15845,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
_Py_DecRefTotal(_PyThreadState_GET());
#endif
}
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);

/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */

Expand Down Expand Up @@ -15981,7 +15981,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_UNREACHABLE();
}
if (!shared) {
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED);
}
}
#ifdef INTERNED_STATS
Expand Down
Loading