From 64f07da0fad4bee7b3a74eaab2b884a9252a0df5 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 23 Jun 2024 23:45:30 +0800 Subject: [PATCH 1/5] Fix data races reported by TSAN in some set methods --- Objects/setobject.c | 6 +++--- Tools/tsan/suppressions_free_threading.txt | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 68986bb6a6b557..e5d84ddf4acc47 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -368,7 +368,7 @@ set_add_key(PySetObject *so, PyObject *key) Py_hash_t hash; if (!PyUnicode_CheckExact(key) || - (hash = _PyASCIIObject_CAST(key)->hash) == -1) { + (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -382,7 +382,7 @@ set_contains_key(PySetObject *so, PyObject *key) Py_hash_t hash; if (!PyUnicode_CheckExact(key) || - (hash = _PyASCIIObject_CAST(key)->hash) == -1) { + (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -396,7 +396,7 @@ set_discard_key(PySetObject *so, PyObject *key) Py_hash_t hash; if (!PyUnicode_CheckExact(key) || - (hash = _PyASCIIObject_CAST(key)->hash) == -1) { + (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 2986efe6774157..d6e439883e2d3c 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -30,7 +30,6 @@ race_top:assign_version_tag race_top:insertdict race_top:lookup_tp_dict race_top:new_reference -race_top:set_contains_key # https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 race_top:set_discard_entry race_top:_PyDict_CheckConsistency From 1571d018edafd714d431a0d0e2802a6b1c81c524 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 2 Jul 2024 01:17:18 +0800 Subject: [PATCH 2/5] Introduce _PyObject_HashFast and apply it to the codebase --- Include/internal/pycore_object.h | 14 ++++++++++++++ Modules/_collectionsmodule.c | 9 +++------ Objects/setobject.c | 30 +++++++++--------------------- Objects/typeobject.c | 13 ++++--------- 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 9c963d8970d665..e269ea6d272a09 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -613,6 +613,20 @@ _PyObject_IS_GC(PyObject *obj) && (type->tp_is_gc == NULL || type->tp_is_gc(obj))); } +// Fast inlined version of PyObject_Hash() +static inline Py_hash_t +_PyObject_HashFast(PyObject *op) { + Py_hash_t hash; + if (!PyUnicode_CheckExact(op) || + (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash)) == -1) { + hash = PyObject_Hash(op); + if (hash == -1) { + return -1; + } + } + return hash; +} + // Fast inlined version of PyType_IS_GC() #define _PyType_IS_GC(t) _PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index 641d57a64c8357..0bc61db4117c5d 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -2537,12 +2537,9 @@ _collections__count_elements_impl(PyObject *module, PyObject *mapping, if (key == NULL) break; - if (!PyUnicode_CheckExact(key) || - (hash = _PyASCIIObject_CAST(key)->hash) == -1) - { - hash = PyObject_Hash(key); - if (hash == -1) - goto done; + hash = _PyObject_HashFast(key); + if (hash == -1) { + goto done; } oldval = _PyDict_GetItem_KnownHash(mapping, key, hash); diff --git a/Objects/setobject.c b/Objects/setobject.c index e5d84ddf4acc47..eb0c404bf6b8e0 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -365,13 +365,9 @@ set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash) static int set_add_key(PySetObject *so, PyObject *key) { - Py_hash_t hash; - - if (!PyUnicode_CheckExact(key) || - (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + return -1; } return set_add_entry(so, key, hash); } @@ -379,13 +375,9 @@ set_add_key(PySetObject *so, PyObject *key) static int set_contains_key(PySetObject *so, PyObject *key) { - Py_hash_t hash; - - if (!PyUnicode_CheckExact(key) || - (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + return -1; } return set_contains_entry(so, key, hash); } @@ -393,13 +385,9 @@ set_contains_key(PySetObject *so, PyObject *key) static int set_discard_key(PySetObject *so, PyObject *key) { - Py_hash_t hash; - - if (!PyUnicode_CheckExact(key) || - (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(key)->hash)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + return -1; } return set_discard_entry(so, key, hash); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d374a8e6393176..b042e64a188d9d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5251,15 +5251,10 @@ find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { ASSERT_TYPE_LOCK_HELD(); - Py_hash_t hash; - if (!PyUnicode_CheckExact(name) || - (hash = _PyASCIIObject_CAST(name)->hash) == -1) - { - hash = PyObject_Hash(name); - if (hash == -1) { - *error = -1; - return NULL; - } + Py_hash_t hash = _PyObject_HashFast(name); + if (hash == -1) { + *error = -1; + return NULL; } /* Look in tp_dict of types in MRO */ From fefc8103e842ef97c82ff563d28e77ff2e29a580 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 2 Jul 2024 01:58:38 +0800 Subject: [PATCH 3/5] apply _PyObject_HashFast on dictobject.c --- Objects/dictobject.c | 135 ++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 84 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 5d325465608f99..2b11a01595b0bc 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -433,7 +433,7 @@ static inline Py_hash_t unicode_get_hash(PyObject *o) { assert(PyUnicode_CheckExact(o)); - return _PyASCIIObject_CAST(o)->hash; + return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(o)->hash); } /* Print summary info about the state of the optimized allocator */ @@ -2177,13 +2177,10 @@ dict_getitem(PyObject *op, PyObject *key, const char *warnmsg) } PyDictObject *mp = (PyDictObject *)op; - Py_hash_t hash; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - PyErr_FormatUnraisable(warnmsg); - return NULL; - } + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + PyErr_FormatUnraisable(warnmsg); + return NULL; } PyThreadState *tstate = _PyThreadState_GET(); @@ -2232,12 +2229,9 @@ _PyDict_LookupIndex(PyDictObject *mp, PyObject *key) assert(PyDict_CheckExact((PyObject*)mp)); assert(PyUnicode_CheckExact(key)); - Py_hash_t hash = unicode_get_hash(key); + Py_hash_t hash = _PyObject_HashFast(key); if (hash == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - return -1; - } + return -1; } return _Py_dict_lookup(mp, key, hash, &value); @@ -2308,14 +2302,10 @@ PyDict_GetItemRef(PyObject *op, PyObject *key, PyObject **result) return -1; } - Py_hash_t hash; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) - { - hash = PyObject_Hash(key); - if (hash == -1) { - *result = NULL; - return -1; - } + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + *result = NULL; + return -1; } return _PyDict_GetItemRef_KnownHash((PyDictObject *)op, key, hash, result); @@ -2327,13 +2317,10 @@ _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject ** ASSERT_DICT_LOCKED(op); assert(PyUnicode_CheckExact(key)); - Py_hash_t hash; - if ((hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - *result = NULL; - return -1; - } + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + *result = NULL; + return -1; } PyObject *value; @@ -2367,12 +2354,9 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key) PyErr_BadInternalCall(); return NULL; } - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) - { - hash = PyObject_Hash(key); - if (hash == -1) { - return NULL; - } + hash = _PyObject_HashFast(key); + if (hash == -1) { + return NULL; } #ifdef Py_GIL_DISABLED @@ -2440,10 +2424,9 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key) Py_hash_t hash; PyObject *value; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; + hash = _PyObject_HashFast(key); + if (hash == -1) { + return NULL; } /* namespace 1: globals */ @@ -2468,14 +2451,11 @@ setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) assert(key); assert(value); assert(PyDict_Check(mp)); - Py_hash_t hash; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - Py_DECREF(key); - Py_DECREF(value); - return -1; - } + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + Py_DECREF(key); + Py_DECREF(value); + return -1; } PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -2624,12 +2604,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, int PyDict_DelItem(PyObject *op, PyObject *key) { - Py_hash_t hash; assert(key); - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + return -1; } return _PyDict_DelItem_KnownHash(op, key, hash); @@ -2953,15 +2931,12 @@ pop_lock_held(PyObject *op, PyObject *key, PyObject **result) return 0; } - Py_hash_t hash; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - if (result) { - *result = NULL; - } - return -1; + Py_hash_t hash = _PyObject_HashFast(key); + if (hash == -1) { + if (result) { + *result = NULL; } + return -1; } return _PyDict_Pop_KnownHash(dict, key, hash, result); } @@ -3293,10 +3268,9 @@ dict_subscript(PyObject *self, PyObject *key) Py_hash_t hash; PyObject *value; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; + hash = _PyObject_HashFast(key); + if (hash == -1) { + return NULL; } ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value); if (ix == DKIX_ERROR) @@ -4183,10 +4157,9 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) Py_hash_t hash; Py_ssize_t ix; - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; + hash = _PyObject_HashFast(key); + if (hash == -1) { + return NULL; } ix = _Py_dict_lookup_threadsafe(self, key, hash, &val); if (ix == DKIX_ERROR) @@ -4216,14 +4189,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu return -1; } - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) { - if (result) { - *result = NULL; - } - return -1; + hash = _PyObject_HashFast(key); + if (hash == -1) { + if (result) { + *result = NULL; } + return -1; } if (mp->ma_keys == Py_EMPTY_KEYS) { @@ -4655,12 +4626,10 @@ static PyMethodDef mapp_methods[] = { int PyDict_Contains(PyObject *op, PyObject *key) { - Py_hash_t hash; + Py_hash_t hash = _PyObject_HashFast(key); - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; + if (hash == -1) { + return -1; } return _PyDict_Contains_KnownHash(op, key, hash); @@ -6743,11 +6712,9 @@ int _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value) { if (value == NULL) { - Py_hash_t hash; - if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) { - hash = PyObject_Hash(name); - if (hash == -1) - return -1; + Py_hash_t hash = _PyObject_HashFast(name); + if (hash == -1) { + return -1; } return delitem_knownhash_lock_held((PyObject *)dict, name, hash); } else { From ad6f28edba991dea96cf6b799db8d0a6307d4d38 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 2 Jul 2024 02:25:31 +0800 Subject: [PATCH 4/5] Update Include/internal/pycore_object.h Co-authored-by: Sam Gross --- Include/internal/pycore_object.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index e269ea6d272a09..699b56f1983975 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -615,7 +615,8 @@ _PyObject_IS_GC(PyObject *obj) // Fast inlined version of PyObject_Hash() static inline Py_hash_t -_PyObject_HashFast(PyObject *op) { +_PyObject_HashFast(PyObject *op) +{ Py_hash_t hash; if (!PyUnicode_CheckExact(op) || (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash)) == -1) { From ef3d43835cb376ccc4ead6316e2a7a1f9d9f7236 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 2 Jul 2024 02:37:00 +0800 Subject: [PATCH 5/5] Refactor _PyObject_HashFast --- Include/internal/pycore_object.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 699b56f1983975..fa789611133a6f 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -617,15 +617,14 @@ _PyObject_IS_GC(PyObject *obj) static inline Py_hash_t _PyObject_HashFast(PyObject *op) { - Py_hash_t hash; - if (!PyUnicode_CheckExact(op) || - (hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash)) == -1) { - hash = PyObject_Hash(op); - if (hash == -1) { - return -1; + if (PyUnicode_CheckExact(op)) { + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED( + _PyASCIIObject_CAST(op)->hash); + if (hash != -1) { + return hash; } } - return hash; + return PyObject_Hash(op); } // Fast inlined version of PyType_IS_GC()