Skip to content

Commit d608fb3

Browse files
committed
Less duplication, improved comments, better handling of new keys during resize
1 parent 6a584a2 commit d608fb3

File tree

1 file changed

+42
-96
lines changed

1 file changed

+42
-96
lines changed

Objects/dictobject.c

Lines changed: 42 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ set_keys(PyDictObject *mp, PyDictKeysObject *keys)
167167
}
168168

169169
static inline void
170-
set_values(PyDictObject *mp, PyDictValues *values) {
170+
set_values(PyDictObject *mp, PyDictValues *values)
171+
{
171172
ASSERT_OWNED_OR_SHARED(mp);
172173
_Py_atomic_store_ptr_release(&mp->ma_values, values);
173174
}
@@ -332,8 +333,6 @@ static int dictresize(PyInterpreterState *interp, PyDictObject *mp,
332333

333334
static PyObject* dict_iter(PyObject *dict);
334335

335-
static int
336-
contains_known_hash(PyDictObject *mp, PyObject *key, Py_ssize_t hash);
337336
static int
338337
setitem_lock_held(PyDictObject *mp, PyObject *key, PyObject *value);
339338
static int
@@ -817,9 +816,9 @@ new_values(size_t size)
817816
}
818817

819818
static inline void
820-
free_values(PyDictValues *values, int use_qsbr)
819+
free_values(PyDictValues *values, bool use_qsbr)
821820
{
822-
int prefix_size = ((uint8_t *)values)[-1];
821+
int prefix_size = DICT_VALUES_SIZE(values);
823822
#ifdef Py_GIL_DISABLED
824823
if (use_qsbr) {
825824
_PyMem_FreeQsbr(((char *)values)-prefix_size);
@@ -1219,12 +1218,10 @@ ensure_shared_on_read(PyDictObject *mp)
12191218
{
12201219
#ifdef Py_GIL_DISABLED
12211220
if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) {
1222-
// We are accessing the dict from another thread then owns
1223-
// it and we haven't marked it as shared which will ensure
1224-
// that when we re-size ma_keys or ma_values that we will
1225-
// free using QSBR. We need to lock the dictionary to
1226-
// contend with writes from the owning thread, mark it as
1227-
// shared, and then we can continue with lock-free reads.
1221+
// The first time we access a dict from a non-owning thread we mark it
1222+
// as shared. This ensures that a concurrent resize operation will
1223+
// delay freeing the old keys or values using QSBR, which is necessary
1224+
// to safely allow concurrent reads without locking...
12281225
Py_BEGIN_CRITICAL_SECTION(mp);
12291226
if (!IS_DICT_SHARED(mp)) {
12301227
SET_DICT_SHARED(mp);
@@ -1241,7 +1238,7 @@ ensure_shared_on_resize(PyDictObject *mp)
12411238
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp);
12421239

12431240
if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) {
1244-
// We are writing to the dict from another thread then owns
1241+
// We are writing to the dict from another thread that owns
12451242
// it and we haven't marked it as shared which will ensure
12461243
// that when we re-size ma_keys or ma_values that we will
12471244
// free using QSBR. We need to lock the dictionary to
@@ -1299,8 +1296,8 @@ unicodekeys_lookup_generic_threadsafe(PyDictObject *mp, PyDictKeysObject* dk, Py
12991296
return do_lookup(mp, dk, key, hash, compare_unicode_generic_threadsafe);
13001297
}
13011298

1302-
static inline Py_ALWAYS_INLINE
1303-
Py_ssize_t compare_unicode_unicode_threadsafe(PyDictObject *mp, PyDictKeysObject *dk,
1299+
static inline Py_ALWAYS_INLINE Py_ssize_t
1300+
compare_unicode_unicode_threadsafe(PyDictObject *mp, PyDictKeysObject *dk,
13041301
void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
13051302
{
13061303
PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix];
@@ -1758,7 +1755,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
17581755
return -1;
17591756
}
17601757

1761-
// Same to insertdict but specialized for ma_keys == Py_EMPTY_KEYS.
1758+
// Same as insertdict but specialized for ma_keys == Py_EMPTY_KEYS.
17621759
// Consumes key and value references.
17631760
static int
17641761
insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
@@ -1803,7 +1800,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
18031800
// We store the keys last so no one can see them in a partially inconsistent
18041801
// state so that we don't need to switch the keys to being shared yet for
18051802
// the case where we're inserting from the non-owner thread. We don't use
1806-
// store_keys here because the transition from empty to non-empty is safe
1803+
// set_keys here because the transition from empty to non-empty is safe
18071804
// as the empty keys will never be freed.
18081805
#ifdef Py_GIL_DISABLED
18091806
_Py_atomic_store_ptr_release(&mp->ma_keys, newkeys);
@@ -1865,7 +1862,7 @@ static int
18651862
dictresize(PyInterpreterState *interp, PyDictObject *mp,
18661863
uint8_t log2_newsize, int unicode)
18671864
{
1868-
PyDictKeysObject *oldkeys;
1865+
PyDictKeysObject *oldkeys, *newkeys;
18691866
PyDictValues *oldvalues;
18701867

18711868
ASSERT_DICT_LOCKED(mp);
@@ -1890,13 +1887,12 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
18901887
*/
18911888

18921889
/* Allocate a new table. */
1893-
set_keys(mp, new_keys_object(interp, log2_newsize, unicode));
1894-
if (mp->ma_keys == NULL) {
1895-
set_keys(mp, oldkeys);
1890+
newkeys = new_keys_object(interp, log2_newsize, unicode);
1891+
if (newkeys == NULL) {
18961892
return -1;
18971893
}
18981894
// New table must be large enough.
1899-
assert(mp->ma_keys->dk_usable >= mp->ma_used);
1895+
assert(newkeys->dk_usable >= mp->ma_used);
19001896

19011897
Py_ssize_t numentries = mp->ma_used;
19021898

@@ -1906,9 +1902,9 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19061902
/* Convert split table into new combined table.
19071903
* We must incref keys; we can transfer values.
19081904
*/
1909-
if (mp->ma_keys->dk_kind == DICT_KEYS_GENERAL) {
1905+
if (newkeys->dk_kind == DICT_KEYS_GENERAL) {
19101906
// split -> generic
1911-
PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1907+
PyDictKeyEntry *newentries = DK_ENTRIES(newkeys);
19121908

19131909
for (Py_ssize_t i = 0; i < numentries; i++) {
19141910
int index = get_index_from_order(mp, i);
@@ -1918,10 +1914,10 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19181914
newentries[i].me_hash = unicode_get_hash(ep->me_key);
19191915
newentries[i].me_value = oldvalues->values[index];
19201916
}
1921-
build_indices_generic(mp->ma_keys, newentries, numentries);
1917+
build_indices_generic(newkeys, newentries, numentries);
19221918
}
19231919
else { // split -> combined unicode
1924-
PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(mp->ma_keys);
1920+
PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(newkeys);
19251921

19261922
for (Py_ssize_t i = 0; i < numentries; i++) {
19271923
int index = get_index_from_order(mp, i);
@@ -1930,19 +1926,20 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19301926
newentries[i].me_key = Py_NewRef(ep->me_key);
19311927
newentries[i].me_value = oldvalues->values[index];
19321928
}
1933-
build_indices_unicode(mp->ma_keys, newentries, numentries);
1929+
build_indices_unicode(newkeys, newentries, numentries);
19341930
}
19351931
UNLOCK_KEYS(oldkeys);
1932+
set_keys(mp, newkeys);
19361933
dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp));
19371934
set_values(mp, NULL);
19381935
free_values(oldvalues, IS_DICT_SHARED(mp));
19391936
}
19401937
else { // oldkeys is combined.
19411938
if (oldkeys->dk_kind == DICT_KEYS_GENERAL) {
19421939
// generic -> generic
1943-
assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL);
1940+
assert(newkeys->dk_kind == DICT_KEYS_GENERAL);
19441941
PyDictKeyEntry *oldentries = DK_ENTRIES(oldkeys);
1945-
PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1942+
PyDictKeyEntry *newentries = DK_ENTRIES(newkeys);
19461943
if (oldkeys->dk_nentries == numentries) {
19471944
memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
19481945
}
@@ -1954,12 +1951,12 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19541951
newentries[i] = *ep++;
19551952
}
19561953
}
1957-
build_indices_generic(mp->ma_keys, newentries, numentries);
1954+
build_indices_generic(newkeys, newentries, numentries);
19581955
}
19591956
else { // oldkeys is combined unicode
19601957
PyDictUnicodeEntry *oldentries = DK_UNICODE_ENTRIES(oldkeys);
19611958
if (unicode) { // combined unicode -> combined unicode
1962-
PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(mp->ma_keys);
1959+
PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(newkeys);
19631960
if (oldkeys->dk_nentries == numentries && mp->ma_keys->dk_kind == DICT_KEYS_UNICODE) {
19641961
memcpy(newentries, oldentries, numentries * sizeof(PyDictUnicodeEntry));
19651962
}
@@ -1971,10 +1968,10 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19711968
newentries[i] = *ep++;
19721969
}
19731970
}
1974-
build_indices_unicode(mp->ma_keys, newentries, numentries);
1971+
build_indices_unicode(newkeys, newentries, numentries);
19751972
}
19761973
else { // combined unicode -> generic
1977-
PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys);
1974+
PyDictKeyEntry *newentries = DK_ENTRIES(newkeys);
19781975
PyDictUnicodeEntry *ep = oldentries;
19791976
for (Py_ssize_t i = 0; i < numentries; i++) {
19801977
while (ep->me_value == NULL)
@@ -1984,10 +1981,12 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,
19841981
newentries[i].me_value = ep->me_value;
19851982
ep++;
19861983
}
1987-
build_indices_generic(mp->ma_keys, newentries, numentries);
1984+
build_indices_generic(newkeys, newentries, numentries);
19881985
}
19891986
}
19901987

1988+
set_keys(mp, newkeys);
1989+
19911990
if (oldkeys != Py_EMPTY_KEYS) {
19921991
#ifdef Py_REF_DEBUG
19931992
_Py_DecRefTotal(_PyInterpreterState_GET());
@@ -3640,7 +3639,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
36403639
Py_NewRef(key), hash, Py_NewRef(value));
36413640
}
36423641
else {
3643-
err = contains_known_hash(mp, key, hash);
3642+
err = _PyDict_Contains_KnownHash((PyObject *)mp, key, hash);
36443643
if (err == 0) {
36453644
err = insertdict(interp, mp,
36463645
Py_NewRef(key), hash, Py_NewRef(value));
@@ -4033,29 +4032,14 @@ static PyObject *
40334032
dict___contains__(PyDictObject *self, PyObject *key)
40344033
/*[clinic end generated code: output=a3d03db709ed6e6b input=fe1cb42ad831e820]*/
40354034
{
4036-
register PyDictObject *mp = self;
4037-
Py_hash_t hash;
4038-
Py_ssize_t ix;
4039-
PyObject *value;
4040-
4041-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4042-
hash = PyObject_Hash(key);
4043-
if (hash == -1)
4044-
return NULL;
4045-
}
4046-
#ifdef Py_GIL_DISABLED
4047-
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
4048-
#else
4049-
ix = _Py_dict_lookup(mp, key, hash, &value);
4050-
#endif
4051-
if (ix == DKIX_ERROR)
4035+
int contains = PyDict_Contains((PyObject *)self, key);
4036+
if (contains < 0) {
40524037
return NULL;
4053-
if (ix == DKIX_EMPTY || value == NULL)
4054-
Py_RETURN_FALSE;
4055-
#ifdef Py_GIL_DISABLED
4056-
Py_DECREF(value);
4057-
#endif
4058-
Py_RETURN_TRUE;
4038+
}
4039+
if (contains) {
4040+
Py_RETURN_TRUE;
4041+
}
4042+
Py_RETURN_FALSE;
40594043
}
40604044

40614045
/*[clinic input]
@@ -4551,57 +4535,19 @@ static PyMethodDef mapp_methods[] = {
45514535
{NULL, NULL} /* sentinel */
45524536
};
45534537

4554-
static int
4555-
contains_known_hash(PyDictObject *mp, PyObject *key, Py_ssize_t hash)
4556-
{
4557-
Py_ssize_t ix;
4558-
PyObject *value;
4559-
4560-
#ifdef Py_GIL_DISABLED
4561-
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
4562-
#else
4563-
ix = _Py_dict_lookup(mp, key, hash, &value);
4564-
#endif
4565-
if (ix == DKIX_ERROR)
4566-
return -1;
4567-
4568-
if (ix != DKIX_EMPTY && value != NULL) {
4569-
#ifdef Py_GIL_DISABLED
4570-
Py_DECREF(value);
4571-
#endif
4572-
return 1;
4573-
}
4574-
return 0;
4575-
}
4576-
45774538
/* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
45784539
int
45794540
PyDict_Contains(PyObject *op, PyObject *key)
45804541
{
45814542
Py_hash_t hash;
4582-
Py_ssize_t ix;
4583-
PyObject *value;
4584-
PyDictObject *mp = (PyDictObject *)op;
45854543

45864544
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
45874545
hash = PyObject_Hash(key);
45884546
if (hash == -1)
45894547
return -1;
45904548
}
4591-
#ifdef Py_GIL_DISABLED
4592-
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
4593-
#else
4594-
ix = _Py_dict_lookup(mp, key, hash, &value);
4595-
#endif
4596-
if (ix == DKIX_ERROR)
4597-
return -1;
4598-
if (ix != DKIX_EMPTY && value != NULL) {
4599-
#ifdef Py_GIL_DISABLED
4600-
Py_DECREF(value);
4601-
#endif
4602-
return 1;
4603-
}
4604-
return 0;
4549+
4550+
return _PyDict_Contains_KnownHash(op, key, hash);
46054551
}
46064552

46074553
int

0 commit comments

Comments
 (0)