Skip to content

Improve code clarity for the set lookup logic #20028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 10, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Improve code clarity for the set lookup logic
Cuts the code volume in half and substantially improves readability
while keeping the algorithm unchanged (there no change to the
probe pattern).  Performance is the same showing no measureable
change.
  • Loading branch information
rhettinger committed May 10, 2020
commit 37f079eb76e922580582fc76442a0f02dcba24a6
176 changes: 53 additions & 123 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,77 +57,43 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *table;
setentry *entry;
size_t perturb;
size_t perturb = hash;
size_t mask = so->mask;
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
size_t j;
int cmp;

entry = &so->table[i];
if (entry->key == NULL)
return entry;

perturb = hash;

while (1) {
if (entry->hash == hash) {
PyObject *startkey = entry->key;
/* startkey cannot be a dummy because the dummy hash field is -1 */
assert(startkey != dummy);
if (startkey == key)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) /* unlikely */
return NULL;
if (table != so->table || entry->key != startkey) /* unlikely */
return set_lookkey(so, key, hash);
if (cmp > 0) /* likely */
entry = &so->table[i];
j = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
do {
if (entry->hash == 0 && entry->key == NULL)
return entry;
mask = so->mask; /* help avoid a register spill */
}

if (i + LINEAR_PROBES <= mask) {
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
if (startkey == key)
return entry;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
if (startkey == key)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
if (cmp > 0)
return entry;
mask = so->mask;
}
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
if (cmp > 0)
return entry;
mask = so->mask;
}
}

entry++;
} while (j--);
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;

entry = &so->table[i];
if (entry->key == NULL)
return entry;
}
}

Expand All @@ -152,75 +118,39 @@ set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)

mask = so->mask;
i = (size_t)hash & mask;

entry = &so->table[i];
if (entry->key == NULL)
goto found_unused;

perturb = hash;

while (1) {
if (entry->hash == hash) {
PyObject *startkey = entry->key;
/* startkey cannot be a dummy because the dummy hash field is -1 */
assert(startkey != dummy);
if (startkey == key)
goto found_active;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp > 0) /* likely */
goto found_active;
if (cmp < 0)
goto comparison_error;
/* Continuing the search from the current entry only makes
sense if the table and entry are unchanged; otherwise,
we have to restart from the beginning */
if (table != so->table || entry->key != startkey)
goto restart;
mask = so->mask; /* help avoid a register spill */
}

if (i + LINEAR_PROBES <= mask) {
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
goto found_unused;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
if (startkey == key)
goto found_active;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp > 0)
goto found_active;
if (cmp < 0)
goto comparison_error;
if (table != so->table || entry->key != startkey)
goto restart;
mask = so->mask;
}
entry = &so->table[i];
j = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
do {
if (entry->hash == 0 && entry->key == NULL)
goto found_unused;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
if (startkey == key)
goto found_active;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp > 0)
goto found_active;
if (cmp < 0)
goto comparison_error;
if (table != so->table || entry->key != startkey)
goto restart;
mask = so->mask;
}
}

entry++;
} while (j--);
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;

entry = &so->table[i];
if (entry->key == NULL)
goto found_unused;
}

found_unused:
Expand Down