Skip to content

Commit a07da09

Browse files
authored
bpo-43475: Fix worst case collision behavior for NaN instances (pythonGH-25493)
1 parent accea7d commit a07da09

File tree

10 files changed

+25
-21
lines changed

10 files changed

+25
-21
lines changed

Doc/library/stdtypes.rst

+4-5
Original file line numberDiff line numberDiff line change
@@ -692,10 +692,9 @@ Here are the rules in detail:
692692
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
693693
``-2``.
694694

695-
- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
696-
and ``sys.hash_info.nan`` are used as hash values for positive
697-
infinity, negative infinity, or nans (respectively). (All hashable
698-
nans have the same hash value.)
695+
- The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf``
696+
are used as hash values for positive
697+
infinity or negative infinity (respectively).
699698

700699
- For a :class:`complex` number ``z``, the hash values of the real
701700
and imaginary parts are combined by computing ``hash(z.real) +
@@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`::
740739
"""Compute the hash of a float x."""
741740

742741
if math.isnan(x):
743-
return sys.hash_info.nan
742+
return super().__hash__()
744743
elif math.isinf(x):
745744
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
746745
else:

Doc/library/sys.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ always available.
855855
+---------------------+--------------------------------------------------+
856856
| :const:`inf` | hash value returned for a positive infinity |
857857
+---------------------+--------------------------------------------------+
858-
| :const:`nan` | hash value returned for a nan |
858+
| :const:`nan` | (this attribute is no longer used) |
859859
+---------------------+--------------------------------------------------+
860860
| :const:`imag` | multiplier used for the imaginary part of a |
861861
| | complex number |

Include/pyhash.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ extern "C" {
77

88
/* Helpers for hash functions */
99
#ifndef Py_LIMITED_API
10-
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
10+
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
1111
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
1212
// Similar to _Py_HashPointer(), but don't replace -1 with -2
1313
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
@@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
2929

3030
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
3131
#define _PyHASH_INF 314159
32-
#define _PyHASH_NAN 0
3332
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
3433

3534

Lib/_pydecimal.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def __hash__(self):
951951
if self.is_snan():
952952
raise TypeError('Cannot hash a signaling NaN value.')
953953
elif self.is_nan():
954-
return _PyHASH_NAN
954+
return super().__hash__()
955955
else:
956956
if self._sign:
957957
return -_PyHASH_INF
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Hashes of NaN values now depend on object identity. Formerly, they always
2+
hashed to 0 even though NaN values are not equal to one another. Having the
3+
same hash for unequal values caused pile-ups in hash tables.

Modules/_decimal/_decimal.c

+1-4
Original file line numberDiff line numberDiff line change
@@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v)
45364536
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
45374537
#endif
45384538
const Py_hash_t py_hash_inf = 314159;
4539-
const Py_hash_t py_hash_nan = 0;
45404539
mpd_uint_t ten_data[1] = {10};
45414540
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
45424541
0, 2, 1, 1, ten_data};
@@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v)
45554554
return -1;
45564555
}
45574556
else if (mpd_isnan(MPD(v))) {
4558-
return py_hash_nan;
4557+
return _Py_HashPointer(v);
45594558
}
45604559
else {
45614560
return py_hash_inf * mpd_arith_sign(MPD(v));
@@ -5939,5 +5938,3 @@ PyInit__decimal(void)
59395938

59405939
return NULL; /* GCOV_NOT_REACHED */
59415940
}
5942-
5943-

Objects/complexobject.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -412,10 +412,10 @@ static Py_hash_t
412412
complex_hash(PyComplexObject *v)
413413
{
414414
Py_uhash_t hashreal, hashimag, combined;
415-
hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real);
415+
hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real);
416416
if (hashreal == (Py_uhash_t)-1)
417417
return -1;
418-
hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag);
418+
hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag);
419419
if (hashimag == (Py_uhash_t)-1)
420420
return -1;
421421
/* Note: if the imaginary part is 0, hashimag is 0 now,

Objects/floatobject.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
556556
static Py_hash_t
557557
float_hash(PyFloatObject *v)
558558
{
559-
return _Py_HashDouble(v->ob_fval);
559+
return _Py_HashDouble((PyObject *)v, v->ob_fval);
560560
}
561561

562562
static PyObject *

Python/pyhash.c

+10-4
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
5656
If the result of the reduction is infinity (this is impossible for
5757
integers, floats and Decimals) then use the predefined hash value
5858
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
59-
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
60-
hashes of float and Decimal infinities and nans.
59+
_PyHASH_INF and -_PyHASH_INF are also used for the
60+
hashes of float and Decimal infinities.
61+
62+
NaNs hash with a pointer hash. Having distinct hash values prevents
63+
catastrophic pileups from distinct NaN instances which used to always
64+
have the same hash value but would compare unequal.
6165
6266
A selling point for the above strategy is that it makes it possible
6367
to compute hashes of decimal and binary floating-point numbers
@@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
8286
8387
*/
8488

89+
Py_hash_t _Py_HashPointer(const void *);
90+
8591
Py_hash_t
86-
_Py_HashDouble(double v)
92+
_Py_HashDouble(PyObject *inst, double v)
8793
{
8894
int e, sign;
8995
double m;
@@ -93,7 +99,7 @@ _Py_HashDouble(double v)
9399
if (Py_IS_INFINITY(v))
94100
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
95101
else
96-
return _PyHASH_NAN;
102+
return _Py_HashPointer(inst);
97103
}
98104

99105
m = frexp(v, &e);

Python/sysmodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate)
14051405
PyStructSequence_SET_ITEM(hash_info, field++,
14061406
PyLong_FromLong(_PyHASH_INF));
14071407
PyStructSequence_SET_ITEM(hash_info, field++,
1408-
PyLong_FromLong(_PyHASH_NAN));
1408+
PyLong_FromLong(0)); // This is no longer used
14091409
PyStructSequence_SET_ITEM(hash_info, field++,
14101410
PyLong_FromLong(_PyHASH_IMAG));
14111411
PyStructSequence_SET_ITEM(hash_info, field++,

0 commit comments

Comments
 (0)