diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h index 34dfa53771288e..59e416ca18a729 100644 --- a/Include/internal/pycore_pyhash.h +++ b/Include/internal/pycore_pyhash.h @@ -1,10 +1,86 @@ -#ifndef Py_INTERNAL_HASH_H -#define Py_INTERNAL_HASH_H +#ifndef Py_INTERNAL_PYHASH_H +#define Py_INTERNAL_PYHASH_H #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" #endif +/* Helpers for hash functions */ +extern Py_hash_t _Py_HashDouble(PyObject *, double); +// _decimal shared extensions uses _Py_HashPointer() +PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); +// Similar to _Py_HashPointer(), but don't replace -1 with -2 +extern Py_hash_t _Py_HashPointerRaw(const void*); +// _datetime shared extension uses _Py_HashBytes() +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); + +/* Prime multiplier used in string and various other hashes. */ +#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ + +/* Parameters used for the numeric hash implementation. See notes for + _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on + reduction modulo the prime 2**_PyHASH_BITS - 1. */ + +#if SIZEOF_VOID_P >= 8 +# define _PyHASH_BITS 61 +#else +# define _PyHASH_BITS 31 +#endif + +#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) +#define _PyHASH_INF 314159 +#define _PyHASH_IMAG _PyHASH_MULTIPLIER + +/* Hash secret + * + * memory layout on 64 bit systems + * cccccccc cccccccc cccccccc uc -- unsigned char[24] + * pppppppp ssssssss ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t + * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeeeeeee pyexpat XML hash salt + * + * memory layout on 32 bit systems + * cccccccc cccccccc cccccccc uc + * ppppssss ........ ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*) + * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeee.... pyexpat XML hash salt + * + * (*) The siphash member may not be available on 32 bit platforms without + * an unsigned int64 data type. + */ +typedef union { + /* ensure 24 bytes */ + unsigned char uc[24]; + /* two Py_hash_t for FNV */ + struct { + Py_hash_t prefix; + Py_hash_t suffix; + } fnv; + /* two uint64 for SipHash24 */ + struct { + uint64_t k0; + uint64_t k1; + } siphash; + /* a different (!) Py_hash_t for small string optimization */ + struct { + unsigned char padding[16]; + Py_hash_t suffix; + } djbx33a; + struct { + unsigned char padding[16]; + Py_hash_t hashsalt; + } expat; +} _Py_HashSecret_t; + +// _elementtree shared extension uses _Py_HashSecret.expat +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + +#ifdef Py_DEBUG +extern int _Py_HashSecret_Initialized; +#endif + struct pyhash_runtime_state { struct { @@ -34,7 +110,6 @@ struct pyhash_runtime_state { } -uint64_t _Py_KeyedHash(uint64_t, const char *, Py_ssize_t); - +extern uint64_t _Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz); -#endif // Py_INTERNAL_HASH_H +#endif // !Py_INTERNAL_PYHASH_H diff --git a/Include/pyhash.h b/Include/pyhash.h index 182d223fab1cac..6e969f86fa2625 100644 --- a/Include/pyhash.h +++ b/Include/pyhash.h @@ -1,87 +1,10 @@ #ifndef Py_HASH_H - #define Py_HASH_H #ifdef __cplusplus extern "C" { #endif -/* Helpers for hash functions */ #ifndef Py_LIMITED_API -PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double); -PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); -// Similar to _Py_HashPointer(), but don't replace -1 with -2 -PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); -PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); -#endif - -/* Prime multiplier used in string and various other hashes. */ -#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ - -/* Parameters used for the numeric hash implementation. See notes for - _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on - reduction modulo the prime 2**_PyHASH_BITS - 1. */ - -#if SIZEOF_VOID_P >= 8 -# define _PyHASH_BITS 61 -#else -# define _PyHASH_BITS 31 -#endif - -#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) -#define _PyHASH_INF 314159 -#define _PyHASH_IMAG _PyHASH_MULTIPLIER - - -/* hash secret - * - * memory layout on 64 bit systems - * cccccccc cccccccc cccccccc uc -- unsigned char[24] - * pppppppp ssssssss ........ fnv -- two Py_hash_t - * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t - * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t - * ........ ........ eeeeeeee pyexpat XML hash salt - * - * memory layout on 32 bit systems - * cccccccc cccccccc cccccccc uc - * ppppssss ........ ........ fnv -- two Py_hash_t - * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*) - * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t - * ........ ........ eeee.... pyexpat XML hash salt - * - * (*) The siphash member may not be available on 32 bit platforms without - * an unsigned int64 data type. - */ -#ifndef Py_LIMITED_API -typedef union { - /* ensure 24 bytes */ - unsigned char uc[24]; - /* two Py_hash_t for FNV */ - struct { - Py_hash_t prefix; - Py_hash_t suffix; - } fnv; - /* two uint64 for SipHash24 */ - struct { - uint64_t k0; - uint64_t k1; - } siphash; - /* a different (!) Py_hash_t for small string optimization */ - struct { - unsigned char padding[16]; - Py_hash_t suffix; - } djbx33a; - struct { - unsigned char padding[16]; - Py_hash_t hashsalt; - } expat; -} _Py_HashSecret_t; -PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; - -#ifdef Py_DEBUG -PyAPI_DATA(int) _Py_HashSecret_Initialized; -#endif - - /* hash function definition */ typedef struct { Py_hash_t (*const hash)(const void *, Py_ssize_t); @@ -94,7 +17,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); #endif -/* cutoff for small string DJBX33A optimization in range [1, cutoff). +/* Cutoff for small string DJBX33A optimization in range [1, cutoff). * * About 50% of the strings in a typical Python application are smaller than * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. @@ -112,7 +35,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); #endif /* Py_HASH_CUTOFF */ -/* hash algorithm selection +/* Hash algorithm selection * * The values for Py_HASH_* are hard-coded in the * configure script. @@ -140,5 +63,4 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); #ifdef __cplusplus } #endif - -#endif /* !Py_HASH_H */ +#endif // !Py_HASH_H diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index a8d68d68420d36..30327e0b6fc31f 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -17,6 +17,7 @@ #include "Python.h" #include "pycore_import.h" // _PyImport_GetModuleAttrString() +#include "pycore_pyhash.h" // _Py_HashSecret #include "structmember.h" // PyMemberDef #include "expat.h" #include "pyexpat.h" diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 246eea74098820..6d60037cd2054e 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -24,8 +24,9 @@ #include "Python.h" #include "pycore_hashtable.h" -#include "hashlib.h" +#include "pycore_pyhash.h" // _Py_HashBytes() #include "pycore_strhex.h" // _Py_strhex() +#include "hashlib.h" /* EVP is the preferred interface to hashing in OpenSSL */ #include diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 37d402824853f0..54f8a42273401f 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -10,7 +10,12 @@ See the source code for LLVMFuzzerTestOneInput for details. */ +#ifndef Py_BUILD_CORE +# define Py_BUILD_CORE 1 +#endif + #include +#include "pycore_pyhash.h" // _Py_HashBytes() #include #include diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index bd8a98a46579a3..33a56c57a876ec 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_import.h" // _PyImport_SetModule() +#include "pycore_pyhash.h" // _Py_HashSecret #include #include "structmember.h" // PyMemberDef diff --git a/Python/hashtable.c b/Python/hashtable.c index 09501de199b0e6..0f07cd20b1a95e 100644 --- a/Python/hashtable.c +++ b/Python/hashtable.c @@ -46,6 +46,7 @@ #include "Python.h" #include "pycore_hashtable.h" +#include "pycore_pyhash.h" // _Py_HashPointerRaw() #define HASHTABLE_MIN_SIZE 16 #define HASHTABLE_HIGH 0.50 diff --git a/Python/pyhash.c b/Python/pyhash.c index d5ac9f83be61cc..b2bdab5099d86a 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -4,6 +4,7 @@ All the utility functions (_Py_Hash*()) return "-1" to signify an error. */ #include "Python.h" +#include "pycore_pyhash.h" // _Py_HashSecret_t #ifdef __APPLE__ # include