Skip to content

Commit aee45fd

Browse files
authored
gh-134531: refactor _hashlib logic for handling NIDs and EVP_MDs (#135254)
1 parent 158e516 commit aee45fd

File tree

1 file changed

+106
-62
lines changed

1 file changed

+106
-62
lines changed

Modules/_hashopenssl.c

Lines changed: 106 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -368,41 +368,83 @@ notify_ssl_error_occurred(void)
368368
}
369369
/* LCOV_EXCL_STOP */
370370

371-
static const char *
372-
get_openssl_evp_md_utf8name(const EVP_MD *md)
373-
{
374-
assert(md != NULL);
375-
int nid = EVP_MD_nid(md);
376-
const char *name = NULL;
377-
const py_hashentry_t *h;
371+
/*
372+
* OpenSSL provides a way to go from NIDs to digest names for hash functions
373+
* but lacks this granularity for MAC objects where it is not possible to get
374+
* the underlying digest name (only the block size and digest size are allowed
375+
* to be recovered).
376+
*
377+
* In addition, OpenSSL aliases pollute the list of known digest names
378+
* as OpenSSL appears to have its own definition of alias. In particular,
379+
* the resulting list still contains duplicate and alternate names for several
380+
* algorithms.
381+
*
382+
* Therefore, digest names, whether they are used by hash functions or HMAC,
383+
* are handled through EVP_MD objects or directly by using some NID.
384+
*/
378385

379-
for (h = py_hashes; h->py_name != NULL; h++) {
386+
/* Get a cached entry by OpenSSL NID. */
387+
static const py_hashentry_t *
388+
get_hashentry_by_nid(int nid)
389+
{
390+
for (const py_hashentry_t *h = py_hashes; h->py_name != NULL; h++) {
380391
if (h->ossl_nid == nid) {
381-
name = h->py_name;
382-
break;
392+
return h;
383393
}
384394
}
395+
return NULL;
396+
}
397+
398+
/*
399+
* Convert the NID to a string via OBJ_nid2*() functions.
400+
*
401+
* If 'nid' cannot be resolved, set an exception and return NULL.
402+
*/
403+
static const char *
404+
get_asn1_utf8name_by_nid(int nid)
405+
{
406+
const char *name = OBJ_nid2ln(nid);
385407
if (name == NULL) {
386-
/* Ignore aliased names and only use long, lowercase name. The aliases
387-
* pollute the list and OpenSSL appears to have its own definition of
388-
* alias as the resulting list still contains duplicate and alternate
389-
* names for several algorithms.
390-
*/
391-
name = OBJ_nid2ln(nid);
392-
if (name == NULL)
393-
name = OBJ_nid2sn(nid);
408+
// In OpenSSL 3.0 and later, OBJ_nid*() are thread-safe and may raise.
409+
assert(ERR_peek_last_error() != 0);
410+
if (ERR_GET_REASON(ERR_peek_last_error()) != OBJ_R_UNKNOWN_NID) {
411+
notify_ssl_error_occurred();
412+
return NULL;
413+
}
414+
// fallback to short name and unconditionally propagate errors
415+
name = OBJ_nid2sn(nid);
416+
if (name == NULL) {
417+
raise_ssl_error(PyExc_ValueError, "cannot resolve NID %d", nid);
418+
}
394419
}
395420
return name;
396421
}
397422

398-
static PyObject *
399-
get_openssl_evp_md_name(const EVP_MD *md)
423+
/*
424+
* Convert the NID to an OpenSSL digest name.
425+
*
426+
* On error, set an exception and return NULL.
427+
*/
428+
static const char *
429+
get_hashlib_utf8name_by_nid(int nid)
430+
{
431+
const py_hashentry_t *e = get_hashentry_by_nid(nid);
432+
return e ? e->py_name : get_asn1_utf8name_by_nid(nid);
433+
}
434+
435+
/* Same as get_hashlib_utf8name_by_nid() but using an EVP_MD object. */
436+
static const char *
437+
get_hashlib_utf8name_by_evp_md(const EVP_MD *md)
400438
{
401-
const char *name = get_openssl_evp_md_utf8name(md);
402-
return PyUnicode_FromString(name);
439+
assert(md != NULL);
440+
return get_hashlib_utf8name_by_nid(EVP_MD_nid(md));
403441
}
404442

405-
/* Get EVP_MD by HID and purpose */
443+
/*
444+
* Get a new reference to an EVP_MD object described by name and purpose.
445+
*
446+
* If 'name' is an OpenSSL indexed name, the return value is cached.
447+
*/
406448
static PY_EVP_MD *
407449
get_openssl_evp_md_by_utf8name(PyObject *module, const char *name,
408450
Py_hash_type py_ht)
@@ -471,42 +513,46 @@ get_openssl_evp_md_by_utf8name(PyObject *module, const char *name,
471513
return digest;
472514
}
473515

474-
/* Get digest EVP_MD from object
516+
/*
517+
* Raise an exception indicating that 'digestmod' is not supported.
518+
*/
519+
static void
520+
raise_unsupported_digestmod_error(PyObject *module, PyObject *digestmod)
521+
{
522+
_hashlibstate *state = get_hashlib_state(module);
523+
PyErr_Format(state->unsupported_digestmod_error,
524+
"Unsupported digestmod %R", digestmod);
525+
}
526+
527+
/*
528+
* Get a new reference to an EVP_MD described by 'digestmod' and purpose.
529+
*
530+
* On error, set an exception and return NULL.
475531
*
476-
* * string
477-
* * _hashopenssl builtin function
532+
* Parameters
478533
*
479-
* on error returns NULL with exception set.
534+
* digestmod A digest name or a _hashopenssl builtin function
535+
* py_ht The message digest purpose.
480536
*/
481537
static PY_EVP_MD *
482-
get_openssl_evp_md(PyObject *module, PyObject *digestmod,
483-
Py_hash_type py_ht)
538+
get_openssl_evp_md(PyObject *module, PyObject *digestmod, Py_hash_type py_ht)
484539
{
485-
PyObject *name_obj = NULL;
486540
const char *name;
487-
488541
if (PyUnicode_Check(digestmod)) {
489-
name_obj = digestmod;
490-
} else {
491-
_hashlibstate *state = get_hashlib_state(module);
492-
// borrowed ref
493-
name_obj = PyDict_GetItemWithError(state->constructs, digestmod);
542+
name = PyUnicode_AsUTF8(digestmod);
494543
}
495-
if (name_obj == NULL) {
496-
if (!PyErr_Occurred()) {
497-
_hashlibstate *state = get_hashlib_state(module);
498-
PyErr_Format(
499-
state->unsupported_digestmod_error,
500-
"Unsupported digestmod %R", digestmod);
501-
}
502-
return NULL;
544+
else {
545+
PyObject *dict = get_hashlib_state(module)->constructs;
546+
assert(dict != NULL);
547+
PyObject *borrowed_ref = PyDict_GetItemWithError(dict, digestmod);
548+
name = borrowed_ref == NULL ? NULL : PyUnicode_AsUTF8(borrowed_ref);
503549
}
504-
505-
name = PyUnicode_AsUTF8(name_obj);
506550
if (name == NULL) {
551+
if (!PyErr_Occurred()) {
552+
raise_unsupported_digestmod_error(module, digestmod);
553+
}
507554
return NULL;
508555
}
509-
510556
return get_openssl_evp_md_by_utf8name(module, name, py_ht);
511557
}
512558

@@ -745,7 +791,9 @@ _hashlib_HASH_get_name(PyObject *op, void *Py_UNUSED(closure))
745791
notify_ssl_error_occurred();
746792
return NULL;
747793
}
748-
return get_openssl_evp_md_name(md);
794+
const char *name = get_hashlib_utf8name_by_evp_md(md);
795+
assert(name != NULL || PyErr_Occurred());
796+
return name == NULL ? NULL : PyUnicode_FromString(name);
749797
}
750798

751799
static PyGetSetDef HASH_getsets[] = {
@@ -1775,20 +1823,15 @@ _hmac_dealloc(PyObject *op)
17751823
static PyObject *
17761824
_hmac_repr(PyObject *op)
17771825
{
1826+
const char *digest_name;
17781827
HMACobject *self = HMACobject_CAST(op);
17791828
const EVP_MD *md = _hashlib_hmac_get_md(self);
1780-
if (md == NULL) {
1781-
return NULL;
1782-
}
1783-
PyObject *digest_name = get_openssl_evp_md_name(md);
1829+
digest_name = md == NULL ? NULL : get_hashlib_utf8name_by_evp_md(md);
17841830
if (digest_name == NULL) {
1831+
assert(PyErr_Occurred());
17851832
return NULL;
17861833
}
1787-
PyObject *repr = PyUnicode_FromFormat(
1788-
"<%U HMAC object @ %p>", digest_name, self
1789-
);
1790-
Py_DECREF(digest_name);
1791-
return repr;
1834+
return PyUnicode_FromFormat("<%s HMAC object @ %p>", digest_name, self);
17921835
}
17931836

17941837
/*[clinic input]
@@ -1900,13 +1943,12 @@ _hashlib_hmac_get_name(PyObject *op, void *Py_UNUSED(closure))
19001943
if (md == NULL) {
19011944
return NULL;
19021945
}
1903-
PyObject *digest_name = get_openssl_evp_md_name(md);
1946+
const char *digest_name = get_hashlib_utf8name_by_evp_md(md);
19041947
if (digest_name == NULL) {
1948+
assert(PyErr_Occurred());
19051949
return NULL;
19061950
}
1907-
PyObject *name = PyUnicode_FromFormat("hmac-%U", digest_name);
1908-
Py_DECREF(digest_name);
1909-
return name;
1951+
return PyUnicode_FromFormat("hmac-%s", digest_name);
19101952
}
19111953

19121954
static PyMethodDef HMAC_methods[] = {
@@ -1982,7 +2024,9 @@ _openssl_hash_name_mapper(const EVP_MD *md, const char *from,
19822024
return;
19832025
}
19842026

1985-
py_name = get_openssl_evp_md_name(md);
2027+
const char *name = get_hashlib_utf8name_by_evp_md(md);
2028+
assert(name != NULL || PyErr_Occurred());
2029+
py_name = name == NULL ? NULL : PyUnicode_FromString(name);
19862030
if (py_name == NULL) {
19872031
state->error = 1;
19882032
} else {

0 commit comments

Comments
 (0)