From 18c020cbd569b11c1579641326695a3f4f9c696b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 13 Jan 2023 09:41:04 +0200 Subject: [PATCH 1/3] gh-101006: Improve error handling when read marshal data * EOFError no longer overrides other errors such as MemoryError or OSError at the start of the object. * Raise more relevant error when the NULL object occurs as a code object component. * Minimize an overhead of calling PyErr_Occurred(). --- ...-01-13-11-37-41.gh-issue-101006.fuLvn2.rst | 1 + Python/marshal.c | 122 +++++++++--------- 2 files changed, 62 insertions(+), 61 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-01-13-11-37-41.gh-issue-101006.fuLvn2.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-01-13-11-37-41.gh-issue-101006.fuLvn2.rst b/Misc/NEWS.d/next/Core and Builtins/2023-01-13-11-37-41.gh-issue-101006.fuLvn2.rst new file mode 100644 index 00000000000000..c98670d8c4963d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-01-13-11-37-41.gh-issue-101006.fuLvn2.rst @@ -0,0 +1 @@ +Improve error handling when read :mod:`marshal` data. diff --git a/Python/marshal.c b/Python/marshal.c index 5f392d9e1ecfff..c9b087b1dcb790 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -746,23 +746,28 @@ r_string(Py_ssize_t n, RFILE *p) static int r_byte(RFILE *p) { - int c = EOF; - if (p->ptr != NULL) { - if (p->ptr < p->end) - c = (unsigned char) *p->ptr++; - return c; + if (p->ptr < p->end) { + return (unsigned char) *p->ptr++; + } } - if (!p->readable) { + else if (!p->readable) { assert(p->fp); - c = getc(p->fp); + int c = getc(p->fp); + if (c != EOF) { + return c; + } } else { const char *ptr = r_string(1, p); - if (ptr != NULL) - c = *(const unsigned char *) ptr; + if (ptr != NULL) { + return *(const unsigned char *) ptr; + } + return EOF; } - return c; + PyErr_SetString(PyExc_EOFError, + "EOF read where not expected"); + return EOF; } static int @@ -823,10 +828,10 @@ r_PyLong(RFILE *p) digit d; n = r_long(p); - if (PyErr_Occurred()) - return NULL; if (n == 0) return (PyObject *)_PyLong_New(0); + if (n == -1 && PyErr_Occurred()) + return NULL; if (n < -SIZE32_MAX || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (long size out of range)"); @@ -845,10 +850,6 @@ r_PyLong(RFILE *p) d = 0; for (j=0; j < PyLong_MARSHAL_RATIO; j++) { md = r_short(p); - if (PyErr_Occurred()) { - Py_DECREF(ob); - return NULL; - } if (md < 0 || md > PyLong_MARSHAL_BASE) goto bad_digit; d += (digit)md << j*PyLong_MARSHAL_SHIFT; @@ -859,10 +860,6 @@ r_PyLong(RFILE *p) d = 0; for (j=0; j < shorts_in_top_digit; j++) { md = r_short(p); - if (PyErr_Occurred()) { - Py_DECREF(ob); - return NULL; - } if (md < 0 || md > PyLong_MARSHAL_BASE) goto bad_digit; /* topmost marshal digit should be nonzero */ @@ -874,18 +871,16 @@ r_PyLong(RFILE *p) } d += (digit)md << j*PyLong_MARSHAL_SHIFT; } - if (PyErr_Occurred()) { - Py_DECREF(ob); - return NULL; - } /* top digit should be nonzero, else the resulting PyLong won't be normalized */ ob->ob_digit[size-1] = d; return (PyObject *)ob; bad_digit: Py_DECREF(ob); - PyErr_SetString(PyExc_ValueError, - "bad marshal data (digit out of range in long)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "bad marshal data (digit out of range in long)"); + } return NULL; } @@ -908,8 +903,6 @@ r_float_str(RFILE *p) const char *ptr; n = r_byte(p); if (n == EOF) { - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); return -1; } ptr = r_string(n, p); @@ -987,8 +980,10 @@ r_object(RFILE *p) PyObject *retval = NULL; if (code == EOF) { - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); + if (PyErr_ExceptionMatches(PyExc_EOFError)) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + } return NULL; } @@ -1035,7 +1030,9 @@ r_object(RFILE *p) case TYPE_INT: n = r_long(p); - retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n); + if (n == -1 && PyErr_Occurred()) + break; + retval = PyLong_FromLong(n); R_REF(retval); break; @@ -1101,10 +1098,10 @@ r_object(RFILE *p) { const char *ptr; n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)"); + } break; } v = PyBytes_FromStringAndSize((char *)NULL, n); @@ -1126,10 +1123,10 @@ r_object(RFILE *p) /* fall through */ case TYPE_ASCII: n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + } break; } goto _read_ascii; @@ -1140,8 +1137,6 @@ r_object(RFILE *p) case TYPE_SHORT_ASCII: n = r_byte(p); if (n == EOF) { - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); break; } _read_ascii: @@ -1168,10 +1163,10 @@ r_object(RFILE *p) const char *buffer; n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + } break; } if (n != 0) { @@ -1193,16 +1188,17 @@ r_object(RFILE *p) } case TYPE_SMALL_TUPLE: - n = (unsigned char) r_byte(p); - if (PyErr_Occurred()) + n = r_byte(p); + if (n == EOF) { break; + } goto _read_tuple; case TYPE_TUPLE: n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); + } break; } _read_tuple: @@ -1227,10 +1223,10 @@ r_object(RFILE *p) case TYPE_LIST: n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); + } break; } v = PyList_New(n); @@ -1283,10 +1279,10 @@ r_object(RFILE *p) case TYPE_SET: case TYPE_FROZENSET: n = r_long(p); - if (PyErr_Occurred()) - break; if (n < 0 || n > SIZE32_MAX) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); + } break; } @@ -1363,20 +1359,20 @@ r_object(RFILE *p) /* XXX ignore long->int overflows for now */ argcount = (int)r_long(p); - if (PyErr_Occurred()) + if (argcount == -1 && PyErr_Occurred()) goto code_error; posonlyargcount = (int)r_long(p); - if (PyErr_Occurred()) { + if (posonlyargcount == -1 && PyErr_Occurred()) { goto code_error; } kwonlyargcount = (int)r_long(p); - if (PyErr_Occurred()) + if (kwonlyargcount == -1 && PyErr_Occurred()) goto code_error; stacksize = (int)r_long(p); - if (PyErr_Occurred()) + if (stacksize == -1 && PyErr_Occurred()) goto code_error; flags = (int)r_long(p); - if (PyErr_Occurred()) + if (flags == -1 && PyErr_Occurred()) goto code_error; code = r_object(p); if (code == NULL) @@ -1449,6 +1445,10 @@ r_object(RFILE *p) v = r_ref_insert(v, idx, flag, p); code_error: + if (v == NULL && !PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "NULL object in marshal data for code object"); + } Py_XDECREF(code); Py_XDECREF(consts); Py_XDECREF(names); @@ -1466,9 +1466,9 @@ r_object(RFILE *p) case TYPE_REF: n = r_long(p); if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { - if (n == -1 && PyErr_Occurred()) - break; - PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + } break; } v = PyList_GET_ITEM(p->refs, n); From baa83b35563116c5b27cfc14cd36233345b6fa0a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 21 May 2023 23:23:19 +0300 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Erlend E. Aasland --- Python/marshal.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 88ca32e4dd1e5b..2bef42f9b7ed93 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -831,8 +831,9 @@ r_PyLong(RFILE *p) n = r_long(p); if (n == 0) return (PyObject *)_PyLong_New(0); - if (n == -1 && PyErr_Occurred()) + if (n == -1 && PyErr_Occurred()) { return NULL; + } if (n < -SIZE32_MAX || n > SIZE32_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data (long size out of range)"); @@ -1031,8 +1032,9 @@ r_object(RFILE *p) case TYPE_INT: n = r_long(p); - if (n == -1 && PyErr_Occurred()) + if (n == -1 && PyErr_Occurred()) { break; + } retval = PyLong_FromLong(n); R_REF(retval); break; @@ -1101,7 +1103,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (bytes object size out of range)"); } break; } @@ -1126,7 +1129,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (string size out of range)"); } break; } @@ -1166,7 +1170,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (string size out of range)"); } break; } @@ -1198,7 +1203,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (tuple size out of range)"); } break; } @@ -1226,7 +1232,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (list size out of range)"); } break; } @@ -1282,7 +1289,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n > SIZE32_MAX) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (set size out of range)"); } break; } @@ -1468,7 +1476,8 @@ r_object(RFILE *p) n = r_long(p); if (n < 0 || n >= PyList_GET_SIZE(p->refs)) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)"); + PyErr_SetString(PyExc_ValueError, + "bad marshal data (invalid reference)"); } break; } From a55545a83d656c8e29b99570a87d296934ae352a Mon Sep 17 00:00:00 2001 From: Oleg Iarygin Date: Mon, 22 May 2023 07:13:32 +0400 Subject: [PATCH 3/3] Return `assert(!PyErr_Occurred());` back --- Python/marshal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/marshal.c b/Python/marshal.c index 2bef42f9b7ed93..efa92eecefaac1 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -873,6 +873,7 @@ r_PyLong(RFILE *p) } d += (digit)md << j*PyLong_MARSHAL_SHIFT; } + assert(!PyErr_Occurred()); /* top digit should be nonzero, else the resulting PyLong won't be normalized */ ob->long_value.ob_digit[size-1] = d;