Skip to content

Commit c9b9555

Browse files
[3.12] gh-101006: Improve error handling when read marshal data (GH-101007) (GH-106226)
* EOFError no longer overrides other errors such as MemoryError or OSError at the start of the object. * Raise more relevant error when the NULL object occurs as a code object component. * Minimize an overhead of calling PyErr_Occurred(). (cherry picked from commit 8bf6904) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent e0fa531 commit c9b9555

File tree

2 files changed

+72
-61
lines changed

2 files changed

+72
-61
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve error handling when read :mod:`marshal` data.

Python/marshal.c

+71-61
Original file line numberDiff line numberDiff line change
@@ -751,23 +751,28 @@ r_string(Py_ssize_t n, RFILE *p)
751751
static int
752752
r_byte(RFILE *p)
753753
{
754-
int c = EOF;
755-
756754
if (p->ptr != NULL) {
757-
if (p->ptr < p->end)
758-
c = (unsigned char) *p->ptr++;
759-
return c;
755+
if (p->ptr < p->end) {
756+
return (unsigned char) *p->ptr++;
757+
}
760758
}
761-
if (!p->readable) {
759+
else if (!p->readable) {
762760
assert(p->fp);
763-
c = getc(p->fp);
761+
int c = getc(p->fp);
762+
if (c != EOF) {
763+
return c;
764+
}
764765
}
765766
else {
766767
const char *ptr = r_string(1, p);
767-
if (ptr != NULL)
768-
c = *(const unsigned char *) ptr;
768+
if (ptr != NULL) {
769+
return *(const unsigned char *) ptr;
770+
}
771+
return EOF;
769772
}
770-
return c;
773+
PyErr_SetString(PyExc_EOFError,
774+
"EOF read where not expected");
775+
return EOF;
771776
}
772777

773778
static int
@@ -828,10 +833,11 @@ r_PyLong(RFILE *p)
828833
digit d;
829834

830835
n = r_long(p);
831-
if (PyErr_Occurred())
832-
return NULL;
833836
if (n == 0)
834837
return (PyObject *)_PyLong_New(0);
838+
if (n == -1 && PyErr_Occurred()) {
839+
return NULL;
840+
}
835841
if (n < -SIZE32_MAX || n > SIZE32_MAX) {
836842
PyErr_SetString(PyExc_ValueError,
837843
"bad marshal data (long size out of range)");
@@ -850,10 +856,6 @@ r_PyLong(RFILE *p)
850856
d = 0;
851857
for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
852858
md = r_short(p);
853-
if (PyErr_Occurred()) {
854-
Py_DECREF(ob);
855-
return NULL;
856-
}
857859
if (md < 0 || md > PyLong_MARSHAL_BASE)
858860
goto bad_digit;
859861
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@@ -864,10 +866,6 @@ r_PyLong(RFILE *p)
864866
d = 0;
865867
for (j=0; j < shorts_in_top_digit; j++) {
866868
md = r_short(p);
867-
if (PyErr_Occurred()) {
868-
Py_DECREF(ob);
869-
return NULL;
870-
}
871869
if (md < 0 || md > PyLong_MARSHAL_BASE)
872870
goto bad_digit;
873871
/* topmost marshal digit should be nonzero */
@@ -879,18 +877,17 @@ r_PyLong(RFILE *p)
879877
}
880878
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
881879
}
882-
if (PyErr_Occurred()) {
883-
Py_DECREF(ob);
884-
return NULL;
885-
}
880+
assert(!PyErr_Occurred());
886881
/* top digit should be nonzero, else the resulting PyLong won't be
887882
normalized */
888883
ob->long_value.ob_digit[size-1] = d;
889884
return (PyObject *)ob;
890885
bad_digit:
891886
Py_DECREF(ob);
892-
PyErr_SetString(PyExc_ValueError,
893-
"bad marshal data (digit out of range in long)");
887+
if (!PyErr_Occurred()) {
888+
PyErr_SetString(PyExc_ValueError,
889+
"bad marshal data (digit out of range in long)");
890+
}
894891
return NULL;
895892
}
896893

@@ -913,8 +910,6 @@ r_float_str(RFILE *p)
913910
const char *ptr;
914911
n = r_byte(p);
915912
if (n == EOF) {
916-
PyErr_SetString(PyExc_EOFError,
917-
"EOF read where object expected");
918913
return -1;
919914
}
920915
ptr = r_string(n, p);
@@ -992,8 +987,10 @@ r_object(RFILE *p)
992987
PyObject *retval = NULL;
993988

994989
if (code == EOF) {
995-
PyErr_SetString(PyExc_EOFError,
996-
"EOF read where object expected");
990+
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
991+
PyErr_SetString(PyExc_EOFError,
992+
"EOF read where object expected");
993+
}
997994
return NULL;
998995
}
999996

@@ -1040,7 +1037,10 @@ r_object(RFILE *p)
10401037

10411038
case TYPE_INT:
10421039
n = r_long(p);
1043-
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1040+
if (n == -1 && PyErr_Occurred()) {
1041+
break;
1042+
}
1043+
retval = PyLong_FromLong(n);
10441044
R_REF(retval);
10451045
break;
10461046

@@ -1106,10 +1106,11 @@ r_object(RFILE *p)
11061106
{
11071107
const char *ptr;
11081108
n = r_long(p);
1109-
if (PyErr_Occurred())
1110-
break;
11111109
if (n < 0 || n > SIZE32_MAX) {
1112-
PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1110+
if (!PyErr_Occurred()) {
1111+
PyErr_SetString(PyExc_ValueError,
1112+
"bad marshal data (bytes object size out of range)");
1113+
}
11131114
break;
11141115
}
11151116
v = PyBytes_FromStringAndSize((char *)NULL, n);
@@ -1131,10 +1132,11 @@ r_object(RFILE *p)
11311132
/* fall through */
11321133
case TYPE_ASCII:
11331134
n = r_long(p);
1134-
if (PyErr_Occurred())
1135-
break;
11361135
if (n < 0 || n > SIZE32_MAX) {
1137-
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1136+
if (!PyErr_Occurred()) {
1137+
PyErr_SetString(PyExc_ValueError,
1138+
"bad marshal data (string size out of range)");
1139+
}
11381140
break;
11391141
}
11401142
goto _read_ascii;
@@ -1145,8 +1147,6 @@ r_object(RFILE *p)
11451147
case TYPE_SHORT_ASCII:
11461148
n = r_byte(p);
11471149
if (n == EOF) {
1148-
PyErr_SetString(PyExc_EOFError,
1149-
"EOF read where object expected");
11501150
break;
11511151
}
11521152
_read_ascii:
@@ -1173,10 +1173,11 @@ r_object(RFILE *p)
11731173
const char *buffer;
11741174

11751175
n = r_long(p);
1176-
if (PyErr_Occurred())
1177-
break;
11781176
if (n < 0 || n > SIZE32_MAX) {
1179-
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1177+
if (!PyErr_Occurred()) {
1178+
PyErr_SetString(PyExc_ValueError,
1179+
"bad marshal data (string size out of range)");
1180+
}
11801181
break;
11811182
}
11821183
if (n != 0) {
@@ -1198,16 +1199,18 @@ r_object(RFILE *p)
11981199
}
11991200

12001201
case TYPE_SMALL_TUPLE:
1201-
n = (unsigned char) r_byte(p);
1202-
if (PyErr_Occurred())
1202+
n = r_byte(p);
1203+
if (n == EOF) {
12031204
break;
1205+
}
12041206
goto _read_tuple;
12051207
case TYPE_TUPLE:
12061208
n = r_long(p);
1207-
if (PyErr_Occurred())
1208-
break;
12091209
if (n < 0 || n > SIZE32_MAX) {
1210-
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1210+
if (!PyErr_Occurred()) {
1211+
PyErr_SetString(PyExc_ValueError,
1212+
"bad marshal data (tuple size out of range)");
1213+
}
12111214
break;
12121215
}
12131216
_read_tuple:
@@ -1232,10 +1235,11 @@ r_object(RFILE *p)
12321235

12331236
case TYPE_LIST:
12341237
n = r_long(p);
1235-
if (PyErr_Occurred())
1236-
break;
12371238
if (n < 0 || n > SIZE32_MAX) {
1238-
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1239+
if (!PyErr_Occurred()) {
1240+
PyErr_SetString(PyExc_ValueError,
1241+
"bad marshal data (list size out of range)");
1242+
}
12391243
break;
12401244
}
12411245
v = PyList_New(n);
@@ -1288,10 +1292,11 @@ r_object(RFILE *p)
12881292
case TYPE_SET:
12891293
case TYPE_FROZENSET:
12901294
n = r_long(p);
1291-
if (PyErr_Occurred())
1292-
break;
12931295
if (n < 0 || n > SIZE32_MAX) {
1294-
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1296+
if (!PyErr_Occurred()) {
1297+
PyErr_SetString(PyExc_ValueError,
1298+
"bad marshal data (set size out of range)");
1299+
}
12951300
break;
12961301
}
12971302

@@ -1368,20 +1373,20 @@ r_object(RFILE *p)
13681373

13691374
/* XXX ignore long->int overflows for now */
13701375
argcount = (int)r_long(p);
1371-
if (PyErr_Occurred())
1376+
if (argcount == -1 && PyErr_Occurred())
13721377
goto code_error;
13731378
posonlyargcount = (int)r_long(p);
1374-
if (PyErr_Occurred()) {
1379+
if (posonlyargcount == -1 && PyErr_Occurred()) {
13751380
goto code_error;
13761381
}
13771382
kwonlyargcount = (int)r_long(p);
1378-
if (PyErr_Occurred())
1383+
if (kwonlyargcount == -1 && PyErr_Occurred())
13791384
goto code_error;
13801385
stacksize = (int)r_long(p);
1381-
if (PyErr_Occurred())
1386+
if (stacksize == -1 && PyErr_Occurred())
13821387
goto code_error;
13831388
flags = (int)r_long(p);
1384-
if (PyErr_Occurred())
1389+
if (flags == -1 && PyErr_Occurred())
13851390
goto code_error;
13861391
code = r_object(p);
13871392
if (code == NULL)
@@ -1454,6 +1459,10 @@ r_object(RFILE *p)
14541459
v = r_ref_insert(v, idx, flag, p);
14551460

14561461
code_error:
1462+
if (v == NULL && !PyErr_Occurred()) {
1463+
PyErr_SetString(PyExc_TypeError,
1464+
"NULL object in marshal data for code object");
1465+
}
14571466
Py_XDECREF(code);
14581467
Py_XDECREF(consts);
14591468
Py_XDECREF(names);
@@ -1471,9 +1480,10 @@ r_object(RFILE *p)
14711480
case TYPE_REF:
14721481
n = r_long(p);
14731482
if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1474-
if (n == -1 && PyErr_Occurred())
1475-
break;
1476-
PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1483+
if (!PyErr_Occurred()) {
1484+
PyErr_SetString(PyExc_ValueError,
1485+
"bad marshal data (invalid reference)");
1486+
}
14771487
break;
14781488
}
14791489
v = PyList_GET_ITEM(p->refs, n);

0 commit comments

Comments
 (0)