Skip to content

Commit b937ca0

Browse files
[3.11] gh-101006: Improve error handling when read marshal data (GH-101007) (GH-106227)
* EOFError no longer overrides other errors such as MemoryError or OSError at the start of the object. * Raise more relevant error when the NULL object occurs as a code object component. * Minimize an overhead of calling PyErr_Occurred(). (cherry picked from commit 8bf6904) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent ce091c9 commit b937ca0

File tree

2 files changed

+72
-61
lines changed

2 files changed

+72
-61
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve error handling when read :mod:`marshal` data.

Python/marshal.c

+71-61
Original file line numberDiff line numberDiff line change
@@ -750,23 +750,28 @@ r_string(Py_ssize_t n, RFILE *p)
750750
static int
751751
r_byte(RFILE *p)
752752
{
753-
int c = EOF;
754-
755753
if (p->ptr != NULL) {
756-
if (p->ptr < p->end)
757-
c = (unsigned char) *p->ptr++;
758-
return c;
754+
if (p->ptr < p->end) {
755+
return (unsigned char) *p->ptr++;
756+
}
759757
}
760-
if (!p->readable) {
758+
else if (!p->readable) {
761759
assert(p->fp);
762-
c = getc(p->fp);
760+
int c = getc(p->fp);
761+
if (c != EOF) {
762+
return c;
763+
}
763764
}
764765
else {
765766
const char *ptr = r_string(1, p);
766-
if (ptr != NULL)
767-
c = *(const unsigned char *) ptr;
767+
if (ptr != NULL) {
768+
return *(const unsigned char *) ptr;
769+
}
770+
return EOF;
768771
}
769-
return c;
772+
PyErr_SetString(PyExc_EOFError,
773+
"EOF read where not expected");
774+
return EOF;
770775
}
771776

772777
static int
@@ -827,10 +832,11 @@ r_PyLong(RFILE *p)
827832
digit d;
828833

829834
n = r_long(p);
830-
if (PyErr_Occurred())
831-
return NULL;
832835
if (n == 0)
833836
return (PyObject *)_PyLong_New(0);
837+
if (n == -1 && PyErr_Occurred()) {
838+
return NULL;
839+
}
834840
if (n < -SIZE32_MAX || n > SIZE32_MAX) {
835841
PyErr_SetString(PyExc_ValueError,
836842
"bad marshal data (long size out of range)");
@@ -849,10 +855,6 @@ r_PyLong(RFILE *p)
849855
d = 0;
850856
for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
851857
md = r_short(p);
852-
if (PyErr_Occurred()) {
853-
Py_DECREF(ob);
854-
return NULL;
855-
}
856858
if (md < 0 || md > PyLong_MARSHAL_BASE)
857859
goto bad_digit;
858860
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@@ -863,10 +865,6 @@ r_PyLong(RFILE *p)
863865
d = 0;
864866
for (j=0; j < shorts_in_top_digit; j++) {
865867
md = r_short(p);
866-
if (PyErr_Occurred()) {
867-
Py_DECREF(ob);
868-
return NULL;
869-
}
870868
if (md < 0 || md > PyLong_MARSHAL_BASE)
871869
goto bad_digit;
872870
/* topmost marshal digit should be nonzero */
@@ -878,18 +876,17 @@ r_PyLong(RFILE *p)
878876
}
879877
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
880878
}
881-
if (PyErr_Occurred()) {
882-
Py_DECREF(ob);
883-
return NULL;
884-
}
879+
assert(!PyErr_Occurred());
885880
/* top digit should be nonzero, else the resulting PyLong won't be
886881
normalized */
887882
ob->ob_digit[size-1] = d;
888883
return (PyObject *)ob;
889884
bad_digit:
890885
Py_DECREF(ob);
891-
PyErr_SetString(PyExc_ValueError,
892-
"bad marshal data (digit out of range in long)");
886+
if (!PyErr_Occurred()) {
887+
PyErr_SetString(PyExc_ValueError,
888+
"bad marshal data (digit out of range in long)");
889+
}
893890
return NULL;
894891
}
895892

@@ -912,8 +909,6 @@ r_float_str(RFILE *p)
912909
const char *ptr;
913910
n = r_byte(p);
914911
if (n == EOF) {
915-
PyErr_SetString(PyExc_EOFError,
916-
"EOF read where object expected");
917912
return -1;
918913
}
919914
ptr = r_string(n, p);
@@ -992,8 +987,10 @@ r_object(RFILE *p)
992987
PyObject *retval = NULL;
993988

994989
if (code == EOF) {
995-
PyErr_SetString(PyExc_EOFError,
996-
"EOF read where object expected");
990+
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
991+
PyErr_SetString(PyExc_EOFError,
992+
"EOF read where object expected");
993+
}
997994
return NULL;
998995
}
999996

@@ -1045,7 +1042,10 @@ r_object(RFILE *p)
10451042

10461043
case TYPE_INT:
10471044
n = r_long(p);
1048-
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
1045+
if (n == -1 && PyErr_Occurred()) {
1046+
break;
1047+
}
1048+
retval = PyLong_FromLong(n);
10491049
R_REF(retval);
10501050
break;
10511051

@@ -1111,10 +1111,11 @@ r_object(RFILE *p)
11111111
{
11121112
const char *ptr;
11131113
n = r_long(p);
1114-
if (PyErr_Occurred())
1115-
break;
11161114
if (n < 0 || n > SIZE32_MAX) {
1117-
PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
1115+
if (!PyErr_Occurred()) {
1116+
PyErr_SetString(PyExc_ValueError,
1117+
"bad marshal data (bytes object size out of range)");
1118+
}
11181119
break;
11191120
}
11201121
v = PyBytes_FromStringAndSize((char *)NULL, n);
@@ -1136,10 +1137,11 @@ r_object(RFILE *p)
11361137
/* fall through */
11371138
case TYPE_ASCII:
11381139
n = r_long(p);
1139-
if (PyErr_Occurred())
1140-
break;
11411140
if (n < 0 || n > SIZE32_MAX) {
1142-
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1141+
if (!PyErr_Occurred()) {
1142+
PyErr_SetString(PyExc_ValueError,
1143+
"bad marshal data (string size out of range)");
1144+
}
11431145
break;
11441146
}
11451147
goto _read_ascii;
@@ -1150,8 +1152,6 @@ r_object(RFILE *p)
11501152
case TYPE_SHORT_ASCII:
11511153
n = r_byte(p);
11521154
if (n == EOF) {
1153-
PyErr_SetString(PyExc_EOFError,
1154-
"EOF read where object expected");
11551155
break;
11561156
}
11571157
_read_ascii:
@@ -1178,10 +1178,11 @@ r_object(RFILE *p)
11781178
const char *buffer;
11791179

11801180
n = r_long(p);
1181-
if (PyErr_Occurred())
1182-
break;
11831181
if (n < 0 || n > SIZE32_MAX) {
1184-
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
1182+
if (!PyErr_Occurred()) {
1183+
PyErr_SetString(PyExc_ValueError,
1184+
"bad marshal data (string size out of range)");
1185+
}
11851186
break;
11861187
}
11871188
if (n != 0) {
@@ -1203,16 +1204,18 @@ r_object(RFILE *p)
12031204
}
12041205

12051206
case TYPE_SMALL_TUPLE:
1206-
n = (unsigned char) r_byte(p);
1207-
if (PyErr_Occurred())
1207+
n = r_byte(p);
1208+
if (n == EOF) {
12081209
break;
1210+
}
12091211
goto _read_tuple;
12101212
case TYPE_TUPLE:
12111213
n = r_long(p);
1212-
if (PyErr_Occurred())
1213-
break;
12141214
if (n < 0 || n > SIZE32_MAX) {
1215-
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
1215+
if (!PyErr_Occurred()) {
1216+
PyErr_SetString(PyExc_ValueError,
1217+
"bad marshal data (tuple size out of range)");
1218+
}
12161219
break;
12171220
}
12181221
_read_tuple:
@@ -1238,10 +1241,11 @@ r_object(RFILE *p)
12381241

12391242
case TYPE_LIST:
12401243
n = r_long(p);
1241-
if (PyErr_Occurred())
1242-
break;
12431244
if (n < 0 || n > SIZE32_MAX) {
1244-
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
1245+
if (!PyErr_Occurred()) {
1246+
PyErr_SetString(PyExc_ValueError,
1247+
"bad marshal data (list size out of range)");
1248+
}
12451249
break;
12461250
}
12471251
v = PyList_New(n);
@@ -1296,10 +1300,11 @@ r_object(RFILE *p)
12961300
case TYPE_SET:
12971301
case TYPE_FROZENSET:
12981302
n = r_long(p);
1299-
if (PyErr_Occurred())
1300-
break;
13011303
if (n < 0 || n > SIZE32_MAX) {
1302-
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
1304+
if (!PyErr_Occurred()) {
1305+
PyErr_SetString(PyExc_ValueError,
1306+
"bad marshal data (set size out of range)");
1307+
}
13031308
break;
13041309
}
13051310

@@ -1377,20 +1382,20 @@ r_object(RFILE *p)
13771382

13781383
/* XXX ignore long->int overflows for now */
13791384
argcount = (int)r_long(p);
1380-
if (PyErr_Occurred())
1385+
if (argcount == -1 && PyErr_Occurred())
13811386
goto code_error;
13821387
posonlyargcount = (int)r_long(p);
1383-
if (PyErr_Occurred()) {
1388+
if (posonlyargcount == -1 && PyErr_Occurred()) {
13841389
goto code_error;
13851390
}
13861391
kwonlyargcount = (int)r_long(p);
1387-
if (PyErr_Occurred())
1392+
if (kwonlyargcount == -1 && PyErr_Occurred())
13881393
goto code_error;
13891394
stacksize = (int)r_long(p);
1390-
if (PyErr_Occurred())
1395+
if (stacksize == -1 && PyErr_Occurred())
13911396
goto code_error;
13921397
flags = (int)r_long(p);
1393-
if (PyErr_Occurred())
1398+
if (flags == -1 && PyErr_Occurred())
13941399
goto code_error;
13951400
code = r_object(p);
13961401
if (code == NULL)
@@ -1463,6 +1468,10 @@ r_object(RFILE *p)
14631468
v = r_ref_insert(v, idx, flag, p);
14641469

14651470
code_error:
1471+
if (v == NULL && !PyErr_Occurred()) {
1472+
PyErr_SetString(PyExc_TypeError,
1473+
"NULL object in marshal data for code object");
1474+
}
14661475
Py_XDECREF(code);
14671476
Py_XDECREF(consts);
14681477
Py_XDECREF(names);
@@ -1480,9 +1489,10 @@ r_object(RFILE *p)
14801489
case TYPE_REF:
14811490
n = r_long(p);
14821491
if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
1483-
if (n == -1 && PyErr_Occurred())
1484-
break;
1485-
PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
1492+
if (!PyErr_Occurred()) {
1493+
PyErr_SetString(PyExc_ValueError,
1494+
"bad marshal data (invalid reference)");
1495+
}
14861496
break;
14871497
}
14881498
v = PyList_GET_ITEM(p->refs, n);

0 commit comments

Comments
 (0)