Skip to content

Commit 9b8de84

Browse files
committed
Backported r55839 and r61350
Issue python#4469: Prevent expandtabs() on string and unicode objects from causing a segfault when a large width is passed on 32-bit platforms.
1 parent aec20a6 commit 9b8de84

File tree

5 files changed

+91
-19
lines changed

5 files changed

+91
-19
lines changed

Lib/test/test_str.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
12
import unittest
3+
import sys
24
from test import test_support, string_tests
35

46

@@ -19,6 +21,15 @@ def test_formatting(self):
1921
string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
2022
self.assertRaises(OverflowError, '%c'.__mod__, 0x1234)
2123

24+
def test_expandtabs_overflows_gracefully(self):
25+
# This test only affects 32-bit platforms because expandtabs can only take
26+
# an int as the max value, not a 64-bit C long. If expandtabs is changed
27+
# to take a 64-bit long, this test should apply to all platforms.
28+
if sys.maxint > (1 << 32):
29+
return
30+
self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxint)
31+
32+
2233
def test_main():
2334
test_support.run_unittest(StrTest)
2435

Lib/test/test_unicode.py

+8
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,14 @@ def __repr__(self):
756756
self.assertEqual(repr(s1()), '\\n')
757757
self.assertEqual(repr(s2()), '\\n')
758758

759+
def test_expandtabs_overflows_gracefully(self):
760+
# This test only affects 32-bit platforms because expandtabs can only take
761+
# an int as the max value, not a 64-bit C long. If expandtabs is changed
762+
# to take a 64-bit long, this test should apply to all platforms.
763+
if sys.maxint > (1 << 32):
764+
return
765+
self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
766+
759767
def test_main():
760768
test_support.run_unittest(UnicodeTest)
761769

Misc/NEWS

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ What's New in Python 2.4.6c1?
1212
Core and builtins
1313
-----------------
1414

15+
- Issue #4469: Prevent expandtabs() on string and unicode
16+
objects from causing a segfault when a large width is passed
17+
on 32-bit platforms.
18+
1519
- Issue #4317: Fixed a crash in the imageop.rgb2rgb8() function.
1620

1721
- Issue #4230: Fix a crash when a class has a custom __getattr__ and an

Objects/stringobject.c

+34-10
Original file line numberDiff line numberDiff line change
@@ -2759,56 +2759,80 @@ If tabsize is not given, a tab size of 8 characters is assumed.");
27592759
static PyObject*
27602760
string_expandtabs(PyStringObject *self, PyObject *args)
27612761
{
2762-
const char *e, *p;
2762+
const char *e, *p, *qe;
27632763
char *q;
2764-
int i, j;
2764+
int i, j, incr;
27652765
PyObject *u;
27662766
int tabsize = 8;
27672767

27682768
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
27692769
return NULL;
27702770

27712771
/* First pass: determine size of output string */
2772-
i = j = 0;
2773-
e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2772+
i = 0; /* chars up to and including most recent \n or \r */
2773+
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
2774+
e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
27742775
for (p = PyString_AS_STRING(self); p < e; p++)
27752776
if (*p == '\t') {
2776-
if (tabsize > 0)
2777-
j += tabsize - (j % tabsize);
2777+
if (tabsize > 0) {
2778+
incr = tabsize - (j % tabsize);
2779+
if (j > INT_MAX - incr)
2780+
goto overflow1;
2781+
j += incr;
2782+
}
27782783
}
27792784
else {
2785+
if (j > INT_MAX - 1)
2786+
goto overflow1;
27802787
j++;
27812788
if (*p == '\n' || *p == '\r') {
2789+
if (i > INT_MAX - j)
2790+
goto overflow1;
27822791
i += j;
27832792
j = 0;
27842793
}
27852794
}
27862795

2796+
if (i > INT_MAX - j)
2797+
goto overflow1;
2798+
27872799
/* Second pass: create output string and fill it */
27882800
u = PyString_FromStringAndSize(NULL, i + j);
27892801
if (!u)
27902802
return NULL;
27912803

2792-
j = 0;
2793-
q = PyString_AS_STRING(u);
2804+
j = 0; /* same as in first pass */
2805+
q = PyString_AS_STRING(u); /* next output char */
2806+
qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
27942807

27952808
for (p = PyString_AS_STRING(self); p < e; p++)
27962809
if (*p == '\t') {
27972810
if (tabsize > 0) {
27982811
i = tabsize - (j % tabsize);
27992812
j += i;
2800-
while (i--)
2813+
while (i--) {
2814+
if (q >= qe)
2815+
goto overflow2;
28012816
*q++ = ' ';
2817+
}
28022818
}
28032819
}
28042820
else {
2805-
j++;
2821+
if (q >= qe)
2822+
goto overflow2;
28062823
*q++ = *p;
2824+
j++;
28072825
if (*p == '\n' || *p == '\r')
28082826
j = 0;
28092827
}
28102828

28112829
return u;
2830+
2831+
overflow2:
2832+
Py_DECREF(u);
2833+
overflow1:
2834+
PyErr_SetString(PyExc_OverflowError, "new string is too long");
2835+
return NULL;
28122836
}
28132837

28142838
static PyObject *

Objects/unicodeobject.c

+34-9
Original file line numberDiff line numberDiff line change
@@ -5207,54 +5207,79 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
52075207
Py_UNICODE *e;
52085208
Py_UNICODE *p;
52095209
Py_UNICODE *q;
5210-
int i, j;
5210+
Py_UNICODE *qe;
5211+
int i, j, incr;
52115212
PyUnicodeObject *u;
52125213
int tabsize = 8;
52135214

52145215
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
52155216
return NULL;
52165217

52175218
/* First pass: determine size of output string */
5218-
i = j = 0;
5219-
e = self->str + self->length;
5219+
i = 0; /* chars up to and including most recent \n or \r */
5220+
j = 0; /* chars since most recent \n or \r (use in tab calculations) */
5221+
e = self->str + self->length; /* end of input */
52205222
for (p = self->str; p < e; p++)
52215223
if (*p == '\t') {
5222-
if (tabsize > 0)
5223-
j += tabsize - (j % tabsize);
5224+
if (tabsize > 0) {
5225+
incr = tabsize - (j % tabsize); /* cannot overflow */
5226+
if (j > INT_MAX - incr)
5227+
goto overflow1;
5228+
j += incr;
5229+
}
52245230
}
52255231
else {
5232+
if (j > INT_MAX - 1)
5233+
goto overflow1;
52265234
j++;
52275235
if (*p == '\n' || *p == '\r') {
5236+
if (i > INT_MAX - j)
5237+
goto overflow1;
52285238
i += j;
52295239
j = 0;
52305240
}
52315241
}
52325242

5243+
if (i > INT_MAX - j)
5244+
goto overflow1;
5245+
52335246
/* Second pass: create output string and fill it */
52345247
u = _PyUnicode_New(i + j);
52355248
if (!u)
52365249
return NULL;
52375250

5238-
j = 0;
5239-
q = u->str;
5251+
j = 0; /* same as in first pass */
5252+
q = u->str; /* next output char */
5253+
qe = u->str + u->length; /* end of output */
52405254

52415255
for (p = self->str; p < e; p++)
52425256
if (*p == '\t') {
52435257
if (tabsize > 0) {
52445258
i = tabsize - (j % tabsize);
52455259
j += i;
5246-
while (i--)
5260+
while (i--) {
5261+
if (q >= qe)
5262+
goto overflow2;
52475263
*q++ = ' ';
5264+
}
52485265
}
52495266
}
52505267
else {
5251-
j++;
5268+
if (q >= qe)
5269+
goto overflow2;
52525270
*q++ = *p;
5271+
j++;
52535272
if (*p == '\n' || *p == '\r')
52545273
j = 0;
52555274
}
52565275

52575276
return (PyObject*) u;
5277+
5278+
overflow2:
5279+
Py_DECREF(u);
5280+
overflow1:
5281+
PyErr_SetString(PyExc_OverflowError, "new string is too long");
5282+
return NULL;
52585283
}
52595284

52605285
PyDoc_STRVAR(find__doc__,

0 commit comments

Comments
 (0)