Skip to content

Commit 3f95292

Browse files
Issue python#23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
1 parent f18bf6f commit 3f95292

File tree

3 files changed

+161
-33
lines changed

3 files changed

+161
-33
lines changed

Lib/test/test_unicode.py

Lines changed: 129 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,7 +1661,10 @@ def __str__(self):
16611661
# Test PyUnicode_FromFormat()
16621662
def test_from_format(self):
16631663
support.import_module('ctypes')
1664-
from ctypes import pythonapi, py_object, c_int
1664+
from ctypes import (
1665+
pythonapi, py_object, sizeof,
1666+
c_int, c_long, c_longlong, c_ssize_t,
1667+
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
16651668
if sys.maxunicode == 65535:
16661669
name = "PyUnicodeUCS2_FromFormat"
16671670
else:
@@ -1675,9 +1678,13 @@ def PyUnicode_FromFormat(format, *args):
16751678
for arg in args)
16761679
return _PyUnicode_FromFormat(format, *cargs)
16771680

1681+
def check_format(expected, format, *args):
1682+
text = PyUnicode_FromFormat(format, *args)
1683+
self.assertEqual(expected, text)
1684+
16781685
# ascii format, non-ascii argument
1679-
text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
1680-
self.assertEqual(text, 'ascii\x7f=unicode\xe9')
1686+
check_format('ascii\x7f=unicode\xe9',
1687+
b'ascii\x7f=%U', 'unicode\xe9')
16811688

16821689
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
16831690
# raises an error
@@ -1686,25 +1693,131 @@ def PyUnicode_FromFormat(format, *args):
16861693
'string, got a non-ASCII byte: 0xe9$',
16871694
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
16881695

1689-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
1690-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
1691-
1692-
# other tests
1693-
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
1694-
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
1695-
1696-
text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
1697-
self.assertEqual(text, 'repr=abc')
1696+
# test "%c"
1697+
check_format('\uabcd',
1698+
b'%c', c_int(0xabcd))
1699+
check_format('\U0010ffff',
1700+
b'%c', c_int(0x10ffff))
1701+
with self.assertRaises(OverflowError):
1702+
PyUnicode_FromFormat(b'%c', c_int(0x110000))
1703+
# Issue #18183
1704+
check_format('\U00010000\U00100000',
1705+
b'%c%c', c_int(0x10000), c_int(0x100000))
1706+
1707+
# test "%"
1708+
check_format('%',
1709+
b'%')
1710+
check_format('%',
1711+
b'%%')
1712+
check_format('%s',
1713+
b'%%s')
1714+
check_format('[%]',
1715+
b'[%%]')
1716+
check_format('%abc',
1717+
b'%%%s', b'abc')
1718+
1719+
# test %S
1720+
check_format("repr=\u20acABC",
1721+
b'repr=%S', '\u20acABC')
1722+
1723+
# test %R
1724+
check_format("repr='\u20acABC'",
1725+
b'repr=%R', '\u20acABC')
1726+
1727+
# test integer formats (%i, %d, %u)
1728+
check_format('010',
1729+
b'%03i', c_int(10))
1730+
check_format('0010',
1731+
b'%0.4i', c_int(10))
1732+
check_format('-123',
1733+
b'%i', c_int(-123))
1734+
1735+
check_format('-123',
1736+
b'%d', c_int(-123))
1737+
check_format('-123',
1738+
b'%ld', c_long(-123))
1739+
check_format('-123',
1740+
b'%lld', c_longlong(-123))
1741+
check_format('-123',
1742+
b'%zd', c_ssize_t(-123))
1743+
1744+
check_format('123',
1745+
b'%u', c_uint(123))
1746+
check_format('123',
1747+
b'%lu', c_ulong(123))
1748+
check_format('123',
1749+
b'%llu', c_ulonglong(123))
1750+
check_format('123',
1751+
b'%zu', c_size_t(123))
1752+
1753+
# test long output
1754+
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
1755+
max_longlong = -min_longlong - 1
1756+
check_format(str(min_longlong),
1757+
b'%lld', c_longlong(min_longlong))
1758+
check_format(str(max_longlong),
1759+
b'%lld', c_longlong(max_longlong))
1760+
max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
1761+
check_format(str(max_ulonglong),
1762+
b'%llu', c_ulonglong(max_ulonglong))
1763+
PyUnicode_FromFormat(b'%p', c_void_p(-1))
1764+
1765+
# test padding (width and/or precision)
1766+
check_format('123'.rjust(10, '0'),
1767+
b'%010i', c_int(123))
1768+
check_format('123'.rjust(100),
1769+
b'%100i', c_int(123))
1770+
check_format('123'.rjust(100, '0'),
1771+
b'%.100i', c_int(123))
1772+
check_format('123'.rjust(80, '0').rjust(100),
1773+
b'%100.80i', c_int(123))
1774+
1775+
check_format('123'.rjust(10, '0'),
1776+
b'%010u', c_uint(123))
1777+
check_format('123'.rjust(100),
1778+
b'%100u', c_uint(123))
1779+
check_format('123'.rjust(100, '0'),
1780+
b'%.100u', c_uint(123))
1781+
check_format('123'.rjust(80, '0').rjust(100),
1782+
b'%100.80u', c_uint(123))
1783+
1784+
check_format('123'.rjust(10, '0'),
1785+
b'%010x', c_int(0x123))
1786+
check_format('123'.rjust(100),
1787+
b'%100x', c_int(0x123))
1788+
check_format('123'.rjust(100, '0'),
1789+
b'%.100x', c_int(0x123))
1790+
check_format('123'.rjust(80, '0').rjust(100),
1791+
b'%100.80x', c_int(0x123))
1792+
1793+
# test %A
1794+
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
1795+
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
1796+
1797+
# test %V
1798+
check_format('repr=abc',
1799+
b'repr=%V', 'abc', b'xyz')
16981800

16991801
# Test string decode from parameter of %s using utf-8.
17001802
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
17011803
# '\u4eba\u6c11'
1702-
text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
1703-
self.assertEqual(text, 'repr=\u4eba\u6c11')
1804+
check_format('repr=\u4eba\u6c11',
1805+
b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
17041806

17051807
#Test replace error handler.
1706-
text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
1707-
self.assertEqual(text, 'repr=abc\ufffd')
1808+
check_format('repr=abc\ufffd',
1809+
b'repr=%V', None, b'abc\xff')
1810+
1811+
# not supported: copy the raw format string. these tests are just here
1812+
# to check for crashs and should not be considered as specifications
1813+
check_format('%s',
1814+
b'%1%s', b'abc')
1815+
check_format('%1abc',
1816+
b'%1abc')
1817+
check_format('%+i',
1818+
b'%+i', c_int(10))
1819+
check_format('%s',
1820+
b'%.%s', b'abc')
17081821

17091822
# Test PyUnicode_AsWideChar()
17101823
def test_aswidechar(self):

Misc/NEWS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
Python News
33
+++++++++++
44

5+
What's New in Python 3.2.7?
6+
============================
7+
8+
*Release date: XXXX-XX-XX*
9+
10+
Core and Builtins
11+
-----------------
12+
13+
- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
14+
and fix by Guido Vranken.
15+
16+
517
What's New in Python 3.2.6?
618
===========================
719

Objects/unicodeobject.c

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
759759
* result in an array) */
760760
for (f = format; *f; f++) {
761761
if (*f == '%') {
762-
if (*(f+1)=='%')
763-
continue;
764-
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
765-
++callcount;
766-
while (Py_ISDIGIT((unsigned)*f))
767-
width = (width*10) + *f++ - '0';
768-
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
769-
;
770-
if (*f == 's')
762+
f++;
763+
while (*f && *f != '%' && !Py_ISALPHA((unsigned)*f))
764+
f++;
765+
if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
771766
++callcount;
772767
}
773768
else if (128 <= (unsigned char)*f) {
@@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
794789
#ifdef HAVE_LONG_LONG
795790
int longlongflag = 0;
796791
#endif
797-
const char* p = f;
792+
const char* p = f++;
798793
width = 0;
799794
while (Py_ISDIGIT((unsigned)*f))
800795
width = (width*10) + *f++ - '0';
801-
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
802-
;
796+
precision = 0;
797+
if (*f == '.') {
798+
f++;
799+
while (Py_ISDIGIT((unsigned)*f))
800+
precision = (precision*10) + *f++ - '0';
801+
}
803802

804803
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
805804
* they don't affect the amount of space we reserve.
@@ -823,23 +822,27 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
823822
switch (*f) {
824823
case 'c':
825824
{
826-
#ifndef Py_UNICODE_WIDE
827825
int ordinal = va_arg(count, int);
826+
if (ordinal < 0 || ordinal > 0x10ffff) {
827+
PyErr_SetString(PyExc_OverflowError,
828+
"%c arg not in range(0x110000)");
829+
goto fail;
830+
}
831+
#ifndef Py_UNICODE_WIDE
828832
if (ordinal > 0xffff)
829833
n += 2;
830834
else
831-
n++;
832-
#else
833-
(void)va_arg(count, int);
834-
n++;
835835
#endif
836+
n++;
836837
break;
837838
}
838839
case '%':
839840
n++;
840841
break;
841842
case 'd': case 'u': case 'i': case 'x':
842843
(void) va_arg(count, int);
844+
if (width < precision)
845+
width = precision;
843846
#ifdef HAVE_LONG_LONG
844847
if (longlongflag) {
845848
if (width < MAX_LONG_LONG_CHARS)

0 commit comments

Comments
 (0)