@@ -1888,7 +1888,28 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
1888
1888
1889
1889
static const char * hexdigit = "0123456789abcdef" ;
1890
1890
1891
- repr = PyString_FromStringAndSize (NULL , 2 + 6 * size + 1 );
1891
+ /* Initial allocation is based on the longest-possible unichr
1892
+ escape.
1893
+
1894
+ In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
1895
+ unichr, so in this case it's the longest unichr escape. In
1896
+ narrow (UTF-16) builds this is five chars per source unichr
1897
+ since there are two unichrs in the surrogate pair, so in narrow
1898
+ (UTF-16) builds it's not the longest unichr escape.
1899
+
1900
+ In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
1901
+ so in the narrow (UTF-16) build case it's the longest unichr
1902
+ escape.
1903
+ */
1904
+
1905
+ repr = PyString_FromStringAndSize (NULL ,
1906
+ 2
1907
+ #ifdef Py_UNICODE_WIDE
1908
+ + 10 * size
1909
+ #else
1910
+ + 6 * size
1911
+ #endif
1912
+ + 1 );
1892
1913
if (repr == NULL )
1893
1914
return NULL ;
1894
1915
@@ -1913,15 +1934,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
1913
1934
#ifdef Py_UNICODE_WIDE
1914
1935
/* Map 21-bit characters to '\U00xxxxxx' */
1915
1936
else if (ch >= 0x10000 ) {
1916
- int offset = p - PyString_AS_STRING (repr );
1917
-
1918
- /* Resize the string if necessary */
1919
- if (offset + 12 > PyString_GET_SIZE (repr )) {
1920
- if (_PyString_Resize (& repr , PyString_GET_SIZE (repr ) + 100 ))
1921
- return NULL ;
1922
- p = PyString_AS_STRING (repr ) + offset ;
1923
- }
1924
-
1925
1937
* p ++ = '\\' ;
1926
1938
* p ++ = 'U' ;
1927
1939
* p ++ = hexdigit [(ch >> 28 ) & 0x0000000F ];
@@ -1934,8 +1946,8 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
1934
1946
* p ++ = hexdigit [ch & 0x0000000F ];
1935
1947
continue ;
1936
1948
}
1937
- #endif
1938
- /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
1949
+ #else
1950
+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1939
1951
else if (ch >= 0xD800 && ch < 0xDC00 ) {
1940
1952
Py_UNICODE ch2 ;
1941
1953
Py_UCS4 ucs ;
@@ -1960,6 +1972,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
1960
1972
s -- ;
1961
1973
size ++ ;
1962
1974
}
1975
+ #endif
1963
1976
1964
1977
/* Map 16-bit characters to '\uxxxx' */
1965
1978
if (ch >= 256 ) {
0 commit comments