From 574e0b6dc9a0295d3fbdb7876602b8ca525b3405 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 3 Mar 2022 08:17:37 +0000 Subject: [PATCH 1/8] static latin1 --- Include/internal/pycore_global_strings.h | 8 + Include/internal/pycore_runtime_init.h | 278 ++++++++++++++++++++++- Include/internal/pycore_unicodeobject.h | 3 - Objects/unicodeobject.c | 56 +---- Tools/scripts/generate_global_objects.py | 13 ++ 5 files changed, 304 insertions(+), 54 deletions(-) diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 2a42dc16ce1b55..8b93295da4fc4a 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -348,6 +348,14 @@ struct _Py_global_strings { STRUCT_FOR_ID(write) STRUCT_FOR_ID(zipimporter) } identifiers; + struct { + PyASCIIObject _ascii; + uint8_t _data[2]; + } ascii[128]; + struct { + PyCompactUnicodeObject _latin1; + uint8_t _data[2]; + } latin1[128]; }; /* End auto-generated code */ diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 2f2bc65cd71116..355498a8210633 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -112,7 +112,23 @@ extern "C" { ._ ## NAME = _PyASCIIObject_INIT(LITERAL) #define INIT_ID(NAME) \ ._ ## NAME = _PyASCIIObject_INIT(#NAME) - +#define _PyCompactUnicodeObject_INIT(LITERAL) \ + { \ + ._latin1 = { \ + ._base = { \ + .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \ + .length = sizeof(LITERAL) - 1, \ + .hash = -1, \ + .state = { \ + .kind = 1, \ + .compact = 1, \ + .ascii = 0, \ + .ready = 1, \ + }, \ + }, \ + }, \ + ._data = LITERAL, \ + } /* The following is auto-generated by Tools/scripts/generate_global_objects.py. */ #define _Py_global_objects_INIT { \ @@ -963,6 +979,266 @@ extern "C" { INIT_ID(write), \ INIT_ID(zipimporter), \ }, \ + .ascii = { \ + _PyASCIIObject_INIT("\x00"), \ + _PyASCIIObject_INIT("\x01"), \ + _PyASCIIObject_INIT("\x02"), \ + _PyASCIIObject_INIT("\x03"), \ + _PyASCIIObject_INIT("\x04"), \ + _PyASCIIObject_INIT("\x05"), \ + _PyASCIIObject_INIT("\x06"), \ + _PyASCIIObject_INIT("\x07"), \ + _PyASCIIObject_INIT("\x08"), \ + _PyASCIIObject_INIT("\x09"), \ + _PyASCIIObject_INIT("\x0a"), \ + _PyASCIIObject_INIT("\x0b"), \ + _PyASCIIObject_INIT("\x0c"), \ + _PyASCIIObject_INIT("\x0d"), \ + _PyASCIIObject_INIT("\x0e"), \ + _PyASCIIObject_INIT("\x0f"), \ + _PyASCIIObject_INIT("\x10"), \ + _PyASCIIObject_INIT("\x11"), \ + _PyASCIIObject_INIT("\x12"), \ + _PyASCIIObject_INIT("\x13"), \ + _PyASCIIObject_INIT("\x14"), \ + _PyASCIIObject_INIT("\x15"), \ + _PyASCIIObject_INIT("\x16"), \ + _PyASCIIObject_INIT("\x17"), \ + _PyASCIIObject_INIT("\x18"), \ + _PyASCIIObject_INIT("\x19"), \ + _PyASCIIObject_INIT("\x1a"), \ + _PyASCIIObject_INIT("\x1b"), \ + _PyASCIIObject_INIT("\x1c"), \ + _PyASCIIObject_INIT("\x1d"), \ + _PyASCIIObject_INIT("\x1e"), \ + _PyASCIIObject_INIT("\x1f"), \ + _PyASCIIObject_INIT("\x20"), \ + _PyASCIIObject_INIT("\x21"), \ + _PyASCIIObject_INIT("\x22"), \ + _PyASCIIObject_INIT("\x23"), \ + _PyASCIIObject_INIT("\x24"), \ + _PyASCIIObject_INIT("\x25"), \ + _PyASCIIObject_INIT("\x26"), \ + _PyASCIIObject_INIT("\x27"), \ + _PyASCIIObject_INIT("\x28"), \ + _PyASCIIObject_INIT("\x29"), \ + _PyASCIIObject_INIT("\x2a"), \ + _PyASCIIObject_INIT("\x2b"), \ + _PyASCIIObject_INIT("\x2c"), \ + _PyASCIIObject_INIT("\x2d"), \ + _PyASCIIObject_INIT("\x2e"), \ + _PyASCIIObject_INIT("\x2f"), \ + _PyASCIIObject_INIT("\x30"), \ + _PyASCIIObject_INIT("\x31"), \ + _PyASCIIObject_INIT("\x32"), \ + _PyASCIIObject_INIT("\x33"), \ + _PyASCIIObject_INIT("\x34"), \ + _PyASCIIObject_INIT("\x35"), \ + _PyASCIIObject_INIT("\x36"), \ + _PyASCIIObject_INIT("\x37"), \ + _PyASCIIObject_INIT("\x38"), \ + _PyASCIIObject_INIT("\x39"), \ + _PyASCIIObject_INIT("\x3a"), \ + _PyASCIIObject_INIT("\x3b"), \ + _PyASCIIObject_INIT("\x3c"), \ + _PyASCIIObject_INIT("\x3d"), \ + _PyASCIIObject_INIT("\x3e"), \ + _PyASCIIObject_INIT("\x3f"), \ + _PyASCIIObject_INIT("\x40"), \ + _PyASCIIObject_INIT("\x41"), \ + _PyASCIIObject_INIT("\x42"), \ + _PyASCIIObject_INIT("\x43"), \ + _PyASCIIObject_INIT("\x44"), \ + _PyASCIIObject_INIT("\x45"), \ + _PyASCIIObject_INIT("\x46"), \ + _PyASCIIObject_INIT("\x47"), \ + _PyASCIIObject_INIT("\x48"), \ + _PyASCIIObject_INIT("\x49"), \ + _PyASCIIObject_INIT("\x4a"), \ + _PyASCIIObject_INIT("\x4b"), \ + _PyASCIIObject_INIT("\x4c"), \ + _PyASCIIObject_INIT("\x4d"), \ + _PyASCIIObject_INIT("\x4e"), \ + _PyASCIIObject_INIT("\x4f"), \ + _PyASCIIObject_INIT("\x50"), \ + _PyASCIIObject_INIT("\x51"), \ + _PyASCIIObject_INIT("\x52"), \ + _PyASCIIObject_INIT("\x53"), \ + _PyASCIIObject_INIT("\x54"), \ + _PyASCIIObject_INIT("\x55"), \ + _PyASCIIObject_INIT("\x56"), \ + _PyASCIIObject_INIT("\x57"), \ + _PyASCIIObject_INIT("\x58"), \ + _PyASCIIObject_INIT("\x59"), \ + _PyASCIIObject_INIT("\x5a"), \ + _PyASCIIObject_INIT("\x5b"), \ + _PyASCIIObject_INIT("\x5c"), \ + _PyASCIIObject_INIT("\x5d"), \ + _PyASCIIObject_INIT("\x5e"), \ + _PyASCIIObject_INIT("\x5f"), \ + _PyASCIIObject_INIT("\x60"), \ + _PyASCIIObject_INIT("\x61"), \ + _PyASCIIObject_INIT("\x62"), \ + _PyASCIIObject_INIT("\x63"), \ + _PyASCIIObject_INIT("\x64"), \ + _PyASCIIObject_INIT("\x65"), \ + _PyASCIIObject_INIT("\x66"), \ + _PyASCIIObject_INIT("\x67"), \ + _PyASCIIObject_INIT("\x68"), \ + _PyASCIIObject_INIT("\x69"), \ + _PyASCIIObject_INIT("\x6a"), \ + _PyASCIIObject_INIT("\x6b"), \ + _PyASCIIObject_INIT("\x6c"), \ + _PyASCIIObject_INIT("\x6d"), \ + _PyASCIIObject_INIT("\x6e"), \ + _PyASCIIObject_INIT("\x6f"), \ + _PyASCIIObject_INIT("\x70"), \ + _PyASCIIObject_INIT("\x71"), \ + _PyASCIIObject_INIT("\x72"), \ + _PyASCIIObject_INIT("\x73"), \ + _PyASCIIObject_INIT("\x74"), \ + _PyASCIIObject_INIT("\x75"), \ + _PyASCIIObject_INIT("\x76"), \ + _PyASCIIObject_INIT("\x77"), \ + _PyASCIIObject_INIT("\x78"), \ + _PyASCIIObject_INIT("\x79"), \ + _PyASCIIObject_INIT("\x7a"), \ + _PyASCIIObject_INIT("\x7b"), \ + _PyASCIIObject_INIT("\x7c"), \ + _PyASCIIObject_INIT("\x7d"), \ + _PyASCIIObject_INIT("\x7e"), \ + _PyASCIIObject_INIT("\x7f"), \ + }, \ + .latin1 = { \ + _PyCompactUnicodeObject_INIT("\x80"), \ + _PyCompactUnicodeObject_INIT("\x81"), \ + _PyCompactUnicodeObject_INIT("\x82"), \ + _PyCompactUnicodeObject_INIT("\x83"), \ + _PyCompactUnicodeObject_INIT("\x84"), \ + _PyCompactUnicodeObject_INIT("\x85"), \ + _PyCompactUnicodeObject_INIT("\x86"), \ + _PyCompactUnicodeObject_INIT("\x87"), \ + _PyCompactUnicodeObject_INIT("\x88"), \ + _PyCompactUnicodeObject_INIT("\x89"), \ + _PyCompactUnicodeObject_INIT("\x8a"), \ + _PyCompactUnicodeObject_INIT("\x8b"), \ + _PyCompactUnicodeObject_INIT("\x8c"), \ + _PyCompactUnicodeObject_INIT("\x8d"), \ + _PyCompactUnicodeObject_INIT("\x8e"), \ + _PyCompactUnicodeObject_INIT("\x8f"), \ + _PyCompactUnicodeObject_INIT("\x90"), \ + _PyCompactUnicodeObject_INIT("\x91"), \ + _PyCompactUnicodeObject_INIT("\x92"), \ + _PyCompactUnicodeObject_INIT("\x93"), \ + _PyCompactUnicodeObject_INIT("\x94"), \ + _PyCompactUnicodeObject_INIT("\x95"), \ + _PyCompactUnicodeObject_INIT("\x96"), \ + _PyCompactUnicodeObject_INIT("\x97"), \ + _PyCompactUnicodeObject_INIT("\x98"), \ + _PyCompactUnicodeObject_INIT("\x99"), \ + _PyCompactUnicodeObject_INIT("\x9a"), \ + _PyCompactUnicodeObject_INIT("\x9b"), \ + _PyCompactUnicodeObject_INIT("\x9c"), \ + _PyCompactUnicodeObject_INIT("\x9d"), \ + _PyCompactUnicodeObject_INIT("\x9e"), \ + _PyCompactUnicodeObject_INIT("\x9f"), \ + _PyCompactUnicodeObject_INIT("\xa0"), \ + _PyCompactUnicodeObject_INIT("\xa1"), \ + _PyCompactUnicodeObject_INIT("\xa2"), \ + _PyCompactUnicodeObject_INIT("\xa3"), \ + _PyCompactUnicodeObject_INIT("\xa4"), \ + _PyCompactUnicodeObject_INIT("\xa5"), \ + _PyCompactUnicodeObject_INIT("\xa6"), \ + _PyCompactUnicodeObject_INIT("\xa7"), \ + _PyCompactUnicodeObject_INIT("\xa8"), \ + _PyCompactUnicodeObject_INIT("\xa9"), \ + _PyCompactUnicodeObject_INIT("\xaa"), \ + _PyCompactUnicodeObject_INIT("\xab"), \ + _PyCompactUnicodeObject_INIT("\xac"), \ + _PyCompactUnicodeObject_INIT("\xad"), \ + _PyCompactUnicodeObject_INIT("\xae"), \ + _PyCompactUnicodeObject_INIT("\xaf"), \ + _PyCompactUnicodeObject_INIT("\xb0"), \ + _PyCompactUnicodeObject_INIT("\xb1"), \ + _PyCompactUnicodeObject_INIT("\xb2"), \ + _PyCompactUnicodeObject_INIT("\xb3"), \ + _PyCompactUnicodeObject_INIT("\xb4"), \ + _PyCompactUnicodeObject_INIT("\xb5"), \ + _PyCompactUnicodeObject_INIT("\xb6"), \ + _PyCompactUnicodeObject_INIT("\xb7"), \ + _PyCompactUnicodeObject_INIT("\xb8"), \ + _PyCompactUnicodeObject_INIT("\xb9"), \ + _PyCompactUnicodeObject_INIT("\xba"), \ + _PyCompactUnicodeObject_INIT("\xbb"), \ + _PyCompactUnicodeObject_INIT("\xbc"), \ + _PyCompactUnicodeObject_INIT("\xbd"), \ + _PyCompactUnicodeObject_INIT("\xbe"), \ + _PyCompactUnicodeObject_INIT("\xbf"), \ + _PyCompactUnicodeObject_INIT("\xc0"), \ + _PyCompactUnicodeObject_INIT("\xc1"), \ + _PyCompactUnicodeObject_INIT("\xc2"), \ + _PyCompactUnicodeObject_INIT("\xc3"), \ + _PyCompactUnicodeObject_INIT("\xc4"), \ + _PyCompactUnicodeObject_INIT("\xc5"), \ + _PyCompactUnicodeObject_INIT("\xc6"), \ + _PyCompactUnicodeObject_INIT("\xc7"), \ + _PyCompactUnicodeObject_INIT("\xc8"), \ + _PyCompactUnicodeObject_INIT("\xc9"), \ + _PyCompactUnicodeObject_INIT("\xca"), \ + _PyCompactUnicodeObject_INIT("\xcb"), \ + _PyCompactUnicodeObject_INIT("\xcc"), \ + _PyCompactUnicodeObject_INIT("\xcd"), \ + _PyCompactUnicodeObject_INIT("\xce"), \ + _PyCompactUnicodeObject_INIT("\xcf"), \ + _PyCompactUnicodeObject_INIT("\xd0"), \ + _PyCompactUnicodeObject_INIT("\xd1"), \ + _PyCompactUnicodeObject_INIT("\xd2"), \ + _PyCompactUnicodeObject_INIT("\xd3"), \ + _PyCompactUnicodeObject_INIT("\xd4"), \ + _PyCompactUnicodeObject_INIT("\xd5"), \ + _PyCompactUnicodeObject_INIT("\xd6"), \ + _PyCompactUnicodeObject_INIT("\xd7"), \ + _PyCompactUnicodeObject_INIT("\xd8"), \ + _PyCompactUnicodeObject_INIT("\xd9"), \ + _PyCompactUnicodeObject_INIT("\xda"), \ + _PyCompactUnicodeObject_INIT("\xdb"), \ + _PyCompactUnicodeObject_INIT("\xdc"), \ + _PyCompactUnicodeObject_INIT("\xdd"), \ + _PyCompactUnicodeObject_INIT("\xde"), \ + _PyCompactUnicodeObject_INIT("\xdf"), \ + _PyCompactUnicodeObject_INIT("\xe0"), \ + _PyCompactUnicodeObject_INIT("\xe1"), \ + _PyCompactUnicodeObject_INIT("\xe2"), \ + _PyCompactUnicodeObject_INIT("\xe3"), \ + _PyCompactUnicodeObject_INIT("\xe4"), \ + _PyCompactUnicodeObject_INIT("\xe5"), \ + _PyCompactUnicodeObject_INIT("\xe6"), \ + _PyCompactUnicodeObject_INIT("\xe7"), \ + _PyCompactUnicodeObject_INIT("\xe8"), \ + _PyCompactUnicodeObject_INIT("\xe9"), \ + _PyCompactUnicodeObject_INIT("\xea"), \ + _PyCompactUnicodeObject_INIT("\xeb"), \ + _PyCompactUnicodeObject_INIT("\xec"), \ + _PyCompactUnicodeObject_INIT("\xed"), \ + _PyCompactUnicodeObject_INIT("\xee"), \ + _PyCompactUnicodeObject_INIT("\xef"), \ + _PyCompactUnicodeObject_INIT("\xf0"), \ + _PyCompactUnicodeObject_INIT("\xf1"), \ + _PyCompactUnicodeObject_INIT("\xf2"), \ + _PyCompactUnicodeObject_INIT("\xf3"), \ + _PyCompactUnicodeObject_INIT("\xf4"), \ + _PyCompactUnicodeObject_INIT("\xf5"), \ + _PyCompactUnicodeObject_INIT("\xf6"), \ + _PyCompactUnicodeObject_INIT("\xf7"), \ + _PyCompactUnicodeObject_INIT("\xf8"), \ + _PyCompactUnicodeObject_INIT("\xf9"), \ + _PyCompactUnicodeObject_INIT("\xfa"), \ + _PyCompactUnicodeObject_INIT("\xfb"), \ + _PyCompactUnicodeObject_INIT("\xfc"), \ + _PyCompactUnicodeObject_INIT("\xfd"), \ + _PyCompactUnicodeObject_INIT("\xfe"), \ + _PyCompactUnicodeObject_INIT("\xff"), \ + }, \ }, \ \ .tuple_empty = { \ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 977bbeb1917120..4394ce939b5673 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -44,9 +44,6 @@ struct _Py_unicode_ids { }; struct _Py_unicode_state { - /* Single character Unicode strings in the Latin-1 range are being - shared as well. */ - PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 908ad514925999..38ce81e59c9623 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -206,6 +206,7 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) +#define LATIN1(ch) (ch < 128 ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) #ifdef MS_WINDOWS /* On Windows, overallocate by 50% is the best factor */ # define OVERALLOCATE_FACTOR 2 @@ -249,14 +250,6 @@ static int unicode_is_singleton(PyObject *unicode); #endif -static struct _Py_unicode_state* -get_unicode_state(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->unicode; -} - - // Return a borrowed reference to the empty string singleton. static inline PyObject* unicode_get_empty(void) { @@ -680,24 +673,10 @@ unicode_result_ready(PyObject *unicode) if (kind == PyUnicode_1BYTE_KIND) { const Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode); Py_UCS1 ch = data[0]; - struct _Py_unicode_state *state = get_unicode_state(); - PyObject *latin1_char = state->latin1[ch]; - if (latin1_char != NULL) { - if (unicode != latin1_char) { - Py_INCREF(latin1_char); - Py_DECREF(unicode); - } - return latin1_char; + if (unicode != LATIN1(ch)) { + Py_DECREF(unicode); } - else { - assert(_PyUnicode_CheckConsistency(unicode, 1)); - Py_INCREF(unicode); - state->latin1[ch] = unicode; - return unicode; - } - } - else { - assert(PyUnicode_READ_CHAR(unicode, 0) >= 256); + return get_latin1_char(ch); } } @@ -1990,11 +1969,10 @@ unicode_is_singleton(PyObject *unicode) return 1; } - struct _Py_unicode_state *state = get_unicode_state(); PyASCIIObject *ascii = (PyASCIIObject *)unicode; if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); - if (ch < 256 && state->latin1[ch] == unicode) { + if (ch < 256 && LATIN1(ch) == unicode) { return 1; } } @@ -2137,25 +2115,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, static PyObject* get_latin1_char(Py_UCS1 ch) { - struct _Py_unicode_state *state = get_unicode_state(); - - PyObject *unicode = state->latin1[ch]; - if (unicode) { - Py_INCREF(unicode); - return unicode; - } - - unicode = PyUnicode_New(1, ch); - if (!unicode) { - return NULL; - } - - PyUnicode_1BYTE_DATA(unicode)[0] = ch; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - - Py_INCREF(unicode); - state->latin1[ch] = unicode; - return unicode; + return Py_NewRef(LATIN1(ch)); } static PyObject* @@ -16113,10 +16073,6 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); unicode_clear_identifiers(state); - - for (Py_ssize_t i = 0; i < 256; i++) { - Py_CLEAR(state->latin1[i]); - } } diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 867358cda89194..15b7296ee351a1 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -196,6 +196,13 @@ def generate_global_strings(identifiers, strings): for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'STRUCT_FOR_ID({name})') + with printer.block('struct', ' ascii[128];'): + printer.write("PyASCIIObject _ascii;") + printer.write("uint8_t _data[2];") + with printer.block('struct', ' latin1[128];'): + printer.write("PyCompactUnicodeObject _latin1;") + printer.write("uint8_t _data[2];") + printer.write(END) printer.write(after) @@ -252,6 +259,12 @@ def generate_runtime_init(identifiers, strings): for name in sorted(identifiers): assert name.isidentifier(), name printer.write(f'INIT_ID({name}),') + with printer.block('.ascii =', ','): + for i in range(128): + printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') + with printer.block('.latin1 =', ','): + for i in range(128, 256): + printer.write(f'_PyCompactUnicodeObject_INIT("\\x{i:02x}"),') printer.write('') with printer.block('.tuple_empty =', ','): printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') From 62720ffc87f507c3f468c69cf3ffd4a8d313507e Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 3 Mar 2022 09:08:17 +0000 Subject: [PATCH 2/8] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2022-03-03-09-08-17.bpo-46881.ckD4tT.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-03-09-08-17.bpo-46881.ckD4tT.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-03-09-08-17.bpo-46881.ckD4tT.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-03-09-08-17.bpo-46881.ckD4tT.rst new file mode 100644 index 00000000000000..88434dd1dba97c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-03-09-08-17.bpo-46881.ckD4tT.rst @@ -0,0 +1 @@ +Statically allocate and initialize the latin1 characters. From 7bf2ac520dc26b99b230c5b682ddc828db191ab2 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 3 Mar 2022 10:41:05 +0000 Subject: [PATCH 3/8] fmt macro --- Objects/unicodeobject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 38ce81e59c9623..a8fa850ef0f863 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -206,7 +206,11 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) -#define LATIN1(ch) (ch < 128 ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) +#define LATIN1(ch) \ + (ch < 128 ? \ + (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ + : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) + #ifdef MS_WINDOWS /* On Windows, overallocate by 50% is the best factor */ # define OVERALLOCATE_FACTOR 2 From 366810f4d8ed37f5453b2a0bcbe2a33d4e994df4 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Fri, 4 Mar 2022 13:27:44 +0530 Subject: [PATCH 4/8] Update Objects/unicodeobject.c Co-authored-by: Eric Snow --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a8fa850ef0f863..bbc9cd1b752731 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -207,8 +207,8 @@ extern "C" { } while (0) #define LATIN1(ch) \ - (ch < 128 ? \ - (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ + (ch < 128 \ + ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) #ifdef MS_WINDOWS From fd3676bc84ed92ac8156dfd6f8d78c3f3cc0e3c1 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Fri, 4 Mar 2022 08:03:19 +0000 Subject: [PATCH 5/8] add assertions for _PyUnicode_CheckConsistency for latin1 characters --- Objects/unicodeobject.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bbc9cd1b752731..9052c53f11b8e2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15499,6 +15499,10 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) #ifdef Py_DEBUG assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); + + for (int i = 0; i < 256; i++) { + assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); + } #endif return _PyStatus_OK(); From be04d68c3755482a90b916d7bd9999525ef8ec4f Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Fri, 4 Mar 2022 08:09:59 +0000 Subject: [PATCH 6/8] refactor macro --- Include/internal/pycore_runtime_init.h | 38 ++++++++++---------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 355498a8210633..370b48b9e5cc44 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -93,20 +93,22 @@ extern "C" { _PyBytes_SIMPLE_INIT(CH, 1) \ } -#define _PyASCIIObject_INIT(LITERAL) \ +#define _PyASCIIObjectBase_INIT(LITERAL, ASCII) \ { \ - ._ascii = { \ - .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \ - .length = sizeof(LITERAL) - 1, \ - .hash = -1, \ - .state = { \ - .kind = 1, \ - .compact = 1, \ - .ascii = 1, \ - .ready = 1, \ - }, \ + .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \ + .length = sizeof(LITERAL) - 1, \ + .hash = -1, \ + .state = { \ + .kind = 1, \ + .compact = 1, \ + .ascii = ASCII, \ + .ready = 1, \ }, \ - ._data = LITERAL, \ + } +#define _PyASCIIObject_INIT(LITERAL) \ + { \ + ._ascii = _PyASCIIObjectBase_INIT(LITERAL, 1), \ + ._data = LITERAL \ } #define INIT_STR(NAME, LITERAL) \ ._ ## NAME = _PyASCIIObject_INIT(LITERAL) @@ -115,17 +117,7 @@ extern "C" { #define _PyCompactUnicodeObject_INIT(LITERAL) \ { \ ._latin1 = { \ - ._base = { \ - .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \ - .length = sizeof(LITERAL) - 1, \ - .hash = -1, \ - .state = { \ - .kind = 1, \ - .compact = 1, \ - .ascii = 0, \ - .ready = 1, \ - }, \ - }, \ + ._base = _PyASCIIObjectBase_INIT(LITERAL, 0), \ }, \ ._data = LITERAL, \ } From 7f885196a3ac0c7ebcd5e2d557751bd42e8c9708 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sat, 5 Mar 2022 06:36:42 +0000 Subject: [PATCH 7/8] code review --- Include/internal/pycore_runtime_init.h | 264 +++++++++++------------ Tools/scripts/generate_global_objects.py | 2 +- 2 files changed, 133 insertions(+), 133 deletions(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 370b48b9e5cc44..dd5e2d1046f7d6 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -93,7 +93,7 @@ extern "C" { _PyBytes_SIMPLE_INIT(CH, 1) \ } -#define _PyASCIIObjectBase_INIT(LITERAL, ASCII) \ +#define _PyUnicode_ASCII_BASE_INIT(LITERAL, ASCII) \ { \ .ob_base = _PyObject_IMMORTAL_INIT(&PyUnicode_Type), \ .length = sizeof(LITERAL) - 1, \ @@ -107,17 +107,17 @@ extern "C" { } #define _PyASCIIObject_INIT(LITERAL) \ { \ - ._ascii = _PyASCIIObjectBase_INIT(LITERAL, 1), \ + ._ascii = _PyUnicode_ASCII_BASE_INIT(LITERAL, 1), \ ._data = LITERAL \ } #define INIT_STR(NAME, LITERAL) \ ._ ## NAME = _PyASCIIObject_INIT(LITERAL) #define INIT_ID(NAME) \ ._ ## NAME = _PyASCIIObject_INIT(#NAME) -#define _PyCompactUnicodeObject_INIT(LITERAL) \ +#define _PyUnicode_LATIN1_BASE_INIT(LITERAL) \ { \ ._latin1 = { \ - ._base = _PyASCIIObjectBase_INIT(LITERAL, 0), \ + ._base = _PyUnicode_ASCII_BASE_INIT(LITERAL, 0), \ }, \ ._data = LITERAL, \ } @@ -1102,134 +1102,134 @@ extern "C" { _PyASCIIObject_INIT("\x7f"), \ }, \ .latin1 = { \ - _PyCompactUnicodeObject_INIT("\x80"), \ - _PyCompactUnicodeObject_INIT("\x81"), \ - _PyCompactUnicodeObject_INIT("\x82"), \ - _PyCompactUnicodeObject_INIT("\x83"), \ - _PyCompactUnicodeObject_INIT("\x84"), \ - _PyCompactUnicodeObject_INIT("\x85"), \ - _PyCompactUnicodeObject_INIT("\x86"), \ - _PyCompactUnicodeObject_INIT("\x87"), \ - _PyCompactUnicodeObject_INIT("\x88"), \ - _PyCompactUnicodeObject_INIT("\x89"), \ - _PyCompactUnicodeObject_INIT("\x8a"), \ - _PyCompactUnicodeObject_INIT("\x8b"), \ - _PyCompactUnicodeObject_INIT("\x8c"), \ - _PyCompactUnicodeObject_INIT("\x8d"), \ - _PyCompactUnicodeObject_INIT("\x8e"), \ - _PyCompactUnicodeObject_INIT("\x8f"), \ - _PyCompactUnicodeObject_INIT("\x90"), \ - _PyCompactUnicodeObject_INIT("\x91"), \ - _PyCompactUnicodeObject_INIT("\x92"), \ - _PyCompactUnicodeObject_INIT("\x93"), \ - _PyCompactUnicodeObject_INIT("\x94"), \ - _PyCompactUnicodeObject_INIT("\x95"), \ - _PyCompactUnicodeObject_INIT("\x96"), \ - _PyCompactUnicodeObject_INIT("\x97"), \ - _PyCompactUnicodeObject_INIT("\x98"), \ - _PyCompactUnicodeObject_INIT("\x99"), \ - _PyCompactUnicodeObject_INIT("\x9a"), \ - _PyCompactUnicodeObject_INIT("\x9b"), \ - _PyCompactUnicodeObject_INIT("\x9c"), \ - _PyCompactUnicodeObject_INIT("\x9d"), \ - _PyCompactUnicodeObject_INIT("\x9e"), \ - _PyCompactUnicodeObject_INIT("\x9f"), \ - _PyCompactUnicodeObject_INIT("\xa0"), \ - _PyCompactUnicodeObject_INIT("\xa1"), \ - _PyCompactUnicodeObject_INIT("\xa2"), \ - _PyCompactUnicodeObject_INIT("\xa3"), \ - _PyCompactUnicodeObject_INIT("\xa4"), \ - _PyCompactUnicodeObject_INIT("\xa5"), \ - _PyCompactUnicodeObject_INIT("\xa6"), \ - _PyCompactUnicodeObject_INIT("\xa7"), \ - _PyCompactUnicodeObject_INIT("\xa8"), \ - _PyCompactUnicodeObject_INIT("\xa9"), \ - _PyCompactUnicodeObject_INIT("\xaa"), \ - _PyCompactUnicodeObject_INIT("\xab"), \ - _PyCompactUnicodeObject_INIT("\xac"), \ - _PyCompactUnicodeObject_INIT("\xad"), \ - _PyCompactUnicodeObject_INIT("\xae"), \ - _PyCompactUnicodeObject_INIT("\xaf"), \ - _PyCompactUnicodeObject_INIT("\xb0"), \ - _PyCompactUnicodeObject_INIT("\xb1"), \ - _PyCompactUnicodeObject_INIT("\xb2"), \ - _PyCompactUnicodeObject_INIT("\xb3"), \ - _PyCompactUnicodeObject_INIT("\xb4"), \ - _PyCompactUnicodeObject_INIT("\xb5"), \ - _PyCompactUnicodeObject_INIT("\xb6"), \ - _PyCompactUnicodeObject_INIT("\xb7"), \ - _PyCompactUnicodeObject_INIT("\xb8"), \ - _PyCompactUnicodeObject_INIT("\xb9"), \ - _PyCompactUnicodeObject_INIT("\xba"), \ - _PyCompactUnicodeObject_INIT("\xbb"), \ - _PyCompactUnicodeObject_INIT("\xbc"), \ - _PyCompactUnicodeObject_INIT("\xbd"), \ - _PyCompactUnicodeObject_INIT("\xbe"), \ - _PyCompactUnicodeObject_INIT("\xbf"), \ - _PyCompactUnicodeObject_INIT("\xc0"), \ - _PyCompactUnicodeObject_INIT("\xc1"), \ - _PyCompactUnicodeObject_INIT("\xc2"), \ - _PyCompactUnicodeObject_INIT("\xc3"), \ - _PyCompactUnicodeObject_INIT("\xc4"), \ - _PyCompactUnicodeObject_INIT("\xc5"), \ - _PyCompactUnicodeObject_INIT("\xc6"), \ - _PyCompactUnicodeObject_INIT("\xc7"), \ - _PyCompactUnicodeObject_INIT("\xc8"), \ - _PyCompactUnicodeObject_INIT("\xc9"), \ - _PyCompactUnicodeObject_INIT("\xca"), \ - _PyCompactUnicodeObject_INIT("\xcb"), \ - _PyCompactUnicodeObject_INIT("\xcc"), \ - _PyCompactUnicodeObject_INIT("\xcd"), \ - _PyCompactUnicodeObject_INIT("\xce"), \ - _PyCompactUnicodeObject_INIT("\xcf"), \ - _PyCompactUnicodeObject_INIT("\xd0"), \ - _PyCompactUnicodeObject_INIT("\xd1"), \ - _PyCompactUnicodeObject_INIT("\xd2"), \ - _PyCompactUnicodeObject_INIT("\xd3"), \ - _PyCompactUnicodeObject_INIT("\xd4"), \ - _PyCompactUnicodeObject_INIT("\xd5"), \ - _PyCompactUnicodeObject_INIT("\xd6"), \ - _PyCompactUnicodeObject_INIT("\xd7"), \ - _PyCompactUnicodeObject_INIT("\xd8"), \ - _PyCompactUnicodeObject_INIT("\xd9"), \ - _PyCompactUnicodeObject_INIT("\xda"), \ - _PyCompactUnicodeObject_INIT("\xdb"), \ - _PyCompactUnicodeObject_INIT("\xdc"), \ - _PyCompactUnicodeObject_INIT("\xdd"), \ - _PyCompactUnicodeObject_INIT("\xde"), \ - _PyCompactUnicodeObject_INIT("\xdf"), \ - _PyCompactUnicodeObject_INIT("\xe0"), \ - _PyCompactUnicodeObject_INIT("\xe1"), \ - _PyCompactUnicodeObject_INIT("\xe2"), \ - _PyCompactUnicodeObject_INIT("\xe3"), \ - _PyCompactUnicodeObject_INIT("\xe4"), \ - _PyCompactUnicodeObject_INIT("\xe5"), \ - _PyCompactUnicodeObject_INIT("\xe6"), \ - _PyCompactUnicodeObject_INIT("\xe7"), \ - _PyCompactUnicodeObject_INIT("\xe8"), \ - _PyCompactUnicodeObject_INIT("\xe9"), \ - _PyCompactUnicodeObject_INIT("\xea"), \ - _PyCompactUnicodeObject_INIT("\xeb"), \ - _PyCompactUnicodeObject_INIT("\xec"), \ - _PyCompactUnicodeObject_INIT("\xed"), \ - _PyCompactUnicodeObject_INIT("\xee"), \ - _PyCompactUnicodeObject_INIT("\xef"), \ - _PyCompactUnicodeObject_INIT("\xf0"), \ - _PyCompactUnicodeObject_INIT("\xf1"), \ - _PyCompactUnicodeObject_INIT("\xf2"), \ - _PyCompactUnicodeObject_INIT("\xf3"), \ - _PyCompactUnicodeObject_INIT("\xf4"), \ - _PyCompactUnicodeObject_INIT("\xf5"), \ - _PyCompactUnicodeObject_INIT("\xf6"), \ - _PyCompactUnicodeObject_INIT("\xf7"), \ - _PyCompactUnicodeObject_INIT("\xf8"), \ - _PyCompactUnicodeObject_INIT("\xf9"), \ - _PyCompactUnicodeObject_INIT("\xfa"), \ - _PyCompactUnicodeObject_INIT("\xfb"), \ - _PyCompactUnicodeObject_INIT("\xfc"), \ - _PyCompactUnicodeObject_INIT("\xfd"), \ - _PyCompactUnicodeObject_INIT("\xfe"), \ - _PyCompactUnicodeObject_INIT("\xff"), \ + _PyUnicode_LATIN1_BASE_INIT("\x80"), \ + _PyUnicode_LATIN1_BASE_INIT("\x81"), \ + _PyUnicode_LATIN1_BASE_INIT("\x82"), \ + _PyUnicode_LATIN1_BASE_INIT("\x83"), \ + _PyUnicode_LATIN1_BASE_INIT("\x84"), \ + _PyUnicode_LATIN1_BASE_INIT("\x85"), \ + _PyUnicode_LATIN1_BASE_INIT("\x86"), \ + _PyUnicode_LATIN1_BASE_INIT("\x87"), \ + _PyUnicode_LATIN1_BASE_INIT("\x88"), \ + _PyUnicode_LATIN1_BASE_INIT("\x89"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8a"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8b"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8c"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8d"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8e"), \ + _PyUnicode_LATIN1_BASE_INIT("\x8f"), \ + _PyUnicode_LATIN1_BASE_INIT("\x90"), \ + _PyUnicode_LATIN1_BASE_INIT("\x91"), \ + _PyUnicode_LATIN1_BASE_INIT("\x92"), \ + _PyUnicode_LATIN1_BASE_INIT("\x93"), \ + _PyUnicode_LATIN1_BASE_INIT("\x94"), \ + _PyUnicode_LATIN1_BASE_INIT("\x95"), \ + _PyUnicode_LATIN1_BASE_INIT("\x96"), \ + _PyUnicode_LATIN1_BASE_INIT("\x97"), \ + _PyUnicode_LATIN1_BASE_INIT("\x98"), \ + _PyUnicode_LATIN1_BASE_INIT("\x99"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9a"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9b"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9c"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9d"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9e"), \ + _PyUnicode_LATIN1_BASE_INIT("\x9f"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xa9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xaa"), \ + _PyUnicode_LATIN1_BASE_INIT("\xab"), \ + _PyUnicode_LATIN1_BASE_INIT("\xac"), \ + _PyUnicode_LATIN1_BASE_INIT("\xad"), \ + _PyUnicode_LATIN1_BASE_INIT("\xae"), \ + _PyUnicode_LATIN1_BASE_INIT("\xaf"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xb9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xba"), \ + _PyUnicode_LATIN1_BASE_INIT("\xbb"), \ + _PyUnicode_LATIN1_BASE_INIT("\xbc"), \ + _PyUnicode_LATIN1_BASE_INIT("\xbd"), \ + _PyUnicode_LATIN1_BASE_INIT("\xbe"), \ + _PyUnicode_LATIN1_BASE_INIT("\xbf"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xc9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xca"), \ + _PyUnicode_LATIN1_BASE_INIT("\xcb"), \ + _PyUnicode_LATIN1_BASE_INIT("\xcc"), \ + _PyUnicode_LATIN1_BASE_INIT("\xcd"), \ + _PyUnicode_LATIN1_BASE_INIT("\xce"), \ + _PyUnicode_LATIN1_BASE_INIT("\xcf"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xd9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xda"), \ + _PyUnicode_LATIN1_BASE_INIT("\xdb"), \ + _PyUnicode_LATIN1_BASE_INIT("\xdc"), \ + _PyUnicode_LATIN1_BASE_INIT("\xdd"), \ + _PyUnicode_LATIN1_BASE_INIT("\xde"), \ + _PyUnicode_LATIN1_BASE_INIT("\xdf"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xe9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xea"), \ + _PyUnicode_LATIN1_BASE_INIT("\xeb"), \ + _PyUnicode_LATIN1_BASE_INIT("\xec"), \ + _PyUnicode_LATIN1_BASE_INIT("\xed"), \ + _PyUnicode_LATIN1_BASE_INIT("\xee"), \ + _PyUnicode_LATIN1_BASE_INIT("\xef"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf0"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf1"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf2"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf3"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf4"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf5"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf6"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf7"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf8"), \ + _PyUnicode_LATIN1_BASE_INIT("\xf9"), \ + _PyUnicode_LATIN1_BASE_INIT("\xfa"), \ + _PyUnicode_LATIN1_BASE_INIT("\xfb"), \ + _PyUnicode_LATIN1_BASE_INIT("\xfc"), \ + _PyUnicode_LATIN1_BASE_INIT("\xfd"), \ + _PyUnicode_LATIN1_BASE_INIT("\xfe"), \ + _PyUnicode_LATIN1_BASE_INIT("\xff"), \ }, \ }, \ \ diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 15b7296ee351a1..552fdb8be7a108 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -264,7 +264,7 @@ def generate_runtime_init(identifiers, strings): printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') with printer.block('.latin1 =', ','): for i in range(128, 256): - printer.write(f'_PyCompactUnicodeObject_INIT("\\x{i:02x}"),') + printer.write(f'_PyUnicode_LATIN1_BASE_INIT("\\x{i:02x}"),') printer.write('') with printer.block('.tuple_empty =', ','): printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') From f9d1a87863ed9e46491e75f2e995c9db9e6a6162 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sat, 5 Mar 2022 06:42:06 +0000 Subject: [PATCH 8/8] rename macro --- Include/internal/pycore_runtime_init.h | 258 +++++++++++------------ Tools/scripts/generate_global_objects.py | 2 +- 2 files changed, 130 insertions(+), 130 deletions(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index dd5e2d1046f7d6..94a5ed6a1cfa98 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -114,7 +114,7 @@ extern "C" { ._ ## NAME = _PyASCIIObject_INIT(LITERAL) #define INIT_ID(NAME) \ ._ ## NAME = _PyASCIIObject_INIT(#NAME) -#define _PyUnicode_LATIN1_BASE_INIT(LITERAL) \ +#define _PyUnicode_LATIN1_INIT(LITERAL) \ { \ ._latin1 = { \ ._base = _PyUnicode_ASCII_BASE_INIT(LITERAL, 0), \ @@ -1102,134 +1102,134 @@ extern "C" { _PyASCIIObject_INIT("\x7f"), \ }, \ .latin1 = { \ - _PyUnicode_LATIN1_BASE_INIT("\x80"), \ - _PyUnicode_LATIN1_BASE_INIT("\x81"), \ - _PyUnicode_LATIN1_BASE_INIT("\x82"), \ - _PyUnicode_LATIN1_BASE_INIT("\x83"), \ - _PyUnicode_LATIN1_BASE_INIT("\x84"), \ - _PyUnicode_LATIN1_BASE_INIT("\x85"), \ - _PyUnicode_LATIN1_BASE_INIT("\x86"), \ - _PyUnicode_LATIN1_BASE_INIT("\x87"), \ - _PyUnicode_LATIN1_BASE_INIT("\x88"), \ - _PyUnicode_LATIN1_BASE_INIT("\x89"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8a"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8b"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8c"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8d"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8e"), \ - _PyUnicode_LATIN1_BASE_INIT("\x8f"), \ - _PyUnicode_LATIN1_BASE_INIT("\x90"), \ - _PyUnicode_LATIN1_BASE_INIT("\x91"), \ - _PyUnicode_LATIN1_BASE_INIT("\x92"), \ - _PyUnicode_LATIN1_BASE_INIT("\x93"), \ - _PyUnicode_LATIN1_BASE_INIT("\x94"), \ - _PyUnicode_LATIN1_BASE_INIT("\x95"), \ - _PyUnicode_LATIN1_BASE_INIT("\x96"), \ - _PyUnicode_LATIN1_BASE_INIT("\x97"), \ - _PyUnicode_LATIN1_BASE_INIT("\x98"), \ - _PyUnicode_LATIN1_BASE_INIT("\x99"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9a"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9b"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9c"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9d"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9e"), \ - _PyUnicode_LATIN1_BASE_INIT("\x9f"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xa9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xaa"), \ - _PyUnicode_LATIN1_BASE_INIT("\xab"), \ - _PyUnicode_LATIN1_BASE_INIT("\xac"), \ - _PyUnicode_LATIN1_BASE_INIT("\xad"), \ - _PyUnicode_LATIN1_BASE_INIT("\xae"), \ - _PyUnicode_LATIN1_BASE_INIT("\xaf"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xb9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xba"), \ - _PyUnicode_LATIN1_BASE_INIT("\xbb"), \ - _PyUnicode_LATIN1_BASE_INIT("\xbc"), \ - _PyUnicode_LATIN1_BASE_INIT("\xbd"), \ - _PyUnicode_LATIN1_BASE_INIT("\xbe"), \ - _PyUnicode_LATIN1_BASE_INIT("\xbf"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xc9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xca"), \ - _PyUnicode_LATIN1_BASE_INIT("\xcb"), \ - _PyUnicode_LATIN1_BASE_INIT("\xcc"), \ - _PyUnicode_LATIN1_BASE_INIT("\xcd"), \ - _PyUnicode_LATIN1_BASE_INIT("\xce"), \ - _PyUnicode_LATIN1_BASE_INIT("\xcf"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xd9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xda"), \ - _PyUnicode_LATIN1_BASE_INIT("\xdb"), \ - _PyUnicode_LATIN1_BASE_INIT("\xdc"), \ - _PyUnicode_LATIN1_BASE_INIT("\xdd"), \ - _PyUnicode_LATIN1_BASE_INIT("\xde"), \ - _PyUnicode_LATIN1_BASE_INIT("\xdf"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xe9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xea"), \ - _PyUnicode_LATIN1_BASE_INIT("\xeb"), \ - _PyUnicode_LATIN1_BASE_INIT("\xec"), \ - _PyUnicode_LATIN1_BASE_INIT("\xed"), \ - _PyUnicode_LATIN1_BASE_INIT("\xee"), \ - _PyUnicode_LATIN1_BASE_INIT("\xef"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf0"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf1"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf2"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf3"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf4"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf5"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf6"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf7"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf8"), \ - _PyUnicode_LATIN1_BASE_INIT("\xf9"), \ - _PyUnicode_LATIN1_BASE_INIT("\xfa"), \ - _PyUnicode_LATIN1_BASE_INIT("\xfb"), \ - _PyUnicode_LATIN1_BASE_INIT("\xfc"), \ - _PyUnicode_LATIN1_BASE_INIT("\xfd"), \ - _PyUnicode_LATIN1_BASE_INIT("\xfe"), \ - _PyUnicode_LATIN1_BASE_INIT("\xff"), \ + _PyUnicode_LATIN1_INIT("\x80"), \ + _PyUnicode_LATIN1_INIT("\x81"), \ + _PyUnicode_LATIN1_INIT("\x82"), \ + _PyUnicode_LATIN1_INIT("\x83"), \ + _PyUnicode_LATIN1_INIT("\x84"), \ + _PyUnicode_LATIN1_INIT("\x85"), \ + _PyUnicode_LATIN1_INIT("\x86"), \ + _PyUnicode_LATIN1_INIT("\x87"), \ + _PyUnicode_LATIN1_INIT("\x88"), \ + _PyUnicode_LATIN1_INIT("\x89"), \ + _PyUnicode_LATIN1_INIT("\x8a"), \ + _PyUnicode_LATIN1_INIT("\x8b"), \ + _PyUnicode_LATIN1_INIT("\x8c"), \ + _PyUnicode_LATIN1_INIT("\x8d"), \ + _PyUnicode_LATIN1_INIT("\x8e"), \ + _PyUnicode_LATIN1_INIT("\x8f"), \ + _PyUnicode_LATIN1_INIT("\x90"), \ + _PyUnicode_LATIN1_INIT("\x91"), \ + _PyUnicode_LATIN1_INIT("\x92"), \ + _PyUnicode_LATIN1_INIT("\x93"), \ + _PyUnicode_LATIN1_INIT("\x94"), \ + _PyUnicode_LATIN1_INIT("\x95"), \ + _PyUnicode_LATIN1_INIT("\x96"), \ + _PyUnicode_LATIN1_INIT("\x97"), \ + _PyUnicode_LATIN1_INIT("\x98"), \ + _PyUnicode_LATIN1_INIT("\x99"), \ + _PyUnicode_LATIN1_INIT("\x9a"), \ + _PyUnicode_LATIN1_INIT("\x9b"), \ + _PyUnicode_LATIN1_INIT("\x9c"), \ + _PyUnicode_LATIN1_INIT("\x9d"), \ + _PyUnicode_LATIN1_INIT("\x9e"), \ + _PyUnicode_LATIN1_INIT("\x9f"), \ + _PyUnicode_LATIN1_INIT("\xa0"), \ + _PyUnicode_LATIN1_INIT("\xa1"), \ + _PyUnicode_LATIN1_INIT("\xa2"), \ + _PyUnicode_LATIN1_INIT("\xa3"), \ + _PyUnicode_LATIN1_INIT("\xa4"), \ + _PyUnicode_LATIN1_INIT("\xa5"), \ + _PyUnicode_LATIN1_INIT("\xa6"), \ + _PyUnicode_LATIN1_INIT("\xa7"), \ + _PyUnicode_LATIN1_INIT("\xa8"), \ + _PyUnicode_LATIN1_INIT("\xa9"), \ + _PyUnicode_LATIN1_INIT("\xaa"), \ + _PyUnicode_LATIN1_INIT("\xab"), \ + _PyUnicode_LATIN1_INIT("\xac"), \ + _PyUnicode_LATIN1_INIT("\xad"), \ + _PyUnicode_LATIN1_INIT("\xae"), \ + _PyUnicode_LATIN1_INIT("\xaf"), \ + _PyUnicode_LATIN1_INIT("\xb0"), \ + _PyUnicode_LATIN1_INIT("\xb1"), \ + _PyUnicode_LATIN1_INIT("\xb2"), \ + _PyUnicode_LATIN1_INIT("\xb3"), \ + _PyUnicode_LATIN1_INIT("\xb4"), \ + _PyUnicode_LATIN1_INIT("\xb5"), \ + _PyUnicode_LATIN1_INIT("\xb6"), \ + _PyUnicode_LATIN1_INIT("\xb7"), \ + _PyUnicode_LATIN1_INIT("\xb8"), \ + _PyUnicode_LATIN1_INIT("\xb9"), \ + _PyUnicode_LATIN1_INIT("\xba"), \ + _PyUnicode_LATIN1_INIT("\xbb"), \ + _PyUnicode_LATIN1_INIT("\xbc"), \ + _PyUnicode_LATIN1_INIT("\xbd"), \ + _PyUnicode_LATIN1_INIT("\xbe"), \ + _PyUnicode_LATIN1_INIT("\xbf"), \ + _PyUnicode_LATIN1_INIT("\xc0"), \ + _PyUnicode_LATIN1_INIT("\xc1"), \ + _PyUnicode_LATIN1_INIT("\xc2"), \ + _PyUnicode_LATIN1_INIT("\xc3"), \ + _PyUnicode_LATIN1_INIT("\xc4"), \ + _PyUnicode_LATIN1_INIT("\xc5"), \ + _PyUnicode_LATIN1_INIT("\xc6"), \ + _PyUnicode_LATIN1_INIT("\xc7"), \ + _PyUnicode_LATIN1_INIT("\xc8"), \ + _PyUnicode_LATIN1_INIT("\xc9"), \ + _PyUnicode_LATIN1_INIT("\xca"), \ + _PyUnicode_LATIN1_INIT("\xcb"), \ + _PyUnicode_LATIN1_INIT("\xcc"), \ + _PyUnicode_LATIN1_INIT("\xcd"), \ + _PyUnicode_LATIN1_INIT("\xce"), \ + _PyUnicode_LATIN1_INIT("\xcf"), \ + _PyUnicode_LATIN1_INIT("\xd0"), \ + _PyUnicode_LATIN1_INIT("\xd1"), \ + _PyUnicode_LATIN1_INIT("\xd2"), \ + _PyUnicode_LATIN1_INIT("\xd3"), \ + _PyUnicode_LATIN1_INIT("\xd4"), \ + _PyUnicode_LATIN1_INIT("\xd5"), \ + _PyUnicode_LATIN1_INIT("\xd6"), \ + _PyUnicode_LATIN1_INIT("\xd7"), \ + _PyUnicode_LATIN1_INIT("\xd8"), \ + _PyUnicode_LATIN1_INIT("\xd9"), \ + _PyUnicode_LATIN1_INIT("\xda"), \ + _PyUnicode_LATIN1_INIT("\xdb"), \ + _PyUnicode_LATIN1_INIT("\xdc"), \ + _PyUnicode_LATIN1_INIT("\xdd"), \ + _PyUnicode_LATIN1_INIT("\xde"), \ + _PyUnicode_LATIN1_INIT("\xdf"), \ + _PyUnicode_LATIN1_INIT("\xe0"), \ + _PyUnicode_LATIN1_INIT("\xe1"), \ + _PyUnicode_LATIN1_INIT("\xe2"), \ + _PyUnicode_LATIN1_INIT("\xe3"), \ + _PyUnicode_LATIN1_INIT("\xe4"), \ + _PyUnicode_LATIN1_INIT("\xe5"), \ + _PyUnicode_LATIN1_INIT("\xe6"), \ + _PyUnicode_LATIN1_INIT("\xe7"), \ + _PyUnicode_LATIN1_INIT("\xe8"), \ + _PyUnicode_LATIN1_INIT("\xe9"), \ + _PyUnicode_LATIN1_INIT("\xea"), \ + _PyUnicode_LATIN1_INIT("\xeb"), \ + _PyUnicode_LATIN1_INIT("\xec"), \ + _PyUnicode_LATIN1_INIT("\xed"), \ + _PyUnicode_LATIN1_INIT("\xee"), \ + _PyUnicode_LATIN1_INIT("\xef"), \ + _PyUnicode_LATIN1_INIT("\xf0"), \ + _PyUnicode_LATIN1_INIT("\xf1"), \ + _PyUnicode_LATIN1_INIT("\xf2"), \ + _PyUnicode_LATIN1_INIT("\xf3"), \ + _PyUnicode_LATIN1_INIT("\xf4"), \ + _PyUnicode_LATIN1_INIT("\xf5"), \ + _PyUnicode_LATIN1_INIT("\xf6"), \ + _PyUnicode_LATIN1_INIT("\xf7"), \ + _PyUnicode_LATIN1_INIT("\xf8"), \ + _PyUnicode_LATIN1_INIT("\xf9"), \ + _PyUnicode_LATIN1_INIT("\xfa"), \ + _PyUnicode_LATIN1_INIT("\xfb"), \ + _PyUnicode_LATIN1_INIT("\xfc"), \ + _PyUnicode_LATIN1_INIT("\xfd"), \ + _PyUnicode_LATIN1_INIT("\xfe"), \ + _PyUnicode_LATIN1_INIT("\xff"), \ }, \ }, \ \ diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py index 552fdb8be7a108..17ddb8b324a0b6 100644 --- a/Tools/scripts/generate_global_objects.py +++ b/Tools/scripts/generate_global_objects.py @@ -264,7 +264,7 @@ def generate_runtime_init(identifiers, strings): printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') with printer.block('.latin1 =', ','): for i in range(128, 256): - printer.write(f'_PyUnicode_LATIN1_BASE_INIT("\\x{i:02x}"),') + printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),') printer.write('') with printer.block('.tuple_empty =', ','): printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')