From fbbf5f307a60a88be0a7b13d9f8ef6374b409566 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 20:58:36 +0200 Subject: [PATCH 01/17] PoC: str vectorcall --- Objects/unicodeobject.c | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f15071d7d54ef..0211ab49bc56dc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14617,6 +14617,134 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, return unicode; } +static const char * +as_const_char(PyObject *obj) +{ + if (!PyUnicode_Check(obj)) { + _PyArg_BadArgument("str", "argument", "str", obj); + return NULL; + } + Py_ssize_t sz; + const char *str = PyUnicode_AsUTF8AndSize(obj, &sz); + if (str == NULL) { + return NULL; + } + if (strlen(str) != (size_t)sz) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return str; +} + +static PyObject * +fallback_to_tp_call(PyObject *type, Py_ssize_t nargs, Py_ssize_t nkwargs, + PyObject *const *args, PyObject *kwnames) +{ + PyObject *tuple = PyTuple_New(nargs); + if (tuple == NULL) { + return NULL; + } + for (Py_ssize_t i = 0; i < nargs; i++) { + PyObject *value = Py_NewRef(args[i]); + PyTuple_SET_ITEM(tuple, i, value); + } + PyObject *dict = PyDict_New(); + if (dict == NULL) { + Py_DECREF(tuple); + return NULL; + } + for (Py_ssize_t j = 0; j < nkwargs; j++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, j); + PyObject *value = args[nargs + j]; + if (PyDict_SetItem(dict, key, value) < 0) { + Py_DECREF(tuple); + Py_DECREF(dict); + return NULL; + } + } + return unicode_new(_PyType_CAST(type), tuple, dict); +} + +static PyObject * +unicode_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; + if (nargs == 0 && nkwargs == 0) { + return unicode_get_empty(); + } + PyObject *object = args[0]; + if (nargs == 1 && nkwargs == 0) { + return unicode_new_impl(_PyType_CAST(type), object, NULL, NULL); + } + if (nargs + nkwargs == 2) { + const char *encoding; + const char *errors; + if (nkwargs == 1) { + PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); + if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { + encoding = as_const_char(args[1]); + if (encoding == NULL) { + return NULL; + } + errors = NULL; + } + else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { + errors = as_const_char(args[1]); + if (errors == NULL) { + return NULL; + } + encoding = NULL; + } + else { + PyErr_Format(PyExc_TypeError, + "'%S' is an invalid keyword argument for str()", kw0); + return NULL; + } + } + else if (nkwargs == 0) { + encoding = as_const_char(args[1]); + errors = NULL; + } + else { + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); + } + PyObject *object = args[0]; + return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + } + if (nargs + nkwargs == 3) { + if (nkwargs == 1) { + PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); + if (!_PyUnicode_EqualToASCIIString(kw0, "errors")) { + PyErr_Format(PyExc_TypeError, + "'%S' is an invalid keyword argument for str()", kw0); + return NULL; + } + } + else if (nkwargs != 0) { + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); + } + PyObject *object = args[0]; + const char *encoding = as_const_char(args[1]); + if (encoding == NULL) { + return NULL; + } + const char *errors = as_const_char(args[2]); + if (errors == NULL) { + return NULL; + } + return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + } + if (nargs > 3) { + PyErr_Format(PyExc_TypeError, + "str() takes at most 3 arguments (%d given)", nargs + nkwargs); + return NULL; + } + + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); +} + static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *unicode) { @@ -14758,6 +14886,7 @@ PyTypeObject PyUnicode_Type = { 0, /* tp_alloc */ unicode_new, /* tp_new */ PyObject_Del, /* tp_free */ + .tp_vectorcall = unicode_vectorcall, }; /* Initialize the Unicode implementation */ From 2a94bfa7da458364a4dee2a6a6c1a695aa9cee26 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:15:01 +0200 Subject: [PATCH 02/17] Nicer error message --- Objects/unicodeobject.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0211ab49bc56dc..aac8bfed672db6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14618,10 +14618,12 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, } static const char * -as_const_char(PyObject *obj) +as_const_char(PyObject *obj, const char *name) { if (!PyUnicode_Check(obj)) { - _PyArg_BadArgument("str", "argument", "str", obj); + PyErr_Format(PyExc_TypeError, + "str() argument '%s' must be str, not %T", + name, obj); return NULL; } Py_ssize_t sz; @@ -14684,14 +14686,14 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (nkwargs == 1) { PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { - encoding = as_const_char(args[1]); + encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } errors = NULL; } else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { - errors = as_const_char(args[1]); + errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; } @@ -14704,7 +14706,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } } else if (nkwargs == 0) { - encoding = as_const_char(args[1]); + encoding = as_const_char(args[1], "encoding"); errors = NULL; } else { @@ -14726,11 +14728,11 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } PyObject *object = args[0]; - const char *encoding = as_const_char(args[1]); + const char *encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } - const char *errors = as_const_char(args[2]); + const char *errors = as_const_char(args[2], "errors"); if (errors == NULL) { return NULL; } From 1a29140ddb0ae26a8e79fe6bb700de7aaf243bb1 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:16:27 +0200 Subject: [PATCH 03/17] NEWS --- .../2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst new file mode 100644 index 00000000000000..1877ac5818cb69 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst @@ -0,0 +1,2 @@ +Speed up calls to :func:`str` by using the :pep:`590` ``vectorcall`` calling +convention. Patch by Erlend Aasland. From 311aa3cdca0199b620447425fda90e47feaf16dd Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:44:03 +0200 Subject: [PATCH 04/17] Don't call unicode_get_empty() directly; remove some unneeded assignments --- Objects/unicodeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aac8bfed672db6..01111c47b01abc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14674,7 +14674,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; if (nargs == 0 && nkwargs == 0) { - return unicode_get_empty(); + return unicode_new_impl(_PyType_CAST(type), NULL, NULL, NULL); } PyObject *object = args[0]; if (nargs == 1 && nkwargs == 0) { @@ -14712,7 +14712,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - PyObject *object = args[0]; return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); } if (nargs + nkwargs == 3) { @@ -14727,7 +14726,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else if (nkwargs != 0) { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - PyObject *object = args[0]; const char *encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; From c59478c0e32c51b7ae0b6743a6aff13a57fa5e2a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 23:01:52 +0200 Subject: [PATCH 05/17] type is always PyUnicode_Type? --- Objects/unicodeobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 01111c47b01abc..f6d1c46ac799cd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14671,14 +14671,16 @@ static PyObject * unicode_vectorcall(PyObject *type, PyObject *const *args, size_t nargsf, PyObject *kwnames) { + assert(Py_Is(_PyType_CAST(type), &PyUnicode_Type)); + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; if (nargs == 0 && nkwargs == 0) { - return unicode_new_impl(_PyType_CAST(type), NULL, NULL, NULL); + return unicode_get_empty(); } PyObject *object = args[0]; if (nargs == 1 && nkwargs == 0) { - return unicode_new_impl(_PyType_CAST(type), object, NULL, NULL); + return PyObject_Str(object); } if (nargs + nkwargs == 2) { const char *encoding; @@ -14712,7 +14714,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + return PyUnicode_FromEncodedObject(object, encoding, errors); } if (nargs + nkwargs == 3) { if (nkwargs == 1) { @@ -14734,7 +14736,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (errors == NULL) { return NULL; } - return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + return PyUnicode_FromEncodedObject(object, encoding, errors); } if (nargs > 3) { PyErr_Format(PyExc_TypeError, From 068de64f5343e222f73bfc8ef4c1b0e863cb8414 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:08:08 +0200 Subject: [PATCH 06/17] Address review: variable naming --- Objects/unicodeobject.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f6d1c46ac799cd..9d8838eb5f6d9f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14686,15 +14686,15 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, const char *encoding; const char *errors; if (nkwargs == 1) { - PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); - if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { + PyObject *key = PyTuple_GET_ITEM(kwnames, 0); + if (_PyUnicode_EqualToASCIIString(key, "encoding")) { encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } errors = NULL; } - else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { + else if (_PyUnicode_EqualToASCIIString(key, "errors")) { errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; @@ -14703,7 +14703,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", kw0); + "'%S' is an invalid keyword argument for str()", key); return NULL; } } @@ -14718,10 +14718,10 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } if (nargs + nkwargs == 3) { if (nkwargs == 1) { - PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); - if (!_PyUnicode_EqualToASCIIString(kw0, "errors")) { + PyObject *key = PyTuple_GET_ITEM(kwnames, 0); + if (!_PyUnicode_EqualToASCIIString(key, "errors")) { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", kw0); + "'%S' is an invalid keyword argument for str()", key); return NULL; } } From 57c01a4f57e79407058dad119d7a2159cff3b507 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:09:27 +0200 Subject: [PATCH 07/17] Align exception messages with tp_call --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9d8838eb5f6d9f..dc3934233f1542 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14703,7 +14703,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", key); + "str() got an unexpected keyword argument %R", key); return NULL; } } @@ -14721,7 +14721,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, PyObject *key = PyTuple_GET_ITEM(kwnames, 0); if (!_PyUnicode_EqualToASCIIString(key, "errors")) { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", key); + "str() got an unexpected keyword argument %R", key); return NULL; } } From 37b45ed8c5e69a97b911db20adaca46474646413 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:13:39 +0200 Subject: [PATCH 08/17] Address review: use better internal APIs --- Objects/unicodeobject.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dc3934233f1542..5de4e0e8fa560e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14626,15 +14626,10 @@ as_const_char(PyObject *obj, const char *name) name, obj); return NULL; } - Py_ssize_t sz; - const char *str = PyUnicode_AsUTF8AndSize(obj, &sz); + const char *str = _PyUnicode_AsUTF8NoNUL(obj); if (str == NULL) { return NULL; } - if (strlen(str) != (size_t)sz) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } return str; } @@ -14642,28 +14637,15 @@ static PyObject * fallback_to_tp_call(PyObject *type, Py_ssize_t nargs, Py_ssize_t nkwargs, PyObject *const *args, PyObject *kwnames) { - PyObject *tuple = PyTuple_New(nargs); + PyObject *tuple = _PyTuple_FromArray(args, nargs); if (tuple == NULL) { return NULL; } - for (Py_ssize_t i = 0; i < nargs; i++) { - PyObject *value = Py_NewRef(args[i]); - PyTuple_SET_ITEM(tuple, i, value); - } - PyObject *dict = PyDict_New(); + PyObject *dict = _PyStack_AsDict(args + nargs, kwnames); if (dict == NULL) { Py_DECREF(tuple); return NULL; } - for (Py_ssize_t j = 0; j < nkwargs; j++) { - PyObject *key = PyTuple_GET_ITEM(kwnames, j); - PyObject *value = args[nargs + j]; - if (PyDict_SetItem(dict, key, value) < 0) { - Py_DECREF(tuple); - Py_DECREF(dict); - return NULL; - } - } return unicode_new(_PyType_CAST(type), tuple, dict); } From 7e141e792b5f733e4636958f5e5ef289c6a770e9 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:14:47 +0200 Subject: [PATCH 09/17] Address review: explcitly initialise variables --- Objects/unicodeobject.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5de4e0e8fa560e..2a6f05d59baecd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14665,8 +14665,8 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return PyObject_Str(object); } if (nargs + nkwargs == 2) { - const char *encoding; - const char *errors; + const char *encoding = NULL; + const char *errors = NULL; if (nkwargs == 1) { PyObject *key = PyTuple_GET_ITEM(kwnames, 0); if (_PyUnicode_EqualToASCIIString(key, "encoding")) { @@ -14674,14 +14674,12 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (encoding == NULL) { return NULL; } - errors = NULL; } else if (_PyUnicode_EqualToASCIIString(key, "errors")) { errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; } - encoding = NULL; } else { PyErr_Format(PyExc_TypeError, @@ -14691,7 +14689,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else if (nkwargs == 0) { encoding = as_const_char(args[1], "encoding"); - errors = NULL; } else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); From b09a3fa1d22fb382afbe42c48448629223f886c3 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:28:16 +0200 Subject: [PATCH 10/17] Only optimise for positional-only arguments --- Objects/unicodeobject.c | 98 +++++++++++------------------------------ 1 file changed, 25 insertions(+), 73 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2a6f05d59baecd..307c0b30e57538 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14633,22 +14633,6 @@ as_const_char(PyObject *obj, const char *name) return str; } -static PyObject * -fallback_to_tp_call(PyObject *type, Py_ssize_t nargs, Py_ssize_t nkwargs, - PyObject *const *args, PyObject *kwnames) -{ - PyObject *tuple = _PyTuple_FromArray(args, nargs); - if (tuple == NULL) { - return NULL; - } - PyObject *dict = _PyStack_AsDict(args + nargs, kwnames); - if (dict == NULL) { - Py_DECREF(tuple); - return NULL; - } - return unicode_new(_PyType_CAST(type), tuple, dict); -} - static PyObject * unicode_vectorcall(PyObject *type, PyObject *const *args, size_t nargsf, PyObject *kwnames) @@ -14657,73 +14641,41 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; - if (nargs == 0 && nkwargs == 0) { - return unicode_get_empty(); - } - PyObject *object = args[0]; - if (nargs == 1 && nkwargs == 0) { - return PyObject_Str(object); - } - if (nargs + nkwargs == 2) { - const char *encoding = NULL; - const char *errors = NULL; - if (nkwargs == 1) { - PyObject *key = PyTuple_GET_ITEM(kwnames, 0); - if (_PyUnicode_EqualToASCIIString(key, "encoding")) { - encoding = as_const_char(args[1], "encoding"); - if (encoding == NULL) { - return NULL; - } - } - else if (_PyUnicode_EqualToASCIIString(key, "errors")) { - errors = as_const_char(args[1], "errors"); - if (errors == NULL) { - return NULL; - } - } - else { - PyErr_Format(PyExc_TypeError, - "str() got an unexpected keyword argument %R", key); - return NULL; - } - } - else if (nkwargs == 0) { - encoding = as_const_char(args[1], "encoding"); - } - else { - return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); - } - return PyUnicode_FromEncodedObject(object, encoding, errors); - } - if (nargs + nkwargs == 3) { - if (nkwargs == 1) { - PyObject *key = PyTuple_GET_ITEM(kwnames, 0); - if (!_PyUnicode_EqualToASCIIString(key, "errors")) { - PyErr_Format(PyExc_TypeError, - "str() got an unexpected keyword argument %R", key); - return NULL; - } - } - else if (nkwargs != 0) { - return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); - } - const char *encoding = as_const_char(args[1], "encoding"); - if (encoding == NULL) { + if (nkwargs) { + // Fallback to tp_call() + PyObject *tuple = _PyTuple_FromArray(args, nargs); + if (tuple == NULL) { return NULL; } - const char *errors = as_const_char(args[2], "errors"); - if (errors == NULL) { + PyObject *dict = _PyStack_AsDict(args + nargs, kwnames); + if (dict == NULL) { + Py_DECREF(tuple); return NULL; } - return PyUnicode_FromEncodedObject(object, encoding, errors); + return unicode_new(_PyType_CAST(type), tuple, dict); } if (nargs > 3) { PyErr_Format(PyExc_TypeError, "str() takes at most 3 arguments (%d given)", nargs + nkwargs); return NULL; } - - return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); + if (nargs == 0) { + return unicode_get_empty(); + } + PyObject *object = args[0]; + if (nargs == 1) { + return PyObject_Str(object); + } + const char *encoding = NULL; + const char *errors = NULL; + if (nargs == 2) { + encoding = as_const_char(args[1], "encoding"); + } + if (nargs == 3) { + encoding = as_const_char(args[1], "encoding"); + errors = as_const_char(args[1], "errors"); + } + return PyUnicode_FromEncodedObject(object, encoding, errors); } static PyObject * From 9319c963295a53f70f5e455d1beb4769b844ace5 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:32:55 +0200 Subject: [PATCH 11/17] Update Objects/unicodeobject.c --- Objects/unicodeobject.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 307c0b30e57538..055fff74a3c59b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14666,15 +14666,8 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (nargs == 1) { return PyObject_Str(object); } - const char *encoding = NULL; - const char *errors = NULL; - if (nargs == 2) { - encoding = as_const_char(args[1], "encoding"); - } - if (nargs == 3) { - encoding = as_const_char(args[1], "encoding"); - errors = as_const_char(args[1], "errors"); - } + const char *encoding = as_const_char(args[1], "encoding"); + const char *errors = (nargs == 2) ? NULL : as_const_char(args[2], "errors"); return PyUnicode_FromEncodedObject(object, encoding, errors); } From b069905911d093173ec111e2d8ea389d1d29aaf0 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:34:13 +0200 Subject: [PATCH 12/17] Update Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst --- .../2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst index 1877ac5818cb69..2216b53688c378 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst @@ -1,2 +1,3 @@ -Speed up calls to :func:`str` by using the :pep:`590` ``vectorcall`` calling -convention. Patch by Erlend Aasland. +Speed up calls to :func:`str` with positional-only argument, +by using the :pep:`590` ``vectorcall`` calling convention. +Patch by Erlend Aasland. From 519e7949bb292354b7cf33f91e44fd378c922328 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 13:11:46 +0200 Subject: [PATCH 13/17] Address review --- Objects/unicodeobject.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 055fff74a3c59b..045515294c3190 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14618,7 +14618,7 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, } static const char * -as_const_char(PyObject *obj, const char *name) +arg_as_utf8(PyObject *obj, const char *name) { if (!PyUnicode_Check(obj)) { PyErr_Format(PyExc_TypeError, @@ -14640,9 +14640,8 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, assert(Py_Is(_PyType_CAST(type), &PyUnicode_Type)); Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; - if (nkwargs) { - // Fallback to tp_call() + if (kwnames != NULL && PyTuple_GET_SIZE(kwnames) != 0) { + // Fallback to unicode_new() PyObject *tuple = _PyTuple_FromArray(args, nargs); if (tuple == NULL) { return NULL; @@ -14652,11 +14651,14 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, Py_DECREF(tuple); return NULL; } - return unicode_new(_PyType_CAST(type), tuple, dict); + PyObject *ret = unicode_new(_PyType_CAST(type), tuple, dict); + Py_DECREF(tuple); + Py_DECREF(dict); + return ret; } if (nargs > 3) { PyErr_Format(PyExc_TypeError, - "str() takes at most 3 arguments (%d given)", nargs + nkwargs); + "str() takes at most 3 arguments (%d given)", nargs); return NULL; } if (nargs == 0) { @@ -14666,8 +14668,8 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (nargs == 1) { return PyObject_Str(object); } - const char *encoding = as_const_char(args[1], "encoding"); - const char *errors = (nargs == 2) ? NULL : as_const_char(args[2], "errors"); + const char *encoding = arg_as_utf8(args[1], "encoding"); + const char *errors = (nargs == 2) ? NULL : arg_as_utf8(args[2], "errors"); return PyUnicode_FromEncodedObject(object, encoding, errors); } From 8138db40dd06d639d443baeddf09e709e6b6494b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 14:42:57 +0200 Subject: [PATCH 14/17] Add more tests --- Lib/test/test_str.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index b4927113db44e3..ea37eb5d96457d 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -2651,6 +2651,24 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + def test_str_invalid_call(self): + check = lambda *a, **kw: self.assertRaises(TypeError, str, *a, **kw) + + # too many args + check(1, "", "", 1) + + # no such kw arg + check(test=1) + + # 'encoding' must be str + check(1, encoding=1) + check(1, 1) + + # 'errors' must be str + check(1, errors=1) + check(1, "", errors=1) + check(1, 1, 1) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): From 906f2d1d4e18eb1bbab85d97ef81105118bb3cb8 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 14:54:36 +0200 Subject: [PATCH 15/17] Update Objects/unicodeobject.c --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 045515294c3190..860e0df427782b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14669,7 +14669,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return PyObject_Str(object); } const char *encoding = arg_as_utf8(args[1], "encoding"); - const char *errors = (nargs == 2) ? NULL : arg_as_utf8(args[2], "errors"); + const char *errors = (nargs == 3) ? arg_as_utf8(args[2], "errors") : NULL; return PyUnicode_FromEncodedObject(object, encoding, errors); } From 4a533c0ca6fd49296719256990aa5c20dfe0dbcc Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 15:12:07 +0200 Subject: [PATCH 16/17] Address review: use _PyArg_CheckPositional --- Objects/unicodeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 860e0df427782b..0be82d7d9f3fbe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14656,9 +14656,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, Py_DECREF(dict); return ret; } - if (nargs > 3) { - PyErr_Format(PyExc_TypeError, - "str() takes at most 3 arguments (%d given)", nargs); + if (!_PyArg_CheckPositional("str", nargs, 0, 3)) { return NULL; } if (nargs == 0) { From 162a7ef0d29f643b168e9b9b89a4bdc807cdbaa1 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 15:26:15 +0200 Subject: [PATCH 17/17] Update Objects/unicodeobject.c Co-authored-by: Victor Stinner --- Objects/unicodeobject.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0be82d7d9f3fbe..2c259b7e869efe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14626,11 +14626,7 @@ arg_as_utf8(PyObject *obj, const char *name) name, obj); return NULL; } - const char *str = _PyUnicode_AsUTF8NoNUL(obj); - if (str == NULL) { - return NULL; - } - return str; + return _PyUnicode_AsUTF8NoNUL(obj); } static PyObject *