Skip to content

Commit 6537bc9

Browse files
gh-94526: getpath_dirname() no longer encodes the path (GH-97645)
Fix the Python path configuration used to initialized sys.path at Python startup. Paths are no longer encoded to UTF-8/strict to avoid encoding errors if it contains surrogate characters (bytes paths are decoded with the surrogateescape error handler). getpath_basename() and getpath_dirname() functions no longer encode the path to UTF-8/strict, but work directly on Unicode strings. These functions now use PyUnicode_FindChar() and PyUnicode_Substring() on the Unicode path, rather than strrchr() on the encoded bytes string. (cherry picked from commit 9f2f1dd) Co-authored-by: Victor Stinner <vstinner@python.org>
1 parent 0fbee30 commit 6537bc9

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix the Python path configuration used to initialized :data:`sys.path` at
2+
Python startup. Paths are no longer encoded to UTF-8/strict to avoid encoding
3+
errors if it contains surrogate characters (bytes paths are decoded with the
4+
surrogateescape error handler). Patch by Victor Stinner.

Modules/getpath.c

+14-9
Original file line numberDiff line numberDiff line change
@@ -82,27 +82,32 @@ getpath_abspath(PyObject *Py_UNUSED(self), PyObject *args)
8282
static PyObject *
8383
getpath_basename(PyObject *Py_UNUSED(self), PyObject *args)
8484
{
85-
const char *path;
86-
if (!PyArg_ParseTuple(args, "s", &path)) {
85+
PyObject *path;
86+
if (!PyArg_ParseTuple(args, "U", &path)) {
8787
return NULL;
8888
}
89-
const char *name = strrchr(path, SEP);
90-
return PyUnicode_FromString(name ? name + 1 : path);
89+
Py_ssize_t end = PyUnicode_GET_LENGTH(path);
90+
Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
91+
if (pos < 0) {
92+
return Py_NewRef(path);
93+
}
94+
return PyUnicode_Substring(path, pos + 1, end);
9195
}
9296

9397

9498
static PyObject *
9599
getpath_dirname(PyObject *Py_UNUSED(self), PyObject *args)
96100
{
97-
const char *path;
98-
if (!PyArg_ParseTuple(args, "s", &path)) {
101+
PyObject *path;
102+
if (!PyArg_ParseTuple(args, "U", &path)) {
99103
return NULL;
100104
}
101-
const char *name = strrchr(path, SEP);
102-
if (!name) {
105+
Py_ssize_t end = PyUnicode_GET_LENGTH(path);
106+
Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
107+
if (pos < 0) {
103108
return PyUnicode_FromStringAndSize(NULL, 0);
104109
}
105-
return PyUnicode_FromStringAndSize(path, (name - path));
110+
return PyUnicode_Substring(path, 0, pos);
106111
}
107112

108113

0 commit comments

Comments
 (0)