From 4615f6d5c45e99ca659c1ca702e3e6daa74f3958 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 2 Dec 2024 19:43:57 +0100 Subject: [PATCH 01/10] gh-127527: Improve error handling in time_strftime1 --- Modules/timemodule.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 340011fc08b551..7d6efbd93c8d73 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -837,20 +837,26 @@ time_strftime1(time_char **outbuf, size_t *bufsize, return NULL; } #endif - if (buflen == 0 && *bufsize < 256 * fmtlen) { - *bufsize += *bufsize; - continue; - } - /* If the buffer is 256 times as long as the format, - it's probably not failing for lack of room! - More likely, the format yields an empty result, - e.g. an empty format, or %Z when the timezone - is unknown. */ + if ((*outbuf)[buflen] == '\0') { #ifdef HAVE_WCSFTIME - return PyUnicode_FromWideChar(*outbuf, buflen); + return PyUnicode_FromWideChar(*outbuf, buflen); #else - return PyUnicode_DecodeLocaleAndSize(*outbuf, buflen, "surrogateescape"); + return PyUnicode_DecodeLocaleAndSize(*outbuf, buflen, "surrogateescape"); #endif + } + if (buflen != 0) { + PyErr_SetString(PyExc_SystemError, "Unexpected behavior from strftime"); + return NULL; + } + if (*bufsize >= 256 * fmtlen) { + /* If the buffer is 256 times as long as the format, it's probably not + failing for lack of room! More likely, `format_time` doesn't like the + format string. For instance we end up here with musl if the format + string ends with a '%'. + */ + PyErr_SetString(PyExc_ValueError, "Invalid format string"); + return NULL; + } } } From a7cf1b3e11a8dee08b7987a0de00c1fada55d25c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 09:59:38 +0100 Subject: [PATCH 02/10] Debug --- Modules/timemodule.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 7d6efbd93c8d73..66d7c0fc6bddb1 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -813,7 +813,9 @@ time_strftime1(time_char **outbuf, size_t *bufsize, /* I hate these functions that presume you know how big the output * will be ahead of time... */ + int attempts = 0; while (1) { + attempts ++; if (*bufsize > PY_SSIZE_T_MAX/sizeof(time_char)) { PyErr_NoMemory(); return NULL; @@ -854,7 +856,7 @@ time_strftime1(time_char **outbuf, size_t *bufsize, format string. For instance we end up here with musl if the format string ends with a '%'. */ - PyErr_SetString(PyExc_ValueError, "Invalid format string"); + PyErr_Format(PyExc_ValueError, "Invalid format string attempts: %d *outbuf: '%.8s'", attempts, *outbuf); return NULL; } } From ca016d75162b0bfafdb863e3d26155134c3af78a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 10:14:56 +0100 Subject: [PATCH 03/10] Double buffer size --- Modules/timemodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 66d7c0fc6bddb1..b5f35f8da9d5c6 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -859,6 +859,7 @@ time_strftime1(time_char **outbuf, size_t *bufsize, PyErr_Format(PyExc_ValueError, "Invalid format string attempts: %d *outbuf: '%.8s'", attempts, *outbuf); return NULL; } + *bufsize += *bufsize; } } From d3b0056939028f88686b3a4a3d0d11644e18c398 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 10:53:29 +0100 Subject: [PATCH 04/10] Restore backwards compatibility --- Modules/timemodule.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index b5f35f8da9d5c6..0b5aa045e8ff84 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -813,9 +813,7 @@ time_strftime1(time_char **outbuf, size_t *bufsize, /* I hate these functions that presume you know how big the output * will be ahead of time... */ - int attempts = 0; while (1) { - attempts ++; if (*bufsize > PY_SSIZE_T_MAX/sizeof(time_char)) { PyErr_NoMemory(); return NULL; @@ -851,13 +849,15 @@ time_strftime1(time_char **outbuf, size_t *bufsize, return NULL; } if (*bufsize >= 256 * fmtlen) { - /* If the buffer is 256 times as long as the format, it's probably not - failing for lack of room! More likely, `format_time` doesn't like the - format string. For instance we end up here with musl if the format - string ends with a '%'. + /* If the buffer is 256 times as long as the format, it's probably + not failing for lack of room! More likely, `format_time` doesn't + like the format string. For instance we end up here with musl if + the format string ends with a '%'. + + Ideally we should raise ValueError("Invalid format string") + here. For backwards compatibility, return empty string instead. */ - PyErr_Format(PyExc_ValueError, "Invalid format string attempts: %d *outbuf: '%.8s'", attempts, *outbuf); - return NULL; + return PyUnicode_FromStringAndSize(NULL, 0); } *bufsize += *bufsize; } From 95c7213e2bfa1cc0b33a3d37e63472ba57e3021c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 11:36:43 +0100 Subject: [PATCH 05/10] Update comment --- Modules/timemodule.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 0b5aa045e8ff84..0f03abcc89c459 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -845,19 +845,28 @@ time_strftime1(time_char **outbuf, size_t *bufsize, #endif } if (buflen != 0) { + // I believe this is unreachable in glibc, musl, and BSD. PyErr_SetString(PyExc_SystemError, "Unexpected behavior from strftime"); return NULL; } if (*bufsize >= 256 * fmtlen) { - /* If the buffer is 256 times as long as the format, it's probably - not failing for lack of room! More likely, `format_time` doesn't - like the format string. For instance we end up here with musl if - the format string ends with a '%'. - - Ideally we should raise ValueError("Invalid format string") - here. For backwards compatibility, return empty string instead. - */ + // If the buffer is 256 times as long as the format, it's probably + // not failing for lack of room! More likely, `format_time` doesn't + // like the format string. + // I believe that this is unreachable in glibc, but both musl and + // BSD can end up here. +#ifdef HAVE_WCSFTIME + // For backwards compatibility, return empty string instead of + // raising a ValueError. return PyUnicode_FromStringAndSize(NULL, 0); +#else + // Previously we raised ValueError("embedded null byte") here, so + // this is backwards compatible as long as we are concerned only + // about error type. + PyErr_SetString(PyExc_ValueError, "Invalid format string"); + return NULL; +#endif + } *bufsize += *bufsize; } From d0bd2470532fe4d8aede28e6727f0406c252306f Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 11:42:24 +0100 Subject: [PATCH 06/10] Update documentation --- Doc/library/time.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 6265c2214eaa0d..c2c967c5dc000d 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -607,6 +607,9 @@ Functions and thus does not necessarily support all directives available that are not documented as supported. + If a format directive is unrecognized, the behavior is platform-dependent. + glibc will return the format string unmodified, Windows will raise a + :exc:`ValueError`, and macOS, musl, and BSD will return an empty string. .. class:: struct_time From a2daaf472089acb83bceb301519065e73c043b04 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 11:46:22 +0100 Subject: [PATCH 07/10] Whitespace --- Doc/library/time.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/library/time.rst b/Doc/library/time.rst index c2c967c5dc000d..c25dfa953b38c3 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -611,6 +611,7 @@ Functions glibc will return the format string unmodified, Windows will raise a :exc:`ValueError`, and macOS, musl, and BSD will return an empty string. + .. class:: struct_time The type of the time value sequence returned by :func:`gmtime`, From c3955c56cbf5dd5d6c5714a2b07149237a6a6d06 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 11:54:01 +0100 Subject: [PATCH 08/10] Add news entry --- .../next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst b/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst new file mode 100644 index 00000000000000..2d48a76445b25f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst @@ -0,0 +1,2 @@ +Made minor improvements to the error handling in `time.strftime` and +documented the behavior when an unknown directive is seen From 5e5e243606259cb5a0c175e06dc3c5fda10c6e40 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 12:19:41 +0100 Subject: [PATCH 09/10] More docs updates --- Doc/library/datetime.rst | 14 +++++++++----- Doc/library/time.rst | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 2f81080d525f86..44f2737f1272e0 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -2638,11 +2638,15 @@ For :class:`date` objects, the format codes for hours, minutes, seconds, and microseconds should not be used, as :class:`date` objects have no such values. If they're used anyway, 0 is substituted for them. -For the same reason, handling of format strings containing Unicode code points -that can't be represented in the charset of the current locale is also -platform-dependent. On some platforms such code points are preserved intact in -the output, while on others ``strftime`` may raise :exc:`UnicodeError` or return -an empty string instead. +If a format directive is unrecognized, the behavior is platform-dependent. glibc +will return the directive unmodified, Windows will raise a :exc:`ValueError`, +and macOS, musl, and BSD will return an empty string. + +Handling of format strings containing Unicode code points that can't be +represented in the charset of the current locale is also platform-dependent. On +some platforms such code points are preserved intact in the output, while on +others ``strftime`` may raise :exc:`UnicodeError` or return an empty string +instead. Notes: diff --git a/Doc/library/time.rst b/Doc/library/time.rst index c25dfa953b38c3..187561af765555 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -608,7 +608,7 @@ Functions documented as supported. If a format directive is unrecognized, the behavior is platform-dependent. - glibc will return the format string unmodified, Windows will raise a + glibc will return the directive unmodified, Windows will raise a :exc:`ValueError`, and macOS, musl, and BSD will return an empty string. From 4d67df4415edb920973a3ef284403cf2134b231e Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 12:21:20 +0100 Subject: [PATCH 10/10] Fix lint --- .../next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst b/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst index 2d48a76445b25f..727cda7245453a 100644 --- a/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst +++ b/Misc/NEWS.d/next/Library/2024-12-03-11-53-56.gh-issue-127527.hoM7oD.rst @@ -1,2 +1,2 @@ -Made minor improvements to the error handling in `time.strftime` and +Made minor improvements to the error handling in ``time.strftime`` and documented the behavior when an unknown directive is seen