diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 64510a77c67c11..417d17b5fa35db 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -2648,6 +2648,17 @@ Notes: example, "month/day/year" versus "day/month/year"), and the output may contain non-ASCII characters. + .. versionchanged:: 3.14 + The :meth:`~.datetime.strptime` method, if used with the ``%c`` + or ``%x`` format code, no longer requires the *year* part of the + input to be zero-padded to the usual width (which is either 4 or + 2 digits, depending on the format code and current locale). In + previous versions, a :exc:`ValueError` was raised if a narrower + (i.e., not zero-padded) year representation was part of the input + (and it is worth noting that, depending on the platform/locale, + such inputs may be produced by using :meth:`~.datetime.strftime` + with ``%c`` or ``%x``). + (2) The :meth:`~.datetime.strptime` method can parse years in the full [1, 9999] range, but years < 1000 must be zero-filled to 4-digit width. diff --git a/Lib/_strptime.py b/Lib/_strptime.py index a3f8bb544d518d..d03feb7a810a9c 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -199,8 +199,6 @@ def __init__(self, locale_time=None): 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? 'Y': r"(?P\d\d\d\d)", 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), @@ -213,8 +211,10 @@ def __init__(self, locale_time=None): 'Z'), '%': '%'}) base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__( + 'c', self.__pattern_with_lax_year(self.locale_time.LC_date_time)) + base.__setitem__( + 'x', self.__pattern_with_lax_year(self.locale_time.LC_date)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) def __seqToRE(self, to_convert, directive): @@ -236,6 +236,27 @@ def __seqToRE(self, to_convert, directive): regex = '(?P<%s>%s' % (directive, regex) return '%s)' % regex + def __pattern_with_lax_year(self, format): + """Like pattern(), but making %Y and %y accept also fewer digits. + + Necessary to ensure that strptime() is able to parse strftime()'s + output when %c or %x is used -- considering that for some locales + and platforms (e.g., 'C.UTF-8' on Linux) formatting with %c or %x + may produce a year number representation which is not zero-padded + and, consequently (if the number is small enough), shorter than + the usual four or two digits (e.g., '999' rather than `0999', or + '9' rather than of '09'). + + Note that this helper is intended to be used to prepare regex + patterns *only* for the %c and %x format codes (and *not* for + %y, %Y or %G). + + """ + pattern = self.pattern(format) + pattern = pattern.replace(self['Y'], r"(?P\d{1,4})") + pattern = pattern.replace(self['y'], r"(?P\d{1,2})") + return pattern + def pattern(self, format): """Return regex pattern for the format string. @@ -374,6 +395,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': + # 1 or 2 digits (1 only for directive c or x; see TimeRE.__init__) year = int(found_dict['y']) # Open Group specification for strptime() states that a %y #value in the range of [00, 68] is in the century 2000, while @@ -383,6 +405,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): else: year += 1900 elif group_key == 'Y': + # 1-4 digits (1-3 only for directive c or x; see TimeRE.__init__) year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c81408b344968d..76d6783a6de2e4 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2124,6 +2124,84 @@ def test_fromisocalendar_type_errors(self): with self.assertRaises(TypeError): self.theclass.fromisocalendar(*isocal) + def test_strftime_strptime_roundtrip_concerning_locale_specific_year(self): + # gh-124529 + concerned_formats = '%c', '%x' + + def run_subtest(): + input_obj = sample.replace(year=year) + reason = (f"test strftime/strptime roundtrip concerning " + f"locale-specific year representation " + f"- for {fmt=} and {input_obj=}") + fail_msg = f"{reason} - failed" + with self.subTest(reason=reason): + formatted = input_obj.strftime(fmt) + try: + parsed = self.theclass.strptime(formatted, fmt) + except ValueError as exc: + self.fail(f"{fail_msg}; parsing error: {exc!r}") + self.assertEqual(parsed, input_obj, fail_msg) + + sample = self.theclass.strptime('1999-03-17', '%Y-%m-%d') + for fmt in concerned_formats: + with self.subTest(fmt=fmt): + sample_str = sample.strftime(fmt) + if '1999' in sample_str: + for year in [ + 1000, 1410, 1989, 2024, 2095, 9999, + # gh-124529: + 1, 9, 10, 99, 100, 999, + ]: + run_subtest() + elif '99' in sample_str: + for year in [ + 1969, 1999, 2068, + # gh-124529: + 2000, 2001, 2009, + ]: + run_subtest() + else: + self.fail(f"{sample!r}.strftime({fmt!r})={sample_str!r} " + f"does not include year={sample.year!r} in " + f"any expected format (is there something " + f"severely wrong with the current locale?)") + + def test_strptime_accepting_locale_specific_year_with_fewer_digits(self): + # gh-124529 + concerned_formats = '%c', '%x' + + def run_subtest(): + input_str = sample_str.replace(sample_year_digits, year_digits) + reason = (f"test strptime accepting locale-specific " + f"year representation with fewer digits " + f"- for {fmt=} and {input_str=} ({year=})") + fail_msg = f"{reason} - failed" + expected = sample.replace(year=year) + with self.subTest(reason=reason): + try: + parsed = self.theclass.strptime(input_str, fmt) + except ValueError as exc: + self.fail(f"{fail_msg}; parsing error: {exc!r}") + self.assertEqual(parsed, expected, fail_msg) + + sample = self.theclass.strptime('1999-03-17', '%Y-%m-%d') + for fmt in concerned_formats: + with self.subTest(fmt=fmt): + sample_str = sample.strftime(fmt) + if (sample_year_digits := '1999') in sample_str: + for year in [1, 9, 10, 99, 100, 999]: + year_digits = str(year) + run_subtest() + elif (sample_year_digits := '99') in sample_str: + for year in [2000, 2001, 2009]: + year_digits = str(year - 2000) + run_subtest() + else: + self.fail(f"{sample!r}.strftime({fmt!r})={sample_str!r} " + f"does not include year={sample.year!r} in " + f"any expected format (is there something " + f"severely wrong with the current locale?)") + ############################################################################# # datetime tests diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 038746e26c24ad..85bedcad41cc76 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -161,11 +161,42 @@ def test_compile(self): for directive in ('a','A','b','B','c','d','G','H','I','j','m','M','p', 'S','u','U','V','w','W','x','X','y','Y','Z','%'): fmt = "%d %Y" if directive == 'd' else "%" + directive + input_string = time.strftime(fmt) compiled = self.time_re.compile(fmt) - found = compiled.match(time.strftime(fmt)) - self.assertTrue(found, "Matching failed on '%s' using '%s' regex" % - (time.strftime(fmt), - compiled.pattern)) + found = compiled.match(input_string) + self.assertTrue(found, + (f"Matching failed on '{input_string}' " + f"using '{compiled.pattern}' regex")) + for directive in ('c', 'x'): + # gh-124529 + fmt = "%" + directive + with self.subTest(f"{fmt!r} should match input containing " + f"year with fewer digits than usual"): + try: + (input_string, + _) = _get_data_to_test_strptime_with_few_digits_year(fmt) + except AssertionError as exc: + self.fail(str(exc)) + compiled = self.time_re.compile(fmt) + found = compiled.match(input_string) + self.assertTrue(found, + (f"Matching failed on '{input_string}' " + f"using '{compiled.pattern}' regex")) + for directive in ('y', 'Y', 'G'): + fmt = "%" + directive + with self.subTest(f"{fmt!r} should not match input containing " + f"year with fewer digits than usual"): + try: + (input_string, + _) = _get_data_to_test_strptime_with_few_digits_year(fmt) + except AssertionError as exc: + self.fail(str(exc)) + compiled = self.time_re.compile(fmt) + found = compiled.match(input_string) + self.assertFalse(found, + (f"Matching unexpectedly succeeded " + f"on '{input_string}' using " + f"'{compiled.pattern}' regex")) def test_blankpattern(self): # Make sure when tuple or something has no values no regex is generated. @@ -299,6 +330,28 @@ def helper(self, directive, position): (directive, strf_output, strp_output[position], self.time_tuple[position])) + def helper_for_directives_accepting_few_digits_year(self, directive): + fmt = "%" + directive + + try: + (input_string, + expected_year, + ) = _get_data_to_test_strptime_with_few_digits_year(fmt) + except AssertionError as exc: + self.fail(str(exc)) + + try: + output_year = _strptime._strptime(input_string, fmt)[0][0] + except ValueError as exc: + # See: gh-124529 + self.fail(f"testing of {directive!r} directive failed; " + f"{input_string!r} -> exception: {exc!r}") + else: + self.assertEqual(output_year, expected_year, + (f"testing of {directive!r} directive failed; " + f"{input_string!r} -> output including year " + f"{output_year!r} != {expected_year!r}")) + def test_year(self): # Test that the year is handled properly for directive in ('y', 'Y'): @@ -312,6 +365,17 @@ def test_year(self): "'y' test failed; passed in '%s' " "and returned '%s'" % (bound, strp_output[0])) + def test_bad_year(self): + for directive, bad_inputs in ( + ('y', ('9', '100', 'ni')), + ('Y', ('7', '42', '999', '10000', 'SPAM')), + ): + fmt = "%" + directive + for input_val in bad_inputs: + with self.subTest(directive=directive, input_val=input_val): + with self.assertRaises(ValueError): + _strptime._strptime_time(input_val, fmt) + def test_month(self): # Test for month directives for directive in ('B', 'b', 'm'): @@ -454,11 +518,21 @@ def test_date_time(self): for position in range(6): self.helper('c', position) + def test_date_time_accepting_few_digits_year(self): # gh-124529 + # Test %c directive with input containing year + # number consisting of fewer digits than usual + self.helper_for_directives_accepting_few_digits_year('c') + def test_date(self): # Test %x directive for position in range(0,3): self.helper('x', position) + def test_date_accepting_few_digits_year(self): # gh-124529 + # Test %x directive with input containing year + # number consisting of fewer digits than usual + self.helper_for_directives_accepting_few_digits_year('x') + def test_time(self): # Test %X directive for position in range(3,6): @@ -769,5 +843,59 @@ def test_TimeRE_recreation_timezone(self): _strptime._strptime_time(oldtzname[1], '%Z') +def _get_data_to_test_strptime_with_few_digits_year(fmt): + # This helper, for the given format string (fmt), returns a 2-tuple: + # (, ) + # where: + # * -- is a `strftime(fmt)`-result-like str + # containing a year number which is *shorter* than the usual four + # or two digits, because the number is small and *not* 0-padded + # (namely: here the contained year number consist of one digit: 7 + # -- that's an arbitrary choice); + # * -- is an int representing the year number that + # is expected to be part of the result of a `strptime(, fmt)` call (namely: either 7 or 2007, depending + # on the given format string and current locale...). + # Note: AssertionError (with an appropriate failure message) is + # raised if does *not* contain the year + # part (for the given format string and current locale). + + # 1. Prepare auxiliary sample time data (note that the magic values + # we use here are guaranteed to be compatible with `time.strftime()`, + # and are also intended to be well distinguishable within formatted + # strings, thanks to the fact that the amount of overloaded numbers + # is minimized, as in `_strptime.LocaleTime.__calc_date_time()`): + sample_year = 1999 + sample_tt = (sample_year, 3, 17, 22, 44, 55, 2, 76, 0) + sample_string = time.strftime(fmt, sample_tt) + sample_year_4digits = str(sample_year) + sample_year_2digits = str(sample_year)[-2:] + + # 2. Pick an arbitrary year number representation that + # is always *shorter* than the usual four or two digits: + year_1digit = '7' + + # 3. Obtain the resultant 2-tuple: + if sample_year_4digits in sample_string: + input_string = sample_string.replace(sample_year_4digits, year_1digit) + if sample_year_2digits in input_string: + raise RuntimeError(f"the {fmt!r} format is not supported by " + f"this helper (a {fmt!r}-formatted string, " + f"{sample_string!r}, seems to include both a " + f"2-digit and 4-digit year number)") + expected_year = int(year_1digit) + elif sample_year_2digits in sample_string: + input_string = sample_string.replace(sample_year_2digits, year_1digit) + expected_year = 2000 + int(year_1digit) + else: + raise AssertionError(f"time.strftime({fmt!r}, ...)={sample_string!r} " + f"does not include the year {sample_year!r} in " + f"any expected format (are {fmt!r}-formatted " + f"strings supposed to include a year number? " + f"if they are, isn't there something severely " + f"wrong with the current locale?)") + return input_string, expected_year + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 530c317a852e77..b81c5201e93ac1 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -307,6 +307,42 @@ def test_strptime_leap_year(self): r'.*day of month without a year.*'): time.strptime('02-07 18:28', '%m-%d %H:%M') + def test_strptime_accepting_locale_specific_year_with_fewer_digits(self): + # GH-124529 + concerned_formats = '%c', '%x' + + def run_subtest(): + input_str = sample_str.replace(sample_year_digits, year_digits) + reason = (f"test strptime accepting locale-specific " + f"year representation with fewer digits " + f"- for {fmt=} and {input_str=} ({year=})") + fail_msg = f"{reason} - failed" + expected = (year,) + sample_tt[1:6] + with self.subTest(reason=reason): + try: + parsed_tt = time.strptime(input_str, fmt) + except ValueError as exc: + self.fail(f"{fail_msg}; parsing error: {exc!r}") + self.assertEqual(parsed_tt[:6], expected, fail_msg) + + sample_tt = (1999, 3, 17) + (0,) * 6 + for fmt in concerned_formats: + with self.subTest(fmt=fmt): + sample_str = time.strftime(fmt, sample_tt) + if (sample_year_digits := '1999') in sample_str: + for year in [1, 9, 10, 99, 100, 999]: + year_digits = str(year) + run_subtest() + elif (sample_year_digits := '99') in sample_str: + for year in [2000, 2001, 2009]: + year_digits = str(year - 2000) + run_subtest() + else: + self.fail(f"time.strftime({fmt!r}, ...)={sample_str!r} " + f"does not include year={sample_tt[0]!r} in " + f"any expected format (is there something " + f"severely wrong with the current locale?)") + def test_asctime(self): time.asctime(time.gmtime(self.t)) diff --git a/Misc/NEWS.d/next/Library/2024-09-30-04-49-22.gh-issue-124529.hDDlMH.rst b/Misc/NEWS.d/next/Library/2024-09-30-04-49-22.gh-issue-124529.hDDlMH.rst new file mode 100644 index 00000000000000..cd6525ffdf2850 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-30-04-49-22.gh-issue-124529.hDDlMH.rst @@ -0,0 +1,5 @@ +Fix :meth:`datetime.datetime.strptime`, :meth:`datetime.date.strptime` +and :func:`time.strptime` to make ``%c`` and ``%x`` accept year numbers +with fewer digits than the usual 4 or 2 (not zero-padded). This prevents +:exc:`ValueError` in certain cases of ``strftime/strptime`` round trips. +Patch by Jan Kaliszewski.