Skip to content

[3.11] gh-86826: Fix parsing TZ strings in zoneinfo module (GH-23825) (GH-110882) #110889

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 118 additions & 7 deletions Lib/test/test_zoneinfo/test_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,23 +988,114 @@ def test_tzstr_from_utc(self):

self.assertEqual(dt_act, dt_utc)

def test_extreme_tzstr(self):
tzstrs = [
# Extreme offset hour
"AAA24",
"AAA+24",
"AAA-24",
"AAA24BBB,J60/2,J300/2",
"AAA+24BBB,J60/2,J300/2",
"AAA-24BBB,J60/2,J300/2",
"AAA4BBB24,J60/2,J300/2",
"AAA4BBB+24,J60/2,J300/2",
"AAA4BBB-24,J60/2,J300/2",
# Extreme offset minutes
"AAA4:00BBB,J60/2,J300/2",
"AAA4:59BBB,J60/2,J300/2",
"AAA4BBB5:00,J60/2,J300/2",
"AAA4BBB5:59,J60/2,J300/2",
# Extreme offset seconds
"AAA4:00:00BBB,J60/2,J300/2",
"AAA4:00:59BBB,J60/2,J300/2",
"AAA4BBB5:00:00,J60/2,J300/2",
"AAA4BBB5:00:59,J60/2,J300/2",
# Extreme total offset
"AAA24:59:59BBB5,J60/2,J300/2",
"AAA-24:59:59BBB5,J60/2,J300/2",
"AAA4BBB24:59:59,J60/2,J300/2",
"AAA4BBB-24:59:59,J60/2,J300/2",
# Extreme months
"AAA4BBB,M12.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M12.1.1/2",
# Extreme weeks
"AAA4BBB,M1.5.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M1.5.1/2",
# Extreme weekday
"AAA4BBB,M1.1.6/2,M2.1.1/2",
"AAA4BBB,M1.1.1/2,M2.1.6/2",
# Extreme numeric offset
"AAA4BBB,0/2,20/2",
"AAA4BBB,0/2,0/14",
"AAA4BBB,20/2,365/2",
"AAA4BBB,365/2,365/14",
# Extreme julian offset
"AAA4BBB,J1/2,J20/2",
"AAA4BBB,J1/2,J1/14",
"AAA4BBB,J20/2,J365/2",
"AAA4BBB,J365/2,J365/14",
# Extreme transition hour
"AAA4BBB,J60/167,J300/2",
"AAA4BBB,J60/+167,J300/2",
"AAA4BBB,J60/-167,J300/2",
"AAA4BBB,J60/2,J300/167",
"AAA4BBB,J60/2,J300/+167",
"AAA4BBB,J60/2,J300/-167",
# Extreme transition minutes
"AAA4BBB,J60/2:00,J300/2",
"AAA4BBB,J60/2:59,J300/2",
"AAA4BBB,J60/2,J300/2:00",
"AAA4BBB,J60/2,J300/2:59",
# Extreme transition seconds
"AAA4BBB,J60/2:00:00,J300/2",
"AAA4BBB,J60/2:00:59,J300/2",
"AAA4BBB,J60/2,J300/2:00:00",
"AAA4BBB,J60/2,J300/2:00:59",
# Extreme total transition time
"AAA4BBB,J60/167:59:59,J300/2",
"AAA4BBB,J60/-167:59:59,J300/2",
"AAA4BBB,J60/2,J300/167:59:59",
"AAA4BBB,J60/2,J300/-167:59:59",
]

for tzstr in tzstrs:
with self.subTest(tzstr=tzstr):
self.zone_from_tzstr(tzstr)

def test_invalid_tzstr(self):
invalid_tzstrs = [
"PST8PDT", # DST but no transition specified
"+11", # Unquoted alphanumeric
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
"PST8PDT,M3.2.0/2", # Only one transition rule
# Invalid offsets
"STD+25",
"STD-25",
"STD+374",
"STD+374DST,M3.2.0/2,M11.1.0/3",
"STD+23DST+25,M3.2.0/2,M11.1.0/3",
"STD-23DST-25,M3.2.0/2,M11.1.0/3",
# Invalid offset hours
"AAA168",
"AAA+168",
"AAA-168",
"AAA168BBB,J60/2,J300/2",
"AAA+168BBB,J60/2,J300/2",
"AAA-168BBB,J60/2,J300/2",
"AAA4BBB168,J60/2,J300/2",
"AAA4BBB+168,J60/2,J300/2",
"AAA4BBB-168,J60/2,J300/2",
# Invalid offset minutes
"AAA4:0BBB,J60/2,J300/2",
"AAA4:100BBB,J60/2,J300/2",
"AAA4BBB5:0,J60/2,J300/2",
"AAA4BBB5:100,J60/2,J300/2",
# Invalid offset seconds
"AAA4:00:0BBB,J60/2,J300/2",
"AAA4:00:100BBB,J60/2,J300/2",
"AAA4BBB5:00:0,J60/2,J300/2",
"AAA4BBB5:00:100,J60/2,J300/2",
# Completely invalid dates
"AAA4BBB,M1443339,M11.1.0/3",
"AAA4BBB,M3.2.0/2,0349309483959c",
"AAA4BBB,,J300/2",
"AAA4BBB,z,J300/2",
"AAA4BBB,J60/2,",
"AAA4BBB,J60/2,z",
# Invalid months
"AAA4BBB,M13.1.1/2,M1.1.1/2",
"AAA4BBB,M1.1.1/2,M13.1.1/2",
Expand All @@ -1024,6 +1115,26 @@ def test_invalid_tzstr(self):
# Invalid julian offset
"AAA4BBB,J0/2,J20/2",
"AAA4BBB,J20/2,J366/2",
# Invalid transition time
"AAA4BBB,J60/2/3,J300/2",
"AAA4BBB,J60/2,J300/2/3",
# Invalid transition hour
"AAA4BBB,J60/168,J300/2",
"AAA4BBB,J60/+168,J300/2",
"AAA4BBB,J60/-168,J300/2",
"AAA4BBB,J60/2,J300/168",
"AAA4BBB,J60/2,J300/+168",
"AAA4BBB,J60/2,J300/-168",
# Invalid transition minutes
"AAA4BBB,J60/2:0,J300/2",
"AAA4BBB,J60/2:100,J300/2",
"AAA4BBB,J60/2,J300/2:0",
"AAA4BBB,J60/2,J300/2:100",
# Invalid transition seconds
"AAA4BBB,J60/2:00:0,J300/2",
"AAA4BBB,J60/2:00:100,J300/2",
"AAA4BBB,J60/2,J300/2:00:0",
"AAA4BBB,J60/2,J300/2:00:100",
]

for invalid_tzstr in invalid_tzstrs:
Expand Down
86 changes: 53 additions & 33 deletions Lib/zoneinfo/_zoneinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ class _DayOffset:
__slots__ = ["d", "julian", "hour", "minute", "second"]

def __init__(self, d, julian, hour=2, minute=0, second=0):
if not (0 + julian) <= d <= 365:
min_day = 0 + julian
min_day = 0 + julian # convert bool to int
if not min_day <= d <= 365:
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")

self.d = d
Expand Down Expand Up @@ -560,11 +560,11 @@ class _CalendarOffset:
)

def __init__(self, m, w, d, hour=2, minute=0, second=0):
if not 0 < m <= 12:
raise ValueError("m must be in (0, 12]")
if not 1 <= m <= 12:
raise ValueError("m must be in [1, 12]")

if not 0 < w <= 5:
raise ValueError("w must be in (0, 5]")
if not 1 <= w <= 5:
raise ValueError("w must be in [1, 5]")

if not 0 <= d <= 6:
raise ValueError("d must be in [0, 6]")
Expand Down Expand Up @@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):

offset_str, *start_end_str = tz_str.split(",", 1)

# fmt: off
parser_re = re.compile(
r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
r")?" + # dst
r")?$" # stdoff
r"""
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?:
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
(?:
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
)? # dst
)? # stdoff
""",
re.ASCII|re.VERBOSE
)
# fmt: on

m = parser_re.match(offset_str)
m = parser_re.fullmatch(offset_str)

if m is None:
raise ValueError(f"{tz_str} is not a valid TZ string")
Expand Down Expand Up @@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):


def _parse_dst_start_end(dststr):
date, *time = dststr.split("/")
if date[0] == "M":
date, *time = dststr.split("/", 1)
type = date[:1]
if type == "M":
n_is_julian = False
m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
if m is None:
raise ValueError(f"Invalid dst start/end date: {dststr}")
date_offset = tuple(map(int, m.groups()))
offset = _CalendarOffset(*date_offset)
else:
if date[0] == "J":
if type == "J":
n_is_julian = True
date = date[1:]
else:
Expand All @@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
offset = _DayOffset(doy, n_is_julian)

if time:
time_components = list(map(int, time[0].split(":")))
n_components = len(time_components)
if n_components < 3:
time_components.extend([0] * (3 - n_components))
offset.hour, offset.minute, offset.second = time_components
offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])

return offset


def _parse_transition_time(time_str):
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
time_str,
re.ASCII
)
if match is None:
raise ValueError(f"Invalid time: {time_str}")

h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

if h > 167:
raise ValueError(
f"Hour must be in [0, 167]: {time_str}"
)

if match.group("sign") == "-":
h, m, s = -h, -m, -s

return h, m, s


def _parse_tz_delta(tz_delta):
match = re.match(
r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
match = re.fullmatch(
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
tz_delta,
re.ASCII
)
# Anything passed to this function should already have hit an equivalent
# regular expression to find the section to parse.
assert match is not None, tz_delta

h, m, s = (
int(v) if v is not None else 0
for v in map(match.group, ("h", "m", "s"))
)
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))

total = h * 3600 + m * 60 + s

if not -86400 < total < 86400:
if h > 24:
raise ValueError(
f"Offset must be strictly between -24h and +24h: {tz_delta}"
f"Offset hours must be in [0, 24]: {tz_delta}"
)

# Yes, +5 maps to an offset of -5h
if match.group("sign") != "-":
total *= -1
total = -total

return total
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:mod:`zipinfo` now supports the full range of values in the TZ string
determined by RFC 8536 and detects all invalid formats.
Both Python and C implementations now raise exceptions of the same
type on invalid data.
Loading