diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index b2bde5a9b1d696..79c68ae844ba3a 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -3,6 +3,7 @@ import unittest import urllib.parse from test import support +from string import ascii_letters, digits RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -1419,6 +1420,15 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + unreserved = ascii_letters + digits + "-" + "." + "_" + "~" + zoneid_authorized_characters = unreserved + removed_characters = "\t\n\r" + for character in range(256): + character = chr(character) + if character in zoneid_authorized_characters or character in removed_characters: + continue + self.assertRaises(ValueError, urllib.parse.urlsplit, f'scheme://[::1%invalid{character}invalid]/') def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 67d9bbea0d3150..3f10077966b6de 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -466,6 +466,8 @@ def _check_bracketed_host(hostname): ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 if isinstance(ip, ipaddress.IPv4Address): raise ValueError(f"An IPv4 address cannot be in brackets") + if "%" in hostname and not re.match(r"\A(%[a-fA-F0-9]{2}|[\w\.~-])+\z", hostname.split("%", 1)[1], flags=re.ASCII): + raise ValueError(f"IPv6 ZoneID is invalid") # typed=True avoids BytesWarnings being emitted during cache key # comparison since this API supports both bytes and str input. diff --git a/Misc/NEWS.d/next/Library/2025-07-27-15-23-32.gh-issue-137146.BE_ylT.rst b/Misc/NEWS.d/next/Library/2025-07-27-15-23-32.gh-issue-137146.BE_ylT.rst new file mode 100644 index 00000000000000..ae91bffebcbeb6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-27-15-23-32.gh-issue-137146.BE_ylT.rst @@ -0,0 +1 @@ +Validate IPv6 ZoneID characters in bracketed hostnames to match RFC 6874. :func:`urllib.parse.urlparse` now rejects ZoneIDs containing invalid or unsafe characters.