Skip to content

Commit 98a4dce

Browse files
authored
bpo-36742: Fixes handling of pre-normalization characters in urlsplit() (GH-13017)
1 parent 3e5c4a7 commit 98a4dce

File tree

3 files changed

+14
-4
lines changed

3 files changed

+14
-4
lines changed

Lib/test/test_urlparse.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,12 @@ def test_urlsplit_normalization(self):
641641
self.assertIn(u'\u2100', denorm_chars)
642642
self.assertIn(u'\uFF03', denorm_chars)
643643

644+
# bpo-36742: Verify port separators are ignored when they
645+
# existed prior to decomposition
646+
urlparse.urlsplit(u'http://\u30d5\u309a:80')
647+
with self.assertRaises(ValueError):
648+
urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380')
649+
644650
for scheme in [u"http", u"https", u"ftp"]:
645651
for c in denorm_chars:
646652
url = u"{}://netloc{}false.netloc/path".format(scheme, c)

Lib/urlparse.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,16 @@ def _checknetloc(netloc):
171171
# looking for characters like \u2100 that expand to 'a/c'
172172
# IDNA uses NFKC equivalence, so normalize for this check
173173
import unicodedata
174-
netloc2 = unicodedata.normalize('NFKC', netloc)
175-
if netloc == netloc2:
174+
n = netloc.rpartition('@')[2] # ignore anything to the left of '@'
175+
n = n.replace(':', '') # ignore characters already included
176+
n = n.replace('#', '') # but not the surrounding text
177+
n = n.replace('?', '')
178+
netloc2 = unicodedata.normalize('NFKC', n)
179+
if n == netloc2:
176180
return
177-
_, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
178181
for c in '/?#@:':
179182
if c in netloc2:
180-
raise ValueError("netloc '" + netloc2 + "' contains invalid " +
183+
raise ValueError("netloc '" + netloc + "' contains invalid " +
181184
"characters under NFKC normalization")
182185

183186
def urlsplit(url, scheme='', allow_fragments=True):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes mishandling of pre-normalization characters in urlsplit().

0 commit comments

Comments
 (0)