From 7a4406034750d0fe1f9b3bf2e020c7b7a6936066 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Mon, 3 May 2021 06:56:58 -0700 Subject: [PATCH 1/2] Remove the newline, and tab early. From query and fragments. --- Lib/test/test_urlparse.py | 24 ++++++++++++++++-------- Lib/urllib/parse.py | 8 +++++--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 67341fecef94cd..a5ee2812ee02d7 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -614,32 +614,40 @@ def test_urlsplit_attributes(self): def test_urlsplit_remove_unsafe_bytes(self): # Remove ASCII tabs and newlines from input - url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag" + url = "http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" p = urllib.parse.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "www.python.org") self.assertEqual(p.path, "/javascript:alert('msg')/") - self.assertEqual(p.query, "") - self.assertEqual(p.fragment, "frag") + self.assertEqual(p.query, "query=something") + self.assertEqual(p.fragment, "fragment") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, None) - self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag") + self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") # Remove ASCII tabs and newlines from input as bytes. - url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag" + url = b"http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" p = urllib.parse.urlsplit(url) self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"www.python.org") self.assertEqual(p.path, b"/javascript:alert('msg')/") - self.assertEqual(p.query, b"") - self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.query, b"query=something") + self.assertEqual(p.fragment, b"fragment") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, b"www.python.org") self.assertEqual(p.port, None) - self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag") + self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment") + + # with scheme as cache-key + url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + scheme = "ht\ntp" + for _ in range(2): + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") def test_attributes_bad_port(self): """Check handling of invalid ports.""" diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index c11c695a741c8a..b7965fe3d2b195 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -456,6 +456,11 @@ def urlsplit(url, scheme='', allow_fragments=True): """ url, scheme, _coerce_result = _coerce_args(url, scheme) + + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + scheme = scheme.replace(b, "") + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) @@ -472,9 +477,6 @@ def urlsplit(url, scheme='', allow_fragments=True): else: scheme, url = url[:i].lower(), url[i+1:] - for b in _UNSAFE_URL_BYTES_TO_REMOVE: - url = url.replace(b, "") - if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or From f184bef4f66cb98bd4008a3137fee6af4d92bb33 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Mon, 3 May 2021 11:49:13 -0700 Subject: [PATCH 2/2] Address Review Comments. --- Lib/test/test_urlparse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index a5ee2812ee02d7..31943f357f49f3 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -614,7 +614,7 @@ def test_urlsplit_attributes(self): def test_urlsplit_remove_unsafe_bytes(self): # Remove ASCII tabs and newlines from input - url = "http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" p = urllib.parse.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "www.python.org") @@ -628,7 +628,7 @@ def test_urlsplit_remove_unsafe_bytes(self): self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") # Remove ASCII tabs and newlines from input as bytes. - url = b"http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" p = urllib.parse.urlsplit(url) self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"www.python.org") @@ -645,7 +645,7 @@ def test_urlsplit_remove_unsafe_bytes(self): url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" scheme = "ht\ntp" for _ in range(2): - p = urllib.parse.urlsplit(url) + p = urllib.parse.urlsplit(url, scheme=scheme) self.assertEqual(p.scheme, "http") self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")