Skip to content

Commit 0985409

Browse files
committed
Add script used to generate urls test file.
Thank you @woxcab for the script.
1 parent dd88661 commit 0985409

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

tests/generate_urls.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from urllib.parse import urljoin, urlparse
2+
from itertools import product
3+
import csv
4+
import posixpath
5+
6+
7+
def resolveComponents(url):
8+
"""
9+
>>> resolveComponents('http://www.example.com/foo/bar/../../baz/bux/')
10+
'http://www.example.com/baz/bux/'
11+
>>> resolveComponents('http://www.example.com/some/path/../file.ext')
12+
'http://www.example.com/some/file.ext'
13+
"""
14+
15+
parsed = urlparse(url)
16+
new_path = posixpath.normpath(parsed.path)
17+
if parsed.path.endswith('/'):
18+
# Compensate for issue1707768
19+
new_path += '/'
20+
if new_path.startswith('//'):
21+
new_path = new_path[1:]
22+
cleaned = parsed._replace(path=new_path)
23+
return cleaned.geturl()
24+
25+
26+
first_authorities = ['http://example.com@user:pass:7152', 'https://example.com']
27+
second_authorities = ['', 'https://www.example.org', 'http://example.com@user:pass:1111',
28+
'file://example.com', 'file://']
29+
first_paths = ['', '/', '/foobar/bazz', 'foobar/bazz/']
30+
second_paths = ['', '/', '/foo/bar', 'foo/bar/', './foo/../bar', 'foo/./.././bar']
31+
first_queries = ['', '?a=1', '?a=647&b=s564']
32+
second_queries = ['', '?a=sdf', '?a=cvb&b=987']
33+
fragments = ['', '#foo', '#bar']
34+
35+
with open('urls.csv', 'wt') as f:
36+
csvwriter = csv.writer(f, quotechar='"', quoting=csv.QUOTE_ALL)
37+
csvwriter.writerow(['first_url', 'second_url', 'expected'])
38+
counter = 1
39+
for first_domain, second_domain in product(first_authorities, second_authorities):
40+
for first_path, second_path in product(first_paths, second_paths):
41+
for first_query, second_query in product(first_queries, second_queries):
42+
for first_fragment, second_fragment in product(fragments, fragments):
43+
if not first_path.startswith('/'):
44+
first_path = '/' + first_path
45+
first_url = first_domain + first_path + first_query + first_fragment
46+
if second_domain and not second_path.startswith('/'):
47+
second_path = '/' + second_path
48+
second_url = second_domain + second_path + second_query + second_fragment
49+
if first_url != second_url:
50+
csvwriter.writerow([first_url, second_url, resolveComponents(urljoin(first_url, second_url))])

0 commit comments

Comments
 (0)