|
| 1 | +from urllib.parse import urljoin, urlparse |
| 2 | +from itertools import product |
| 3 | +import csv |
| 4 | +import posixpath |
| 5 | + |
| 6 | + |
| 7 | +def resolveComponents(url): |
| 8 | + """ |
| 9 | + >>> resolveComponents('http://www.example.com/foo/bar/../../baz/bux/') |
| 10 | + 'http://www.example.com/baz/bux/' |
| 11 | + >>> resolveComponents('http://www.example.com/some/path/../file.ext') |
| 12 | + 'http://www.example.com/some/file.ext' |
| 13 | + """ |
| 14 | + |
| 15 | + parsed = urlparse(url) |
| 16 | + new_path = posixpath.normpath(parsed.path) |
| 17 | + if parsed.path.endswith('/'): |
| 18 | + # Compensate for issue1707768 |
| 19 | + new_path += '/' |
| 20 | + if new_path.startswith('//'): |
| 21 | + new_path = new_path[1:] |
| 22 | + cleaned = parsed._replace(path=new_path) |
| 23 | + return cleaned.geturl() |
| 24 | + |
| 25 | + |
| 26 | +first_authorities = ['http://example.com@user:pass:7152', 'https://example.com'] |
| 27 | +second_authorities = ['', 'https://www.example.org', 'http://example.com@user:pass:1111', |
| 28 | + 'file://example.com', 'file://'] |
| 29 | +first_paths = ['', '/', '/foobar/bazz', 'foobar/bazz/'] |
| 30 | +second_paths = ['', '/', '/foo/bar', 'foo/bar/', './foo/../bar', 'foo/./.././bar'] |
| 31 | +first_queries = ['', '?a=1', '?a=647&b=s564'] |
| 32 | +second_queries = ['', '?a=sdf', '?a=cvb&b=987'] |
| 33 | +fragments = ['', '#foo', '#bar'] |
| 34 | + |
| 35 | +with open('urls.csv', 'wt') as f: |
| 36 | + csvwriter = csv.writer(f, quotechar='"', quoting=csv.QUOTE_ALL) |
| 37 | + csvwriter.writerow(['first_url', 'second_url', 'expected']) |
| 38 | + counter = 1 |
| 39 | + for first_domain, second_domain in product(first_authorities, second_authorities): |
| 40 | + for first_path, second_path in product(first_paths, second_paths): |
| 41 | + for first_query, second_query in product(first_queries, second_queries): |
| 42 | + for first_fragment, second_fragment in product(fragments, fragments): |
| 43 | + if not first_path.startswith('/'): |
| 44 | + first_path = '/' + first_path |
| 45 | + first_url = first_domain + first_path + first_query + first_fragment |
| 46 | + if second_domain and not second_path.startswith('/'): |
| 47 | + second_path = '/' + second_path |
| 48 | + second_url = second_domain + second_path + second_query + second_fragment |
| 49 | + if first_url != second_url: |
| 50 | + csvwriter.writerow([first_url, second_url, resolveComponents(urljoin(first_url, second_url))]) |
0 commit comments