Skip to content

Commit 5d69a7d

Browse files
authored
Merge pull request package-url#17 from tdruez/url2purl
Add a url2purl module for Package URL creation from arbitrary URLs
2 parents 254876f + 1ca9954 commit 5d69a7d

File tree

8 files changed

+617
-3
lines changed

8 files changed

+617
-3
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,10 @@
55
/.eggs/
66
/.cache/
77
/src/packageurl_python.egg-info/
8+
lib
9+
bin
10+
.idea
11+
include
12+
pip-selfcheck.json
13+
pyvenv.cfg
14+
.Python

MANIFEST.in

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
graft src
2+
graft tests
23

34
include mit.LICENSE
45
include setup.py
@@ -8,7 +9,5 @@ include MANIFEST.in
89
include requirements_tests.txt
910
include CHANGELOG.rst
1011
include CONTRIBUTING.rst
11-
include test-suite-data.json
12-
include test_purl.py
1312

1413
global-exclude *.py[co] __pycache__

src/packageurl/contrib/route.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (c) the purl authors
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
# Visit https://github.com/package-url/packageurl-python for support and
24+
# download.
25+
26+
27+
from __future__ import absolute_import
28+
from __future__ import print_function
29+
from __future__ import unicode_literals
30+
31+
from collections import OrderedDict
32+
from functools import wraps
33+
import inspect
34+
import re
35+
36+
37+
"""
38+
Given a URI regex (or some string), this module can route execution to a
39+
callable.
40+
41+
There are several routing implementations available in Rails, Django, Flask,
42+
Paste, etc. However, these all assume that the routed processing is to craft a
43+
response to an incoming external HTTP request.
44+
45+
Here we are instead doing the opposite: given a URI (and no request yet) we are
46+
routing the processing to emit a request externally (HTTP or other protocol)
47+
and handling its response.
48+
49+
Also we crawl a lot and not only HTTP: git, svn, ftp, rsync and more.
50+
This simple library support this kind of arbitrary URI routing.
51+
52+
This is inspired by Guido's http://www.artima.com/weblogs/viewpost.jsp?thread=101605
53+
and Django, Flask, Werkzeug and other url dispatch and routing design from web
54+
frameworks.
55+
https://github.com/douban/brownant has a similar approach, using
56+
Werkzeug with the limitation that it does not route based on URI scheme and is
57+
limited to HTTP.
58+
"""
59+
60+
61+
class Rule(object):
62+
"""
63+
A rule is a mapping between a pattern (typically a URI) and a callable
64+
(typically a function).
65+
The pattern is a regex string pattern and must match entirely a string
66+
(typically a URI) for the rule to be considered, i.e. for the endpoint to
67+
be resolved and eventually invoked for a given string (typically a URI).
68+
"""
69+
def __init__(self, pattern, endpoint):
70+
# To ensure the pattern will match entirely, we wrap the pattern
71+
# with start of line ^ and end of line $.
72+
self.pattern = pattern.lstrip('^').rstrip('$')
73+
self.pattern_match = re.compile('^' + self.pattern + '$').match
74+
75+
# ensure the endpoint is callable
76+
assert callable(endpoint)
77+
# classes are not always callable, make an extra check
78+
if inspect.isclass(endpoint):
79+
obj = endpoint()
80+
assert callable(obj)
81+
82+
self.endpoint = endpoint
83+
84+
def __repr__(self):
85+
return 'Rule(r"""{}""", {}.{})'.format(
86+
self.pattern, self.endpoint.__module__, self.endpoint.__name__)
87+
88+
def match(self, string):
89+
"""
90+
Match a string with the rule pattern, return True is matching.
91+
"""
92+
return self.pattern_match(string)
93+
94+
95+
class RouteAlreadyDefined(TypeError):
96+
"""
97+
Raised when this route Rule already exists in the route map.
98+
"""
99+
100+
101+
class NoRouteAvailable(TypeError):
102+
"""
103+
Raised when there are no route available.
104+
"""
105+
106+
107+
class MultipleRoutesDefined(TypeError):
108+
"""
109+
Raised when there are more than one route possible.
110+
"""
111+
112+
113+
class Router(object):
114+
"""
115+
A router is:
116+
- a container for a route map, consisting of several rules, stored in an
117+
ordered dictionary keyed by pattern text
118+
- a way to process a route, i.e. given a string (typically a URI), find the
119+
correct rule and invoke its callable endpoint
120+
- and a convenience decorator for routed callables (either a function or
121+
something with a __call__ method)
122+
123+
Multiple routers can co-exist as needed, such as a router to collect,
124+
another to fetch, etc.
125+
"""
126+
def __init__(self, route_map=None):
127+
"""
128+
'route_map' is an ordered mapping of pattern -> Rule.
129+
"""
130+
self.route_map = route_map or OrderedDict()
131+
# lazy cached pre-compiled regex match() for all route patterns
132+
self._is_routable = None
133+
134+
def __repr__(self):
135+
return repr(self.route_map)
136+
137+
def __iter__(self):
138+
return iter(self.route_map.items())
139+
140+
def keys(self):
141+
return self.route_map.keys()
142+
143+
def append(self, pattern, endpoint):
144+
"""
145+
Append a new pattern and endpoint Rule at the end of the map.
146+
Use this as an alternative to the route decorator.
147+
"""
148+
if pattern in self.route_map:
149+
raise RouteAlreadyDefined(pattern)
150+
self.route_map[pattern] = Rule(pattern, endpoint)
151+
152+
def route(self, *patterns):
153+
"""
154+
Decorator to make a callable 'endpoint' routed to one or more patterns.
155+
156+
Example:
157+
>>> my_router = Router()
158+
>>> @my_router.route('http://nexb.com', 'http://deja.com')
159+
... def somefunc(uri):
160+
... pass
161+
"""
162+
def decorator(endpoint):
163+
assert patterns
164+
for pat in patterns:
165+
self.append(pat, endpoint)
166+
167+
@wraps(endpoint)
168+
def decorated(*args, **kwargs):
169+
return self.process(*args, **kwargs)
170+
return decorated
171+
172+
return decorator
173+
174+
def process(self, string, *args, **kwargs):
175+
"""
176+
Given a string (typically a URI), resolve this string to an endpoint
177+
by searching available rules then execute the endpoint callable for
178+
that string passing down all arguments to the endpoint invocation.
179+
"""
180+
endpoint = self.resolve(string)
181+
if inspect.isclass(endpoint):
182+
# instantiate a class, that must define a __call__ method
183+
# TODO: consider passing args to the constructor?
184+
endpoint = endpoint()
185+
# call the callable
186+
return endpoint(string, *args, **kwargs)
187+
188+
def resolve(self, string):
189+
"""
190+
Resolve a string: given a string (typically a URI) resolve and
191+
return the best endpoint function for that string.
192+
193+
Ambiguous resolution is not allowed in order to keep things in
194+
check when there are hundreds rules: if multiple routes are
195+
possible for a string (typically a URI), a MultipleRoutesDefined
196+
TypeError is raised.
197+
"""
198+
# TODO: we could improve the performance of this by using a single
199+
# regex and named groups if this ever becomes a bottleneck.
200+
candidates = [r for r in self.route_map.values() if r.match(string)]
201+
202+
if not candidates:
203+
raise NoRouteAvailable(string)
204+
205+
if len(candidates) > 1:
206+
# this can happen when multiple patterns match the same string
207+
# we raise an exception with enough debugging information
208+
pats = repr([r.pattern for r in candidates])
209+
msg = '%(string)r matches multiple patterns %(pats)r' % locals()
210+
raise MultipleRoutesDefined(msg)
211+
212+
return candidates[0].endpoint
213+
214+
def is_routable(self, string):
215+
"""
216+
Return True if `string` is routable by this router, e.g. if it
217+
matches any of the route patterns.
218+
"""
219+
if not string:
220+
return
221+
222+
if not self._is_routable:
223+
# build an alternation regex
224+
routables = '^(' + '|'.join(pat for pat in self.route_map) + ')$'
225+
self._is_routable = re.compile(routables, re.UNICODE).match
226+
227+
return bool(self._is_routable(string))

0 commit comments

Comments
 (0)