1
+ # -*- coding: utf-8 -*-
2
+ #
1
3
# Copyright (c) the purl authors
2
4
#
3
5
# Permission is hereby granted, free of charge, to any person obtaining a copy
21
23
# Visit https://github.com/package-url/packageurl-python for support and
22
24
# download.
23
25
26
+
24
27
from __future__ import absolute_import
25
28
from __future__ import print_function
26
29
from __future__ import unicode_literals
27
30
28
31
from collections import namedtuple
29
- from collections import OrderedDict
32
+ import string
30
33
31
34
# Python 2 and 3 support
32
35
try :
33
36
# Python 2
34
- from urlparse import urlsplit
35
- from urllib import quote as percent_quote
36
- from urllib import unquote as percent_unquote
37
+ from urlparse import urlsplit as _urlsplit
38
+ from urllib import quote as _percent_quote
39
+ from urllib import unquote as _percent_unquote
37
40
except ImportError :
38
41
# Python 3
39
- from urllib .parse import urlsplit
40
- from urllib .parse import quote as percent_quote
41
- from urllib .parse import unquote as percent_unquote
42
+ from urllib .parse import urlsplit as _urlsplit
43
+ from urllib .parse import quote as _percent_quote
44
+ from urllib .parse import unquote as _percent_unquote
42
45
43
46
# Python 2 and 3 support
44
47
try :
45
48
# Python 2
46
49
unicode
47
- str = unicode # NOQA
48
50
basestring = basestring # NOQA
51
+ bytes = str # NOQA
52
+ str = unicode # NOQA
49
53
except NameError :
50
54
# Python 3
51
55
unicode = str # NOQA
59
63
60
64
def quote (s ):
61
65
"""
62
- Percent-encode a string, except for colon :
66
+ Return a percent-encoded unicode string, except for colon :, given an `s`
67
+ byte or unicode string.
68
+ """
69
+ if isinstance (s , unicode ):
70
+ s = s .encode ('utf-8' )
71
+ quoted = _percent_quote (s )
72
+ if not isinstance (quoted , unicode ):
73
+ quoted = quoted .decode ('utf-8' )
74
+ quoted = quoted .replace ('%3A' , ':' )
75
+ return quoted
76
+
77
+
78
+ def unquote (s ):
79
+ """
80
+ Return a percent-decoded unicode string, given an `s` byte or unicode
81
+ string.
63
82
"""
64
- quoted = percent_quote (s )
65
- return quoted .replace ('%3A' , ':' )
83
+ unquoted = _percent_unquote (s )
84
+ if not isinstance (unquoted , unicode ):
85
+ unquoted = unquoted .decode ('utf-8' )
86
+ return unquoted
66
87
67
88
68
89
def get_quoter (encode = True ):
@@ -72,96 +93,146 @@ def get_quoter(encode=True):
72
93
if encode is True :
73
94
return quote
74
95
elif encode is False :
75
- return percent_unquote
96
+ return unquote
76
97
elif encode is None :
77
98
return lambda x : x
78
99
79
100
80
- def normalize_qualifiers (qualifiers , encode = True ):
81
- """
82
- Return normalized qualifiers.
101
+ def normalize_type (type , encode = True ): # NOQA
102
+ if not type :
103
+ return
104
+ if not isinstance (type , unicode ):
105
+ type = type .decode ('utf-8' ) # NOQA
83
106
84
- If `qualifiers` is a dictionary of qualifiers and values and `encode` is true,
85
- the dictionary is then converted to a string of qualifiers, formatted to the purl specifications.
107
+ quoter = get_quoter (encode )
108
+ type = quoter (type ) # NOQA
109
+ return type .strip ().lower () or None
86
110
87
- If `qualifiers` is a string of qualfiers, formatted to the purl specifications, and `encode`
88
- is false, the string is then converted to a dictionary of qualifiers and their values.
89
- """
90
- quoting = get_quoter (encode )
91
-
92
- if qualifiers :
93
- if isinstance (qualifiers , basestring ):
94
- # decode string to dict
95
- qualifiers = qualifiers .split ('&' )
96
- qualifiers = [kv .partition ('=' ) for kv in qualifiers ]
97
- if qualifiers :
98
- qualifiers = [(k , v ) for k , _ , v in qualifiers ]
99
- else :
100
- qualifiers = []
101
- elif isinstance (qualifiers , (dict , OrderedDict ,)):
102
- qualifiers = qualifiers .items ()
103
- else :
104
- raise ValueError (
105
- 'Invalid qualifier. '
106
- 'Must be a string or dict:{}' .format (repr (qualifiers )))
107
111
108
- if qualifiers :
109
- qualifiers = {
110
- k .strip ().lower (): quoting (v )
111
- for k , v in qualifiers
112
- if k and k .strip () and v and v .strip ()
113
- }
112
+ def normalize_namespace (namespace , ptype , encode = True ): # NOQA
113
+ if not namespace :
114
+ return
115
+ if not isinstance (namespace , unicode ):
116
+ namespace = namespace .decode ('utf-8' )
114
117
115
- if qualifiers and encode is True :
116
- # encode dict as a string
117
- qualifiers = sorted (qualifiers .items ())
118
- qualifiers = ['{}={}' .format (k , v ) for k , v in qualifiers ]
119
- qualifiers = '&' .join (qualifiers )
118
+ namespace = namespace .strip ().strip ('/' )
119
+ if ptype in ('bitbucket' , 'github' , 'pypi' ):
120
+ namespace = namespace .lower ()
121
+ segments = [seg for seg in namespace .split ('/' ) if seg .strip ()]
122
+ segments = map (get_quoter (encode ), segments )
123
+ return '/' .join (segments ) or None
120
124
121
- return qualifiers or None
122
125
126
+ def normalize_name (name , ptype , encode = True ): # NOQA
127
+ if not name :
128
+ return
129
+ if not isinstance (name , unicode ):
130
+ name = name .decode ('utf-8' )
123
131
124
- def normalize (type , namespace , name , version , qualifiers , subpath , encode = True ): # NOQA
132
+ quoter = get_quoter (encode )
133
+ name = quoter (name )
134
+ name = name .strip ().strip ('/' )
135
+ if ptype in ('bitbucket' , 'github' , 'pypi' ,):
136
+ name = name .lower ()
137
+ if ptype in ('pypi' ,):
138
+ name = name .replace ('_' , '-' )
139
+ return name or None
140
+
141
+
142
+ def normalize_version (version , encode = True ): # NOQA
143
+ if not version :
144
+ return
145
+ if not isinstance (version , unicode ):
146
+ version = version .decode ('utf-8' )
147
+
148
+ quoter = get_quoter (encode )
149
+ version = quoter (version .strip ())
150
+ return version or None
151
+
152
+
153
+ def normalize_qualifiers (qualifiers , encode = True ): # NOQA
125
154
"""
126
- Return normalized purl components.
155
+ Return normalized `qualifiers` as a mapping (or as a string if `encode` is
156
+ True). The `qualifiers` arg is either a mapping or a string.
157
+ Always return a mapping if decode is True (and never None).
158
+ Raise ValueError on errors.
127
159
"""
128
- quoting = get_quoter (encode )
160
+ if not qualifiers :
161
+ return None if encode else {}
162
+
163
+ if isinstance (qualifiers , basestring ):
164
+ if not isinstance (qualifiers , unicode ):
165
+ qualifiers = qualifiers .decode ('utf-8' )
166
+ # decode string to list of tuples
167
+ qualifiers = qualifiers .split ('&' )
168
+ qualifiers = [kv .partition ('=' ) for kv in qualifiers ]
169
+ qualifiers = [(k , v ) for k , _ , v in qualifiers ]
170
+ elif isinstance (qualifiers , dict ):
171
+ qualifiers = qualifiers .items ()
172
+ else :
173
+ raise ValueError (
174
+ 'Invalid qualifier. '
175
+ 'Must be a string or dict:{}' .format (repr (qualifiers )))
176
+
177
+ quoter = get_quoter (encode )
178
+ qualifiers = {k .strip ().lower (): quoter (v )
179
+ for k , v in qualifiers if k and k .strip () and v and v .strip ()}
180
+
181
+ valid_chars = string .ascii_letters + string .digits + '.-_'
182
+ for key in qualifiers :
183
+ if not key :
184
+ raise ValueError ('A qualifier key cannot be empty' )
185
+
186
+ if '%' in key :
187
+ raise ValueError (
188
+ "A qualifier key cannot be percent encoded: {}" .format (repr (key )))
189
+
190
+ if ' ' in key :
191
+ raise ValueError (
192
+ "A qualifier key cannot contain spaces: {}" .format (repr (key )))
129
193
130
- if type :
131
- type = type .strip ().lower () # NOQA
194
+ if not all (c in valid_chars for c in key ):
195
+ raise ValueError (
196
+ "A qualifier key must be composed only of ASCII letters and numbers"
197
+ "period, dash and underscore: {}" .format (repr (key )))
132
198
133
- if namespace :
134
- namespace = namespace .strip ().strip ('/' )
135
- if type in ('bitbucket' , 'github' , 'pypi' ):
136
- namespace = namespace .lower ()
137
- segments = namespace .split ('/' )
138
- segments = [seg for seg in segments if seg and seg .strip ()]
139
- segments = map (quoting , segments )
140
- namespace = '/' .join (segments )
199
+ if key [0 ] in string .digits :
200
+ raise ValueError (
201
+ "A qualifier key cannot start with a number: {}" .format (repr (key )))
141
202
142
- if name :
143
- name = name . strip (). strip ( '/' )
144
- if type in ( 'bitbucket' , 'github' , 'pypi' ,):
145
- name = name . lower ( )
146
- if type in ( 'pypi' ,):
147
- name = name . replace ( '_' , '-' )
148
- name = quoting ( name )
203
+ if encode :
204
+ qualifiers = sorted ( qualifiers . items () )
205
+ qualifiers = [ '{}={}' . format ( k , v ) for k , v in qualifiers ]
206
+ qualifiers = '&' . join ( qualifiers )
207
+ return qualifiers or None
208
+ else :
209
+ return qualifiers or {}
149
210
150
- name = name or None
151
211
152
- if version :
153
- version = quoting (version .strip ())
212
+ def normalize_subpath (subpath , encode = True ): # NOQA
213
+ if not subpath :
214
+ return None
215
+ if not isinstance (subpath , unicode ):
216
+ subpath = subpath .decode ('utf-8' )
154
217
155
- qualifiers = normalize_qualifiers (qualifiers , encode )
218
+ quoter = get_quoter (encode )
219
+ segments = subpath .split ('/' )
220
+ segments = [quoter (s ) for s in segments if s .strip () and s not in ('.' , '..' )]
221
+ subpath = '/' .join (segments )
222
+ return subpath or None
156
223
157
- if subpath :
158
- segments = subpath .split ('/' )
159
- segments = [quoting (s ) for s in segments if s and s .strip ()
160
- and s not in ('.' , '..' )]
161
- subpath = '/' .join (segments )
162
224
163
- return (type or None , namespace or None , name or None , version or None ,
164
- qualifiers or None , subpath or None )
225
+ def normalize (type , namespace , name , version , qualifiers , subpath , encode = True ): # NOQA
226
+ """
227
+ Return normalized purl components
228
+ """
229
+ type = normalize_type (type , encode ) # NOQA
230
+ namespace = normalize_namespace (namespace , type , encode )
231
+ name = normalize_name (name , type , encode )
232
+ version = normalize_version (version , encode )
233
+ qualifiers = normalize_qualifiers (qualifiers , encode )
234
+ subpath = normalize_subpath (subpath , encode )
235
+ return type , namespace , name , version , qualifiers , subpath
165
236
166
237
167
238
_components = ['type' , 'namespace' , 'name' , 'version' , 'qualifiers' , 'subpath' ]
@@ -191,7 +262,7 @@ def __new__(self, type=None, namespace=None, name=None, # NOQA
191
262
raise ValueError ('Invalid purl: {} argument must be a string: {}.'
192
263
.format (key , repr (value )))
193
264
194
- if qualifiers and not isinstance (qualifiers , (basestring , dict , OrderedDict , )):
265
+ if qualifiers and not isinstance (qualifiers , (basestring , dict ,)):
195
266
raise ValueError ('Invalid purl: {} argument must be a dict or a string: {}.'
196
267
.format ('qualifiers' , repr (qualifiers )))
197
268
@@ -268,7 +339,7 @@ def from_string(cls, purl):
268
339
'purl is missing the required '
269
340
'type component: {}.' .format (repr (purl )))
270
341
271
- scheme , authority , path , qualifiers , subpath = urlsplit (
342
+ scheme , authority , path , qualifiers , subpath = _urlsplit (
272
343
url = remainder , scheme = '' , allow_fragments = True )
273
344
274
345
if scheme or authority :
0 commit comments