Skip to content

Commit 8500aaf

Browse files
committed
add markupsafe
1 parent 06ff8f8 commit 8500aaf

File tree

4 files changed

+637
-0
lines changed

4 files changed

+637
-0
lines changed

src/buildlib/markupsafe/__init__.py

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
markupsafe
4+
~~~~~~~~~~
5+
6+
Implements a Markup string.
7+
8+
:copyright: (c) 2010 by Armin Ronacher.
9+
:license: BSD, see LICENSE for more details.
10+
"""
11+
import re
12+
import string
13+
from collections import Mapping
14+
from markupsafe._compat import text_type, string_types, int_types, \
15+
unichr, iteritems, PY2
16+
17+
18+
__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
19+
20+
21+
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
22+
_entity_re = re.compile(r'&([^;]+);')
23+
24+
25+
class Markup(text_type):
26+
r"""Marks a string as being safe for inclusion in HTML/XML output without
27+
needing to be escaped. This implements the `__html__` interface a couple
28+
of frameworks and web applications use. :class:`Markup` is a direct
29+
subclass of `unicode` and provides all the methods of `unicode` just that
30+
it escapes arguments passed and always returns `Markup`.
31+
32+
The `escape` function returns markup objects so that double escaping can't
33+
happen.
34+
35+
The constructor of the :class:`Markup` class can be used for three
36+
different things: When passed an unicode object it's assumed to be safe,
37+
when passed an object with an HTML representation (has an `__html__`
38+
method) that representation is used, otherwise the object passed is
39+
converted into a unicode string and then assumed to be safe:
40+
41+
>>> Markup("Hello <em>World</em>!")
42+
Markup(u'Hello <em>World</em>!')
43+
>>> class Foo(object):
44+
... def __html__(self):
45+
... return '<a href="#">foo</a>'
46+
...
47+
>>> Markup(Foo())
48+
Markup(u'<a href="#">foo</a>')
49+
50+
If you want object passed being always treated as unsafe you can use the
51+
:meth:`escape` classmethod to create a :class:`Markup` object:
52+
53+
>>> Markup.escape("Hello <em>World</em>!")
54+
Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
55+
56+
Operations on a markup string are markup aware which means that all
57+
arguments are passed through the :func:`escape` function:
58+
59+
>>> em = Markup("<em>%s</em>")
60+
>>> em % "foo & bar"
61+
Markup(u'<em>foo &amp; bar</em>')
62+
>>> strong = Markup("<strong>%(text)s</strong>")
63+
>>> strong % {'text': '<blink>hacker here</blink>'}
64+
Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
65+
>>> Markup("<em>Hello</em> ") + "<foo>"
66+
Markup(u'<em>Hello</em> &lt;foo&gt;')
67+
"""
68+
__slots__ = ()
69+
70+
def __new__(cls, base=u'', encoding=None, errors='strict'):
71+
if hasattr(base, '__html__'):
72+
base = base.__html__()
73+
if encoding is None:
74+
return text_type.__new__(cls, base)
75+
return text_type.__new__(cls, base, encoding, errors)
76+
77+
def __html__(self):
78+
return self
79+
80+
def __add__(self, other):
81+
if isinstance(other, string_types) or hasattr(other, '__html__'):
82+
return self.__class__(super(Markup, self).__add__(self.escape(other)))
83+
return NotImplemented
84+
85+
def __radd__(self, other):
86+
if hasattr(other, '__html__') or isinstance(other, string_types):
87+
return self.escape(other).__add__(self)
88+
return NotImplemented
89+
90+
def __mul__(self, num):
91+
if isinstance(num, int_types):
92+
return self.__class__(text_type.__mul__(self, num))
93+
return NotImplemented
94+
__rmul__ = __mul__
95+
96+
def __mod__(self, arg):
97+
if isinstance(arg, tuple):
98+
arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
99+
else:
100+
arg = _MarkupEscapeHelper(arg, self.escape)
101+
return self.__class__(text_type.__mod__(self, arg))
102+
103+
def __repr__(self):
104+
return '%s(%s)' % (
105+
self.__class__.__name__,
106+
text_type.__repr__(self)
107+
)
108+
109+
def join(self, seq):
110+
return self.__class__(text_type.join(self, map(self.escape, seq)))
111+
join.__doc__ = text_type.join.__doc__
112+
113+
def split(self, *args, **kwargs):
114+
return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
115+
split.__doc__ = text_type.split.__doc__
116+
117+
def rsplit(self, *args, **kwargs):
118+
return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
119+
rsplit.__doc__ = text_type.rsplit.__doc__
120+
121+
def splitlines(self, *args, **kwargs):
122+
return list(map(self.__class__, text_type.splitlines(
123+
self, *args, **kwargs)))
124+
splitlines.__doc__ = text_type.splitlines.__doc__
125+
126+
def unescape(self):
127+
r"""Unescape markup again into an text_type string. This also resolves
128+
known HTML4 and XHTML entities:
129+
130+
>>> Markup("Main &raquo; <em>About</em>").unescape()
131+
u'Main \xbb <em>About</em>'
132+
"""
133+
from markupsafe._constants import HTML_ENTITIES
134+
def handle_match(m):
135+
name = m.group(1)
136+
if name in HTML_ENTITIES:
137+
return unichr(HTML_ENTITIES[name])
138+
try:
139+
if name[:2] in ('#x', '#X'):
140+
return unichr(int(name[2:], 16))
141+
elif name.startswith('#'):
142+
return unichr(int(name[1:]))
143+
except ValueError:
144+
pass
145+
return u''
146+
return _entity_re.sub(handle_match, text_type(self))
147+
148+
def striptags(self):
149+
r"""Unescape markup into an text_type string and strip all tags. This
150+
also resolves known HTML4 and XHTML entities. Whitespace is
151+
normalized to one:
152+
153+
>>> Markup("Main &raquo; <em>About</em>").striptags()
154+
u'Main \xbb About'
155+
"""
156+
stripped = u' '.join(_striptags_re.sub('', self).split())
157+
return Markup(stripped).unescape()
158+
159+
@classmethod
160+
def escape(cls, s):
161+
"""Escape the string. Works like :func:`escape` with the difference
162+
that for subclasses of :class:`Markup` this function would return the
163+
correct subclass.
164+
"""
165+
rv = escape(s)
166+
if rv.__class__ is not cls:
167+
return cls(rv)
168+
return rv
169+
170+
def make_simple_escaping_wrapper(name):
171+
orig = getattr(text_type, name)
172+
def func(self, *args, **kwargs):
173+
args = _escape_argspec(list(args), enumerate(args), self.escape)
174+
_escape_argspec(kwargs, iteritems(kwargs), self.escape)
175+
return self.__class__(orig(self, *args, **kwargs))
176+
func.__name__ = orig.__name__
177+
func.__doc__ = orig.__doc__
178+
return func
179+
180+
for method in '__getitem__', 'capitalize', \
181+
'title', 'lower', 'upper', 'replace', 'ljust', \
182+
'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
183+
'translate', 'expandtabs', 'swapcase', 'zfill':
184+
locals()[method] = make_simple_escaping_wrapper(method)
185+
186+
# new in python 2.5
187+
if hasattr(text_type, 'partition'):
188+
def partition(self, sep):
189+
return tuple(map(self.__class__,
190+
text_type.partition(self, self.escape(sep))))
191+
def rpartition(self, sep):
192+
return tuple(map(self.__class__,
193+
text_type.rpartition(self, self.escape(sep))))
194+
195+
# new in python 2.6
196+
if hasattr(text_type, 'format'):
197+
def format(*args, **kwargs):
198+
self, args = args[0], args[1:]
199+
formatter = EscapeFormatter(self.escape)
200+
kwargs = _MagicFormatMapping(args, kwargs)
201+
return self.__class__(formatter.vformat(self, args, kwargs))
202+
203+
def __html_format__(self, format_spec):
204+
if format_spec:
205+
raise ValueError('Unsupported format specification '
206+
'for Markup.')
207+
return self
208+
209+
# not in python 3
210+
if hasattr(text_type, '__getslice__'):
211+
__getslice__ = make_simple_escaping_wrapper('__getslice__')
212+
213+
del method, make_simple_escaping_wrapper
214+
215+
216+
class _MagicFormatMapping(Mapping):
217+
"""This class implements a dummy wrapper to fix a bug in the Python
218+
standard library for string formatting.
219+
220+
See http://bugs.python.org/issue13598 for information about why
221+
this is necessary.
222+
"""
223+
224+
def __init__(self, args, kwargs):
225+
self._args = args
226+
self._kwargs = kwargs
227+
self._last_index = 0
228+
229+
def __getitem__(self, key):
230+
if key == '':
231+
idx = self._last_index
232+
self._last_index += 1
233+
try:
234+
return self._args[idx]
235+
except LookupError:
236+
pass
237+
key = str(idx)
238+
return self._kwargs[key]
239+
240+
def __iter__(self):
241+
return iter(self._kwargs)
242+
243+
def __len__(self):
244+
return len(self._kwargs)
245+
246+
247+
if hasattr(text_type, 'format'):
248+
class EscapeFormatter(string.Formatter):
249+
250+
def __init__(self, escape):
251+
self.escape = escape
252+
253+
def format_field(self, value, format_spec):
254+
if hasattr(value, '__html_format__'):
255+
rv = value.__html_format__(format_spec)
256+
elif hasattr(value, '__html__'):
257+
if format_spec:
258+
raise ValueError('No format specification allowed '
259+
'when formatting an object with '
260+
'its __html__ method.')
261+
rv = value.__html__()
262+
else:
263+
rv = string.Formatter.format_field(self, value, format_spec)
264+
return text_type(self.escape(rv))
265+
266+
267+
def _escape_argspec(obj, iterable, escape):
268+
"""Helper for various string-wrapped functions."""
269+
for key, value in iterable:
270+
if hasattr(value, '__html__') or isinstance(value, string_types):
271+
obj[key] = escape(value)
272+
return obj
273+
274+
275+
class _MarkupEscapeHelper(object):
276+
"""Helper for Markup.__mod__"""
277+
278+
def __init__(self, obj, escape):
279+
self.obj = obj
280+
self.escape = escape
281+
282+
__getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
283+
__unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
284+
__repr__ = lambda s: str(s.escape(repr(s.obj)))
285+
__int__ = lambda s: int(s.obj)
286+
__float__ = lambda s: float(s.obj)
287+
288+
289+
# we have to import it down here as the speedups and native
290+
# modules imports the markup type which is define above.
291+
try:
292+
from markupsafe._speedups import escape, escape_silent, soft_unicode
293+
except ImportError:
294+
from markupsafe._native import escape, escape_silent, soft_unicode
295+
296+
if not PY2:
297+
soft_str = soft_unicode
298+
__all__.append('soft_str')

src/buildlib/markupsafe/_compat.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
markupsafe._compat
4+
~~~~~~~~~~~~~~~~~~
5+
6+
Compatibility module for different Python versions.
7+
8+
:copyright: (c) 2013 by Armin Ronacher.
9+
:license: BSD, see LICENSE for more details.
10+
"""
11+
import sys
12+
13+
PY2 = sys.version_info[0] == 2
14+
15+
if not PY2:
16+
text_type = str
17+
string_types = (str,)
18+
unichr = chr
19+
int_types = (int,)
20+
iteritems = lambda x: iter(x.items())
21+
else:
22+
text_type = unicode
23+
string_types = (str, unicode)
24+
unichr = unichr
25+
int_types = (int, long)
26+
iteritems = lambda x: x.iteritems()

0 commit comments

Comments
 (0)