Skip to content

Commit 718c821

Browse files
committed
Fix the domain regex beeing to greedy. It parsed stuff like "http://example.com/test/bla.net_foo_123.jpg" as the domain "example.com/test/bla.net". Now it's fixed ^.^"
1 parent 0de01a4 commit 718c821

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

tests.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,16 @@ def test_url_parentheses(self):
5757
self.assertEqual(result.html, u'text (<a href="http://example.com">http://example.com</a>)')
5858
self.assertEqual(result.urls, [u'http://example.com'])
5959

60+
def test_url_underscore(self):
61+
result = self.parser.parse(u'text http://example.com/test/foo_123.jpg')
62+
self.assertEqual(result.html, u'text <a href="http://example.com/test/foo_123.jpg">http://example.com/test/foo...</a>')
63+
self.assertEqual(result.urls, [u'http://example.com/test/foo_123.jpg'])
64+
65+
def test_url_underscore_dot(self):
66+
result = self.parser.parse(u'text http://example.com/test/bla.net_foo_123.jpg')
67+
self.assertEqual(result.html, u'text <a href="http://example.com/test/bla.net_foo_123.jpg">http://example.com/test/bla...</a>')
68+
self.assertEqual(result.urls, [u'http://example.com/test/bla.net_foo_123.jpg'])
69+
6070
def test_url_amp_lang_equals(self):
6171
result = self.parser.parse(u'Check out http://search.twitter.com/search?q=avro&lang=en')
6272
self.assertEqual(result.html, u'Check out <a href="http://search.twitter.com/search?q=avro&amp;lang=en">http://search.twitter.com/s...</a>')

ttp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
# URLs
4747
PRE_CHARS = ur'(?:[^/"\':!=]|^|\:)'
48-
DOMAIN_CHARS = ur'([\.-]|[^\s_\!\.])+\.[a-z]{2,}(?::[0-9]+)?'
48+
DOMAIN_CHARS = ur'([\.-]|[^\s_\!\.\/])+\.[a-z]{2,}(?::[0-9]+)?'
4949
PATH_CHARS = ur'(?:[\.,]?[%s!\*\'\(\);:=\+\$/%s#\[\]\-_,~@])' % (UTF_CHARS, '%')
5050
QUERY_CHARS = ur'[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~]'
5151

0 commit comments

Comments
 (0)