Skip to content

Commit e5c250c

Browse files
committed
complete 0008
1 parent 7611cec commit e5c250c

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

Jimmy66/0008/0008.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
import urllib2
5+
from pyquery import PyQuery as pq
6+
from lxml import etree
7+
import re
8+
9+
10+
def get_html(url):
11+
html = urllib2.urlopen(url).read()
12+
return html
13+
14+
def get_content(html):
15+
match = re.search(r'<div class="content">([^$]*)<div class="article-copyright">',html)
16+
content = match.group(1)
17+
return content
18+
19+
def get_result(content):
20+
content = content.decode('utf-8')
21+
jq = pq(content)
22+
l = jq('p')
23+
result = []
24+
for string in l:
25+
result.append(pq(string).text())
26+
return result
27+
28+
29+
def main():
30+
url = 'http://jimmy66.com/164.html'
31+
html = get_html(url)
32+
content = get_content(html)
33+
result = get_result(content)
34+
for line in result:
35+
print line
36+
if __name__ == '__main__':
37+
main()

0 commit comments

Comments
 (0)