Skip to content

Commit 5543ef5

Browse files
committed
sort
1 parent 1d56a5c commit 5543ef5

File tree

7 files changed

+387
-321
lines changed

7 files changed

+387
-321
lines changed

.idea/Python.iml

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/workspace.xml

Lines changed: 218 additions & 248 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python基础代码/异步IO学习.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from lxml import etree
2+
import aiohttp, asyncio
3+
import time
4+
5+
list_url = ["https://www.douban.com/doulist/41691053/?start={}&sort=seq&sub_type=4".format(number) for number in
6+
range(0, 125, 25)]
7+
8+
9+
async def fetch(url):
10+
async with aiohttp.ClientSession() as session:
11+
async with session.get(url) as html:
12+
response = await html.text(encoding="utf-8")
13+
return response
14+
15+
16+
async def parser(url):
17+
response = await fetch(url)
18+
dom = etree.HTML(response)
19+
selector = dom.xpath('//div[starts-with(@id,"item")]')
20+
for item in selector:
21+
print(item.xpath('div/div[2]/div[@class="title"]/a/text()')[0].strip(
22+
"\n").strip()) # div//div表示div后面的class="title"的div不管它在此div下什么位置
23+
24+
25+
# 给一个函数添加了async关键字,就会把它变成一个异步函数
26+
# 每个线程有一个事件循环,主线程调用asyncio.get_event_loop时会创建事件循环
27+
# 把异步的任务丢给这个循环的run_until_complete方法,事件循环会安排协同程序的执行
28+
29+
# start = time.time()
30+
loop = asyncio.get_event_loop()
31+
tasks = [parser(url) for url in list_url]
32+
loop.run_until_complete(asyncio.gather(*tasks))
33+
# end = time.time()
34+
# print(end - start)

Python爬取斗鱼房间信息和数据分析——学习中/利用网址构造爬取斗鱼全部房间信息到Mongodb.py

Lines changed: 0 additions & 73 deletions
This file was deleted.
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# 这个抓取弹幕,然后把用户的uid,昵称,等级,弹幕内容都保存到mongodb中
2+
__author__ = '布咯咯_rieuse'
3+
__time__ = '2017.6.2'
4+
__github__ = 'https://github.com/rieuse'
5+
6+
import multiprocessing
7+
import re
8+
import socket
9+
import time
10+
11+
import pymongo
12+
import requests
13+
from bs4 import BeautifulSoup
14+
15+
clients = pymongo.MongoClient('localhost')
16+
db = clients["DouyuTV_danmu"]
17+
col = db["info"]
18+
19+
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
20+
host = socket.gethostbyname("openbarrage.douyutv.com")
21+
port = 8601
22+
client.connect((host, port))
23+
24+
danmu_path = re.compile(b'txt@=(.+?)/cid@')
25+
uid_path = re.compile(b'uid@=(.+?)/nn@')
26+
nickname_path = re.compile(b'nn@=(.+?)/txt@')
27+
level_path = re.compile(b'level@=([1-9][0-9]?)/sahf')
28+
29+
30+
def sendmsg(msgstr):
31+
msg = msgstr.encode('utf-8')
32+
data_length = len(msg) + 8
33+
code = 689
34+
msgHead = int.to_bytes(data_length, 4, 'little') \
35+
+ int.to_bytes(data_length, 4, 'little') + int.to_bytes(code, 4, 'little')
36+
client.send(msgHead)
37+
sent = 0
38+
while sent < len(msg):
39+
tn = client.send(msg[sent:])
40+
sent = sent + tn
41+
42+
43+
def start(roomid):
44+
msg = 'type@=loginreq/username@=rieuse/password@=douyu/roomid@={}/\0'.format(roomid)
45+
sendmsg(msg)
46+
msg_more = 'type@=joingroup/rid@={}/gid@=-9999/\0'.format(roomid)
47+
sendmsg(msg_more)
48+
49+
print('---------------欢迎连接到{}的直播间---------------'.format(get_name(roomid)))
50+
while True:
51+
data = client.recv(1024)
52+
uid_more = uid_path.findall(data)
53+
nickname_more = nickname_path.findall(data)
54+
level_more = level_path.findall(data)
55+
danmu_more = danmu_path.findall(data)
56+
if not level_more:
57+
level_more = b'0'
58+
if not data:
59+
break
60+
else:
61+
for i in range(0, len(danmu_more)):
62+
try:
63+
product = {
64+
'uid': uid_more[0].decode(encoding='utf-8'),
65+
'nickname': nickname_more[0].decode(encoding='utf-8'),
66+
'level': level_more[0].decode(encoding='utf-8'),
67+
'danmu': danmu_more[0].decode(encoding='utf-8')
68+
}
69+
print(product)
70+
col.insert(product)
71+
print('成功导入mongodb')
72+
except Exception as e:
73+
print(e)
74+
75+
76+
def keeplive():
77+
while True:
78+
msg = 'type@=keeplive/tick@=' + str(int(time.time())) + '/\0'
79+
sendmsg(msg)
80+
time.sleep(15)
81+
82+
83+
def get_name(roomid):
84+
r = requests.get("http://www.douyu.com/" + roomid)
85+
soup = BeautifulSoup(r.text, 'lxml')
86+
return soup.find('a', {'class', 'zb-name'}).string
87+
88+
89+
if __name__ == '__main__':
90+
room_id = input('请出入房间ID: ')
91+
p1 = multiprocessing.Process(target=start, args=(room_id,))
92+
p2 = multiprocessing.Process(target=keeplive)
93+
p1.start()
94+
p2.start()
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# 抓取弹幕后保存为text文档,然后词云分析,此部分是词云部分
2+
__author__ = '布咯咯_rieuse'
3+
__time__ = '2017.6.2'
4+
__github__ = 'https://github.com/rieuse'
5+
6+
import jieba
7+
from wordcloud import WordCloud, ImageColorGenerator
8+
import matplotlib.pyplot as plt
9+
import os
10+
import PIL.Image as Image
11+
import numpy as np
12+
13+
with open('大司马上课后.txt', 'r', encoding='utf-8') as f:
14+
text = f.read()
15+
f.close()
16+
cut_text = " ".join(jieba.cut(text))
17+
18+
d = os.path.dirname(__file__)
19+
color_mask = np.array(Image.open(os.path.join(d, 'img.jpg')))
20+
my_wordcloud = WordCloud(
21+
background_color='#F0F8FF', # 背景颜色
22+
font_path="FZLTKHK--GBK1-0.ttf", # 使用特殊字体可以显示中文
23+
max_words=8000,
24+
font_step=20, # 步调太大,显示的词语就少了
25+
mask=color_mask,
26+
random_state=15, # 设置有多少种随机生成状态,即有多少种配色方案
27+
min_font_size=15,
28+
max_font_size=202,
29+
)
30+
my_wordcloud.generate(cut_text)
31+
image_colors = ImageColorGenerator(color_mask)
32+
plt.show(my_wordcloud.recolor(color_func=image_colors))
33+
plt.imshow(my_wordcloud) # 以图片的形式显示词云
34+
plt.axis('off') # 关闭坐标轴
35+
plt.show() # 展示图片
36+
37+
my_wordcloud.to_file(os.path.join(d, 'pic.jpg'))

0 commit comments

Comments
 (0)