Skip to content

Commit d37bafd

Browse files
committed
add
1 parent a836253 commit d37bafd

File tree

9 files changed

+126
-85
lines changed

9 files changed

+126
-85
lines changed

.idea/workspace.xml

Lines changed: 89 additions & 78 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

爬虫/Include/网易云/cc.jpg

10.9 KB
Loading

爬虫/Include/网易云/commentSpider.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import requests
22
import urllib.parse
33
import base64
4+
from wordcloud import WordCloud
5+
import jieba.analyse
6+
import matplotlib.pyplot as plt
7+
from bs4 import BeautifulSoup
48
from Crypto.Cipher import AES
59
header={'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36',
610
#'Postman-Token':'4cbfd1e6-63bf-4136-a041-e2678695b419',
@@ -52,7 +56,7 @@ def encrypt(key, content):
5256
# 重新编码
5357
result = str(base64.b64encode(encrypt_bytes), encoding='utf-8')
5458
return result
55-
def getcomment(songid):
59+
def getcomment(songid,page):
5660
url="https://music.163.com/weapi/v1/resource/comments/R_SO_4_"+songid+"?csrf_token="
5761
print(url)
5862
formdata = {
@@ -63,7 +67,7 @@ def getcomment(songid):
6367
aes_key = '0CoJUm6Qyw8W8jud'## 不变的
6468
print('aes_key:' + aes_key)
6569
# 对英文加密
66-
source_en = '{"rid":"R_SO_4_'+songid+'","offset":"20","total":"false","limit":"20","csrf_token":""}'
70+
source_en = '{"rid":"R_SO_4_'+songid+'","offset":"'+str(page*20)+'","total":"false","limit":"20","csrf_token":""}'
6771

6872
#offset自己该
6973
print(source_en)
@@ -79,8 +83,34 @@ def getcomment(songid):
7983
req = requests.post(url=url, data=formdata, headers=header)
8084
return req.json()
8185
if __name__ == '__main__':
82-
songid='487885426'
83-
comment=getcomment(songid)
84-
comment=comment['comments']
85-
for va in comment:
86-
print (va['content'])
86+
songid='346576'
87+
page=0
88+
text=''
89+
for page in range(10):
90+
comment=getcomment(songid,page)
91+
comment=comment['comments']
92+
for va in comment:
93+
print (va['content'])
94+
text+=va['content']
95+
ags = jieba.analyse.extract_tags(text, topK=50) # jieba分词关键词提取,40个
96+
print(ags)
97+
text = " ".join(ags)
98+
backgroud_Image = plt.imread('tt.jpg') # 如果需要个性化词云
99+
wc = WordCloud(background_color="white",
100+
width=1200, height=900,
101+
mask=backgroud_Image, # 设置背景图片
102+
103+
#min_font_size=50,
104+
font_path="simhei.ttf",
105+
max_font_size=200, # 设置字体最大值
106+
random_state=50, # 设置有多少种随机生成状态,即有多少种配色方案
107+
) # 字体这里有个坑,一定要设这个参数。否则会显示一堆小方框wc.font_path="simhei.ttf" # 黑体
108+
# wc.font_path="simhei.ttf"
109+
my_wordcloud = wc.generate(text)
110+
plt.imshow(my_wordcloud)
111+
plt.axis("off")
112+
plt.show() # 如果展示的话需要一个个点
113+
file = 'image/' + str("aita") + '.png'
114+
wc.to_file(file)
115+
116+
79.7 KB
Loading
74.1 KB
Loading
79.8 KB
Loading
82.8 KB
Loading
76.3 KB
Loading

爬虫/Include/网易云/tt.jpg

12.4 KB
Loading

0 commit comments

Comments
 (0)