Skip to content

Commit 1a3fd29

Browse files
committed
提交代码
1 parent 9c26c1a commit 1a3fd29

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed

bdindex/bdindexneed.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: 闲欢
5+
"""
6+
import requests
7+
import json
8+
from wordcloud import WordCloud
9+
from matplotlib import pyplot as plt
10+
11+
12+
class bdindex:
13+
# 搜索指数URL
14+
data_url = 'http://index.baidu.com/api/WordGraph/multi?wordlist[]={keyword}'
15+
# 检查关键词url
16+
check_url = 'http://index.baidu.com/api/AddWordApi/checkWordsExists?word=%s'
17+
headers = {
18+
"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36',
19+
"Cookie": 'PSTM=1579955530; BAIDUID=C98F0EF9DCB3FC7E06D3B0FA63695787:FG=1; BIDUPSID=1FB86823BF26D806A0117921DBD66135; BDSFRCVID=bpFOJeC62ZTm5dnuEvqKKASNJe3SOxnTH6aoprlQ5IIcI75XA-7tEG0P_U8g0KubIXdfogKKLgOTHPIF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJkf_D8XtK83fP36q470htFjMfQXetJyaR3UWpQvWJ5TMC_whlOFK-I0XHLjWUPf-eOW3C5dLxQ8ShPC-tnZ56Lv5tRT-xb83JbnbxO83l02VM7ae-t2ynLVbNJ324RMW23r0h7mWUJzsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjjCajTcQjN_qq-JQa5TbstbHaJOqD4-k-PnVHPKXhUce2bQHKKI_0-3LK-0_hC_lD6LKjI6XDGLHJ6DfHJuHoC_htD0tftbzBPcqb-F0hHc2bP0hb6nLMbTeqR3bJRO6q6KKDjjLDGtXJjDDtJCH_5u-tDDKhD_6eTONjbtpbtbmhU-e56vQ3-5SWfK2sKTn0qjTD5v3hh6aaTv45J7ZVDKbtI8MbDLrMRoVK-A0hxLXt6kXKKOLVb6Eb4OkeqOJ2Mt5bjFihp_O0PrXB6bCQCoTKlvRjPbzX4Oo0jtpeG_DtjFqtJksL-35HtnheJ54KPu_-P4DeU8eaMRZ5mAqoqOoyI_bO45ODtD2yU_9X467K5btX5rnaIQqabIMeMJFbnOIjqDNbbPtafc43bRT0xKy5KJvfjCx-UAMhP-UyPvMWh37Lg5lMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafD_2MCD6DTLhen-W5gTEaPoX5Kj-WjrJabCQHnnph4Tqhh4ShUO-f6_jtnuf8JOSKRr_eJR3MPoB5P4XbacKJT3-5RPt3RLKfnD5MD89epDh0btpbtbmhU-e3TrOb45vK-oGbKjCKqo-2t0F-xbW2PkfaR7ZVD_ytCL-bK_GenJb5ICEbfreanLXKK_s3tJIBhcqEIL4WlOVjt0H5toqbxni0G7waJKbLh7WDxbSj4QoKbDj0HoAB4JAJbTv56C5bp5nhMJ33j7JDMP0-4rvKP5y523i2n3vQpnmOqQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0-nDSHHuOJjOP; BDUSS=UJsNmwzSnVwLWJ6eGJiTGtBMXRxVkNVVHFYOEgzZ0NMemo0V2o4dG9RaH5xbmxlRVFBQUFBJCQAAAAAAAAAAAEAAAArVO4Kzt7D-3ZpcGVyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH8dUl5~HVJee; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1582632851; bdshare_firstime=1582719699670; bdindexid=lbhlaubfjakm0eklbjbislhal1; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1582940553; delPer=0; PSINO=6; H_PS_PSSID=1445_21119_30790_30905_30823_26350; RT="sl=2&ss=k771w9qf&tt=1yz&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=0pgwidvcjf8&ld=1ab9"',
20+
"Host": "index.baidu.com",
21+
"Referer": "http://index.baidu.com/v2/main/index.html"
22+
}
23+
24+
# 获取指数数据
25+
def get_index(self, params):
26+
url = self.data_url.format(**params)
27+
response = requests.get(url, headers=self.headers)
28+
29+
data = json.loads(response.text)['data']
30+
print(data)
31+
32+
pv_dict = {}
33+
ratio_dict = {}
34+
for item in data['wordlist'][0]['wordGraph']:
35+
pv_dict[item['word']] = item['pv']
36+
ratio_dict[item['word']] = item['ratio']
37+
38+
# 生成词云
39+
self.gen_wc_tags(pv_dict)
40+
self.gen_wc_tags(ratio_dict)
41+
42+
# 检查关键词是否存在
43+
def check_word(self, kw):
44+
url = self.check_url % kw
45+
response = requests.get(url, headers=self.headers)
46+
data = json.loads(response.text)['data']
47+
return not len(data['result'])
48+
49+
# 生成词云
50+
def gen_wc_tags(self, tags):
51+
# 设置一个底图
52+
# mask = np.array(Image.open('./bf.jpg'))
53+
wordcloud = WordCloud(background_color='black',
54+
mask=None,
55+
max_words=100,
56+
max_font_size=100,
57+
width=800,
58+
height=600,
59+
# 如果不设置中文字体,可能会出现乱码
60+
font_path='/System/Library/Fonts/PingFang.ttc').generate_from_frequencies(tags)
61+
62+
# 展示词云图
63+
plt.imshow(wordcloud, interpolation='bilinear')
64+
plt.axis('off')
65+
plt.show()
66+
67+
# 保存词云图
68+
wordcloud.to_file('./gzbd_wc.png')
69+
70+
if __name__ == '__main__':
71+
bdindex = bdindex()
72+
# keyword = '股市'
73+
# keyword = '新冠状病毒'
74+
keyword = '特朗普'
75+
word_exists = bdindex.check_word(keyword)
76+
if word_exists:
77+
params = {
78+
'keyword': keyword,
79+
}
80+
bdindex.get_index(params)
81+
else:
82+
print('keyword is not found')

0 commit comments

Comments
 (0)