Skip to content

Commit 127b363

Browse files
committed
update
1 parent f50cec1 commit 127b363

File tree

7 files changed

+11
-3
lines changed

7 files changed

+11
-3
lines changed

爬虫/Include/豆瓣2/analyse.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import matplotlib.pyplot as plt
22
import matplotlib
33
import jieba
4+
import jieba.analyse
45
import xlwt
56
import xlrd
67
from wordcloud import WordCloud
@@ -88,6 +89,9 @@ def getciyun_most(map):
8889
plt.axis("off")
8990

9091
def anylaseword(comment):
92+
list=['这个','一个','不少','起来','没有','就是','不是','那个','还是','剧情','这样','那样','这种','那种','故事','人物']
93+
list.append("这个")
94+
print(list)
9195
commnetstr=''
9296
c = Counter()
9397
low=Counter()
@@ -103,7 +107,7 @@ def anylaseword(comment):
103107
continue
104108
commnetstr+=va[3]
105109
for (k, v) in c.most_common():
106-
if v<5:
110+
if v<5 or k in list:
107111
c.pop(k)
108112
continue
109113
#print(k,v)

爬虫/Include/豆瓣2/get_comment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,13 @@ def getcomment(cookies):
4242
ws = w.add_sheet('sheet1')
4343
index=1
4444
while True:
45+
header = {
46+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
47+
}
4548
try:
4649
url = 'https://movie.douban.com/subject/26794435/comments?start='+str(start)+'&limit=20&sort=new_score&status=P&comments_only=1'
4750
start+=20
48-
req = requests.get(url,cookies=cookies)
51+
req = requests.get(url,cookies=cookies,headers=header)
4952
res = req.json()
5053
res=res['html']
5154
soup = BeautifulSoup(res, 'lxml')
@@ -72,7 +75,7 @@ def getcomment(cookies):
7275

7376
if __name__ == '__main__':
7477

75-
cookies=login('15751512041','52cuihuini')
78+
cookies=login('15751512041','52cuihuini!')
7679
getcomment(cookies)
7780

7881

爬虫/Include/豆瓣2/img.jpg

-83 Bytes
Loading

爬虫/Include/豆瓣2/img2.jpg

4.32 KB
Loading

爬虫/Include/豆瓣2/login.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ def login(username,password):
3131
cookies = requests.utils.dict_from_cookiejar(req.cookies)
3232
print(cookies)
3333
return cookies
34+
login()

爬虫/Include/豆瓣2/nezha.xls

4 KB
Binary file not shown.

爬虫/Include/豆瓣2/score.png

330 Bytes
Loading

0 commit comments

Comments
 (0)