Skip to content

Commit bab301c

Browse files
authored
Update Python抓取喜马拉雅电台音频.py
1 parent 3604c27 commit bab301c

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

Python抓取喜马拉雅电台音频/Python抓取喜马拉雅电台音频.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@
1010
db = clients["XiMaLaYa"]
1111
col1 = db["album2"]
1212
col2 = db["detaile2"]
13-
# cookies = {
14-
# 'Cookie': '_xmLog=xm_1497536646561_j3yinjq9yfbobs; trackType=web; x_xmly_traffic=utm_source%3A%26utm_medium%3A%26utm_campaign%3A%26utm_content%3A%26utm_term%3A%26utm_from%3A; _ga=GA1.2.1494610706.1497536647',
15-
# }
16-
# start_url = 'http://www.ximalaya.com/dq/all/'
13+
1714
UA_LIST = [
1815
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
1916
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
@@ -72,9 +69,6 @@
7269
}
7370

7471

75-
# content = {}
76-
77-
7872
def get_url():
7973
start_urls = ['http://www.ximalaya.com/dq/all/{}'.format(num) for num in range(1, 85)]
8074
for start_url in start_urls:
@@ -116,7 +110,7 @@ def get_m4a(url):
116110
dic = json.loads(html)
117111
col2.insert(dic)
118112
print(murl + '中的数据已被成功插入mongodb')
119-
# print(dic)
113+
120114

121115
if __name__ == '__main__':
122-
get_url()
116+
get_url()

0 commit comments

Comments
 (0)