Skip to content

Commit 96be773

Browse files
authored
bilibili视频和弹幕批量下载
bilibili视频和弹幕批量下载
1 parent 9091c03 commit 96be773

File tree

1 file changed

+191
-0
lines changed

1 file changed

+191
-0
lines changed

bilibili/bilibili.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# -*-coding:utf-8 -*-
2+
# Website: http://cuijiahua.com
3+
# Author: Jack Cui
4+
# Date: 2018.6.9
5+
6+
import requests, json, re, sys, os, urllib, argparse, time
7+
from urllib.request import urlretrieve
8+
from contextlib import closing
9+
from urllib import parse
10+
import xml2ass
11+
12+
class BiliBili:
13+
def __init__(self, dirname, keyword):
14+
self.dn_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
15+
'Accept': '*/*',
16+
'Accept-Encoding': 'gzip, deflate, br',
17+
'Accept-Language': 'zh-CN,zh;q=0.9',
18+
'Referer': 'https://search.bilibili.com/all?keyword=%s' % parse.quote('猫')}
19+
20+
self.search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
21+
'Accept-Language': 'zh-CN,zh;q=0.9',
22+
'Accept-Encoding': 'gzip, deflate, br',
23+
'Accept': 'application/json, text/plain, */*'}
24+
25+
self.video_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
26+
'Accept-Language': 'zh-CN,zh;q=0.9',
27+
'Accept-Encoding': 'gzip, deflate, br',
28+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}
29+
30+
self.danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
31+
'Accept': '*/*',
32+
'Accept-Encoding': 'gzip, deflate, br',
33+
'Accept-Language': 'zh-CN,zh;q=0.9'}
34+
35+
self.sess = requests.Session()
36+
37+
self.dir = dirname
38+
39+
def video_downloader(self, video_url, video_name):
40+
"""
41+
视频下载
42+
Parameters:
43+
video_url: 带水印的视频地址
44+
video_name: 视频名
45+
Returns:
46+
47+
"""
48+
size = 0
49+
with closing(self.sess.get(video_url, headers=self.dn_headers, stream=True, verify=False)) as response:
50+
chunk_size = 1024
51+
content_size = int(response.headers['content-length'])
52+
if response.status_code == 200:
53+
sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
54+
video_name = os.path.join(self.dir, video_name)
55+
with open(video_name, 'wb') as file:
56+
for data in response.iter_content(chunk_size = chunk_size):
57+
file.write(data)
58+
size += len(data)
59+
file.flush()
60+
61+
sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
62+
# sys.stdout.flush()
63+
if size / content_size == 1:
64+
print('\n')
65+
else:
66+
print('链接异常')
67+
68+
def search_video(self, search_url):
69+
"""
70+
搜索接口
71+
Parameters:
72+
search_url: 带水印的视频地址
73+
Returns:
74+
titles:视频名列表
75+
arcurls: 视频播放地址列表
76+
"""
77+
req = self.sess.get(url=search_url, headers=self.search_headers, verify=False)
78+
html = json.loads(req.text)
79+
videos = html['result']
80+
titles = []
81+
arcurls = []
82+
for video in videos:
83+
titles.append(video['title'].replace('<em class="keyword">','').replace('</em>',''))
84+
arcurls.append(video['arcurl'])
85+
return titles, arcurls
86+
87+
def get_download_url(self, arcurl):
88+
"""
89+
获取视频下载地址
90+
Parameters:
91+
arcurl: 视频播放地址
92+
oid:弹幕地址参数
93+
Returns:
94+
download_url:视频下载地址
95+
"""
96+
req = self.sess.get(url=arcurl, headers=self.video_headers, verify=False)
97+
pattern = '.__playinfo__=(.*)</script><script>window.__INITIAL_STATE__='
98+
try:
99+
infos = re.findall(pattern, req.text)[0]
100+
except:
101+
return '',''
102+
html = json.loads(infos)
103+
durl = html['durl']
104+
print(durl)
105+
download_url = durl[0]['url']
106+
if 'mirrork' in download_url:
107+
oid = download_url.split('/')[6]
108+
else:
109+
oid = download_url.split('/')[7]
110+
if len(oid) >= 10:
111+
oid = download_url.split('/')[6]
112+
return download_url, oid
113+
114+
115+
def download_xml(self, danmu_url, danmu_name):
116+
"""
117+
获取视频XML原生弹幕
118+
Parameters:
119+
danmu_url: 弹幕地址
120+
danmu_name:弹幕xml文件保存名
121+
Returns:
122+
123+
"""
124+
with closing(self.sess.get(danmu_url, headers=self.danmu_header, stream=True, verify=False)) as response:
125+
if response.status_code == 200:
126+
with open(danmu_name, 'wb') as file:
127+
for data in response.iter_content():
128+
file.write(data)
129+
file.flush()
130+
else:
131+
print('链接异常')
132+
133+
def get_danmu(self, oid, filename):
134+
"""
135+
下载弹幕
136+
Parameters:
137+
oid: 弹幕oid
138+
filename: 弹幕保存前缀名
139+
Returns:
140+
141+
"""
142+
danmu_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(oid)
143+
danmu_name = os.path.join(self.dir, filename + '.xml')
144+
danmu_ass = os.path.join(self.dir, filename + '.ass')
145+
self.download_xml(danmu_url, danmu_name)
146+
time.sleep(0.5)
147+
xml2ass.Danmaku2ASS(danmu_name, danmu_ass, 1280, 720)
148+
# os.remove(danmu_name)
149+
150+
def search_videos(self, keyword, pages):
151+
"""
152+
搜索视频
153+
Parameters:
154+
keyword: 搜索关键字
155+
pages:下载页数
156+
Returns:
157+
158+
"""
159+
if self.dir not in os.listdir():
160+
os.mkdir(self.dir)
161+
for page in range(1, pages+1):
162+
search_url = 'https://search.bilibili.com/api/search?search_type=video&keyword={}&order=totalrank&duration=1&tids=0&page={}'.format(keyword, page)
163+
titles, arcurls = self.search_video(search_url)
164+
for index, arcurl in enumerate(arcurls):
165+
title = titles[index]
166+
for c in u'´☆❤◦\/:*?"<>|':
167+
title = title.replace(c, '')
168+
if title + '.flv' not in os.listdir(self.dir):
169+
download_url, oid = self.get_download_url(arcurl)
170+
if download_url != '' and oid != '':
171+
print('第[ %d ]页:视频[ %s ]下载中:' % (page, title))
172+
self.video_downloader(download_url, title + '.flv')
173+
print('视频下载完成!')
174+
self.get_danmu(oid, title)
175+
print('弹幕下载完成!')
176+
177+
if __name__ == '__main__':
178+
179+
if len(sys.argv) == 1:
180+
sys.argv.append('--help')
181+
182+
parser = argparse.ArgumentParser()
183+
parser.add_argument('-d', '--dir', required=True, help=_('download path'))
184+
parser.add_argument('-k', '--keyword', required=True, help=_('search content'))
185+
parser.add_argument('-p', '--pages', required=True, help=_('the number of pages for downloading'), type=int, default=1)
186+
187+
args = parser.parse_args()
188+
B = BiliBili(args.dir,args.keyword)
189+
B.search_videos(args.keyword, args.pages)
190+
191+
print('全部下载完成!')

0 commit comments

Comments
 (0)