Skip to content

Commit 1b0a13f

Browse files
committed
[youtube:tab] Pass innertube context and x-goog-visitor-id header along with continuation requests (closes ytdl-org#28702)
1 parent 27e5a44 commit 1b0a13f

File tree

1 file changed

+27
-15
lines changed

1 file changed

+27
-15
lines changed

youtube_dl/extractor/youtube.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def _extract_ytcfg(self, video_id, webpage):
306306
return self._parse_json(
307307
self._search_regex(
308308
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
309-
default='{}'), video_id, fatal=False)
309+
default='{}'), video_id, fatal=False) or {}
310310

311311
def _extract_video(self, renderer):
312312
video_id = renderer['videoId']
@@ -2475,7 +2475,7 @@ def _extract_continuation(cls, renderer):
24752475
ctp = continuation_ep.get('clickTrackingParams')
24762476
return YoutubeTabIE._build_continuation_query(continuation, ctp)
24772477

2478-
def _entries(self, tab, identity_token):
2478+
def _entries(self, tab, item_id, webpage):
24792479
tab_content = try_get(tab, lambda x: x['content'], dict)
24802480
if not tab_content:
24812481
return
@@ -2535,26 +2535,37 @@ def _entries(self, tab, identity_token):
25352535
yield entry
25362536
continuation = self._extract_continuation(rich_grid_renderer)
25372537

2538+
ytcfg = self._extract_ytcfg(item_id, webpage)
2539+
client_version = try_get(
2540+
ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
2541+
25382542
headers = {
25392543
'x-youtube-client-name': '1',
2540-
'x-youtube-client-version': '2.20201112.04.01',
2544+
'x-youtube-client-version': client_version,
25412545
'content-type': 'application/json',
25422546
}
2547+
2548+
context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
2549+
'client': {
2550+
'clientName': 'WEB',
2551+
'clientVersion': client_version,
2552+
}
2553+
}
2554+
visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
2555+
2556+
identity_token = self._extract_identity_token(ytcfg, webpage)
25432557
if identity_token:
25442558
headers['x-youtube-identity-token'] = identity_token
25452559

25462560
data = {
2547-
'context': {
2548-
'client': {
2549-
'clientName': 'WEB',
2550-
'clientVersion': '2.20201021.03.00',
2551-
}
2552-
},
2561+
'context': context,
25532562
}
25542563

25552564
for page_num in itertools.count(1):
25562565
if not continuation:
25572566
break
2567+
if visitor_data:
2568+
headers['x-goog-visitor-id'] = visitor_data
25582569
data['continuation'] = continuation['continuation']
25592570
data['clickTracking'] = {
25602571
'clickTrackingParams': continuation['itct']
@@ -2579,6 +2590,9 @@ def _entries(self, tab, identity_token):
25792590
if not response:
25802591
break
25812592

2593+
visitor_data = try_get(
2594+
response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
2595+
25822596
continuation_contents = try_get(
25832597
response, lambda x: x['continuationContents'], dict)
25842598
if continuation_contents:
@@ -2687,7 +2701,7 @@ def _extract_alert(data):
26872701
alerts.append(text)
26882702
return '\n'.join(alerts)
26892703

2690-
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
2704+
def _extract_from_tabs(self, item_id, webpage, data, tabs):
26912705
selected_tab = self._extract_selected_tab(tabs)
26922706
renderer = try_get(
26932707
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
@@ -2712,7 +2726,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
27122726
if renderer:
27132727
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
27142728
playlist = self.playlist_result(
2715-
self._entries(selected_tab, identity_token),
2729+
self._entries(selected_tab, item_id, webpage),
27162730
playlist_id=playlist_id, playlist_title=title,
27172731
playlist_description=description)
27182732
playlist.update(self._extract_uploader(data))
@@ -2736,8 +2750,7 @@ def _extract_from_playlist(self, item_id, url, data, playlist):
27362750
self._playlist_entries(playlist), playlist_id=playlist_id,
27372751
playlist_title=title)
27382752

2739-
def _extract_identity_token(self, webpage, item_id):
2740-
ytcfg = self._extract_ytcfg(item_id, webpage)
2753+
def _extract_identity_token(self, ytcfg, webpage):
27412754
if ytcfg:
27422755
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
27432756
if token:
@@ -2760,12 +2773,11 @@ def _real_extract(self, url):
27602773
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
27612774
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
27622775
webpage = self._download_webpage(url, item_id)
2763-
identity_token = self._extract_identity_token(webpage, item_id)
27642776
data = self._extract_yt_initial_data(item_id, webpage)
27652777
tabs = try_get(
27662778
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
27672779
if tabs:
2768-
return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
2780+
return self._extract_from_tabs(item_id, webpage, data, tabs)
27692781
playlist = try_get(
27702782
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
27712783
if playlist:

0 commit comments

Comments
 (0)