Skip to content

Commit 7c52395

Browse files
committed
[youtube:tab] Improve grid extraction (closes ytdl-org#28725)
1 parent ea87ed8 commit 7c52395

File tree

1 file changed

+21
-17
lines changed

1 file changed

+21
-17
lines changed

youtube_dl/extractor/youtube.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,10 +2320,13 @@ def _extract_channel_id(self, webpage):
23202320

23212321
@staticmethod
23222322
def _extract_grid_item_renderer(item):
2323-
for item_kind in ('Playlist', 'Video', 'Channel', 'Show'):
2324-
renderer = item.get('grid%sRenderer' % item_kind)
2325-
if renderer:
2326-
return renderer
2323+
assert isinstance(item, dict)
2324+
for key, renderer in item.items():
2325+
if not key.startswith('grid') or not key.endswith('Renderer'):
2326+
continue
2327+
if not isinstance(renderer, dict):
2328+
continue
2329+
return renderer
23272330

23282331
def _grid_entries(self, grid_renderer):
23292332
for item in grid_renderer['items']:
@@ -2333,18 +2336,21 @@ def _grid_entries(self, grid_renderer):
23332336
if not isinstance(renderer, dict):
23342337
continue
23352338
title = try_get(
2336-
renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2339+
renderer, (lambda x: x['title']['runs'][0]['text'],
2340+
lambda x: x['title']['simpleText']), compat_str)
23372341
# playlist
23382342
playlist_id = renderer.get('playlistId')
23392343
if playlist_id:
23402344
yield self.url_result(
23412345
'https://www.youtube.com/playlist?list=%s' % playlist_id,
23422346
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
23432347
video_title=title)
2348+
continue
23442349
# video
23452350
video_id = renderer.get('videoId')
23462351
if video_id:
23472352
yield self._extract_video(renderer)
2353+
continue
23482354
# channel
23492355
channel_id = renderer.get('channelId')
23502356
if channel_id:
@@ -2353,19 +2359,17 @@ def _grid_entries(self, grid_renderer):
23532359
yield self.url_result(
23542360
'https://www.youtube.com/channel/%s' % channel_id,
23552361
ie=YoutubeTabIE.ie_key(), video_title=title)
2356-
# show
2357-
if playlist_id is None: # needs to check for playlist_id, or non-series playlists are recognized twice
2358-
show_playlist_url = try_get(
2359-
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2360-
compat_str)
2361-
if show_playlist_url:
2362-
playlist_id = self._search_regex(r'/playlist\?list=([0-9a-zA-Z-_]+)', show_playlist_url,
2363-
'playlist id', default=None)
2364-
if playlist_id:
2365-
title = try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
2362+
continue
2363+
# generic endpoint URL support
2364+
ep_url = urljoin('https://www.youtube.com/', try_get(
2365+
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2366+
compat_str))
2367+
if ep_url:
2368+
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2369+
if ie.suitable(ep_url):
23662370
yield self.url_result(
2367-
"https://www.youtube.com/playlist?list=%s" % playlist_id,
2368-
ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title)
2371+
ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2372+
break
23692373

23702374
def _shelf_entries_from_content(self, shelf_renderer):
23712375
content = shelf_renderer.get('content')

0 commit comments

Comments
 (0)