Skip to content

Commit 41920fc

Browse files
authored
[bbc] Extract description and timestamp from __INITIAL_DATA__ (ytdl-org#28774)
1 parent 9f6c03a commit 41920fc

File tree

1 file changed

+24
-1
lines changed

1 file changed

+24
-1
lines changed

youtube_dl/extractor/bbc.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
compat_etree_Element,
1212
compat_HTTPError,
1313
compat_parse_qs,
14+
compat_str,
1415
compat_urllib_parse_urlparse,
1516
compat_urlparse,
1617
)
@@ -25,8 +26,10 @@
2526
js_to_json,
2627
parse_duration,
2728
parse_iso8601,
29+
strip_or_none,
2830
try_get,
2931
unescapeHTML,
32+
unified_timestamp,
3033
url_or_none,
3134
urlencode_postdata,
3235
urljoin,
@@ -761,8 +764,17 @@ class BBCIE(BBCCoUkIE):
761764
'only_matching': True,
762765
}, {
763766
# custom redirection to www.bbc.com
767+
# also, video with window.__INITIAL_DATA__
764768
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
765-
'only_matching': True,
769+
'info_dict': {
770+
'id': 'p02xzws1',
771+
'ext': 'mp4',
772+
'title': "Pluto may have 'nitrogen glaciers'",
773+
'description': "Pluto could have glaciers of nitrogen ice, new photographs from Nasa's New Horizons probe suggest.",
774+
'thumbnail': r're:https?://.+/.+\.jpg',
775+
'timestamp': 1437785037,
776+
'upload_date': '20150725',
777+
},
766778
}, {
767779
# single video article embedded with data-media-vpid
768780
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
@@ -1164,12 +1176,23 @@ def parse_media(media):
11641176
continue
11651177
formats, subtitles = self._download_media_selector(item_id)
11661178
self._sort_formats(formats)
1179+
item_desc = try_get(
1180+
media,
1181+
lambda x: x['summary']['blocks'][0]['model']['text'],
1182+
compat_str)
1183+
item_time = None
1184+
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1185+
if try_get(meta, lambda x: x['label']) == 'Published':
1186+
item_time = unified_timestamp(meta.get('timestamp'))
1187+
break
11671188
entries.append({
11681189
'id': item_id,
11691190
'title': item_title,
11701191
'thumbnail': item.get('holdingImageUrl'),
11711192
'formats': formats,
11721193
'subtitles': subtitles,
1194+
'timestamp': item_time,
1195+
'description': strip_or_none(item_desc),
11731196
})
11741197
for resp in (initial_data.get('data') or {}).values():
11751198
name = resp.get('name')

0 commit comments

Comments
 (0)