|
1 | 1 | from __future__ import unicode_literals
|
2 | 2 |
|
3 |
| -import re |
| 3 | +from .zdf import ZDFIE |
4 | 4 |
|
5 |
| -from .common import InfoExtractor |
6 |
| -from ..utils import ( |
7 |
| - int_or_none, |
8 |
| - unified_strdate, |
9 |
| - xpath_text, |
10 |
| - determine_ext, |
11 |
| - float_or_none, |
12 |
| - ExtractorError, |
13 |
| -) |
14 | 5 |
|
15 |
| - |
16 |
| -class DreiSatIE(InfoExtractor): |
| 6 | +class DreiSatIE(ZDFIE): |
17 | 7 | IE_NAME = '3sat'
|
18 |
| - _GEO_COUNTRIES = ['DE'] |
19 |
| - _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)' |
20 |
| - _TESTS = [ |
21 |
| - { |
22 |
| - 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', |
23 |
| - 'md5': 'be37228896d30a88f315b638900a026e', |
24 |
| - 'info_dict': { |
25 |
| - 'id': '45918', |
26 |
| - 'ext': 'mp4', |
27 |
| - 'title': 'Waidmannsheil', |
28 |
| - 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', |
29 |
| - 'uploader': 'SCHWEIZWEIT', |
30 |
| - 'uploader_id': '100000210', |
31 |
| - 'upload_date': '20140913' |
32 |
| - }, |
33 |
| - 'params': { |
34 |
| - 'skip_download': True, # m3u8 downloads |
35 |
| - } |
| 8 | + _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html' |
| 9 | + _TESTS = [{ |
| 10 | + # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html |
| 11 | + 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html', |
| 12 | + 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', |
| 13 | + 'info_dict': { |
| 14 | + 'id': '141007_ab18_10wochensommer_film', |
| 15 | + 'ext': 'mp4', |
| 16 | + 'title': 'Ab 18! - 10 Wochen Sommer', |
| 17 | + 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', |
| 18 | + 'duration': 2660, |
| 19 | + 'timestamp': 1608604200, |
| 20 | + 'upload_date': '20201222', |
36 | 21 | },
|
37 |
| - { |
38 |
| - 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', |
39 |
| - 'only_matching': True, |
| 22 | + }, { |
| 23 | + 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html', |
| 24 | + 'info_dict': { |
| 25 | + 'id': '140913_sendung_schweizweit', |
| 26 | + 'ext': 'mp4', |
| 27 | + 'title': 'Waidmannsheil', |
| 28 | + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', |
| 29 | + 'timestamp': 1410623100, |
| 30 | + 'upload_date': '20140913' |
40 | 31 | },
|
41 |
| - ] |
42 |
| - |
43 |
| - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): |
44 |
| - param_groups = {} |
45 |
| - for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): |
46 |
| - group_id = param_group.get(self._xpath_ns( |
47 |
| - 'id', 'http://www.w3.org/XML/1998/namespace')) |
48 |
| - params = {} |
49 |
| - for param in param_group: |
50 |
| - params[param.get('name')] = param.get('value') |
51 |
| - param_groups[group_id] = params |
52 |
| - |
53 |
| - formats = [] |
54 |
| - for video in smil.findall(self._xpath_ns('.//video', namespace)): |
55 |
| - src = video.get('src') |
56 |
| - if not src: |
57 |
| - continue |
58 |
| - bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) |
59 |
| - group_id = video.get('paramGroup') |
60 |
| - param_group = param_groups[group_id] |
61 |
| - for proto in param_group['protocols'].split(','): |
62 |
| - formats.append({ |
63 |
| - 'url': '%s://%s' % (proto, param_group['host']), |
64 |
| - 'app': param_group['app'], |
65 |
| - 'play_path': src, |
66 |
| - 'ext': 'flv', |
67 |
| - 'format_id': '%s-%d' % (proto, bitrate), |
68 |
| - 'tbr': bitrate, |
69 |
| - }) |
70 |
| - self._sort_formats(formats) |
71 |
| - return formats |
72 |
| - |
73 |
| - def extract_from_xml_url(self, video_id, xml_url): |
74 |
| - doc = self._download_xml( |
75 |
| - xml_url, video_id, |
76 |
| - note='Downloading video info', |
77 |
| - errnote='Failed to download video info') |
78 |
| - |
79 |
| - status_code = xpath_text(doc, './status/statuscode') |
80 |
| - if status_code and status_code != 'ok': |
81 |
| - if status_code == 'notVisibleAnymore': |
82 |
| - message = 'Video %s is not available' % video_id |
83 |
| - else: |
84 |
| - message = '%s returned error: %s' % (self.IE_NAME, status_code) |
85 |
| - raise ExtractorError(message, expected=True) |
86 |
| - |
87 |
| - title = xpath_text(doc, './/information/title', 'title', True) |
88 |
| - |
89 |
| - urls = [] |
90 |
| - formats = [] |
91 |
| - for fnode in doc.findall('.//formitaeten/formitaet'): |
92 |
| - video_url = xpath_text(fnode, 'url') |
93 |
| - if not video_url or video_url in urls: |
94 |
| - continue |
95 |
| - urls.append(video_url) |
96 |
| - |
97 |
| - is_available = 'http://www.metafilegenerator' not in video_url |
98 |
| - geoloced = 'static_geoloced_online' in video_url |
99 |
| - if not is_available or geoloced: |
100 |
| - continue |
101 |
| - |
102 |
| - format_id = fnode.attrib['basetype'] |
103 |
| - format_m = re.match(r'''(?x) |
104 |
| - (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ |
105 |
| - (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) |
106 |
| - ''', format_id) |
107 |
| - |
108 |
| - ext = determine_ext(video_url, None) or format_m.group('container') |
109 |
| - |
110 |
| - if ext == 'meta': |
111 |
| - continue |
112 |
| - elif ext == 'smil': |
113 |
| - formats.extend(self._extract_smil_formats( |
114 |
| - video_url, video_id, fatal=False)) |
115 |
| - elif ext == 'm3u8': |
116 |
| - # the certificates are misconfigured (see |
117 |
| - # https://github.com/ytdl-org/youtube-dl/issues/8665) |
118 |
| - if video_url.startswith('https://'): |
119 |
| - continue |
120 |
| - formats.extend(self._extract_m3u8_formats( |
121 |
| - video_url, video_id, 'mp4', 'm3u8_native', |
122 |
| - m3u8_id=format_id, fatal=False)) |
123 |
| - elif ext == 'f4m': |
124 |
| - formats.extend(self._extract_f4m_formats( |
125 |
| - video_url, video_id, f4m_id=format_id, fatal=False)) |
126 |
| - else: |
127 |
| - quality = xpath_text(fnode, './quality') |
128 |
| - if quality: |
129 |
| - format_id += '-' + quality |
130 |
| - |
131 |
| - abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000) |
132 |
| - vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000) |
133 |
| - |
134 |
| - tbr = int_or_none(self._search_regex( |
135 |
| - r'_(\d+)k', video_url, 'bitrate', None)) |
136 |
| - if tbr and vbr and not abr: |
137 |
| - abr = tbr - vbr |
138 |
| - |
139 |
| - formats.append({ |
140 |
| - 'format_id': format_id, |
141 |
| - 'url': video_url, |
142 |
| - 'ext': ext, |
143 |
| - 'acodec': format_m.group('acodec'), |
144 |
| - 'vcodec': format_m.group('vcodec'), |
145 |
| - 'abr': abr, |
146 |
| - 'vbr': vbr, |
147 |
| - 'tbr': tbr, |
148 |
| - 'width': int_or_none(xpath_text(fnode, './width')), |
149 |
| - 'height': int_or_none(xpath_text(fnode, './height')), |
150 |
| - 'filesize': int_or_none(xpath_text(fnode, './filesize')), |
151 |
| - 'protocol': format_m.group('proto').lower(), |
152 |
| - }) |
153 |
| - |
154 |
| - geolocation = xpath_text(doc, './/details/geolocation') |
155 |
| - if not formats and geolocation and geolocation != 'none': |
156 |
| - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) |
157 |
| - |
158 |
| - self._sort_formats(formats) |
159 |
| - |
160 |
| - thumbnails = [] |
161 |
| - for node in doc.findall('.//teaserimages/teaserimage'): |
162 |
| - thumbnail_url = node.text |
163 |
| - if not thumbnail_url: |
164 |
| - continue |
165 |
| - thumbnail = { |
166 |
| - 'url': thumbnail_url, |
167 |
| - } |
168 |
| - thumbnail_key = node.get('key') |
169 |
| - if thumbnail_key: |
170 |
| - m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) |
171 |
| - if m: |
172 |
| - thumbnail['width'] = int(m.group(1)) |
173 |
| - thumbnail['height'] = int(m.group(2)) |
174 |
| - thumbnails.append(thumbnail) |
175 |
| - |
176 |
| - upload_date = unified_strdate(xpath_text(doc, './/details/airtime')) |
177 |
| - |
178 |
| - return { |
179 |
| - 'id': video_id, |
180 |
| - 'title': title, |
181 |
| - 'description': xpath_text(doc, './/information/detail'), |
182 |
| - 'duration': int_or_none(xpath_text(doc, './/details/lengthSec')), |
183 |
| - 'thumbnails': thumbnails, |
184 |
| - 'uploader': xpath_text(doc, './/details/originChannelTitle'), |
185 |
| - 'uploader_id': xpath_text(doc, './/details/originChannelId'), |
186 |
| - 'upload_date': upload_date, |
187 |
| - 'formats': formats, |
| 32 | + 'params': { |
| 33 | + 'skip_download': True, |
188 | 34 | }
|
189 |
| - |
190 |
| - def _real_extract(self, url): |
191 |
| - video_id = self._match_id(url) |
192 |
| - details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id |
193 |
| - return self.extract_from_xml_url(video_id, details_url) |
| 35 | + }, { |
| 36 | + # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html |
| 37 | + 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', |
| 38 | + 'only_matching': True, |
| 39 | + }, { |
| 40 | + # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids |
| 41 | + 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html', |
| 42 | + 'only_matching': True, |
| 43 | + }] |
0 commit comments