Skip to content

Commit aa9118a

Browse files
committed
[apa] Improve extraction (closes ytdl-org#27750)
1 parent 36abc16 commit aa9118a

File tree

1 file changed

+25
-13
lines changed

1 file changed

+25
-13
lines changed

youtube_dl/extractor/apa.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
from .common import InfoExtractor
77
from ..utils import (
88
determine_ext,
9-
js_to_json,
9+
int_or_none,
10+
url_or_none,
1011
)
1112

1213

1314
class APAIE(InfoExtractor):
14-
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
15+
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
1516
_TESTS = [{
1617
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
1718
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
@@ -41,9 +42,11 @@ def _extract_urls(webpage):
4142
webpage)]
4243

4344
def _real_extract(self, url):
44-
video_id = self._match_id(url)
45+
mobj = re.match(self._VALID_URL, url)
46+
video_id, base_url = mobj.group('id', 'base_url')
4547

46-
webpage = self._download_webpage('https://uvp.apa.at/player/%s' % video_id, video_id)
48+
webpage = self._download_webpage(
49+
'%s/player/%s' % (base_url, video_id), video_id)
4750

4851
jwplatform_id = self._search_regex(
4952
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -54,30 +57,39 @@ def _real_extract(self, url):
5457
'jwplatform:' + jwplatform_id, ie='JWPlatform',
5558
video_id=video_id)
5659

57-
sources = self._parse_json("{" + self._search_regex(
58-
r'("hls"\s*:\s*"[^"]+"\s*,\s*"progressive"\s*:\s*"[^"]+")', webpage, 'sources')
59-
+ "}", video_id, transform_source=js_to_json)
60+
def extract(field, name=None):
61+
return self._search_regex(
62+
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
63+
webpage, name or field, default=None, group='value')
64+
65+
title = extract('title') or video_id
66+
description = extract('description')
67+
thumbnail = extract('poster', 'thumbnail')
6068

6169
formats = []
62-
for (format, source_url) in sources.items():
70+
for format_id in ('hls', 'progressive'):
71+
source_url = url_or_none(extract(format_id))
72+
if not source_url:
73+
continue
6374
ext = determine_ext(source_url)
6475
if ext == 'm3u8':
6576
formats.extend(self._extract_m3u8_formats(
6677
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
6778
m3u8_id='hls', fatal=False))
6879
else:
80+
height = int_or_none(self._search_regex(
81+
r'(\d+)\.mp4', source_url, 'height', default=None))
6982
formats.append({
7083
'url': source_url,
84+
'format_id': format_id,
85+
'height': height,
7186
})
7287
self._sort_formats(formats)
7388

74-
thumbnail = self._search_regex(
75-
r'"poster"\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
76-
'thumbnail', fatal=False, group='url')
77-
7889
return {
7990
'id': video_id,
80-
'title': video_id,
91+
'title': title,
92+
'description': description,
8193
'thumbnail': thumbnail,
8294
'formats': formats,
8395
}

0 commit comments

Comments
 (0)