4
4
import re
5
5
6
6
from .common import InfoExtractor
7
- from ..compat import compat_urllib_parse_urlparse
8
7
from ..utils import (
9
8
ExtractorError ,
9
+ float_or_none ,
10
+ int_or_none ,
10
11
parse_iso8601 ,
11
12
qualities ,
13
+ try_get ,
12
14
)
13
15
14
16
15
17
class SRGSSRIE (InfoExtractor ):
16
- _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
18
+ _VALID_URL = r'''(?x)
19
+ (?:
20
+ https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
21
+ srgssr
22
+ ):
23
+ (?P<bu>
24
+ srf|rts|rsi|rtr|swi
25
+ ):(?:[^:]+:)?
26
+ (?P<type>
27
+ video|audio
28
+ ):
29
+ (?P<id>
30
+ [0-9a-f\-]{36}|\d+
31
+ )
32
+ '''
17
33
_GEO_BYPASS = False
18
34
_GEO_COUNTRIES = ['CH' ]
19
35
@@ -25,25 +41,39 @@ class SRGSSRIE(InfoExtractor):
25
41
'LEGAL' : 'The video cannot be transmitted for legal reasons.' ,
26
42
'STARTDATE' : 'This video is not yet available. Please try again later.' ,
27
43
}
44
+ _DEFAULT_LANGUAGE_CODES = {
45
+ 'srf' : 'de' ,
46
+ 'rts' : 'fr' ,
47
+ 'rsi' : 'it' ,
48
+ 'rtr' : 'rm' ,
49
+ 'swi' : 'en' ,
50
+ }
28
51
29
52
def _get_tokenized_src (self , url , video_id , format_id ):
30
- sp = compat_urllib_parse_urlparse (url ).path .split ('/' )
31
53
token = self ._download_json (
32
- 'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % ( sp [ 1 ], sp [ 2 ]) ,
54
+ 'http://tp.srgssr.ch/akahd/token?acl=*' ,
33
55
video_id , 'Downloading %s token' % format_id , fatal = False ) or {}
34
- auth_params = token . get ( ' token' , {}). get ( ' authparams' )
56
+ auth_params = try_get ( token , lambda x : x [ 'token' ][ ' authparams'] )
35
57
if auth_params :
36
- url += '?' + auth_params
58
+ url += ( '?' if '?' not in url else '&' ) + auth_params
37
59
return url
38
60
39
- def get_media_data (self , bu , media_type , media_id ):
40
- media_data = self ._download_json (
41
- 'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu , media_type , media_id ),
42
- media_id )[media_type .capitalize ()]
43
-
44
- if media_data .get ('block' ) and media_data ['block' ] in self ._ERRORS :
45
- message = self ._ERRORS [media_data ['block' ]]
46
- if media_data ['block' ] == 'GEOBLOCK' :
61
+ def _get_media_data (self , bu , media_type , media_id ):
62
+ query = {'onlyChapters' : True } if media_type == 'video' else {}
63
+ full_media_data = self ._download_json (
64
+ 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
65
+ % (bu , media_type , media_id ),
66
+ media_id , query = query )['chapterList' ]
67
+ try :
68
+ media_data = next (
69
+ x for x in full_media_data if x .get ('id' ) == media_id )
70
+ except StopIteration :
71
+ raise ExtractorError ('No media information found' )
72
+
73
+ block_reason = media_data .get ('blockReason' )
74
+ if block_reason and block_reason in self ._ERRORS :
75
+ message = self ._ERRORS [block_reason ]
76
+ if block_reason == 'GEOBLOCK' :
47
77
self .raise_geo_restricted (
48
78
msg = message , countries = self ._GEO_COUNTRIES )
49
79
raise ExtractorError (
@@ -53,53 +83,75 @@ def get_media_data(self, bu, media_type, media_id):
53
83
54
84
def _real_extract (self , url ):
55
85
bu , media_type , media_id = re .match (self ._VALID_URL , url ).groups ()
86
+ media_data = self ._get_media_data (bu , media_type , media_id )
87
+ title = media_data ['title' ]
56
88
57
- media_data = self .get_media_data (bu , media_type , media_id )
58
-
59
- metadata = media_data ['AssetMetadatas' ]['AssetMetadata' ][0 ]
60
- title = metadata ['title' ]
61
- description = metadata .get ('description' )
62
- created_date = media_data .get ('createdDate' ) or metadata .get ('createdDate' )
63
- timestamp = parse_iso8601 (created_date )
64
-
65
- thumbnails = [{
66
- 'id' : image .get ('id' ),
67
- 'url' : image ['url' ],
68
- } for image in media_data .get ('Image' , {}).get ('ImageRepresentations' , {}).get ('ImageRepresentation' , [])]
69
-
70
- preference = qualities (['LQ' , 'MQ' , 'SD' , 'HQ' , 'HD' ])
71
89
formats = []
72
- for source in media_data .get ('Playlists' , {}).get ('Playlist' , []) + media_data .get ('Downloads' , {}).get ('Download' , []):
73
- protocol = source .get ('@protocol' )
74
- for asset in source ['url' ]:
75
- asset_url = asset ['text' ]
76
- quality = asset ['@quality' ]
77
- format_id = '%s-%s' % (protocol , quality )
78
- if protocol .startswith ('HTTP-HDS' ) or protocol .startswith ('HTTP-HLS' ):
79
- asset_url = self ._get_tokenized_src (asset_url , media_id , format_id )
80
- if protocol .startswith ('HTTP-HDS' ):
81
- formats .extend (self ._extract_f4m_formats (
82
- asset_url + ('?' if '?' not in asset_url else '&' ) + 'hdcore=3.4.0' ,
83
- media_id , f4m_id = format_id , fatal = False ))
84
- elif protocol .startswith ('HTTP-HLS' ):
85
- formats .extend (self ._extract_m3u8_formats (
86
- asset_url , media_id , 'mp4' , 'm3u8_native' ,
87
- m3u8_id = format_id , fatal = False ))
88
- else :
89
- formats .append ({
90
- 'format_id' : format_id ,
91
- 'url' : asset_url ,
92
- 'preference' : preference (quality ),
93
- 'ext' : 'flv' if protocol == 'RTMP' else None ,
94
- })
90
+ q = qualities (['SD' , 'HD' ])
91
+ for source in (media_data .get ('resourceList' ) or []):
92
+ format_url = source .get ('url' )
93
+ if not format_url :
94
+ continue
95
+ protocol = source .get ('protocol' )
96
+ quality = source .get ('quality' )
97
+ format_id = []
98
+ for e in (protocol , source .get ('encoding' ), quality ):
99
+ if e :
100
+ format_id .append (e )
101
+ format_id = '-' .join (format_id )
102
+
103
+ if protocol in ('HDS' , 'HLS' ):
104
+ if source .get ('tokenType' ) == 'AKAMAI' :
105
+ format_url = self ._get_tokenized_src (
106
+ format_url , media_id , format_id )
107
+ formats .extend (self ._extract_akamai_formats (
108
+ format_url , media_id ))
109
+ elif protocol == 'HLS' :
110
+ formats .extend (self ._extract_m3u8_formats (
111
+ format_url , media_id , 'mp4' , 'm3u8_native' ,
112
+ m3u8_id = format_id , fatal = False ))
113
+ elif protocol in ('HTTP' , 'HTTPS' ):
114
+ formats .append ({
115
+ 'format_id' : format_id ,
116
+ 'url' : format_url ,
117
+ 'quality' : q (quality ),
118
+ })
119
+
120
+ # This is needed because for audio medias the podcast url is usually
121
+ # always included, even if is only an audio segment and not the
122
+ # whole episode.
123
+ if int_or_none (media_data .get ('position' )) == 0 :
124
+ for p in ('S' , 'H' ):
125
+ podcast_url = media_data .get ('podcast%sdUrl' % p )
126
+ if not podcast_url :
127
+ continue
128
+ quality = p + 'D'
129
+ formats .append ({
130
+ 'format_id' : 'PODCAST-' + quality ,
131
+ 'url' : podcast_url ,
132
+ 'quality' : q (quality ),
133
+ })
95
134
self ._sort_formats (formats )
96
135
136
+ subtitles = {}
137
+ if media_type == 'video' :
138
+ for sub in (media_data .get ('subtitleList' ) or []):
139
+ sub_url = sub .get ('url' )
140
+ if not sub_url :
141
+ continue
142
+ lang = sub .get ('locale' ) or self ._DEFAULT_LANGUAGE_CODES [bu ]
143
+ subtitles .setdefault (lang , []).append ({
144
+ 'url' : sub_url ,
145
+ })
146
+
97
147
return {
98
148
'id' : media_id ,
99
149
'title' : title ,
100
- 'description' : description ,
101
- 'timestamp' : timestamp ,
102
- 'thumbnails' : thumbnails ,
150
+ 'description' : media_data .get ('description' ),
151
+ 'timestamp' : parse_iso8601 (media_data .get ('date' )),
152
+ 'thumbnail' : media_data .get ('imageUrl' ),
153
+ 'duration' : float_or_none (media_data .get ('duration' ), 1000 ),
154
+ 'subtitles' : subtitles ,
103
155
'formats' : formats ,
104
156
}
105
157
@@ -119,34 +171,26 @@ class SRGSSRPlayIE(InfoExtractor):
119
171
120
172
_TESTS = [{
121
173
'url' : 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5' ,
122
- 'md5' : 'da6b5b3ac9fa4761a942331cef20fcb3 ' ,
174
+ 'md5' : '6db2226ba97f62ad42ce09783680046c ' ,
123
175
'info_dict' : {
124
176
'id' : '28e1a57d-5b76-4399-8ab3-9097f071e6c5' ,
125
177
'ext' : 'mp4' ,
126
178
'upload_date' : '20130701' ,
127
179
'title' : 'Snowden beantragt Asyl in Russland' ,
128
- 'timestamp' : 1372713995 ,
129
- }
130
- }, {
131
- # No Speichern (Save) button
132
- 'url' : 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa' ,
133
- 'md5' : '0a274ce38fda48c53c01890651985bc6' ,
134
- 'info_dict' : {
135
- 'id' : '677f5829-e473-4823-ac83-a1087fe97faa' ,
136
- 'ext' : 'flv' ,
137
- 'upload_date' : '20130710' ,
138
- 'title' : 'Jaguar XK120, Shadow und Tornado-Dampflokomotive' ,
139
- 'description' : 'md5:88604432b60d5a38787f152dec89cd56' ,
140
- 'timestamp' : 1373493600 ,
180
+ 'timestamp' : 1372708215 ,
181
+ 'duration' : 113.827 ,
182
+ 'thumbnail' : r're:^https?://.*1383719781\.png$' ,
141
183
},
184
+ 'expected_warnings' : ['Unable to download f4m manifest' ],
142
185
}, {
143
186
'url' : 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc' ,
144
187
'info_dict' : {
145
188
'id' : '63cb0778-27f8-49af-9284-8c7a8c6d15fc' ,
146
189
'ext' : 'mp3' ,
147
190
'upload_date' : '20151013' ,
148
191
'title' : 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem' ,
149
- 'timestamp' : 1444750398 ,
192
+ 'timestamp' : 1444709160 ,
193
+ 'duration' : 336.816 ,
150
194
},
151
195
'params' : {
152
196
# rtmp download
@@ -159,19 +203,32 @@ class SRGSSRPlayIE(InfoExtractor):
159
203
'id' : '6348260' ,
160
204
'display_id' : '6348260' ,
161
205
'ext' : 'mp4' ,
162
- 'duration' : 1796 ,
206
+ 'duration' : 1796.76 ,
163
207
'title' : 'Le 19h30' ,
164
- 'description' : '' ,
165
- 'uploader' : '19h30' ,
166
208
'upload_date' : '20141201' ,
167
209
'timestamp' : 1417458600 ,
168
210
'thumbnail' : r're:^https?://.*\.image' ,
169
- 'view_count' : int ,
170
211
},
171
212
'params' : {
172
213
# m3u8 download
173
214
'skip_download' : True ,
174
215
}
216
+ }, {
217
+ 'url' : 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270' ,
218
+ 'info_dict' : {
219
+ 'id' : '42960270' ,
220
+ 'ext' : 'mp4' ,
221
+ 'title' : 'Why people were against tax reforms' ,
222
+ 'description' : 'md5:7ac442c558e9630e947427469c4b824d' ,
223
+ 'duration' : 94.0 ,
224
+ 'upload_date' : '20170215' ,
225
+ 'timestamp' : 1487173560 ,
226
+ 'thumbnail' : r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964' ,
227
+ 'subtitles' : 'count:9' ,
228
+ },
229
+ 'params' : {
230
+ 'skip_download' : True ,
231
+ }
175
232
}, {
176
233
'url' : 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01' ,
177
234
'only_matching' : True ,
@@ -181,12 +238,15 @@ class SRGSSRPlayIE(InfoExtractor):
181
238
}, {
182
239
'url' : 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260' ,
183
240
'only_matching' : True ,
241
+ }, {
242
+ # audio segment, has podcastSdUrl of the full episode
243
+ 'url' : 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb' ,
244
+ 'only_matching' : True ,
184
245
}]
185
246
186
247
def _real_extract (self , url ):
187
248
mobj = re .match (self ._VALID_URL , url )
188
249
bu = mobj .group ('bu' )
189
250
media_type = mobj .group ('type' ) or mobj .group ('type_2' )
190
251
media_id = mobj .group ('id' )
191
- # other info can be extracted from url + '&layout=json'
192
252
return self .url_result ('srgssr:%s:%s:%s' % (bu [:3 ], media_type , media_id ), 'SRGSSR' )
0 commit comments