|
| 1 | +# coding: utf-8 |
| 2 | +from __future__ import unicode_literals |
| 3 | + |
| 4 | +import re |
| 5 | + |
| 6 | +from .common import InfoExtractor |
| 7 | +from ..utils import ( |
| 8 | + clean_podcast_url, |
| 9 | + int_or_none, |
| 10 | + parse_iso8601, |
| 11 | + strip_or_none, |
| 12 | + try_get, |
| 13 | + urlencode_postdata, |
| 14 | +) |
| 15 | + |
| 16 | + |
| 17 | +class SimplecastBaseIE(InfoExtractor): |
| 18 | + _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' |
| 19 | + _API_BASE = 'https://api.simplecast.com/' |
| 20 | + |
| 21 | + def _call_api(self, path_tmpl, video_id): |
| 22 | + return self._download_json( |
| 23 | + self._API_BASE + path_tmpl % video_id, video_id) |
| 24 | + |
| 25 | + def _call_search_api(self, resource, resource_id, resource_url): |
| 26 | + return self._download_json( |
| 27 | + 'https://api.simplecast.com/%ss/search' % resource, resource_id, |
| 28 | + data=urlencode_postdata({'url': resource_url})) |
| 29 | + |
| 30 | + def _parse_episode(self, episode): |
| 31 | + episode_id = episode['id'] |
| 32 | + title = episode['title'].strip() |
| 33 | + audio_file = episode.get('audio_file') or {} |
| 34 | + audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url'] |
| 35 | + |
| 36 | + season = episode.get('season') or {} |
| 37 | + season_href = season.get('href') |
| 38 | + season_id = None |
| 39 | + if season_href: |
| 40 | + season_id = self._search_regex( |
| 41 | + r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX, |
| 42 | + season_href, 'season id', default=None) |
| 43 | + |
| 44 | + webpage_url = episode.get('episode_url') |
| 45 | + channel_url = None |
| 46 | + if webpage_url: |
| 47 | + channel_url = self._search_regex( |
| 48 | + r'(https?://[^/]+\.simplecast\.com)', |
| 49 | + webpage_url, 'channel url', default=None) |
| 50 | + |
| 51 | + return { |
| 52 | + 'id': episode_id, |
| 53 | + 'display_id': episode.get('slug'), |
| 54 | + 'title': title, |
| 55 | + 'url': clean_podcast_url(audio_file_url), |
| 56 | + 'webpage_url': webpage_url, |
| 57 | + 'channel_url': channel_url, |
| 58 | + 'series': try_get(episode, lambda x: x['podcast']['title']), |
| 59 | + 'season_number': int_or_none(season.get('number')), |
| 60 | + 'season_id': season_id, |
| 61 | + 'thumbnail': episode.get('image_url'), |
| 62 | + 'episode_id': episode_id, |
| 63 | + 'episode_number': int_or_none(episode.get('number')), |
| 64 | + 'description': strip_or_none(episode.get('description')), |
| 65 | + 'timestamp': parse_iso8601(episode.get('published_at')), |
| 66 | + 'duration': int_or_none(episode.get('duration')), |
| 67 | + 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')), |
| 68 | + } |
| 69 | + |
| 70 | + |
| 71 | +class SimplecastIE(SimplecastBaseIE): |
| 72 | + IE_NAME = 'simplecast' |
| 73 | + _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX |
| 74 | + _COMMON_TEST_INFO = { |
| 75 | + 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays', |
| 76 | + 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', |
| 77 | + 'ext': 'mp3', |
| 78 | + 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays', |
| 79 | + 'episode_number': 1, |
| 80 | + 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', |
| 81 | + 'description': 'md5:34752789d3d2702e2d2c975fbd14f357', |
| 82 | + 'season_number': 1, |
| 83 | + 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13', |
| 84 | + 'series': 'The RE:BIND.io Podcast', |
| 85 | + 'duration': 5343, |
| 86 | + 'timestamp': 1580979475, |
| 87 | + 'upload_date': '20200206', |
| 88 | + 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', |
| 89 | + 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$', |
| 90 | + } |
| 91 | + _TESTS = [{ |
| 92 | + 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876', |
| 93 | + 'md5': '8c93be7be54251bf29ee97464eabe61c', |
| 94 | + 'info_dict': _COMMON_TEST_INFO, |
| 95 | + }, { |
| 96 | + 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876', |
| 97 | + 'only_matching': True, |
| 98 | + }] |
| 99 | + |
| 100 | + @staticmethod |
| 101 | + def _extract_urls(webpage): |
| 102 | + return re.findall( |
| 103 | + r'''(?x)<iframe[^>]+src=["\'] |
| 104 | + ( |
| 105 | + https?://(?:embed\.simplecast\.com/[0-9a-f]{8}| |
| 106 | + player\.simplecast\.com/%s |
| 107 | + ))''' % SimplecastBaseIE._UUID_REGEX, webpage) |
| 108 | + |
| 109 | + def _real_extract(self, url): |
| 110 | + episode_id = self._match_id(url) |
| 111 | + episode = self._call_api('episodes/%s', episode_id) |
| 112 | + return self._parse_episode(episode) |
| 113 | + |
| 114 | + |
| 115 | +class SimplecastEpisodeIE(SimplecastBaseIE): |
| 116 | + IE_NAME = 'simplecast:episode' |
| 117 | + _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)' |
| 118 | + _TEST = { |
| 119 | + 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', |
| 120 | + 'md5': '8c93be7be54251bf29ee97464eabe61c', |
| 121 | + 'info_dict': SimplecastIE._COMMON_TEST_INFO, |
| 122 | + } |
| 123 | + |
| 124 | + def _real_extract(self, url): |
| 125 | + mobj = re.match(self._VALID_URL, url) |
| 126 | + episode = self._call_search_api( |
| 127 | + 'episode', mobj.group(1), mobj.group(0)) |
| 128 | + return self._parse_episode(episode) |
| 129 | + |
| 130 | + |
| 131 | +class SimplecastPodcastIE(SimplecastBaseIE): |
| 132 | + IE_NAME = 'simplecast:podcast' |
| 133 | + _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)' |
| 134 | + _TESTS = [{ |
| 135 | + 'url': 'https://the-re-bind-io-podcast.simplecast.com', |
| 136 | + 'playlist_mincount': 33, |
| 137 | + 'info_dict': { |
| 138 | + 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c', |
| 139 | + 'title': 'The RE:BIND.io Podcast', |
| 140 | + }, |
| 141 | + }, { |
| 142 | + 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes', |
| 143 | + 'only_matching': True, |
| 144 | + }] |
| 145 | + |
| 146 | + def _real_extract(self, url): |
| 147 | + subdomain = self._match_id(url) |
| 148 | + site = self._call_search_api('site', subdomain, url) |
| 149 | + podcast = site['podcast'] |
| 150 | + podcast_id = podcast['id'] |
| 151 | + podcast_title = podcast.get('title') |
| 152 | + |
| 153 | + def entries(): |
| 154 | + episodes = self._call_api('podcasts/%s/episodes', podcast_id) |
| 155 | + for episode in (episodes.get('collection') or []): |
| 156 | + info = self._parse_episode(episode) |
| 157 | + info['series'] = podcast_title |
| 158 | + yield info |
| 159 | + |
| 160 | + return self.playlist_result(entries(), podcast_id, podcast_title) |
0 commit comments