Skip to content

Commit 21e872b

Browse files
Isaac-the-Manremitamine
authored andcommitted
[samplefocus] Add new extractor(closes ytdl-org#27763)
1 parent cf2dbec commit 21e872b

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

youtube_dl/extractor/extractors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,7 @@
10291029
SafariApiIE,
10301030
SafariCourseIE,
10311031
)
1032+
from .samplefocus import SampleFocusIE
10321033
from .sapo import SapoIE
10331034
from .savefrom import SaveFromIE
10341035
from .sbs import SBSIE

youtube_dl/extractor/samplefocus.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
import re
5+
6+
from .common import InfoExtractor
7+
from ..utils import (
8+
extract_attributes,
9+
get_element_by_attribute,
10+
int_or_none,
11+
)
12+
13+
14+
class SampleFocusIE(InfoExtractor):
15+
_VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
16+
_TESTS = [{
17+
'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
18+
'md5': '48c8d62d60be467293912e0e619a5120',
19+
'info_dict': {
20+
'id': '40316',
21+
'display_id': 'lil-peep-sad-emo-guitar',
22+
'ext': 'mp3',
23+
'title': 'Lil Peep Sad Emo Guitar',
24+
'thumbnail': r're:^https?://.+\.png',
25+
'license': 'Standard License',
26+
'uploader': 'CapsCtrl',
27+
'uploader_id': 'capsctrl',
28+
'like_count': int,
29+
'comment_count': int,
30+
'categories': ['Samples', 'Guitar', 'Electric guitar'],
31+
},
32+
}, {
33+
'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
34+
'only_matching': True
35+
}, {
36+
'url': 'https://samplefocus.com/samples/young-chop-kick',
37+
'only_matching': True
38+
}]
39+
40+
def _real_extract(self, url):
41+
display_id = self._match_id(url)
42+
webpage = self._download_webpage(url, display_id)
43+
44+
sample_id = self._search_regex(
45+
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
46+
webpage, 'sample id', group='id')
47+
48+
title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
49+
r'<h1>(.+?)</h1>', webpage, 'title')
50+
51+
mp3_url = self._search_regex(
52+
r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
53+
webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
54+
r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
55+
webpage, 'mp3 url', group=0))['content']
56+
57+
thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
58+
r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
59+
webpage, 'mp3', fatal=False, group='url')
60+
61+
comments = []
62+
for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
63+
comments.append({
64+
'author': author,
65+
'author_id': author_id,
66+
'text': body,
67+
})
68+
69+
uploader_id = uploader = None
70+
mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
71+
if mobj:
72+
uploader_id, uploader = mobj.groups()
73+
74+
breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
75+
categories = []
76+
if breadcrumb:
77+
for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
78+
categories.append(name)
79+
80+
def extract_count(klass):
81+
return int_or_none(self._html_search_regex(
82+
r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
83+
webpage, klass, fatal=False))
84+
85+
return {
86+
'id': sample_id,
87+
'title': title,
88+
'url': mp3_url,
89+
'display_id': display_id,
90+
'thumbnail': thumbnail,
91+
'uploader': uploader,
92+
'license': self._html_search_regex(
93+
r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
94+
webpage, 'license', fatal=False, group='license'),
95+
'uploader_id': uploader_id,
96+
'like_count': extract_count('sample-%s-favorites' % sample_id),
97+
'comment_count': extract_count('comments'),
98+
'comments': comments,
99+
'categories': categories,
100+
}

0 commit comments

Comments
 (0)