1
+ import requests
2
+ from bs4 import BeautifulSoup as bs
3
+
4
+
5
+ def get_video_info (url ):
6
+ # download HTML code
7
+ content = requests .get (url )
8
+ # create beautiful soup object to parse HTML
9
+ soup = bs (content .content , "html.parser" )
10
+ # initialize the result
11
+ result = {}
12
+ # video title
13
+ result ['title' ] = soup .find ("span" , attrs = {"class" : "watch-title" }).text .strip ()
14
+ # video views (converted to integer)
15
+ result ['views' ] = int (soup .find ("div" , attrs = {"class" : "watch-view-count" }).text [:- 6 ].replace ("," , "" ))
16
+ # video description
17
+ result ['description' ] = soup .find ("p" , attrs = {"id" : "eow-description" }).text
18
+ # date published
19
+ result ['date_published' ] = soup .find ("strong" , attrs = {"class" : "watch-time-text" }).text
20
+ # number of likes as integer
21
+ result ['likes' ] = int (soup .find ("button" , attrs = {"title" : "I like this" }).text .replace ("," , "" ))
22
+ # number of dislikes as integer
23
+ result ['dislikes' ] = int (soup .find ("button" , attrs = {"title" : "I dislike this" }).text .replace ("," , "" ))
24
+ # channel details
25
+ channel_tag = soup .find ("div" , attrs = {"class" : "yt-user-info" }).find ("a" )
26
+ # channel name
27
+ channel_name = channel_tag .text
28
+ # channel URL
29
+ channel_url = f"https://www.youtube.com{ channel_tag ['href' ]} "
30
+ # number of subscribers as str
31
+ channel_subscribers = soup .find ("span" , attrs = {"class" : "yt-subscriber-count" }).text .strip ()
32
+ result ['channel' ] = {'name' : channel_name , 'url' : channel_url , 'subscribers' : channel_subscribers }
33
+ return result
34
+
35
+ if __name__ == "__main__" :
36
+ import argparse
37
+ parser = argparse .ArgumentParser (description = "YouTube Video Data Extractor" )
38
+ parser .add_argument ("url" , help = "URL of the YouTube video" )
39
+
40
+ args = parser .parse_args ()
41
+ # parse the video URL from command line
42
+ url = args .url
43
+
44
+ data = get_video_info (url )
45
+
46
+ # print in nice format
47
+ print (f"Title: { data ['title' ]} " )
48
+ print (f"Views: { data ['views' ]} " )
49
+ print (f"\n Description: { data ['description' ]} \n " )
50
+ print (data ['date_published' ])
51
+ print (f"Likes: { data ['likes' ]} " )
52
+ print (f"Dislikes: { data ['dislikes' ]} " )
53
+ print (f"\n Channel Name: { data ['channel' ]['name' ]} " )
54
+ print (f"Channel URL: { data ['channel' ]['url' ]} " )
55
+ print (f"Channel Subscribers: { data ['channel' ]['subscribers' ]} " )
0 commit comments