added youtube extractor tutorial

x4nth055 · x4nth055 · commit 74d573e2fe20 · 2019-09-15T14:02:02.000+02:00
diff --git a/README.md b/README.md
@@ -34,4 +34,5 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Make a Process Monitor in Python](https://www.thepythoncode.com/article/make-process-monitor-python). ([code](general/process-monitor))
     - [How to Make a Screen Recorder in Python](https://www.thepythoncode.com/article/make-screen-recorder-python). ([code](general/screen-recorder))
     - [How to Access Wikipedia in Python](https://www.thepythoncode.com/article/access-wikipedia-python). ([code](general/wikipedia-extractor))
+    - [How to Extract YouTube Data in Python](https://www.thepythoncode.com/article/get-youtube-data-python). ([code](general/youtube-extractor))
 
diff --git a/general/youtube-extractor/README.md b/general/youtube-extractor/README.md
@@ -0,0 +1,22 @@
+# [How to Extract YouTube Data in Python](https://www.thepythoncode.com/article/get-youtube-data-python)
+To run this:
+- `pip3 install -r requirements.txt`
+-
+    ```
+    python extract_video_info.py https://www.youtube.com/watch?v=jNQXAC9IVRw
+    ```
+    **Output:**
+    ```
+    Title: Me at the zoo
+    Views: 75910120
+
+    Description: The first video on YouTube. Maybe it's time to go back to the zoo?sub2sub kthxbai -- fast and loyal if not i get a subs back i will unsubs your cahnnel(Credit: The name of the music playing in the background is Darude - Sandstorm)
+
+    Published on Apr 23, 2005
+    Likes: 2337841
+    Dislikes: 81211
+
+    Channel Name: jawed
+    Channel URL: https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A
+    Channel Subscribers: 616K
+    ```
diff --git a/general/youtube-extractor/extract_video_info.py b/general/youtube-extractor/extract_video_info.py
@@ -0,0 +1,55 @@
+import requests
+from bs4 import BeautifulSoup as bs
+
+
+def get_video_info(url):
+    # download HTML code
+    content = requests.get(url)
+    # create beautiful soup object to parse HTML
+    soup = bs(content.content, "html.parser")
+    # initialize the result
+    result = {}
+    # video title
+    result['title'] = soup.find("span", attrs={"class": "watch-title"}).text.strip()
+    # video views (converted to integer)
+    result['views'] = int(soup.find("div", attrs={"class": "watch-view-count"}).text[:-6].replace(",", ""))
+    # video description
+    result['description'] = soup.find("p", attrs={"id": "eow-description"}).text
+    # date published
+    result['date_published'] = soup.find("strong", attrs={"class": "watch-time-text"}).text
+    # number of likes as integer
+    result['likes'] = int(soup.find("button", attrs={"title": "I like this"}).text.replace(",", ""))
+    # number of dislikes as integer
+    result['dislikes'] = int(soup.find("button", attrs={"title": "I dislike this"}).text.replace(",", ""))
+    # channel details
+    channel_tag = soup.find("div", attrs={"class": "yt-user-info"}).find("a")
+    # channel name
+    channel_name = channel_tag.text
+    # channel URL
+    channel_url = f"https://www.youtube.com{channel_tag['href']}"
+    # number of subscribers as str
+    channel_subscribers = soup.find("span", attrs={"class": "yt-subscriber-count"}).text.strip()
+    result['channel'] = {'name': channel_name, 'url': channel_url, 'subscribers': channel_subscribers}
+    return result
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="YouTube Video Data Extractor")
+    parser.add_argument("url", help="URL of the YouTube video")
+
+    args = parser.parse_args()
+    # parse the video URL from command line
+    url = args.url
+    
+    data = get_video_info(url)
+
+    # print in nice format
+    print(f"Title: {data['title']}")
+    print(f"Views: {data['views']}")
+    print(f"\nDescription: {data['description']}\n")
+    print(data['date_published'])
+    print(f"Likes: {data['likes']}")
+    print(f"Dislikes: {data['dislikes']}")
+    print(f"\nChannel Name: {data['channel']['name']}")
+    print(f"Channel URL: {data['channel']['url']}")
+    print(f"Channel Subscribers: {data['channel']['subscribers']}")
diff --git a/general/youtube-extractor/requirements.txt b/general/youtube-extractor/requirements.txt
@@ -0,0 +1,2 @@
+requests
+bs4