From 0850c76e320b8ea9363417f4053d186668dab571 Mon Sep 17 00:00:00 2001 From: Matt Popovich Date: Wed, 28 Apr 2021 00:20:00 -0600 Subject: [PATCH] Fixing ValueError thrown by int('') YT can return 'No likes' / 'No dislikes'. The digits from this string give ''. Casting '' to an int gives ValueError. --- web-scraping/youtube-extractor/extract_video_info.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/web-scraping/youtube-extractor/extract_video_info.py b/web-scraping/youtube-extractor/extract_video_info.py index d2fb03df..081dc20b 100644 --- a/web-scraping/youtube-extractor/extract_video_info.py +++ b/web-scraping/youtube-extractor/extract_video_info.py @@ -29,9 +29,11 @@ def get_video_info(url): result["tags"] = ', '.join([ meta.attrs.get("content") for meta in soup.find_all("meta", {"property": "og:video:tag"}) ]) # number of likes text_yt_formatted_strings = soup.find_all("yt-formatted-string", {"id": "text", "class": "ytd-toggle-button-renderer"}) - result["likes"] = int(''.join([ c for c in text_yt_formatted_strings[0].attrs.get("aria-label") if c.isdigit() ])) + result["likes"] = ''.join([ c for c in text_yt_formatted_strings[0].attrs.get("aria-label") if c.isdigit() ]) + result["likes"] = 0 if result['likes'] == '' else int(result['likes']) # number of dislikes - result["dislikes"] = int(''.join([ c for c in text_yt_formatted_strings[1].attrs.get("aria-label") if c.isdigit() ])) + result["dislikes"] = ''.join([ c for c in text_yt_formatted_strings[1].attrs.get("aria-label") if c.isdigit() ]) + result['dislikes'] = 0 if result['dislikes'] == '' else int(result['dislikes']) # channel details channel_tag = soup.find("yt-formatted-string", {"class": "ytd-channel-name"}).find("a") @@ -66,4 +68,4 @@ def get_video_info(url): print(f"\nDescription: {data['description']}\n") print(f"\nChannel Name: {data['channel']['name']}") print(f"Channel URL: {data['channel']['url']}") - print(f"Channel Subscribers: {data['channel']['subscribers']}") \ No newline at end of file + print(f"Channel Subscribers: {data['channel']['subscribers']}")