Skip to content
This repository was archived by the owner on Nov 30, 2022. It is now read-only.

Added hacktoberfest events scrapper #283

Merged
merged 1 commit into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Web-Scraping/Hacktoberfest_Events/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## Hacktoberfest Events

### This script scrapes all the events from the Hacktoberfest events website and stores them in a csv file.

### How to use this script?

1. Make sure all the requirements for the script are present in your system by running:

pip install -r requirements.txt

2. Run the following command:

python hacktoberfest_events.py

### Author

[Aditya Jetely](https://github.com/AdityaJ7)
44 changes: 44 additions & 0 deletions Web-Scraping/Hacktoberfest_Events/hacktoberfest_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd


def scrape_tablerows():
"""This function scrapes the tablerows related to our target elements.
Our target element here are the events of hactoberfest.

Returns:
tablerows[list]: A list of tablerows of our taget elements.
"""
hacktoberfest_events_url = "https://hacktoberfest.digitalocean.com/events"
response = requests.get(hacktoberfest_events_url)
soup = bs(response.content, 'html.parser')
mydivs = soup.findAll("tbody", {"class": "list"})
tablerows = mydivs[0].findAll("tr")
return tablerows


def hacktoberfest_events(tablerows):
"""This function takes the list of tablerows as input and performs
scraping of required elements as well as stores the scraped data
into a csv file.

Args:
tablerows (list): Lis of tablerows of the target elements.
"""
events = {}
for i, tablerow in enumerate(tablerows):
location = tablerow.find("td", {"class": "location"}).text
link = tablerow.find("a")['href']
name = tablerow.find("td", {"class": "event_name"}).text.strip()
date = tablerow.find("td", {"class": "date is-hidden"}).text.strip()
events[i] = [name, date, location, link]
df1 = pd.DataFrame.from_dict(events, orient='index')
df1.columns = ['Name', 'Date', 'Location', 'Link']
df1.to_csv('hacktoberfest_events.csv')


if __name__ == "__main__":
tablerows = scrape_tablerows()
hacktoberfest_events(tablerows)
print("The events have been stored successfully")
8 changes: 8 additions & 0 deletions Web-Scraping/Hacktoberfest_Events/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
beautifulsoup4==4.9.1
certifi==2020.6.20
chardet==3.0.4
idna==2.10
requests==2.24.0
soupsieve==2.0.1
urllib3==1.25.10
pandas==1.1.2