File tree Expand file tree Collapse file tree 1 file changed +29
-0
lines changed Expand file tree Collapse file tree 1 file changed +29
-0
lines changed Original file line number Diff line number Diff line change
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ import json
4
+ base_url = "https://www.goodreads.com/quotes/tag/{0}?page={1}" # the url of the site from where quotes
5
+ #will be scrapped emotion and page number will be inserted later
6
+
7
+ def process (content ,emotion ): # function to clean the content of the webpage
8
+ soup = BeautifulSoup (content ,features = "html5lib" )
9
+ quotes_div = soup .find_all ("div" ,attrs = {"class" ,"quote" })
10
+ quotes = []
11
+ for div in quotes_div :
12
+ q_text = div .find ("div" ,attrs = {"class" ,"quoteText" })
13
+ quote = (q_text .text .strip ().split ('\n ' )[0 ])
14
+ author = q_text .find ("span" ,attrs = {"class" ,"authorOrTitle" }).text .strip ()
15
+ q_dict = {"quote" :quote ,"author" :author ,"emotion" :emotion }
16
+ quotes .append (q_dict )
17
+ return quotes
18
+
19
+ emotions = ['friend' ,'sad' ] # you can select any other emotion
20
+ quotes = []
21
+ for emotion in emotions :
22
+ for index in range (1 ,5 ): # here 5 pages have been taken
23
+ final_url = base_url .format (emotion ,index )
24
+ page = requests .get (final_url )
25
+ content = page .text
26
+ quotes += process (content ,emotion )
27
+
28
+ with open ('quote.json' ,'w' ) as file : # dump the quotes in json file
29
+ json .dump (quotes ,file )
You can’t perform that action at this time.
0 commit comments