From 8e24cce4e68311225304e743cd06e7a6d9eae159 Mon Sep 17 00:00:00 2001 From: Kiese Diangebeni Reagan <48117141+Rekidiang2@users.noreply.github.com> Date: Sat, 31 Dec 2022 09:42:20 +0100 Subject: [PATCH] Update 1-Data-Cleaning.ipynb --- 1-Data-Cleaning.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/1-Data-Cleaning.ipynb b/1-Data-Cleaning.ipynb index 2689063..24f3685 100644 --- a/1-Data-Cleaning.ipynb +++ b/1-Data-Cleaning.ipynb @@ -80,7 +80,9 @@ " '''Returns transcript data specifically from scrapsfromtheloft.com.'''\n", " page = requests.get(url).text\n", " soup = BeautifulSoup(page, \"lxml\")\n", - " text = [p.text for p in soup.find(class_=\"ast-container\").find_all('p')]\n", + " #text = [p.text for p in soup.find(class_=\"ast-container\").find_all('p')]\n", + " # page html code was modified this line of code can be formated like this\n", + " text = [p.text for p in soup.find_all('p')]\n", " print(url)\n", " return text\n", "\n",