diff --git a/1-Data-Cleaning.ipynb b/1-Data-Cleaning.ipynb index 7bcb2cf..2689063 100644 --- a/1-Data-Cleaning.ipynb +++ b/1-Data-Cleaning.ipynb @@ -80,7 +80,7 @@ " '''Returns transcript data specifically from scrapsfromtheloft.com.'''\n", " page = requests.get(url).text\n", " soup = BeautifulSoup(page, \"lxml\")\n", - " text = [p.text for p in soup.find(class_=\"post-content\").find_all('p')]\n", + " text = [p.text for p in soup.find(class_=\"ast-container\").find_all('p')]\n", " print(url)\n", " return text\n", "\n", @@ -88,11 +88,11 @@ "urls = ['http://scrapsfromtheloft.com/2017/05/06/louis-ck-oh-my-god-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/04/11/dave-chappelle-age-spin-2017-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2018/03/15/ricky-gervais-humanity-transcript/',\n", - " 'http://scrapsfromtheloft.com/2017/08/07/bo-burnham-2013-full-transcript/',\n", + " 'https://scrapsfromtheloft.com/comedy/bo-burnham-what-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/05/24/bill-burr-im-sorry-feel-way-2014-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/04/21/jim-jefferies-bare-2014-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/08/02/john-mulaney-comeback-kid-2015-full-transcript/',\n", - " 'http://scrapsfromtheloft.com/2017/10/21/hasan-minhaj-homecoming-king-2017-full-transcript/',\n", + " 'https://scrapsfromtheloft.com/comedy/hasan-minhaj-homecoming-king-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/09/19/ali-wong-baby-cobra-2016-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2017/08/03/anthony-jeselnik-thoughts-prayers-2015-full-transcript/',\n", " 'http://scrapsfromtheloft.com/2018/03/03/mike-birbiglia-my-girlfriends-boyfriend-2013-full-transcript/',\n", diff --git a/2-Exploratory-Data-Analysis.ipynb b/2-Exploratory-Data-Analysis.ipynb index 51c9d5c..18ae583 100644 --- a/2-Exploratory-Data-Analysis.ipynb +++ b/2-Exploratory-Data-Analysis.ipynb @@ -247,7 +247,7 @@ "# Identify the non-zero items in the document-term matrix, meaning that the word occurs at least once\n", "unique_list = []\n", "for comedian in data.columns:\n", - " uniques = data[comedian].nonzero()[0].size\n", + " uniques = data[comedian].to_numpy().nonzero()[0].size\n", " unique_list.append(uniques)\n", "\n", "# Create a new dataframe that contains this unique word count\n", diff --git a/3-Sentiment-Analysis.ipynb b/3-Sentiment-Analysis.ipynb index 31e9d67..9998c1a 100644 --- a/3-Sentiment-Analysis.ipynb +++ b/3-Sentiment-Analysis.ipynb @@ -227,7 +227,7 @@ " plt.plot(polarity_transcript[index])\n", " plt.plot(np.arange(0,10), np.zeros(10))\n", " plt.title(data['full_name'][index])\n", - " plt.ylim(ymin=-.2, ymax=.3)\n", + " plt.ylim(bottom=-.2, top=.3)\n", " \n", "plt.show()" ] @@ -285,7 +285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.8.5" }, "toc": { "nav_menu": {}, diff --git a/README.md b/README.md index 953fbd1..7e1f9a2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ We will be going through several Jupyter Notebooks during the tutorial and use a Here are the steps you’ll need to take before the start of the tutorial: ### 1. Download Anaconda -I highly recommend that you download [the Python 3.6 version](https://www.anaconda.com/download/). +I highly recommend that you download [the Python 3.7 version](https://www.anaconda.com/download/). ### 2. Download the Jupyter Notebooks Clone or download this [Github repository](https://github.com/adashofdata/nlp-in-python-tutorial), so you have access to all the Jupyter Notebooks (.ipynb extension) in the tutorial. **Note the green button on the right side of the screen that says `Clone or download`.** If you know how to use Github, go ahead and clone the repo. If you don't know how to use Github, you can also just download the zip file and unzip it on your laptop. @@ -28,4 +28,4 @@ Open the Anaconda Prompt program. You should see a black window pop up. Type `co *Mac/Linux:* Your terminal should already be open. Type command-t to open a new tab. Type `conda install -c conda-forge wordcloud` to download wordcloud. You will be asked whether you want to proceed or not. Type `y` for yes. Once that is done, type `conda install -c conda-forge textblob` to download textblob and `y` to proceed, and type `conda install -c conda-forge gensim` to download gensim and `y` to proceed. -If you have any issues, please email me at adashofdata@gmail.com or come talk to me before the start of the tutorial on Saturday. +If you have any issues, please email me at adashofdata@gmail.com or come talk to me before the start of the tutorial.