diff --git a/Python/text_preprocessing_nlp/Readme.md b/Python/text_preprocessing_nlp/Readme.md new file mode 100644 index 0000000..cbc1f54 --- /dev/null +++ b/Python/text_preprocessing_nlp/Readme.md @@ -0,0 +1,33 @@ +## Script to preprocess text using NLP +Bag of words technique has been used. + +## Libraries imported +# nltk + +# from nltk.corpus import brown +Brown corpus is used. +humor category of brown corpus has been taken as sample data. + +# contractions +Install contractions using following command : +pip install contractions +This library is used for expanding contraction. + +# nltk.download("punkt") +Used to download Punkt Sentence Tokenizer. This tokenizer divides a text into a list of sentences, by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences. + +# from nltk.corpus import stopwords +Stopwords contain all the stopwords. English language stopwords have been removed. + +# from nltk.stem import PorterStemmer +PorterStemmer is used for stemming. + +# from sklearn.feature_extraction.text import CountVectorizer + +## Run script +text_preprocessing.ipynb is jupyter notebook. +you can run each cell by clicking on run. +or +You can use google colab also. + + diff --git a/Python/text_preprocessing_nlp/textpreprocessing.ipynb b/Python/text_preprocessing_nlp/textpreprocessing.ipynb new file mode 100644 index 0000000..d6bcf04 --- /dev/null +++ b/Python/text_preprocessing_nlp/textpreprocessing.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R1mYlKuvzGcR", + "outputId": "5d5b3b37-ba18-4cc0-d4b8-c7b2dc3382eb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package brown to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/brown.zip.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "#Downloading corpus \"brown\"\n", + "nltk.download('brown')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z8acyegYzNvW", + "outputId": "badd8d39-3ce8-48e4-a5ee-38251ff32e50" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['adventure',\n", + " 'belles_lettres',\n", + " 'editorial',\n", + " 'fiction',\n", + " 'government',\n", + " 'hobbies',\n", + " 'humor',\n", + " 'learned',\n", + " 'lore',\n", + " 'mystery',\n", + " 'news',\n", + " 'religion',\n", + " 'reviews',\n", + " 'romance',\n", + " 'science_fiction']" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "from nltk.corpus import brown\n", + "brown.categories()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "-kE9JhsHzku_" + }, + "outputs": [], + "source": [ + "trainingdata = brown.sents(categories=[\"humor\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z77bNcS4zpiW", + "outputId": "fd6565e7-e6e5-40bf-cd3a-582f2c6accc5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "nltk.download(\"punkt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "i4YCO0WqzvWw" + }, + "outputs": [], + "source": [ + "# remove punctuation and special character\n", + "from nltk.tokenize import word_tokenize\n", + "sentences = []\n", + "for sentence in trainingdata:\n", + " \n", + " words = nltk.word_tokenize(\" \".join(sentence))\n", + " new_words = [word for word in words if word.isalnum()]\n", + " sentences.append(new_words)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "_7WfGjkIzz3h" + }, + "outputs": [], + "source": [ + "#lowercasing \n", + "lowercased_data = []\n", + "for sent in sentences:\n", + " text = [w.lower() for w in sent]\n", + " lowercased_data.append(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gSz05fRc0HLO", + "outputId": "6ce717be-d237-4b65-c636-aa341ef627ae" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/stopwords.zip.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "nltk.download('stopwords')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "h6GJbyIGz3hi" + }, + "outputs": [], + "source": [ + "#removing stopwords\n", + "from nltk.corpus import stopwords\n", + "stopword = set(stopwords.words('english'))\n", + "def remove_stopwords(text,stopword):\n", + " data = []\n", + " for w in text:\n", + " if w not in stopword:\n", + " data.append(w)\n", + " return data\n", + "\n", + "trained_data = []\n", + "for sentence in lowercased_data:\n", + " trained_data.append(remove_stopwords(sentence,stopword))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IQqJabKk0bDl", + "outputId": "e79ddf83-f7c7-448b-cc13-ae845eff251d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting contractions\n", + " Downloading https://files.pythonhosted.org/packages/00/92/a05b76a692ac08d470ae5c23873cf1c9a041532f1ee065e74b374f218306/contractions-0.0.25-py2.py3-none-any.whl\n", + "Collecting textsearch\n", + " Downloading https://files.pythonhosted.org/packages/42/a8/03407021f9555043de5492a2bd7a35c56cc03c2510092b5ec018cae1bbf1/textsearch-0.0.17-py2.py3-none-any.whl\n", + "Collecting pyahocorasick\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f4/9f/f0d8e8850e12829eea2e778f1c90e3c53a9a799b7f412082a5d21cd19ae1/pyahocorasick-1.4.0.tar.gz (312kB)\n", + "\u001b[K |████████████████████████████████| 317kB 4.3MB/s \n", + "\u001b[?25hCollecting Unidecode\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d0/42/d9edfed04228bacea2d824904cae367ee9efd05e6cce7ceaaedd0b0ad964/Unidecode-1.1.1-py2.py3-none-any.whl (238kB)\n", + "\u001b[K |████████████████████████████████| 245kB 7.5MB/s \n", + "\u001b[?25hBuilding wheels for collected packages: pyahocorasick\n", + " Building wheel for pyahocorasick (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pyahocorasick: filename=pyahocorasick-1.4.0-cp36-cp36m-linux_x86_64.whl size=81697 sha256=efe569e7b70075867d423d566e87bd60d3b171403e91c1b7c2415b398ce9b79a\n", + " Stored in directory: /root/.cache/pip/wheels/0a/90/61/87a55f5b459792fbb2b7ba6b31721b06ff5cf6bde541b40994\n", + "Successfully built pyahocorasick\n", + "Installing collected packages: pyahocorasick, Unidecode, textsearch, contractions\n", + "Successfully installed Unidecode-1.1.1 contractions-0.0.25 pyahocorasick-1.4.0 textsearch-0.0.17\n" + ] + } + ], + "source": [ + "!pip install contractions" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "ts80ZTnfz_R_" + }, + "outputs": [], + "source": [ + "#exanding the contractions\n", + "import contractions\n", + "\n", + "expanded_word = []\n", + "for t in trained_data:\n", + " x = [contractions.fix(word) for word in t]\n", + " expanded_word.append(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "cMB9fbq30mjz" + }, + "outputs": [], + "source": [ + "#stemming \n", + "from nltk.stem import PorterStemmer\n", + "ps = PorterStemmer()\n", + "useful_data = []\n", + "for t in expanded_word:\n", + " x = [ps.stem(w) for w in t]\n", + " useful_data.append(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "KhlcIMWi0zs_" + }, + "outputs": [], + "source": [ + "#building vocabulary\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "cv = CountVectorizer()\n", + "corpus =[]\n", + "for x in useful_data:\n", + " t = \" \".join(x)\n", + " corpus.append(t)\n", + "vc = cv.fit_transform(corpus)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BiSRBXSy1KGi", + "outputId": "20c9051c-4223-43ff-e502-2b4bd534d6fd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " (0, 110)\t1\n", + " (0, 1477)\t1\n", + " (0, 1540)\t1\n", + " (0, 2281)\t1\n", + " (0, 259)\t1\n", + " (1, 259)\t1\n", + " (1, 2744)\t1\n", + " (1, 1139)\t1\n", + " (1, 1734)\t1\n", + " (1, 1881)\t1\n", + " (1, 2768)\t1\n", + " (1, 1903)\t1\n", + " (2, 1814)\t1\n", + " (2, 2018)\t1\n", + " (2, 279)\t2\n", + " (2, 2630)\t1\n", + " (2, 1627)\t1\n", + " (2, 1243)\t1\n", + " (2, 2041)\t1\n", + " (2, 3368)\t1\n", + " (2, 2672)\t1\n", + " (2, 787)\t1\n", + " (2, 2179)\t1\n", + " (2, 2032)\t1\n", + " (3, 2281)\t1\n", + " :\t:\n", + " (1051, 3442)\t1\n", + " (1051, 1812)\t1\n", + " (1051, 920)\t1\n", + " (1051, 1991)\t1\n", + " (1051, 253)\t1\n", + " (1051, 540)\t1\n", + " (1051, 324)\t1\n", + " (1051, 543)\t1\n", + " (1051, 1962)\t1\n", + " (1051, 1807)\t1\n", + " (1051, 2119)\t1\n", + " (1051, 479)\t1\n", + " (1051, 1851)\t1\n", + " (1051, 3232)\t1\n", + " (1051, 3390)\t1\n", + " (1052, 952)\t1\n", + " (1052, 10)\t1\n", + " (1052, 1173)\t1\n", + " (1052, 1333)\t1\n", + " (1052, 1307)\t1\n", + " (1052, 3077)\t1\n", + " (1052, 1377)\t1\n", + " (1052, 1618)\t1\n", + " (1052, 380)\t1\n", + " (1052, 3034)\t1\n", + "{'among': 110, 'hinkl': 1477, 'identifi': 1540, 'photograph': 2281, 'barco': 259, 'seem': 2744, 'fanci': 1139, 'ladi': 1734, 'man': 1881, 'seven': 2768, 'marriag': 1903, 'list': 1814, 'mormon': 2018, 'beard': 279, 'role': 2630, 'instig': 1627, 'fourth': 1243, 'murder': 2041, 'victim': 3368, 'said': 2672, 'dear': 787, 'ought': 2179, 'movi': 2032, 'mill': 1967, 'secur': 2740, 'gentleman': 1315, 'charg': 531, 'rush': 2663, 'hollywood': 1489, 'polic': 2328, 'station': 2978, 'report': 2564, 'theft': 3133, 'less': 1789, 'five': 1191, 'minut': 1976, 'later': 1757, 'detect': 850, 'pictur': 2289, 'hand': 1417, 'trail': 3215, 'cal': 455, 'way': 3413, 'stop': 3003, 'everi': 1074, 'ga': 1284, 'along': 92, 'main': 1875, 'boulevard': 382, 'question': 2453, 'attend': 208, 'final': 1177, 'ye': 3511, 'old': 2154, 'gass': 1305, 'fill': 1174, 'avocado': 226, 'avenu': 224, 'learn': 1774, 'paus': 2244, 'get': 1321, 'oil': 2153, 'car': 473, 'ask': 187, 'rout': 2650, 'san': 2687, 'diego': 868, 'head': 1443, 'direct': 882, 'juan': 1695, 'capistrano': 470, 'came': 462, 'upon': 3324, 'sit': 2849, 'quaint': 2444, 'spanish': 2922, 'mission': 1990, 'eat': 980, 'hot': 1511, 'tamal': 3092, 'moment': 2003, 'back': 237, 'road': 2620, 'see': 2742, 'close': 591, 'convert': 686, 'quest': 2452, 'stolen': 2998, 'lap': 1745, 'rug': 2657, 'proceed': 2392, 'search': 2734, 'robe': 2625, 'howev': 1521, 'miss': 1989, 'time': 3177, 'dispos': 904, 'pawnshop': 2247, 'glendal': 1336, 'place': 2302, 'arrest': 180, 'without': 3466, 'inform': 1602, 'natur': 2059, 'took': 3196, 'thu': 3165, 'apprehend': 154, 'mere': 1947, 'larceni': 1748, 'began': 295, 'suspect': 3067, 'one': 2159, 'anoth': 131, 'uncov': 3270, 'return': 2596, 'trip': 3233, 'kept': 1710, 'mutter': 2047, 'meaningless': 1930, 'phrase': 2282, 'sand': 2689, 'dune': 968, 'better': 325, 'tell': 3112, 'save': 2705, 'soul': 2915, 'comment': 623, 'behavior': 301, 'felt': 1164, 'belong': 310, 'myriad': 2048, 'citizen': 572, 'commun': 631, 'mental': 1945, 'unhing': 3292, 'harmless': 1434, 'nut': 2124, 'cell': 507, 'await': 228, 'trial': 3228, 'fit': 1190, 'apprehens': 155, 'made': 1868, 'attempt': 207, 'take': 3087, 'life': 1796, 'fail': 1128, 'endeavor': 1021, 'cut': 762, 'wrist': 3500, 'women': 3472, 'faint': 1130, 'sight': 2824, 'blood': 352, 'jail': 1668, 'author': 220, 'attach': 204, 'particular': 2229, 'signific': 2825, 'episod': 1049, 'offer': 2147, 'whiski': 3436, 'reviv': 2600, 'fellow': 1163, 'lifelong': 1797, 'teetotal': 3109, 'refus': 2524, 'thought': 3152, 'matter': 1920, 'district': 912, 'attorney': 212, 'welch': 3420, 'enter': 1038, 'case': 489, 'vault': 3354, 'ambit': 105, 'eye': 1123, 'mayorship': 1927, 'lo': 1827, 'angel': 121, 'noth': 2112, 'ever': 1073, 'escap': 1059, 'might': 1963, 'possibl': 2346, 'lead': 1770, 'person': 2270, 'public': 2428, 'offic': 2148, 'thief': 3141, 'citi': 571, 'suicid': 3051, 'want': 3403, 'know': 1727, 'knew': 1724, 'pet': 2273, 'theori': 3134, 'everyon': 1077, 'guilti': 1402, 'break': 397, 'law': 1765, 'caught': 500, 'look': 1841, 'like': 1801, 'opportun': 2167, 'put': 2441, 'test': 3126, 'paid': 2202, 'call': 459, 'chat': 535, 'state': 2976, 'bluntli': 357, 'got': 1355, 'good': 1348, 'lot': 1846, 'come': 616, 'clean': 582, 'first': 1187, 'evas': 1069, 'shifti': 2793, 'relentless': 2534, 'pursuit': 2439, 'subject': 3038, 'broke': 412, 'start': 2974, 'confess': 655, 'reach': 2490, 'count': 705, 'three': 3156, 'situat': 2851, 'almost': 90, 'true': 3242, 'dream': 949, 'would': 3494, 'throw': 3162, 'headlin': 1445, 'america': 107, 'hero': 1468, 'great': 1374, 'summon': 3055, 'offici': 2149, 'chagrin': 517, 'captain': 472, 'credul': 728, 'clearli': 584, 'requir': 2572, 'strength': 3014, 'bravado': 394, 'dare': 773, 'commit': 626, 'worm': 3487, 'ridicul': 2613, 'sinc': 2840, 'glint': 1337, 'spirit': 2941, 'lit': 1816, 'manhood': 1885, 'attack': 205, 'stiffen': 2993, 'rose': 2642, 'feet': 1158, 'show': 2810, 'queri': 2451, 'well': 3422, 'ten': 3115, 'year': 3512, 'bodi': 361, 'ai': 73, 'four': 1241, 'six': 2853, 'next': 2088, 'day': 781, 'deris': 835, 'organ': 2172, 'term': 3122, 'wild': 3446, 'goos': 1349, 'chase': 534, 'inde': 1577, 'incred': 1575, 'anyon': 140, 'could': 703, 'go': 1341, 'even': 1071, 'parti': 2228, 'consist': 665, 'policemen': 2330, 'shovel': 2809, 'newspap': 2087, 'cameramen': 464, 'gone': 1347, 'never': 2082, 'assum': 194, 'remain': 2544, 'princip': 2382, 'attitud': 211, 'corteg': 696, 'auto': 221, 'near': 2061, 'santa': 2694, 'monica': 2010, 'fork': 1227, 'pace': 2196, 'certain': 514, 'distanc': 906, 'spot': 2949, 'two': 3254, 'hill': 1476, 'indic': 1587, 'locat': 1833, 'order': 2169, 'given': 1331, 'dig': 873, 'found': 1240, 'worri': 3489, 'chortl': 557, 'boy': 388, 'crack': 719, 'joke': 1687, 'pride': 2380, 'arous': 178, 'greater': 1375, 'precis': 2362, 'littl': 1822, 'left': 1779, 'quib': 2455, 'gibe': 1322, 'edg': 986, 'remark': 2546, 'nice': 2089, 'picnic': 2288, 'struck': 3021, 'object': 2128, 'someth': 2903, 'digger': 875, 'gather': 1306, 'around': 177, 'thrust': 3164, 'rais': 2473, 'view': 3372, 'packag': 2199, 'crude': 741, 'wrap': 3496, 'sport': 2948, 'shirt': 2797, 'although': 98, 'fragment': 1245, 'enough': 1035, 'delight': 818, 'elat': 1000, 'grew': 1382, 'disclosur': 892, 'brought': 418, 'light': 1799, 'reward': 2604, 'truli': 3243, 'becom': 289, 'hour': 1515, 'everyth': 1078, 'follow': 1208, 'procedur': 2391, 'justic': 1703, 'new': 2084, 'triumph': 3235, 'went': 3423, 'increas': 1573, 'appar': 147, 'cahoot': 453, 'marshal': 1904, 'use': 3333, 'power': 2352, 'drag': 942, 'sensat': 2752, 'scandal': 2711, 'provid': 2420, 'copi': 691, 'pen': 2255, 'mean': 1929, 'cynic': 765, 'wait': 3395, 'impati': 1555, 'dramat': 944, 'viola': 3378, 'stand': 2967, 'devilish': 856, 'clever': 587, 'multipl': 2039, 'romanc': 2633, 'lake': 1739, 'addict': 43, 'downfal': 937, 'film': 1175, 'idol': 1543, 'fine': 1181, 'will': 3448, 'walk': 3399, 'mayor': 1926, 'throne': 3160, 'broken': 413, 'reput': 2570, 'helpless': 1461, 'girl': 1329, 'studi': 3027, 'progress': 2401, 'hint': 1478, 'give': 1330, 'thwart': 3169, 'long': 1838, 'sens': 2751, 'deeper': 802, 'overvault': 2193, 'desir': 842, 'destruct': 848, 'bitter': 336, 'resent': 2576, 'toward': 3205, 'dread': 948, 'fear': 1153, 'conscious': 661, 'perhap': 2264, 'entertain': 1040, 'aspir': 190, 'favor': 1151, 'seriou': 2761, 'attain': 206, 'share': 2782, 'supersed': 3060, 'luckier': 1856, 'chap': 527, 'rest': 2583, 'track': 3209, 'mysteri': 2049, 'cocktail': 599, 'apart': 143, 'downtown': 940, 'hotel': 1513, 'urg': 3330, 'telephon': 3110, 'host': 1508, 'charact': 530, 'arriv': 181, 'late': 1756, 'note': 2111, 'gentlemen': 1316, 'major': 1876, 'air': 75, 'thick': 3140, 'smoke': 2883, 'empti': 1015, 'bottl': 377, 'evid': 1079, 'sever': 2770, 'guest': 1397, 'somewhat': 2905, 'wors': 3490, 'liquor': 1813, 'wish': 3461, 'leav': 1777, 'soon': 2909, 'polit': 2332, 'happen': 1424, 'adjac': 47, 'couch': 701, 'taken': 3088, 'refug': 2523, 'small': 2876, 'tabl': 3084, 'vase': 3352, 'red': 2515, 'rosebud': 2643, 'project': 2403, 'beneath': 315, 'pair': 2207, 'fate': 1148, 'sat': 2699, 'play': 2308, 'impuls': 1566, 'overcam': 2187, 'remov': 2552, 'shoe': 2800, 'sock': 2895, 'unidentifi': 3293, 'prank': 2355, 'insert': 1617, 'toe': 3188, 'woke': 3470, 'emerg': 1010, 'watch': 3410, 'realiz': 2499, 'engag': 1029, 'address': 45, 'meet': 1934, 'cultur': 750, 'forum': 1238, 'civic': 573, 'southland': 2919, 'byzantin': 444, 'room': 2637, 'wife': 3444, 'presid': 2372, 'bedroom': 291, 'care': 476, 'brush': 421, 'neatli': 2065, 'arrang': 179, 'hair': 1413, 'painstakingli': 2205, 'select': 2748, 'hat': 1437, 'mani': 1886, 'bed': 290, 'neither': 2074, 'absenc': 12, 'footwear': 1213, 'presenc': 2369, 'usual': 3334, 'digniti': 876, 'mount': 2027, 'rostrum': 2646, 'effect': 990, 'intellectu': 1634, 'audienc': 216, 'may': 1924, 'imagin': 1549, 'incid': 1570, 'asid': 186, 'reflect': 2520, 'career': 477, 'wreck': 3497, 'home': 1490, 'discov': 894, 'culprit': 747, 'vow': 3392, 'vengeanc': 3357, 'chanc': 523, 'innoc': 1611, 'action': 35, 'human': 1526, 'tragedi': 3212, 'sometim': 2904, 'set': 2767, 'motion': 2024, 'much': 2034, 'commiser': 625, 'difficult': 871, 'us': 3332, 'seen': 2746, 'togeth': 3189, 'tongu': 3194, 'readi': 2496, 'link': 1809, 'name': 2054, 'find': 1180, 'thing': 3144, 'say': 2708, 'associ': 192, 'invit': 1657, 'hous': 1516, 'mother': 2022, 'snobbishli': 2887, 'receiv': 2507, 'czarship': 766, 'affect': 58, 'social': 2893, 'valu': 3343, 'glori': 1339, 'open': 2163, 'door': 926, 'societi': 2894, 'scion': 2722, 'aristocrat': 170, 'line': 1806, 'domin': 922, 'budweis': 425, 'beer': 293, 'deriv': 836, 'chalmer': 520, 'underwear': 3279, 'origin': 2174, 'heinz': 1454, 'whose': 3440, 'forbear': 1214, 'nation': 2057, 'famou': 1138, 'trade': 3210, 'pickl': 2287, 'hate': 1438, 'salon': 2681, 'insist': 1620, 'seldom': 2747, 'remot': 2551, 'connect': 659, 'cinema': 566, 'exclus': 1092, 'midst': 1961, 'fact': 1127, 'king': 1720, 'spain': 2921, 'visit': 3383, 'pickfair': 2286, 'mari': 1897, 'doug': 933, 'beckon': 288, 'cross': 739, 'sacr': 2666, 'barrier': 266, 'separ': 2757, 'pasadena': 2233, 'far': 1141, 'trump': 3244, 'matrimoni': 1919, 'deb': 791, 'educ': 989, 'abroad': 11, 'lenient': 1787, 'area': 164, 'debutant': 794, 'high': 1472, 'school': 2718, 'last': 1755, 'away': 230, 'spend': 2938, 'cosi': 697, 'chaperon': 528, 'hotb': 1512, 'gossip': 1354, 'grown': 1392, 'precaut': 2359, 'keep': 1709, 'whisper': 3437, 'world': 3486, 'ala': 78, 'shown': 2811, 'surpris': 3065, 'kitchen': 1722, 'greet': 1381, 'check': 538, 'apron': 157, 'ladl': 1735, 'explain': 1107, 'cook': 688, 'night': 2091, 'prepar': 2368, 'dinner': 881, 'roast': 2622, 'chicken': 545, 'dress': 952, 'giblet': 1323, 'gravi': 1372, 'asparagu': 188, 'pea': 2249, 'sprig': 2953, 'mint': 1975, 'cream': 724, 'onion': 2160, 'mash': 1910, 'potato': 2349, 'chosen': 559, 'hamlet': 1415, 'face': 1125, 'entir': 1042, 'differ': 870, 'problem': 2389, 'agoni': 67, 'accomplish': 26, 'ad': 40, 'mr': 2033, 'beig': 304, 'tray': 3223, 'dish': 898, 'pile': 2292, 'repeat': 2558, 'threat': 3154, 'ascend': 184, 'scale': 2710, 'fashion': 1145, 'make': 1877, 'decis': 797, 'phone': 2279, 'rang': 2479, 'instantli': 1625, 'household': 1518, 'ca': 445, 'children': 548, 'tire': 3179, 'pick': 2285, 'grandmoth': 1367, 'field': 1170, 'distant': 907, 'magic': 1870, 'within': 3465, 'second': 2736, 'jenni': 1675, 'miranda': 1978, 'twine': 3252, 'murmur': 2042, 'endear': 1020, 'louis': 1848, 'climb': 588, 'onto': 2161, 'stool': 3002, 'clutch': 597, 'tri': 3227, 'hold': 1486, 'claim': 576, 'immedi': 1550, 'attent': 209, 'ground': 1389, 'extrem': 1121, 'somehow': 2900, 'manag': 1882, 'cool': 689, 'pois': 2324, 'wo': 3468, 'pleas': 2312, 'cover': 714, 'mouthpiec': 2029, 'warmth': 3407, 'gave': 1310, 'quick': 2456, 'lectur': 1778, 'crime': 732, 'punish': 2434, 'mostli': 2021, 'latter': 1759, 'includ': 1571, 'devil': 855, 'island': 1663, 'siberia': 2817, 'promis': 2406, 'illustr': 1548, 'breath': 401, 'till': 3175, 'complet': 638, 'speak': 2924, 'recogn': 2509, 'caller': 461, 'resum': 2589, 'everyday': 1076, 'voic': 3387, 'deep': 801, 'convers': 685, 'interrupt': 1641, 'pretend': 2376, 'black': 339, 'dump': 967, 'content': 674, 'sugar': 3049, 'bowl': 386, 'variou': 3351, 'screech': 2727, 'thud': 3166, 'giggl': 1326, 'circumst': 570, 'difficulti': 872, 'hung': 1530, 'reason': 2502, 'francesca': 1250, 'remind': 2548, 'town': 3207, 'import': 1562, 'hank': 1423, 'shrewdli': 2812, 'vacat': 3339, 'quietli': 2460, 'live': 1823, 'gainer': 1293, 'accept': 22, 'peac': 2250, 'quiet': 2459, 'instead': 1626, 'carri': 481, 'resolv': 2579, 'done': 924, 'right': 2615, 'meant': 1931, 'sink': 2845, 'formid': 1232, 'demand': 822, 'stare': 2972, 'dismay': 902, 'woolgath': 3479, 'husband': 1535, 'herbert': 1467, 'peopl': 2258, 'catatonia': 495, 'month': 2012, 'move': 2030, 'met': 1954, 'fran': 1248, 'pta': 2427, 'kind': 1718, 'indebt': 1578, 'pleasant': 2313, 'shop': 2804, 'dentist': 829, 'doctor': 918, 'plumber': 2319, 'sitter': 2850, 'choic': 556, 'villag': 3374, 'yellow': 3516, 'page': 2200, 'book': 367, 'amazingli': 103, 'thin': 3143, 'herb': 1466, 'expert': 1105, 'narrow': 2055, 'tie': 3172, 'lapel': 1746, 'swatch': 3072, 'men': 1943, 'editor': 987, 'parvenu': 2232, 'weekli': 3417, 'magazin': 1869, 'tremend': 3226, 'circul': 569, 'manhattan': 1884, 'nebraska': 2066, 'wyom': 3507, 'daughter': 779, 'sole': 2897, 'heiress': 1455, 'either': 999, 'cattl': 499, 'baron': 263, 'millionair': 1969, 'york': 3520, 'big': 327, 'bank': 255, 'roll': 2631, 'becam': 286, 'dabbler': 767, 'patron': 2242, 'greenwich': 1380, 'artist': 183, 'awhil': 231, 'money': 2008, 'broadway': 411, 'success': 3044, 'terribl': 3123, 'write': 3501, 'period': 2265, 'decid': 796, 'love': 1849, 'marri': 1902, 'lavish': 1764, 'ceremoni': 513, 'duli': 965, 'record': 2512, 'honeymoon': 1495, 'bermuda': 318, 'style': 3036, 'rome': 2634, 'bought': 381, 'investig': 1655, 'region': 2528, 'suburbia': 3042, 'surround': 3066, 'excel': 1087, 'beach': 277, 'abund': 17, 'bound': 383, 'involv': 1659, 'told': 3190, 'dreari': 950, 'housew': 1520, 'dull': 966, 'admir': 50, 'easi': 978, 'uneasi': 3283, 'alway': 100, 'coloni': 612, 'restor': 2586, 'guid': 1399, 'emin': 1012, 'architect': 163, 'updat': 3323, 'gourmet': 1358, 'corner': 693, 'breakfast': 399, 'luncheon': 1860, 'dine': 880, 'everybodi': 1075, 'mud': 2035, 'dust': 970, 'anywher': 142, 'color': 613, 'paint': 2206, 'block': 350, 'proper': 2409, 'exactli': 1083, 'adjust': 48, 'parent': 2220, 'damn': 769, 'consequ': 662, 'consid': 663, 'violent': 3380, 'nonconformist': 2101, 'contemporari': 673, 'furnitur': 1278, 'art': 182, 'expens': 1102, 'necessarili': 2068, 'valuabl': 3344, 'flaunt': 1195, 'independ': 1582, 'rebel': 2503, 'uniform': 3294, 'short': 2805, 'wear': 3414, 'bright': 407, 'pink': 2295, 'plaid': 2303, 'vivid': 3385, 'stripe': 3019, 'wore': 3481, 'solid': 2898, 'unreliev': 3313, 'blond': 351, 'strongli': 3020, 'resembl': 2575, 'member': 1940, 'ss': 2959, 'dislik': 900, 'though': 3151, 'quit': 2461, 'rather': 2486, 'perfect': 2260, 'stage': 2961, 'actor': 37, 'fat': 1147, 'talent': 3089, 'compet': 635, 'understat': 3275, 'practic': 2353, 'enthusiasm': 1041, 'discuss': 895, 'brand': 391, 'hear': 1448, 'rever': 2598, 'baccarat': 235, 'steuben': 2990, 'madam': 1867, 'alexand': 82, 'let': 1790, 'known': 1728, 'wine': 3453, 'pot': 2348, 'marin': 1899, 'brandi': 392, 'koussevitzki': 1731, 'cousin': 713, 'intim': 1644, 'wast': 3409, 'puzzl': 2442, 'coupl': 709, 'inhabit': 1605, 'equal': 1051, 'unreal': 3312, 'feel': 1156, 'besid': 321, 'blanch': 341, 'charm': 533, 'sweet': 3076, 'compani': 632, 'drown': 958, 'warm': 3406, 'melt': 1938, 'marshmallow': 1905, 'wit': 3462, 'smile': 2880, 'minimum': 1972, 'rueful': 2655, 'oh': 2151, 'souffl': 2914, 'collaps': 607, 'real': 2498, 'woman': 3471, 'react': 2491, 'catastroph': 494, 'honest': 1494, 'yell': 3515, 'slam': 2861, 'least': 1775, 'thrown': 3163, 'valueless': 3345, 'welcom': 3421, 'astring': 198, 'grazi': 1373, 'sort': 2913, 'gwen': 1407, 'cafritz': 452, 'perl': 2266, 'mesta': 1953, 'habitu': 1411, 'committe': 627, 'chairmen': 519, 'elect': 1001, 'boss': 375, 'whatev': 3426, 'launch': 1763, 'insepar': 1616, 'fond': 1209, 'keen': 1708, 'rival': 2619, 'leadership': 1771, 'bitterli': 337, 'unfunnili': 3290, 'sarcast': 2697, 'drive': 955, 'slick': 2868, 'skin': 2856, 'golden': 1346, 'toast': 3186, 'decor': 799, 'appoint': 152, 'complement': 637, 'siames': 2816, 'cat': 492, 'rich': 2609, 'raw': 2489, 'silk': 2830, 'shade': 2775, 'match': 1917, 'housepaint': 1519, 'shapeless': 2781, 'frozen': 1267, 'fish': 1188, 'stick': 2991, 'other': 2177, 'distinguish': 909, 'indistinguish': 1591, 'radio': 2467, 'televis': 3111, 'advertis': 53, 'writer': 3502, 'finit': 1185, 'develop': 853, 'chuckl': 562, 'habit': 1409, 'zound': 3528, 'defer': 807, 'anyth': 141, 'sincer': 2841, 'handclasp': 1419, 'urgent': 3331, 'acknowledg': 31, 'introduct': 1647, 'anybodi': 138, 'gentli': 1317, 'press': 2374, 'gaze': 1312, 'unadulter': 3261, 'total': 3202, 'stranger': 3012, 'credit': 727, 'leap': 1773, 'cigarett': 565, 'plate': 2307, 'glass': 1335, 'mistrust': 1994, 'whole': 3439, 'degre': 811, 'bake': 243, 'sanitari': 2693, 'pioneer': 2296, 'wonder': 3473, 'homestead': 1491, 'odd': 2143, 'establish': 1064, 'suburban': 3041, 'frontier': 1266, 'garden': 1301, 'confer': 654, 'mister': 1993, 'sigh': 2823, 'think': 3145, 'earn': 975, 'yearli': 3513, 'teeth': 3108, 'ingrati': 1604, 'graciou': 1362, 'estim': 1065, 'read': 2493, 'daphn': 772, 'du': 962, 'maurier': 1921, 'kafka': 1704, 'loss': 1845, 'describ': 838, 'opinion': 2166, 'invari': 1652, 'prove': 2418, 'determin': 851, 'awar': 229, 'nearli': 2063, 'lunchtim': 1861, 'task': 3096, 'plan': 2304, 'afternoon': 63, 'mayb': 1925, 'rememb': 2547, 'banana': 251, 'dessert': 846, 'week': 3416, 'shout': 2808, 'food': 1210, 'suppos': 3062, 'cereal': 512, 'dug': 964, 'mix': 1997, 'abl': 10, 'cake': 454, 'chocol': 555, 'ice': 1536, 'need': 2069, 'rule': 2658, 'garlic': 1303, 'stun': 3032, 'silenc': 2827, 'dearth': 789, 'tast': 3097, 'thrill': 3158, 'mama': 1880, 'stretch': 3015, 'forth': 1234, 'arm': 172, 'paradis': 2217, 'needless': 2070, 'furiou': 1276, 'unparallel': 3308, 'intrus': 1649, 'free': 1255, 'enterpris': 1039, 'snoop': 2888, 'privat': 2385, 'financi': 1179, 'disburs': 889, 'confidenti': 656, 'contract': 678, 'agreement': 69, 'histor': 1481, 'broadcast': 410, 'father': 1149, 'coughlin': 702, 'eddi': 985, 'cantor': 468, 'bow': 385, 'white': 3438, 'despot': 845, 'yore': 3519, 'diari': 863, 'raid': 2469, 'larder': 1749, 'ransack': 2480, 'lingeri': 1808, 'impass': 1554, 'plea': 2310, 'civil': 574, 'creat': 725, 'landslid': 1743, 'correspond': 694, 'sponsor': 2947, 'replac': 2561, 'comedi': 618, 'perman': 2267, 'basi': 270, 'quarter': 2447, 'poor': 2337, 'defenseless': 806, 'expect': 1100, 'dictat': 864, 'bold': 364, 'savag': 2704, 'barbarian': 258, 'hord': 1502, 'russian': 2664, 'descend': 837, 'athen': 200, 'mighti': 1964, 'metronom': 1958, 'sack': 2665, 'despoil': 844, 'bolshevist': 365, 'battl': 276, 'cri': 731, 'soak': 2890, 'unspeak': 3316, 'sieg': 2822, 'part': 2227, 'announc': 130, 'studio': 3028, 'owe': 2194, 'govern': 1360, 'tax': 3103, 'debt': 793, 'excess': 1089, 'eight': 997, 'million': 1968, 'dollar': 921, 'aloof': 93, 'iniquit': 1608, 'pay': 2248, 'salari': 2676, 'legal': 1782, 'inherit': 1606, 'morri': 2020, 'relict': 2537, 'stung': 3033, 'bill': 329, 'astronom': 199, 'proport': 2412, 'wipe': 3456, 'fraction': 1244, 'also': 96, 'publicli': 2429, 'reprimand': 2568, 'radic': 2466, 'figur': 1173, 'fun': 1271, 'leftist': 1780, 'republ': 2569, 'yorker': 3521, 'christian': 560, 'scienc': 2720, 'monitor': 2011, 'avail': 223, 'declar': 798, 'incom': 1572, 'unconstitut': 3268, 'litig': 1820, 'costli': 699, 'seemingli': 2745, 'endless': 1022, 'fought': 1239, 'tigress': 3174, 'appeal': 148, 'suprem': 3063, 'court': 711, '1937': 2, 'roosevelt': 2638, 'henchmen': 1463, 'dirti': 884, 'work': 3482, 'pack': 2198, 'highest': 1473, 'tribun': 3230, 'land': 1742, 'defeat': 805, 'presidenti': 2373, 'coercion': 602, 'succeed': 3043, 'poison': 2325, 'courtier': 712, 'toadi': 3185, 'sycoph': 3079, 'bench': 314, 'discourag': 893, 'lawyer': 1767, 'repres': 2566, 'portia': 2342, 'present': 2370, 'brief': 404, 'unanim': 3262, 'vote': 3391, 'front': 1265, 'persona': 2271, 'non': 2100, 'grata': 1369, 'staunch': 2982, 'thinker': 3146, 'student': 3026, 'jurisprud': 1702, 'charl': 532, 'evan': 1068, 'hugh': 1525, 'poitrin': 2326, 'limit': 1804, 'actress': 38, 'exceed': 1086, 'logic': 1836, 'albert': 80, 'einstein': 998, 'quot': 2463, 'mind': 1970, 'amaz': 102, 'henri': 1464, 'ford': 1216, 'spoke': 2944, 'utterli': 3337, 'astound': 197, 'heywood': 1471, 'broun': 419, 'wrote': 3505, 'bell': 307, 'caligula': 458, 'conced': 647, 'foresight': 1221, 'patriot': 2241, 'rapidli': 2482, 'ancient': 119, 'babylon': 234, 'nineveh': 2093, 'cost': 698, 'prohibit': 2402, 'crippl': 733, 'addit': 44, 'usuri': 3335, 'interest': 1636, 'unpaid': 3306, 'blow': 354, 'manni': 1891, 'notorieti': 2117, 'enemi': 1025, 'anxiou': 137, 'level': 1793, 'reactionari': 2492, 'robber': 2624, 'baro': 262, 'traitor': 3218, 'point': 2323, 'ere': 1056, 'activ': 36, 'econom': 983, 'council': 704, 'notabl': 2110, 'watchdog': 3411, 'heard': 1449, 'bulwark': 430, 'fight': 1172, 'creep': 729, 'insuper': 1633, 'troop': 3238, 'nra': 2121, 'pwa': 2443, 'wpa': 3495, 'ccc': 503, 'armi': 174, 'mercenari': 1946, 'employ': 1014, 'secretli': 2739, 'transpar': 3221, 'ruse': 2662, 'regular': 2531, 'relief': 2538, 'suffer': 3047, 'drastic': 945, 'turbul': 3248, 'otherwis': 2178, 'geniu': 1314, 'worth': 3492, 'best': 323, 'fortun': 1237, 'livelihood': 1824, 'imperil': 1558, 'sweep': 3075, 'distract': 910, 'hardli': 1430, 'conduc': 650, 'oscar': 2176, 'win': 3451, 'perform': 2263, 'hardest': 1429, 'help': 1460, 'outsid': 2185, 'pressur': 2375, 'unflag': 3286, 'effort': 993, 'devot': 857, 'achiev': 29, 'stardom': 2971, 'age': 64, 'young': 3522, 'cast': 490, 'letch': 1791, 'neglect': 2071, 'mile': 1965, 'scene': 2715, 'thousand': 3153, 'thespian': 3139, 'thirti': 3149, 'forti': 1236, 'fifti': 1171, 'helpmat': 1462, 'patient': 2240, 'apologet': 146, 'indulg': 1593, 'outburst': 2181, 'tempera': 3113, 'profan': 2396, 'abus': 19, 'blame': 340, 'sulk': 3053, 'occas': 2135, 'storm': 3006, 'financ': 1178, 'privileg': 2386, 'salt': 2684, 'servant': 2765, 'tailor': 3086, 'athlet': 201, 'club': 594, 'buy': 442, 'singl': 2843, 'gift': 1324, 'third': 3147, 'anniversari': 129, 'disloc': 901, 'jaw': 1672, 'stori': 3005, 'moni': 2009, 'dissip': 905, 'drink': 954, 'game': 1297, 'carous': 478, 'confront': 657, 'profession': 2397, 'gambler': 1296, 'booki': 368, 'loan': 1829, 'shark': 2783, 'gangster': 1299, 'thug': 3167, 'finger': 1183, 'class': 579, 'exist': 1096, 'repay': 2557, 'stagger': 2962, 'els': 1007, 'shudder': 2813, 'someon': 2901, 'sinist': 2844, 'milieu': 1966, 'strang': 3011, 'commonest': 629, 'accent': 21, 'sire': 2847, 'unborn': 3265, 'deepest': 803, 'matern': 1918, 'instinct': 1628, 'heart': 1450, 'fairli': 1131, 'bled': 344, 'darl': 776, 'bundl': 432, 'heaven': 1452, 'proudli': 2417, 'ah': 71, 'ungal': 3291, 'untru': 3319, 'retort': 2592, 'whenev': 3429, 'suggest': 3050, 'famili': 1136, 'abundantli': 18, 'clear': 583, 'preciou': 2361, 'wiser': 3460, 'summer': 3054, 'camp': 465, 'run': 2661, 'friend': 1261, 'irrit': 1662, 'hurri': 1533, 'chateau': 536, 'belletch': 308, 'anyhow': 139, 'god': 1343, 'infinit': 1600, 'wisdom': 3458, 'inexplic': 1597, 'laps': 1747, 'memori': 1942, 'yet': 3518, 'ador': 52, 'feeley': 1157, 'brave': 395, 'endur': 1024, 'cold': 606, 'indiffer': 1588, 'slight': 2871, 'recal': 2505, 'humili': 1527, 'trianon': 3229, 'suit': 3052, 'collat': 608, 'postpon': 2347, 'serv': 2764, 'dri': 953, 'overcook': 2188, 'chair': 518, 'vacant': 3338, 'splash': 2943, 'pool': 2336, 'built': 427, 'hope': 1498, 'aquacuti': 158, 'stay': 2983, 'underwat': 3278, 'epiphani': 1048, 'errol': 1057, 'flynn': 1203, 'stole': 2997, 'unknown': 3299, 'starlet': 2973, 'sampl': 2686, 'insult': 1632, 'arbit': 161, 'filmdom': 1176, 'exclud': 1091, 'smart': 2877, 'astair': 196, 'cooper': 690, 'gabl': 1285, 'colman': 611, 'rathbon': 2485, 'taylor': 3104, 'thalberg': 3127, 'devout': 858, 'knit': 1726, 'barrymor': 267, 'crosbi': 738, 'antisoci': 136, 'conduct': 651, 'decreas': 800, 'mouth': 2028, 'hostess': 1510, 'screenland': 2729, '1935': 1, '1939': 4, 'commerci': 624, 'joint': 1686, 'ventur': 3359, 'saint': 2673, 'deepli': 804, 'religi': 2540, 'base': 268, 'cabrini': 448, 'releas': 2533, 'date': 778, 'coincid': 605, 'beatif': 283, 'novemb': 2120, '1938': 3, 'fiasco': 1168, 'finish': 1184, 'actual': 39, 'mine': 1971, 'troubl': 3240, 'especi': 1061, 'dash': 777, 'ruffian': 2656, 'inspir': 1623, 'exampl': 1084, 'appreci': 153, 'behalf': 299, 'memor': 1941, 'specif': 2928, 'eventu': 1072, 'speech': 2935, 'number': 2123, 'consider': 664, 'punctual': 2433, 'shoot': 2803, 'spent': 2939, 'illeg': 1546, 'gambl': 1295, 'den': 826, 'residenti': 2578, 'hundr': 1529, 'bar': 257, 'steam': 2986, 'deign': 812, 'appear': 149, 'uncoop': 3269, 'rude': 2654, 'tasteless': 3098, 'irrever': 1661, 'unfunni': 3289, 'titl': 3182, 'religion': 2541, 'gener': 1313, 'war': 3404, 'immin': 1552, 'forgotten': 1226, 'thank': 3128, 'strain': 3010, 'impos': 1563, 'notic': 2114, 'unfavor': 3285, 'georg': 1318, 'santayana': 2695, 'understand': 3273, 'perpetr': 2269, 'eloqu': 1006, 'argument': 168, 'protest': 2416, 'faith': 1132, 'unleash': 3300, 'consol': 666, 'rare': 2484, 'anger': 122, 'spite': 2942, 'farm': 1142, 'undistinguish': 3280, 'form': 1228, 'must': 2045, 'disciplin': 890, 'diet': 869, 'deserv': 841, 'turn': 3249, 'martin': 1906, 'luther': 1864, 'disast': 887, 'cloth': 592, 'simpli': 2838, 'laugh': 1760, 'screen': 2728, 'servic': 2766, 'english': 1032, 'master': 1914, 'sir': 2846, 'gauntley': 1309, 'pratt': 2357, 'quicki': 2457, 'mad': 1866, 'marquess': 1900, 'american': 108, 'haunt': 1442, 'castl': 491, 'moor': 2014, 'secret': 2737, 'passag': 2236, 'slide': 2870, 'panel': 2210, 'unbeknownst': 3264, 'still': 2994, 'occupi': 2139, 'eccentr': 981, 'maniac': 1887, 'potboil': 2350, 'shoestr': 2801, 'blank': 342, 'scream': 2726, 'deal': 784, 'oddli': 2144, 'twenti': 3251, 'debacl': 792, 'unrepent': 3314, 'badli': 241, 'behav': 300, 'realli': 2500, 'feverish': 1167, 'preserv': 2371, 'momma': 2005, 'earthi': 977, 'advic': 54, 'absorb': 14, 'boundless': 384, 'energi': 1026, 'booz': 371, 'babe': 233, 'repli': 2563, 'hurt': 1534, 'hell': 1458, 'saloon': 2682, 'mess': 1951, 'workshop': 3484, 'meadow': 1928, 'pond': 2335, 'neighborhood': 2073, 'fault': 1150, 'produc': 2395, 'quota': 2464, 'detail': 849, 'result': 2588, 'continu': 677, 'item': 1665, 'harshli': 1435, 'undertak': 3277, 'fall': 1134, 'pattern': 2243, 'wander': 3402, 'wood': 3475, 'accompani': 25, 'dog': 919, 'earli': 973, 'romp': 2635, 'yelp': 3517, 'bush': 439, 'fruitless': 1269, 'hunt': 1531, 'expedit': 1101, 'jump': 1698, 'triumphantli': 3236, 'muddi': 2036, 'paw': 2246, 'ego': 994, 'constantli': 668, 'assur': 195, 'guy': 1406, 'regardless': 2526, 'slob': 2874, 'increasingli': 1574, 'monday': 2006, 'certainli': 515, 'confus': 658, 'nervou': 2077, 'breakdown': 398, 'inabl': 1567, 'simplest': 2837, 'cours': 710, 'wake': 3397, 'middl': 1960, 'fret': 1259, 'formerli': 1231, 'build': 426, 'busi': 440, 'half': 1414, 'dozen': 941, 'board': 359, 'plow': 2317, 'bulg': 428, 'grow': 1391, 'older': 2155, 'chang': 525, 'speed': 2936, 'bore': 372, 'univers': 3298, 'skip': 2857, 'end': 1019, 'chapter': 529, 'answer': 132, 'metaphys': 1956, 'slowli': 2875, 'longer': 1839, 'letter': 1792, 'shave': 2786, 'barn': 261, 'perfectionist': 2261, 'polish': 2331, 'reluct': 2542, 'born': 373, 'compuls': 645, 'complic': 640, 'bliss': 349, 'motionless': 2025, 'indefinit': 1580, 'lizard': 1826, 'sun': 3057, 'indol': 1592, 'blink': 348, 'misfortun': 1983, 'whichev': 3433, 'former': 1230, 'group': 1390, 'struggl': 3023, 'unconsci': 3267, 'none': 2102, 'posit': 2344, 'antagonist': 133, 'retir': 2591, 'roam': 2621, 'lie': 1795, 'cloud': 593, 'behind': 302, 'undon': 3281, 'nobodi': 2096, 'fix': 1192, 'futil': 1282, 'lumin': 1859, 'dial': 860, 'rooster': 2639, 'crow': 740, 'valley': 3342, 'pull': 2431, 'sheet': 2788, 'treacher': 3224, 'encroach': 1018, 'dawn': 780, 'equip': 1052, 'bradley': 389, 'hardwar': 1431, 'store': 3004, 'full': 1270, 'carpent': 480, 'tool': 3197, 'rotari': 2648, 'saw': 2707, 'piec': 2291, 'machineri': 1865, 'essenti': 1062, 'babbit': 232, 'dowel': 935, 'huge': 1524, 'squar': 2957, 'pegboard': 2254, 'hang': 1422, 'lumber': 1858, 'workbench': 3483, 'sandpap': 2691, 'glue': 1340, 'assort': 193, 'nail': 2053, 'plumb': 2318, 'gadget': 1286, 'idea': 1539, 'add': 42, 'collect': 609, 'wagon': 3394, 'load': 1828, 'jinni': 1681, 'recent': 2508, 'purchas': 2435, 'glad': 1332, 'unload': 3303, 'stuff': 3029, 'cellar': 508, 'barrag': 264, 'acid': 30, 'fuss': 1281, 'vulner': 3393, 'stone': 3000, 'wall': 3400, 'nightmar': 2092, 'caus': 501, 'pacifi': 2197, 'sleep': 2864, 'pill': 2293, 'send': 2750, 'box': 387, 'focus': 1204, 'hole': 1487, 'electr': 1002, 'stuck': 3025, 'hopeless': 1499, 'heavi': 1453, 'top': 3198, 'sturdi': 3035, 'leg': 1781, 'wobbl': 3469, 'newborn': 2085, 'calf': 456, 'ultim': 3260, 'lean': 1772, 'scrap': 2724, 'paper': 2215, 'properli': 2410, 'handl': 1421, 'lay': 1768, 'floor': 1199, 'disorderli': 903, 'crombi': 737, 'poke': 2327, 'gingerli': 1327, 'foot': 1211, 'simpler': 2836, 'upstair': 3329, 'crumb': 742, 'laid': 1738, 'bad': 240, 'toss': 3201, 'worktabl': 3485, 'lightli': 1800, 'job': 1682, 'porch': 2340, 'steadi': 2984, 'son': 2907, 'hammer': 1416, 'blatz': 343, 'smithtown': 2882, 'hire': 1480, 'drunk': 959, 'probabl': 2387, 'lodg': 1834, 'sober': 2892, 'doubt': 931, 'touch': 3203, 'greatest': 1376, 'boon': 369, 'highfield': 1474, 'fast': 1146, 'unless': 3301, 'talk': 3090, 'lone': 1837, 'type': 3256, 'funni': 1273, 'wooden': 3476, 'vision': 3382, 'simpl': 2835, 'disclos': 891, 'ignor': 1544, 'agre': 68, 'pretti': 2377, 'morn': 2019, 'truck': 3241, 'stain': 2963, 'wax': 3412, 'accord': 27, 'outlet': 2184, 'fluoresc': 1202, 'lamp': 1741, 'guess': 1396, 'effici': 991, 'shipshap': 2796, 'batter': 275, 'carrier': 482, 'extra': 1119, 'voltag': 3389, 'fuse': 1280, 'circuit': 568, 'nothin': 2113, 'depress': 832, 'news': 2086, 'reliev': 2539, 'stair': 2964, 'utensil': 3336, 'unoccupi': 3305, 'oughta': 2180, 'contrapt': 681, 'express': 1112, 'sayin': 2709, 'shelv': 2790, 'plenti': 2315, 'academ': 20, 'alreadi': 95, 'measur': 1932, 'shelf': 2789, 'ambigu': 104, 'word': 3480, 'sentenc': 2755, 'ben': 313, 'franklin': 1252, 'spoken': 2945, 'fogg': 1206, 'greek': 1378, 'richard': 2610, 'brinsley': 408, 'sheridan': 2791, 'interpret': 1640, 'understood': 3276, 'witti': 3467, 'journalist': 1693, 'centuri': 511, 'ago': 65, 'today': 3187, 'profound': 2398, 'except': 1088, 'rhetorician': 2605, 'amphibolog': 113, 'parisolog': 2223, 'olog': 2156, 'common': 628, 'parlanc': 2226, 'misinterpret': 1984, 'misunderstand': 1995, 'misdirector': 1982, 'kindr': 1719, 'misde': 1981, 'speci': 2926, 'baffl': 242, 'interweav': 1643, 'repetit': 2560, 'jane': 1670, 'christma': 561, 'card': 474, 'sent': 2754, 'shall': 2778, 'exercis': 1094, 'compound': 642, 'expand': 1099, 'jest': 1677, 'sour': 2917, 'dilemma': 877, 'upset': 3326, 'forget': 1223, 'reconsid': 2511, 'misunderstood': 1996, 'politician': 2333, 'jone': 1689, 'print': 2384, 'denial': 828, 'retract': 2593, 'contradict': 679, 'statement': 2977, 'misquot': 1987, 'regard': 2525, 'frame': 1247, 'verbal': 3362, 'fog': 1205, 'stump': 3031, 'experienc': 1104, 'byron': 443, 'classic': 580, 'explan': 1108, 'similarli': 2834, 'lincoln': 1805, 'cryptic': 743, 'explanatori': 1109, 'biter': 334, 'bit': 332, 'obscur': 2130, 'senat': 2749, 'interview': 1642, 'constitu': 669, 'misplac': 1986, 'modifi': 2002, 'honor': 1496, 'observ': 2131, 'breach': 396, 'amus': 115, 'appli': 151, 'wrong': 3504, 'vers': 3365, 'familiar': 1137, 'grammarian': 1363, 'quatrain': 2449, 'beat': 281, 'drunken': 960, 'spree': 2952, 'gag': 1287, 'vaudevil': 3353, 'standbi': 2969, 'variat': 3349, 'smith': 2881, 'stock': 2996, 'ran': 2477, 'sick': 2818, 'routin': 2651, 'standard': 2968, 'devic': 854, 'slack': 2860, 'gagwrit': 1291, 'blunder': 356, 'acquir': 32, 'habitat': 1410, 'gaglin': 1290, 'cartoon': 487, 'theater': 3129, 'doorman': 927, 'excus': 1093, 'handbag': 1418, 'journal': 1691, 'suppli': 3061, 'slip': 2873, 'advis': 55, 'contest': 676, 'snapshot': 2885, 'larger': 1751, 'af': 56, 'inch': 1569, 'classifi': 581, 'chockful': 554, 'misrel': 1988, 'construct': 671, 'reader': 2495, 'digest': 874, 'often': 2150, 'california': 457, 'rent': 2556, 'counti': 707, 'breviti': 403, 'rubbish': 2653, 'shape': 2780, 'bathtub': 274, 'local': 1832, 'headlines': 1446, 'contribut': 683, 'wide': 3442, 'varieti': 3350, 'comic': 621, 'editori': 988, 'saturday': 2703, 'friendship': 1263, 'schoolday': 2719, 'column': 614, 'renew': 2555, 'subscript': 3039, 'enjoy': 1033, 'cater': 497, 'affair': 57, 'facil': 1126, 'accommod': 24, '200': 6, 'favorit': 1152, 'anatom': 117, 'refer': 2519, 'anatomi': 118, 'kiss': 1721, 'passion': 2237, 'reappear': 2501, 'jocular': 1684, 'journales': 1692, 'shot': 2806, 'suburb': 3040, 'morgan': 2017, 'escort': 1060, 'vera': 3360, 'green': 1379, 'church': 563, 'accid': 23, 'star': 2970, 'injur': 1609, 'spectacularli': 2932, 'scenic': 2716, 'doubl': 930, 'weber': 3415, 'kick': 1715, 'ensu': 1037, 'rumpu': 2660, 'stomach': 2999, 'literatur': 1819, 'conscienti': 660, 'secretari': 2738, 'entri': 1044, 'lumbar': 1857, 'clariti': 578, 'grammat': 1364, 'misconstruct': 1980, 'dangl': 771, 'permit': 2268, 'maid': 1873, 'instruct': 1630, 'tail': 3085, 'epicur': 1046, 'lemon': 1785, 'demur': 825, 'silli': 2831, 'reluctantli': 2543, 'specimen': 2930, 'depart': 830, 'salesladi': 2677, 'inquir': 1613, 'larg': 1750, 'failur': 1129, 'transfer': 3219, 'custom': 760, 'allig': 87, 'size': 2854, 'compris': 644, 'extens': 1116, 'restrict': 2587, 'explicit': 1110, 'literari': 1818, 'tea': 3105, 'gush': 1405, 'lack': 1733, 'distinct': 908, 'nod': 2097, 'understandingli': 3274, 'definit': 810, 'modern': 2001, 'gotten': 1357, 'aw': 227, 'cute': 763, 'indefin': 1579, 'clergyman': 585, 'sermon': 2763, 'dearli': 788, 'belov': 311, 'preach': 2358, 'repent': 2559, 'sin': 2839, 'regret': 2530, 'extent': 1117, 'shakespear': 2777, 'dictum': 866, 'maxim': 1923, 'allus': 89, 'ship': 2795, 'beauti': 285, 'smash': 2878, 'champagn': 522, 'stern': 2989, 'slid': 2869, 'grace': 1361, 'sea': 2732, 'unlik': 3302, 'sergeant': 2760, 'motor': 2026, 'fort': 1233, 'mason': 1912, 'gym': 1408, 'tonight': 3195, 'haul': 1440, 'danc': 770, 'seat': 2735, 'approach': 156, 'seek': 2743, 'minor': 1974, 'die': 867, 'joe': 1685, 'blue': 355, 'aris': 169, 'verb': 3361, 'contain': 672, 'respons': 2582, 'moonlit': 2013, 'promptli': 2407, 'request': 2571, 'notori': 2116, 'hard': 1427, 'equivoc': 1054, 'recommend': 2510, 'highli': 1475, 'bearer': 280, 'satisfact': 2701, 'berth': 319, 'sure': 3064, 'promin': 2405, 'frequent': 1258, 'occurr': 2141, 'pronoun': 2408, 'twofold': 3255, 'whiskey': 3435, 'speaker': 2925, 'misnom': 1985, 'excit': 1090, 'pain': 2204, 'grave': 1370, 'moreland': 2016, 'critic': 736, 'ohio': 2152, 'alon': 91, 'cottag': 700, 'upson': 3328, 'down': 936, 'peacock': 2251, 'vener': 3355, 'angri': 124, 'dwindl': 972, 'diminish': 879, 'statur': 2981, 'humor': 1528, 'unscientif': 3315, 'believ': 306, 'momentum': 2004, 'weightless': 3419, 'earth': 976, 'gagarin': 1288, 'cult': 748, 'bee': 292, 'vacuum': 3340, 'fli': 1198, 'hive': 1484, 'futur': 1283, 'slightli': 2872, 'stiff': 2992, 'trembl': 3225, 'funer': 1272, 'admit': 51, 'crazi': 723, 'tension': 3121, 'terror': 3125, 'allevi': 86, 'menac': 1944, 'horror': 1505, 'comedian': 619, 'zeitgeist': 3526, 'loon': 1842, 'march': 1895, 'hare': 1433, 'manic': 1888, 'paraphras': 2219, 'lewi': 1794, 'carrol': 483, 'bear': 278, 'gulp': 1404, 'bare': 260, 'walru': 3401, 'seal': 2733, 'doom': 925, 'sake': 2674, 'reread': 2573, 'emerson': 1011, 'trait': 3217, 'anecdot': 120, 'germani': 1320, 'railway': 2470, 'berlin': 317, 'chant': 526, 'foreign': 1220, 'swallow': 3071, 'ralph': 2476, 'waldo': 3398, 'music': 2044, 'dialogu': 862, 'nonism': 2104, 'demonstr': 824, 'pari': 2221, 'herald': 1465, 'noisier': 2099, 'eichmann': 996, 'hitler': 1483, 'extermin': 1118, 'jew': 1679, 'nonsens': 2105, 'evil': 1080, 'insan': 1614, 'grimli': 1385, 'era': 1055, 'angst': 125, 'downgrad': 938, 'popular': 2339, 'retrogress': 2595, 'tennesse': 3119, 'william': 3449, 'dili': 878, 'powel': 2351, 'program': 2400, 'playwright': 2309, 'dark': 774, 'symbol': 3080, 'disturb': 913, 'psych': 2425, 'derang': 834, 'gland': 1334, 'jumpi': 1699, 'central': 510, 'system': 3083, 'plead': 2311, 'behold': 303, 'rainbow': 2472, 'sky': 2859, 'raptur': 2483, 'daffodil': 768, 'sentiment': 2756, 'pure': 2436, 'galahad': 1294, 'knightli': 1725, 'meredith': 1948, 'tom': 3191, 'longfellow': 1840, 'threaten': 3155, 'throat': 3159, 'worn': 3488, 'breast': 400, 'sleev': 2865, 'england': 1031, 'past': 2238, 'impress': 1565, 'verit': 3364, 'theatr': 3130, 'sex': 2772, 'drama': 943, 'clergymen': 586, 'parishion': 2222, 'psychiatrist': 2426, 'harder': 1428, 'arena': 166, 'jame': 1669, 'noel': 2098, 'coward': 716, 'disappear': 886, 'forev': 1222, 'somnol': 2906, 'antic': 134, 'heyday': 1470, 'tragicom': 3214, 'worthi': 3493, 'explor': 1111, 'novel': 2118, 'satir': 2700, 'stood': 3001, 'brothel': 417, 'dustbin': 971, 'tawdri': 3102, 'creatur': 726, 'crawl': 722, 'woodwork': 3477, 'rock': 2626, 'furious': 1277, 'current': 757, 'omin': 2158, 'sound': 2916, 'act': 34, 'frighten': 1264, 'brood': 415, 'clink': 589, 'toujour': 3204, 'gai': 1292, 'borrow': 374, 'marqui': 1901, 'mehitabel': 1935, 'gay': 1311, 'undergon': 3271, 'countri': 708, 'homosexu': 1492, 'forgot': 1225, 'institut': 1629, 'ambul': 106, 'questionnair': 2454, 'whether': 3432, 'window': 3452, 'anim': 127, 'saner': 2692, 'french': 1256, 'alp': 94, 'sheep': 2787, 'darken': 775, 'unison': 3296, 'mockeri': 1999, 'salubri': 2685, 'hors': 1506, 'partli': 2231, 'rhinocero': 2607, 'rhino': 2606, 'suddenli': 3046, 'begin': 297, 'panic': 2212, 'languag': 1744, 'ahead': 72, 'ironi': 1660, 'nevertheless': 2083, 'spotlight': 2950, 'khrushchev': 1714, 'cultist': 749, 'nixon': 2094, 'ed': 984, 'wynn': 3506, 'caesar': 450, 'goat': 1342, 'cabbag': 446, 'vari': 3348, 'guard': 1395, 'mice': 1959, 'falcon': 1133, 'protect': 2415, 'dove': 934, 'sharp': 2784, 'fallen': 1135, 'nasti': 2056, 'restiv': 2585, 'inner': 1610, 'sphere': 2940, 'sorrow': 2912, 'outer': 2182, 'space': 2920, 'fiction': 1169, 'planet': 2305, 'solar': 2896, 'gigant': 1325, 'fruit': 1268, 'bat': 272, 'nobl': 2095, 'largest': 1752, 'arnold': 175, 'toynbe': 3208, 'inveter': 1656, 'tendenc': 3118, 'uniqu': 3295, 'ideolog': 1541, 'virtual': 3381, 'disbeliev': 888, 'dealt': 786, 'merri': 1950, 'quickli': 2458, 'strike': 3017, 'frenchmen': 1257, 'dead': 783, 'gott': 1356, 'strafe': 3007, 'angleterr': 123, 'carthago': 485, 'delenda': 815, 'est': 1063, 'deu': 852, 'carthag': 484, 'ideologist': 1542, 'jeopardi': 1676, 'forc': 1215, 'paranoiac': 2218, 'corrupt': 695, 'attract': 213, 'written': 3503, 'readili': 2497, 'obsess': 2132, 'nook': 2106, 'cranni': 721, 'ivori': 1666, 'tower': 3206, 'curious': 755, 'complex': 639, 'poetri': 2322, 'cardin': 475, 'burnsid': 437, 'comfort': 620, 'straight': 3008, 'arlen': 171, 'boost': 370, 'policeman': 2329, 'pop': 2338, 'tone': 3193, 'kid': 1716, 'park': 2224, 'ordin': 2170, 'statu': 2979, 'asham': 185, 'unusu': 3320, 'occur': 2140, 'taught': 3099, 'tumbl': 3247, 'exact': 1082, 'step': 2988, 'principl': 2383, 'pursu': 2438, 'grownup': 1393, 'lawn': 1766, 'reflex': 2521, 'normal': 2107, 'predict': 2364, 'cordial': 692, 'loyal': 1853, 'unpredict': 3310, 'echo': 982, 'british': 409, 'twirlingli': 3253, 'magnific': 1872, 'rapid': 2481, 'somersault': 2902, 'pleasantli': 2314, 'tingl': 3178, 'insinu': 1619, 'handi': 1420, 'oatmeal': 2127, 'snap': 2884, 'serious': 2762, 'particularli': 2230, 'loudest': 1847, 'format': 1229, 'root': 2640, 'oak': 2126, 'divid': 915, 'bottom': 378, 'sag': 2671, 'curios': 753, 'happili': 1426, 'threw': 3157, 'backward': 238, 'wiggl': 3445, 'held': 1457, 'wink': 3454, 'upsid': 3327, 'gaspingli': 1304, 'belli': 309, 'enorm': 1034, 'lip': 1812, 'eyelid': 1124, 'clap': 577, 'grand': 1365, 'circl': 567, 'shook': 2802, 'shake': 2776, 'hip': 1479, 'shoulder': 2807, 'happi': 1425, 'laughter': 1762, 'funnier': 1274, 'bump': 431, 'forehead': 1219, 'martini': 1907, 'icebox': 1537, 'funniest': 1275, 'sing': 2842, 'shimmi': 2794, 'sister': 2848, 'kate': 1707, 'clumsi': 596, 'catch': 496, 'allow': 88, 'sorri': 2911, 'mink': 1973, 'low': 1850, 'constitut': 670, 'perfectli': 2262, 'friendli': 1262, 'chorus': 558, 'heartili': 1451, 'rid': 2611, 'sweater': 3073, 'tap': 3095, 'uncl': 3266, 'respond': 2581, 'bangish': 254, 'qualiti': 2445, 'upper': 3325, 'lift': 1798, 'bite': 333, 'radish': 2468, 'sting': 2995, 'curl': 756, 'neat': 2064, 'heap': 1447, 'center': 509, 'cheek': 539, 'bitten': 335, 'suspens': 3069, 'unbear': 3263, 'snatch': 2886, 'inspect': 1621, 'ladylik': 1736, 'inaud': 1568, 'joy': 1694, 'brown': 420, 'curiou': 754, 'pocket': 2320, 'notion': 2115, 'omen': 2157, 'pamper': 2208, 'blush': 358, 'cowardic': 717, 'imposs': 1564, 'indispens': 1590, 'pass': 2235, 'hello': 1459, 'persuad': 2272, 'junctur': 1700, 'loathsom': 1831, 'youngest': 3523, 'trick': 3231, 'laughingli': 1761, 'index': 1584, 'bother': 376, 'loath': 1830, 'push': 2440, 'distribut': 911, 'coconut': 600, 'glanc': 1333, 'stringi': 3018, 'shiver': 2798, 'crept': 730, 'side': 2819, 'gorboduc': 1351, 'revolt': 2601, 'chiefli': 547, 'pranc': 2354, 'horselik': 1507, 'balki': 247, 'demeanor': 823, 'administr': 49, 'bulletin': 429, 'convuls': 687, 'wheez': 3428, 'flower': 1200, 'eh': 995, 'maneuv': 1883, 'swig': 3078, 'sherri': 2792, 'dreg': 951, 'divert': 914, 'command': 622, 'deliv': 820, 'forthright': 1235, 'coy': 718, 'unfortun': 3288, 'nag': 2052, 'nettl': 2078, 'revel': 2597, 'truth': 3245, 'sacrific': 2667, 'superior': 3059, 'victori': 3369, 'illegitim': 1547, 'grind': 1386, 'rakishli': 2475, 'intrigu': 1646, 'rollickingli': 2632, 'profund': 2399, 'pueri': 2430, 'aquam': 159, 'de': 782, 'silva': 2832, 'agricola': 70, 'portant': 2341, 'vignett': 3373, 'unforgett': 3287, 'epoch': 1050, 'marcellu': 1894, 'beset': 320, 'eve': 1070, 'birthday': 331, 'festiv': 1166, 'messeng': 1952, 'saracen': 2696, 'invad': 1651, 'silesia': 2829, 'provinc': 2421, 'affianc': 59, 'cancel': 466, 'celebr': 506, 'buckl': 423, 'scimitar': 2721, 'stumbl': 3030, 'blindli': 347, 'hit': 1482, 'kill': 1717, 'oxcart': 2195, 'albani': 79, 'opera': 2165, 'spumoni': 2956, 'immort': 1553, 'il': 1545, 'sevigli': 2771, 'del': 814, 'spegititgninino': 2937, 'contralto': 680, 'hatti': 1439, 'sforzt': 2774, 'extraordinari': 1120, 'render': 2554, 'myth': 2050, 'rescu': 2574, 'prometheu': 2404, 'cavalri': 502, 'warehous': 3405, 'leather': 1776, 'conduit': 653, 'amid': 109, 'cheer': 541, 'hubba': 1522, 'yalagaloo': 3508, 'pip': 2297, 'overtur': 2192, 'curtain': 758, 'rise': 2618, 'ranavan': 2478, 'northern': 2109, 'armadillo': 173, 'occup': 2138, 'crisi': 734, 'aton': 203, 'sung': 3058, 'grunnfeu': 1394, 'arapaci': 160, 'serbantian': 2759, 'invoc': 1658, 'phineoppu': 2278, 'whereupon': 3430, 'unexpectedli': 3284, 'gorshek': 1353, 'priest': 2381, 'lust': 1863, 'prudenc': 2424, 'respect': 2580, 'vanish': 3347, 'director': 883, 'shuz': 2815, 'skill': 2855, 'sidestep': 2820, 'gooshey': 1350, 'across': 33, 'unpleas': 3309, 'aros': 176, 'soloist': 2899, 'complain': 636, 'smell': 2879, 'train': 3216, 'dharma': 859, 'dictionari': 865, 'eurasian': 1067, 'beaten': 282, 'therein': 3137, 'visrhanik': 3384, 'banter': 256, 'stem': 2987, 'bouanahsha': 379, 'saliv': 2680, 'likewis': 1802, 'fascin': 1143, 'synonym': 3082, 'pratakku': 2356, 'sweathruna': 3074, 'concern': 648, 'imperi': 1557, 'safe': 2670, 'ocean': 2142, 'spout': 2951, 'pockmanst': 2321, 'delhi': 816, 'regiment': 2527, 'nativ': 2058, 'reportedli': 2565, 'outfit': 2183, 'breez': 402, 'chancellor': 524, 'neitzbohr': 2075, 'melodrama': 1937, 'west': 3424, 'german': 1319, 'woo': 3474, 'plaster': 2306, 'apollo': 145, 'belveder': 312, 'doubtless': 932, 'plot': 2316, 'freudian': 1260, 'jungian': 1701, 'meinckian': 1936, 'victorian': 3370, 'piano': 2283, 'wilhelmina': 3447, 'thereupon': 3138, 'flashback': 1194, 'epilept': 1047, 'gover': 1359, 'dori': 928, 'equival': 1053, 'remaind': 2545, 'proce': 2390, 'lash': 1754, 'slat': 2863, 'venetian': 3356, 'blind': 346, 'reichstag': 2532, 'manner': 1890, 'expung': 1114, 'guilt': 1401, 'pang': 2211, 'assault': 191, 'dunkirk': 969, 'therefor': 3136, 'shatter': 2785, 'tortuou': 3200, 'camera': 463, 'pan': 2209, 'redeem': 2516, 'mute': 2046, 'sculptur': 2731, 'terrifi': 3124, 'catharsi': 498, 'repress': 2567, 'bini': 330, 'salfininista': 2679, 'capit': 471, 'lp': 1854, 'titan': 3181, 'unsung': 3317, 'compos': 641, 'lucki': 1855, 'august': 217, '1916': 0, 'bach': 236, 'dormant': 929, 'erudit': 1058, 'ingeni': 1603, 'purgatori': 2437, 'guidanc': 1400, 'marc': 1893, 'schlek': 2717, 'neurenschatz': 2080, 'skolkau': 2858, 'orchestra': 2168, 'baslot': 271, 'rattzhenfuut': 2487, 'concerto': 649, 'controversi': 684, 'tschilwyk': 3246, 'cadenza': 449, 'conductor': 652, 'portion': 2343, 'flautist': 1196, 'haumd': 1441, 'indianapoli': 1586, 'movement': 2031, 'inton': 1645, 'flaw': 1197, 'tonal': 3192, 'stride': 3016, 'score': 2723, 'lowest': 1852, 'regist': 2529, 'audibl': 215, '407': 8, 'feat': 1154, 'absurd': 16, 'silent': 2828, 'throughout': 3161, 'prefer': 2365, 'refrain': 2522, 'mar': 1892, 'lofti': 1835, 'weight': 3418, 'trite': 3234, 'inexor': 1596, 'renaiss': 2553, 'nebul': 2067, 'indel': 1581, 'bittersweet': 338, 'veteran': 3367, 'theatrego': 3131, 'latest': 1758, 'significantli': 2826, 'theatric': 3132, 'vine': 3377, 'adapt': 41, 'basho': 269, 'roger': 2629, 'entwhistl': 1045, 'maryland': 1909, 'chemistri': 542, 'instructor': 1631, 'structur': 3022, 'amateurish': 101, 'eleven': 1004, 'length': 1786, 'ceaseless': 504, 'yearn': 3514, 'youth': 3524, 'outspoken': 2186, 'offbeat': 2146, 'cafeteria': 451, 'cup': 751, 'coffe': 603, 'relentlessli': 2535, 'rebuf': 2504, 'waitress': 3396, 'intrud': 1648, 'tutor': 3250, 'conceal': 646, 'destroy': 847, 'pie': 2290, 'muffin': 2038, 'flash': 1193, 'intuit': 1650, 'rilk': 2617, 'nonexist': 2103, 'unpretenti': 3311, 'slice': 2867, 'brook': 416, 'atkinson': 202, 'precoci': 2363, 'felin': 1160, 'enfant': 1028, 'francoisett': 1251, 'lagoon': 1737, 'shock': 2799, 'jade': 1667, 'sexi': 2773, 'ballet': 248, 'scenario': 2714, 'lascivi': 1753, 'interlud': 1638, 'nymphomaniac': 2125, 'oper': 2164, 'hopelessli': 1500, 'pithi': 2298, 'parabl': 2216, 'hollow': 1488, 'mlle': 1998, 'sumptuou': 3056, 'motif': 2023, 'horn': 1503, 'existentialist': 1097, 'sartr': 2698, 'petit': 2275, 'yvett': 3525, 'chadro': 516, 'engagingli': 1030, 'bambi': 250, 'compar': 633, 'max': 1922, 'fink': 1186, 'wri': 3498, 'underst': 3272, 'immens': 1551, 'readabl': 2494, 'quizzic': 2462, 'salamand': 2675, 'exquisit': 1115, 'alicia': 84, 'jealou': 1673, 'retali': 2590, 'scrape': 2725, 'alga': 83, 'despair': 843, 'marvel': 1908, 'burbank': 433, 'fourteen': 1242, 'tragic': 3213, 'parkinson': 2225, 'diseas': 896, 'tradit': 3211, 'comeback': 617, 'felix': 1161, 'fing': 1182, 'anna': 128, 'pulova': 2432, 'chisel': 552, 'impecc': 1556, 'mistaken': 1992, 'street': 3013, 'cedric': 505, 'hardwick': 1432, 'rediscoveri': 2517, 'verdi': 3363, 'earliest': 974, 'raucou': 2488, 'nabisco': 2051, 'leitmotiv': 1784, 'reminisc': 2549, 'mudugno': 2037, 'version': 3366, 'volar': 3388, 'maria': 1898, 'calla': 460, 'chip': 551, 'screw': 2730, 'driver': 956, 'round': 2649, 'opalesc': 2162, 'fascinatingli': 1144, 'emot': 1013, 'rosali': 2641, 'champ': 521, 'elyse': 1009, 'liter': 1817, 'litter': 1821, 'prostrat': 2414, 'franc': 1249, 'beatnik': 284, 'jeun': 1678, 'becaus': 287, 'commot': 630, 'squat': 2958, 'bald': 244, 'ugli': 3259, 'bogartian': 362, 'sadist': 2669, 'amor': 111, 'philosoph': 2276, 'salesman': 2678, 'fredrico': 1254, 'rossilini': 2645, 'endlessli': 1023, 'provoc': 2422, 'spong': 2946, 'altern': 97, 'grandeur': 1366, 'gaull': 1308, 'decad': 795, 'disgust': 897, 'norman': 2108, 'mailer': 1874, 'deni': 827, 'crackl': 720, 'soar': 2891, 'margo': 1896, 'felic': 1159, 'brighetti': 406, 'lusciou': 1862, 'curvac': 759, 'beguil': 298, 'italian': 1664, 'airplan': 76, 'mechan': 1933, 'sophist': 2910, 'abstract': 15, 'expression': 1113, 'stamp': 2966, 'vindic': 3375, 'septemb': 2758, 'augustu': 218, 'quasimodo': 2448, 'guggenheim': 1398, 'valid': 3341, 'culmin': 746, 'monet': 2007, 'kandinski': 1706, 'cubist': 745, 'picasso': 2284, 'blossom': 353, 'pollock': 2334, 'koon': 1730, 'defin': 809, 'franz': 1253, 'kline': 1723, 'inexpress': 1598, 'unpaint': 3307, 'tick': 3171, 'suspend': 3068, 'balloon': 249, 'unwaiv': 3321, 'adher': 46, 'revolutionari': 2602, 'philosophi': 2277, 'enabl': 1016, 'invas': 1653, 'cuba': 744, 'startl': 2975, 'delic': 817, 'counterbal': 706, 'mass': 1913, 'uniti': 3297, 'entitl': 1043, 'authent': 219, 'masterpiec': 1916, 'model': 2000, 'canva': 469, 'typic': 3257, 'chide': 546, 'sensit': 2753, 'misconcept': 1979, 'wherev': 3431, 'jocos': 1683, 'industri': 1594, 'thiev': 3142, 'reliabl': 2536, 'technic': 3107, 'argot': 167, 'branch': 390, 'burglar': 436, 'rob': 2623, 'tenant': 3117, 'absent': 13, 'contrast': 682, 'prowler': 2423, 'obvious': 2134, 'audac': 214, 'possess': 2345, 'taunt': 3100, 'inferior': 1599, 'draw': 946, 'etymolog': 1066, 'specialist': 2927, 'layman': 1769, 'disabus': 885, 'propos': 2413, 'vent': 3358, 'moral': 2015, 'indign': 1589, 'rifl': 2614, 'resid': 2577, 'whimper': 3434, 'objet': 2129, 'shame': 2779, 'rectitud': 2514, 'alert': 81, 'aspect': 189, 'gravest': 1371, 'implic': 1560, 'beyond': 326, 'sanction': 2688, 'inflict': 1601, 'unwittingli': 3322, 'malign': 1878, 'vindict': 3376, 'downright': 939, 'inhuman': 1607, 'warn': 3408, 'candidli': 467, 'rice': 2608, 'china': 549, 'depred': 831, 'recapitul': 2506, 'briefli': 405, '22nd': 7, 'hostaria': 1509, 'dell': 821, 'orso': 2175, 'jimmi': 1680, 'buck': 422, 'pennsylvania': 2257, 'premis': 2367, 'hasten': 1436, 'attic': 210, 'temperatur': 3114, 'easili': 979, 'hotter': 1514, 'gold': 1345, 'coast': 598, 'mask': 1911, 'wool': 3478, 'coverlet': 715, 'khaki': 1711, 'pant': 2213, 'innumer': 1612, 'barrel': 265, 'hobbi': 1485, 'tenni': 3120, 'racket': 2465, 'elud': 1008, 'judg': 1696, 'phonograph': 2280, 'scatter': 2713, 'victrola': 3371, 'tango': 3094, 'paso': 2234, 'dobl': 917, 'enerv': 1027, 'glom': 1338, 'featur': 1155, 'curio': 752, 'cabinet': 447, 'south': 2918, 'bureau': 435, 'seventeen': 2769, 'metal': 1955, 'orient': 2173, 'african': 62, 'snuffbox': 2889, 'magnifi': 1871, 'rummag': 2659, 'stack': 2960, 'drawer': 947, 'nearbi': 2062, 'unearth': 3282, 'antiqu': 135, 'chess': 544, 'sandalwood': 2690, 'kodak': 1729, 'afghan': 61, 'customarili': 761, 'slept': 2866, 'lam': 1740, 'neighbor': 2072, 'tiresom': 3180, 'constabl': 667, 'trooper': 3239, 'forego': 1218, 'aid': 74, 'clue': 595, 'withhold': 3464, 'tenac': 3116, 'inspector': 1622, 'javert': 1671, 'gird': 1328, 'pitiless': 2299, 'properti': 2411, 'limb': 1803, 'wither': 3463, 'patch': 2239, 'manifest': 1889, 'multipli': 2040, 'effloresc': 992, 'riddl': 2612, 'frambesia': 1246, 'rot': 2647, 'clonic': 590, 'spasm': 2923, 'burden': 434, 'prelud': 2366, 'occident': 2137, 'accur': 28, 'forecast': 1217, 'intensifi': 1635, 'anguish': 126, 'delimit': 819, 'divin': 916, 'affront': 60, 'mete': 1957, 'analog': 116, 'instanc': 1624, 'tight': 3173, 'buddha': 424, 'heist': 1456, 'obsidian': 2133, 'statuett': 2980, 'brass': 393, 'teakwood': 3106, 'widespread': 3443, 'western': 3425, 'belief': 305, 'lord': 1844, 'compassion': 634, 'jehovah': 1674, 'allah': 85, 'ordinarili': 2171, 'wheel': 3427, 'specifi': 2929, 'impiou': 1559, 'folk': 1207, 'steal': 2985, 'maltreat': 1879, 'peculiar': 2253, 'retribut': 2594, 'overtak': 2191, 'joker': 1688, 'hood': 1497, 'priceless': 2379, 'khmer': 1713, 'muse': 2043, 'guimet': 1403, 'salpetrier': 2683, 'unmistak': 3304, 'symptom': 3081, 'leprosi': 1788, 'amount': 112, 'chaulmoogra': 537, 'torment': 3199, 'expir': 1106, 'indescrib': 1583, 'fantod': 1140, 'implor': 1561, 'bless': 345, 'forgiv': 1224, 'desecr': 839, 'intern': 1639, 'similar': 2833, 'recount': 2513, 'experi': 1103, 'catalogu': 493, 'sudanes': 3045, 'carv': 488, 'juju': 1697, 'liver': 1825, 'dealer': 785, 'khartoum': 1712, 'chines': 550, 'femal': 1165, 'proven': 2419, 'honan': 1493, 'fell': 1162, 'previous': 2378, 'quantiti': 2446, 'applejack': 150, 'indian': 1585, 'deiti': 813, 'ganessa': 1298, 'siva': 2852, 'krishna': 1732, 'altogeth': 99, 'hail': 1412, 'travancor': 3222, 'subcontin': 3037, 'kali': 1705, 'goddess': 1344, 'death': 790, 'worship': 3491, 'thugge': 3168, 'nuf': 2122, 'sed': 2741, 'bali': 245, 'bent': 316, 'interlac': 1637, 'amulet': 114, 'housebreak': 1517, 'mem': 1939, 'rajah': 2474, '1949': 5, 'inscrib': 1615, 'balines': 246, 'dialect': 861, 'comprehend': 643, 'tjokorda': 3183, 'agoeng': 66, 'translat': 3220, 'whosoev': 3441, 'violat': 3379, 'rooftre': 2636, 'legend': 1783, 'rain': 2471, 'pebbl': 2252, 'cocu': 601, 'fishmong': 1189, 'trolley': 3237, 'furthermor': 1279, 'vandal': 3346, 'naught': 2060, 'loot': 1843, 'cycl': 764, 'unto': 3318, 'remorseless': 2550, 'preced': 2360, 'pap': 2214, 'gage': 1289, 'rod': 2627, 'gorg': 1352, 'pizza': 2301, 'pinbal': 2294, 'tavern': 3101, 'ubermenschen': 3258, 'listen': 1815, 'befel': 294, 'wisenheim': 3459, 'tangl': 3093, 'joss': 1690, 'staircas': 2965, 'nich': 2090, 'coffin': 604, 'tibetan': 3170, 'bombay': 366, 'occasion': 2136, 'merit': 1949, 'garland': 1302, 'penni': 2256, 'replenish': 2562, 'suspicion': 3070, 'process': 2393, 'elimin': 1005, 'grocer': 1387, 'thoroughli': 3150, 'carton': 486, 'overhead': 2189, 'probe': 2388, 'savori': 2706, 'yap': 3509, 'encount': 1017, 'winter': 3455, 'sprinkl': 2954, 'ici': 1538, 'pavement': 2245, 'scarifi': 2712, 'prodigi': 2394, 'clad': 575, 'garbag': 1300, 'collector': 610, 'hurl': 1532, 'pail': 2203, 'areaway': 165, 'exasper': 1085, 'thereaft': 3135, 'bronx': 414, 'zoo': 3527, 'grimac': 1383, 'lion': 1810, 'cigar': 564, 'butt': 441, 'restaur': 2584, 'enrico': 1036, 'paglieri': 2201, 'peter': 2274, 'backyard': 239, 'drunkenli': 961, 'don': 923, 'mio': 1977, 'arbor': 162, 'slash': 2862, 'timber': 3176, 'odiou': 2145, 'toad': 3184, 'chivi': 553, 'due': 963, 'inevit': 1595, 'overreach': 2190, 'stub': 3024, 'contentedli': 675, 'tallow': 3091, 'crisscross': 735, 'wrinkl': 3499, 'wire': 3457, 'nemesi': 2076, 'footstep': 1212, 'quaver': 2450, 'ape': 144, 'specter': 2933, 'abe': 9, 'spector': 2934, 'rigid': 2616, 'grimesbi': 1384, 'roylott': 2652, 'speckl': 2931, 'band': 252, 'groceri': 1388, 'suffici': 3048, 'neurot': 2081, 'incubi': 1576, 'satisfi': 2702, 'disintegr': 899, 'masterli': 1915, 'reduc': 2518, 'beggari': 296, 'hubri': 1523, 'defici': 808, 'sadism': 2668, 'straightaway': 3009, 'evolv': 1081, 'depth': 833, 'batho': 273, 'besmirch': 322, 'network': 2079, 'rode': 2628, 'elev': 1003, 'thirteen': 3148, 'extremi': 1122, 'pityingli': 2300, 'horoscop': 1504, 'autumn': 222, 'revers': 2599, 'rosi': 2644, 'inventori': 1654, 'sidl': 2821, 'bodhisattva': 360, 'hor': 1501, 'combat': 615, 'sprue': 2955, 'yaw': 3510, 'boil': 363, 'granvil': 1368, 'wilt': 3450, 'fluke': 1201, 'bilharziasi': 328, 'hex': 1469, 'lioniz': 1811, 'vocat': 3386, 'exot': 1098, 'headach': 1444, 'perelman': 2259, 'revuls': 2603, 'desert': 840, 'shut': 2814, 'drop': 957, 'exhal': 1095, 'aisl': 77, 'aviari': 225, 'burst': 438, 'song': 2908, 'doll': 920, 'mistak': 1991, 'bang': 253, 'cheekbon': 540, 'betray': 324, 'cheroke': 543, 'midwestern': 1962, 'lineag': 1807, 'novelist': 2119, 'carp': 479, 'lower': 1851, 'trifl': 3232, 'voluptu': 3390, 'gaug': 1307, 'swift': 3077, 'greedi': 1377, 'insid': 1618, 'boucl': 380, 'stupefi': 3034}\n" + ] + } + ], + "source": [ + "print(vc)\n", + "print(cv.vocabulary_)" + ] + } + ], + "metadata": { + "colab": { + "name": "Untitled9.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}