From 7996613ba8481248f9c97617b1b142d8484f1d55 Mon Sep 17 00:00:00 2001 From: Kaushik Manikonda <78956364+kaushikmanikonda@users.noreply.github.com> Date: Sat, 13 Mar 2021 02:22:49 -0600 Subject: [PATCH 1/2] Add files via upload Iteration_02 of the English Thesaurus application. Improved word recommendations. --- English Thesaurus_Iteration-02.ipynb | 178 +++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 English Thesaurus_Iteration-02.ipynb diff --git a/English Thesaurus_Iteration-02.ipynb b/English Thesaurus_Iteration-02.ipynb new file mode 100644 index 0000000..37de2a4 --- /dev/null +++ b/English Thesaurus_Iteration-02.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please enter a word:Aople\n", + "Did you mean to type any of the below words?\n", + "\n", + "['apple' 'apple tree' 'apple juice' 'apple pie' 'ample' 'applesauce'\n", + " 'apple core' 'apple sauce' 'apple' 'ample' 'apple' 'ample']\n", + "Please enter a word:Treee\n", + "Did you mean to type any of the below words?\n", + "\n", + "['Tregereg' 'Tregereg Written' 'Tregereg Written Latin Script'\n", + " 'Tregereg-W' 'Tregereg-C' 'Tregereg-SE' 'Tregereg' 'Tregereg Written'\n", + " 'Tregereg Written Latin Script' 'Tregereg-W' 'Tregereg-C' 'Tregereg-SE'\n", + " 'Tregereg' 'Tregereg Written' 'Tregereg Written Latin Script'\n", + " 'Tregereg-W' 'Tregereg-C' 'Tregereg-SE' 'tree' 'tree bumblebee'\n", + " 'tree of woe' 'tree' 'tree bumblebee']\n", + "Please enter a word:Clumsy\n", + "['Difficult to handle, because of shape.', 'Lacking coordination in movement or action.']\n", + "Please enter a word:dragoon\n", + "Did you mean to type any of the below words?\n", + "\n", + "['dragonfly' 'dragon' 'dragon fruit' 'dragoman' 'dragon food' 'dragonfly'\n", + " 'dragon' 'dragon fruit' 'dragoman' 'dragon food' 'dragonfly' 'dragon'\n", + " 'dragon fruit' 'dragoman' 'dragon food' 'dragonfly' 'dragon'\n", + " 'dragon fruit' 'dragoman' 'dragon food' 'dragonfly' 'dragon'\n", + " 'dragon fruit' 'dragoman' 'dragon food' 'dragon' 'dragoman' 'dragon']\n", + "Please enter a word:Orfhanage\n", + "Did you mean to type any of the below words?\n", + "\n", + "['orphanage' 'orphanage' 'orphanage' 'orphanage' 'orphanage' 'orphanage']\n", + "Please enter a word:mammoth\n", + "['A large, hairy, extinct elephant-like mammal.']\n", + "Please enter a word:Krokodile\n", + "Did you mean to type any of the below words?\n", + "\n", + "['crocodile' 'crocodile' 'crocodile' 'crocodile' 'crocodile']\n", + "Please enter a word:\\end\n", + "Terminated!\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "#The next sequence of code imports the raw json file with a dictionary of words and definitions.\n", + "import json\n", + "data = json.load(open(\"C:\\\\Users\\\\rajak\\\\Documents\\\\DATA Science Resources\\\\Certifications\\\\Udemy\\\\Course Files\\\\The Python Mega Course_03-2021\\\\English Thesaurus/data.json\"))\n", + "type(data)\n", + "\n", + "#The next sequence creates a function that claculates the similarity score for two given strings.\n", + "#The words must be entered as strings, with \" \", otherwise the function will return an error.\n", + "#However, that will not be a problem for this particular application, since we are only using this function\n", + "#inside the last for loop in the next code sequence.\n", + "\n", + "#This function is just for practice. Python has a built-in difflib with a SequenceMatcher function that does\n", + "#a better job of claculating similarity ratio between strings.\n", + "\n", + "def similarity_score(w1,w2):\n", + " \"\"\" Takes two strings as arguments (include \" \" when writing input strings), and returns a similarity score\n", + " based on how many letters and positions match for those two input strings.\"\"\"\n", + " lis1 = []\n", + " lis2 = []\n", + " for l in w1:\n", + " lis1.append(l)\n", + " for m in w2:\n", + " lis2.append(m)\n", + " i = 0\n", + " if len(w1) < len(w2):\n", + " for n in range(len(w1)):\n", + " if lis1[n].lower() == lis2[n].lower():\n", + " i += 1\n", + " return i\n", + " else:\n", + " for n in range(len(w2)):\n", + " if lis1[n].lower() == lis2[n].lower():\n", + " i += 1\n", + " return i\n", + " \n", + "\n", + "#This next sequence takes a word as an argument and returns its definition as the output (among other things).\n", + "#Enter \\end as the input to terminate the program and start again. Otherwise you can not exit the program without\n", + "#resettig the kernel or causing an error.\n", + "\n", + "while True:\n", + " word = input(\"Please enter a word:\")\n", + " dummy = word\n", + " if word == \"\\end\":\n", + " print(\"Terminated!\")\n", + " break\n", + " elif dummy.lower() in data.keys():\n", + " dummy1 = dummy.lower()\n", + " output = data[dummy1]\n", + " print(output)\n", + " elif dummy.isnumeric():\n", + " print(\"Please enter a word, not a number\")\n", + " continue\n", + " elif (dummy.isalpha() == False):\n", + " print(\"Please enter a word containing only alphabets\")\n", + " continue\n", + " else:\n", + " dummy1 = []\n", + " dummy2 = []\n", + " for i in range(1,(len(word)+1)):\n", + " for j in list(data.keys()):\n", + " if j.startswith(word[:i]): #and (len(j) == len(word)):\n", + " dummy1.append(j)\n", + " for k in list(data.keys()):\n", + " if k.endswith(word[-i::1]): #and (len(k) == len(word)):\n", + " dummy2.append(k)\n", + " dummy3 = dummy1 + dummy2\n", + " lis3 = []\n", + " for p in dummy3:\n", + " lis3.append(similarity_score(p,dummy))\n", + " lis3 = np.array(lis3)\n", + " dummy4 = np.array(dummy3)\n", + " print(\"Did you mean to type any of the below words?\\n\")\n", + " print(dummy4[lis3 >= (len(dummy)/2+1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "similarity_score(\"keraaaatin\",\"Keratin\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 03c717bcab0c5afa7b49a262d87052cc11cdf678 Mon Sep 17 00:00:00 2001 From: Kaushik Manikonda <78956364+kaushikmanikonda@users.noreply.github.com> Date: Sat, 13 Mar 2021 02:42:19 -0600 Subject: [PATCH 2/2] Add files via upload Iteration_03 of the English Thesaurus application. Used the built-in difflib library instead of an original function for comparing strings. --- English Thesaurus_Iteration-03.ipynb | 161 +++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 English Thesaurus_Iteration-03.ipynb diff --git a/English Thesaurus_Iteration-03.ipynb b/English Thesaurus_Iteration-03.ipynb new file mode 100644 index 0000000..c33b328 --- /dev/null +++ b/English Thesaurus_Iteration-03.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please enter a word:Valkiray\n", + "Did you mean to type any of the below words?\n", + "\n", + "['airway', 'airway', 'Val-Máira']\n", + "Please enter a word:valkiree\n", + "Did you mean to type any of the below words?\n", + "\n", + "['valise', 'valise', 'valise']\n", + "Please enter a word:valkirie\n", + "Did you mean to type any of the below words?\n", + "\n", + "['valise', 'valise', 'valise']\n", + "Please enter a word:valkyrie\n", + "Did you mean to type any of the below words?\n", + "\n", + "['valise', 'valise', 'valise']\n", + "Please enter a word:mythopological\n", + "Did you mean to type any of the below words?\n", + "\n", + "['mythological', 'mythological', 'mythological']\n", + "Please enter a word:\\end\n", + "Terminated!\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from difflib import SequenceMatcher as sm\n", + "from difflib import get_close_matches\n", + "\n", + "#The next sequence of code imports the raw json file with a dictionary of words and definitions.\n", + "import json\n", + "data = json.load(open(\"C:\\\\Users\\\\rajak\\\\Documents\\\\DATA Science Resources\\\\Certifications\\\\Udemy\\\\Course Files\\\\The Python Mega Course_03-2021\\\\English Thesaurus/data.json\"))\n", + "\n", + "\n", + "#This next sequence takes a word as an argument and returns its definition as the output (among other things).\n", + "#Enter \\end as the input to terminate the program and start again. Otherwise you can not exit the program without\n", + "#resettig the kernel or causing an error.\n", + "\n", + "while True:\n", + " word = input(\"Please enter a word:\")\n", + " dummy = word\n", + " if word == \"\\end\":\n", + " print(\"Terminated!\")\n", + " break\n", + " elif dummy.lower() in data.keys():\n", + " dummy1 = dummy.lower()\n", + " output = data[dummy1]\n", + " print(output)\n", + " elif dummy.isnumeric():\n", + " print(\"Please enter a word, not a number\")\n", + " continue\n", + " elif (dummy.isalpha() == False):\n", + " print(\"Please enter a word containing only alphabets\")\n", + " continue\n", + " else:\n", + " dummy1 = []\n", + " dummy2 = []\n", + " for i in range(1,(len(word)+1)):\n", + " for j in list(data.keys()):\n", + " if j.startswith(word[:i]): #and (len(j) == len(word)):\n", + " dummy1.append(j)\n", + " for k in list(data.keys()):\n", + " if k.endswith(word[-i::1]): #and (len(k) == len(word)):\n", + " dummy2.append(k)\n", + " dummy3 = dummy1 + dummy2\n", + " lis3 = get_close_matches(dummy,dummy3)\n", + " print(\"Did you mean to type any of the below words?\\n\")\n", + " print(lis3)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#The next sequence creates a function that claculates the similarity score for two given strings.\n", + "#The words must be entered as strings, with \" \", otherwise the function will return an error.\n", + "#However, that will not be a problem for this particular application, since we are only using this function\n", + "#inside the last for loop in the next code sequence.\n", + "\n", + "def similarity_score(w1,w2):\n", + " \"\"\"Takes two strings as arguments (include \" \" when writing input strings), and returns a similarity score\n", + " based on how many letters and positions match for those two input strings.\"\"\"\n", + " lis1 = []\n", + " lis2 = []\n", + " for l in w1:\n", + " lis1.append(l)\n", + " for m in w2:\n", + " lis2.append(m)\n", + " i = 0\n", + " if len(w1) < len(w2):\n", + " for n in range(len(w1)):\n", + " if lis1[n].lower() == lis2[n].lower():\n", + " i += 1\n", + " return i\n", + " else:\n", + " for n in range(len(w2)):\n", + " if lis1[n].lower() == lis2[n].lower():\n", + " i += 1\n", + " return i\n", + "\n", + "#This function is just for practice. Python has a built-in difflib with a SequenceMatcher function that does\n", + "#a better job of claculating similarity ratio between strings.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "similarity_score(\"keraaaatin\",\"Keratin\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}