diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 8c5cf09..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/Colab_Codes/Advanced_Search_on_Linear_Data_Structures.ipynb b/Colab_Codes/Advanced_Search_on_Linear_Data_Structures.ipynb deleted file mode 100644 index c49a6ef..0000000 --- a/Colab_Codes/Advanced_Search_on_Linear_Data_Structures.ipynb +++ /dev/null @@ -1,951 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Advanced Search on Linear Data Structures.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "p1AUD30cnI0b", - "colab_type": "text" - }, - "source": [ - "# Slow-fast Pointers" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uxNIaguAv2QT", - "colab_type": "text" - }, - "source": [ - "## Array" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Kz8-B6bTed1u", - "colab_type": "text" - }, - "source": [ - "### Remove duplicates" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "P6YqRE57ebXz", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a = [0,0,1,1,1,2,2,3,3,4]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "2a-xbM6GesgQ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def removeDuplicates(nums) -> int:\n", - " i, j = 0, 0\n", - " while j < len(nums):\n", - " print('[0, i]:', nums[0:i+1], '[i+1, j]:', nums[i+1:j+1])\n", - " if nums[i] != nums[j]:\n", - " # Copy j to i+1\n", - " i += 1\n", - " nums[i] = nums[j]\n", - " print('copy value {} at index {} to index {}'.format(nums[j],j, i))\n", - " j += 1\n", - " return i + 1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9WPUvQCge_IW", - "colab_type": "code", - "outputId": "ea817fac-5cdc-4514-90ea-f709f273cb15", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 293 - } - }, - "source": [ - "# Test\n", - "removeDuplicates(a)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, i]: [0] [i+1, j]: []\n", - "[0, i]: [0] [i+1, j]: [0]\n", - "[0, i]: [0] [i+1, j]: [0, 1]\n", - "copy value 1 at index 2 to index 1\n", - "[0, i]: [0, 1] [i+1, j]: [1, 1]\n", - "[0, i]: [0, 1] [i+1, j]: [1, 1, 1]\n", - "[0, i]: [0, 1] [i+1, j]: [1, 1, 1, 2]\n", - "copy value 2 at index 5 to index 2\n", - "[0, i]: [0, 1, 2] [i+1, j]: [1, 1, 2, 2]\n", - "[0, i]: [0, 1, 2] [i+1, j]: [1, 1, 2, 2, 3]\n", - "copy value 3 at index 7 to index 3\n", - "[0, i]: [0, 1, 2, 3] [i+1, j]: [1, 2, 2, 3, 3]\n", - "[0, i]: [0, 1, 2, 3] [i+1, j]: [1, 2, 2, 3, 3, 4]\n", - "copy value 4 at index 9 to index 4\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0, 1, 2, 3, 4, 2, 2, 3, 3, 4]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 32 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WpoPXLowYthf", - "colab_type": "text" - }, - "source": [ - "### Minimum Size Subarray Sum" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "y_Kq77AwYuCx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def minSubArrayLen(s: int, nums) -> int:\n", - " i, j = 0, 0\n", - " acc = 0\n", - " ans = float('inf')\n", - " while j < len(nums):\n", - " acc += nums[j]\n", - " # Shrink the window\n", - " while acc >= s:\n", - " ans = min(ans, j - i + 1)\n", - " acc -= nums[i]\n", - " i += 1\n", - " j += 1\n", - " \n", - " return ans if ans < float('inf') else 0" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9gwIePmuY9ZR", - "colab_type": "code", - "outputId": "76fdcc30-1225-4724-d4b4-cb0c3ed4d1e7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "s = 7\n", - "nums = [1,4,1,2,4,3]\n", - "minSubArrayLen(s, nums)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "2" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 34 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7Tvt4n4qHZxP", - "colab_type": "text" - }, - "source": [ - "### [Minimum Window Substring](https://leetcode.com/problems/minimum-window-substring/)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JfSUte9NHbF5", - "colab_type": "code", - "colab": {} - }, - "source": [ - "S = \"ADOBECODEBANC\"\n", - "T = \"ABC\"" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "v2aQ3SfaHrdo", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from collections import Counter\n", - "def minWindow(s, t):\n", - " dict_t = Counter(t)\n", - " count = len(dict_t)\n", - " i, j = 0, 0\n", - " ans = []\n", - " minLen = float('inf')\n", - " while j < len(s):\n", - " c = s[j]\n", - " if c in dict_t:\n", - " dict_t[c] -= 1\n", - " if dict_t[c] == 0:\n", - " count -= 1\n", - " # Shrink the window\n", - " while count == 0 and i < j:\n", - " curLen = j - i + 1\n", - " if curLen < minLen:\n", - " minLen = j - i + 1\n", - " ans = [s[i:j+1]]\n", - " elif curLen == minLen: \n", - " ans.append(s[i:j+1])\n", - "\n", - " c = s[i]\n", - " if c in dict_t:\n", - " dict_t[c] += 1\n", - " if dict_t[c] == 1:\n", - " count += 1\n", - " i += 1\n", - "\n", - " j += 1\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "SrF6tS5PVT6J", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "1b43cf78-a9fe-4a9a-f410-9f44e45d532a" - }, - "source": [ - "S = 'AOBECDBANC'\n", - "minWindow(S, T)" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['CDBA', 'BANC']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 6 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PGsA-6rbv0F_", - "colab_type": "text" - }, - "source": [ - "## Linked List" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x1TiIhmgv1WR", - "colab_type": "text" - }, - "source": [ - "### Middle of the Linked List" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "QBa-8mmewBC6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def middleNode(head):\n", - " slow = fast = head\n", - " while fast and fast.next: \n", - " fast = fast.next.next\n", - " slow = slow.next \n", - " return slow" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vAGAFdIIwHLf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from collections import defaultdict\n", - "class Node:\n", - " def __init__(self, val, next = None):\n", - " self.val = val\n", - " self.next = next\n", - "\n", - "def getLinkedList(val_lst):\n", - " head = cur = None\n", - " # Use a dictionary to track nodes\n", - " Nodes = defaultdict(Node)\n", - " if val_lst:\n", - " head = cur = Node(val_lst[0]) \n", - " Nodes[val_lst[0]] = head \n", - " if len(val_lst) >= 2:\n", - " for i in range(1, len(val_lst)):\n", - " if val_lst[i] not in Nodes:\n", - " cur.next = Node(val_lst[i])\n", - " Nodes[val_lst[i]] = cur.next\n", - " else:\n", - " cur.next = Nodes[val_lst[i]]\n", - " cur = cur.next\n", - " return head\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "22Ajd39uwWDE", - "colab_type": "code", - "outputId": "bf59fd8d-b303-411c-f257-d546652fc2e2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "head1 = getLinkedList([1, 2, 3, 4, 5])\n", - "head2 = getLinkedList([1, 2, 3, 4, 5, 6])\n", - "print(middleNode(head1).val, middleNode(head2).val)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "3 4\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2W37Ktyny09T", - "colab_type": "text" - }, - "source": [ - "### Floyd's Cycle Detection" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "RVMKzZD_y4HK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "head_cycle = getLinkedList([1, 2, 3, 4, 5, 6, 3])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "UEeG5uoxy-ET", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Looping over a cyclic linked list makes the program stuck \n", - "def iterate(head):\n", - " cur = head_cycle\n", - " while cur:\n", - " print(cur.val)\n", - " cur = cur.next" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "N9GnJe54APNW", - "colab_type": "text" - }, - "source": [ - "### [Check the existence of cycles](https://leetcode.com/problems/linked-list-cycle/)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "EqAIbFI6_5zf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def hasCycle(head):\n", - " slow = fast = head\n", - " while fast and fast.next:\n", - " slow = slow.next\n", - " fast = fast.next.next\n", - " if slow == fast:\n", - " return True\n", - " return False" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "AGGujN9M_-ZU", - "colab_type": "code", - "outputId": "8231f73c-1d58-4c76-d587-5ee1928e67a3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "hasCycle(head_cycle), hasCycle(head1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(True, False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 41 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BJxOVKPVAUTK", - "colab_type": "text" - }, - "source": [ - "### [Check where the cycle starts](https://leetcode.com/problems/linked-list-cycle-ii/)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lMkK9SxGFtlU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def detectCycle(head):\n", - " slow = fast = head\n", - "\n", - " def getStartNode(slow, fast, head):\n", - " # Reset slow pointer \n", - " slow = head\n", - " while fast and slow != fast:\n", - " slow = slow.next\n", - " fast = fast.next\n", - " return slow\n", - "\n", - " while fast and fast.next:\n", - " slow = slow.next\n", - " fast = fast.next.next\n", - " # A cycle is detected\n", - " if slow == fast: \n", - " return getStartNode(slow, fast, head)\n", - " \n", - " return None\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZlC_JS3zGoAs", - "colab_type": "code", - "outputId": "a4f45ccf-bf95-4408-83f5-a7e98ee75c4e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "detectCycle(head_cycle).val, detectCycle(head1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(3, None)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 43 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2Yu3lZROJRd0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def resetLastNode(slow, fast, head):\n", - " slow = head\n", - " while fast and slow.next != fast.next:\n", - " slow = slow.next\n", - " fast = fast.next\n", - " fast.next = None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "klgtZzC8J1p5", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def removeCycle(head):\n", - " slow = fast = head\n", - "\n", - " while fast and fast.next:\n", - " slow = slow.next\n", - " fast = fast.next.next\n", - " # A cycle is detected\n", - " if slow == fast: \n", - " resetLastNode(slow, fast, head)\n", - " return \n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Hkfwf1ZHJ_ee", - "colab_type": "code", - "outputId": "c92543cb-be01-46ed-c8b5-c3cbb9f789d8", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 127 - } - }, - "source": [ - "removeCycle(head_cycle)\n", - "iterate(head_cycle)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1\n", - "2\n", - "3\n", - "4\n", - "5\n", - "6\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XRirMOVAnF_P", - "colab_type": "text" - }, - "source": [ - "# Opposite-directional Pointers" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sHZyeFkjoSLT", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Reverse a list or string in place\n", - "def reverse(a):\n", - " i, j = 0, len(a) - 1\n", - " while i < j:\n", - " # Swap items\n", - " a[i], a[j] = a[j], a[i]\n", - " i += 1\n", - " j -= 1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8Mcrgto4oFhP", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "4732a45f-bd29-4ccb-d6cc-2bd8be5ba731" - }, - "source": [ - "# Reverse a list\n", - "a = [1, 2, 3, 4, 5]\n", - "b = 'abcd'\n", - "b = list(b)\n", - "reverse(a), reverse(b)\n", - "a, b" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([5, 4, 3, 2, 1], ['d', 'c', 'b', 'a'])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "peRFBx9ToIMH", - "colab_type": "text" - }, - "source": [ - "## Two Sum" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "a-VwPo3nnaGU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def twoSum(a, target):\n", - " n = len(a)\n", - " i, j = 0, n-1\n", - " while i < j:\n", - " temp = a[i] + a[j]\n", - " if temp == target:\n", - " return [i, j]\n", - " elif temp < target:\n", - " i += 1\n", - " else:\n", - " j -= 1\n", - " return []" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "qE4rNc_yn0ru", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "0e3f7c1d-2f47-487c-f1bf-644255f4214e" - }, - "source": [ - "a = [2, 5, 7, 11, 15]\n", - "target = 9\n", - "twoSum(a, target)" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0, 2]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 2 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "X0Epx9cJWTPC", - "colab_type": "text" - }, - "source": [ - "# Three Pointers" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "M2XkxgxUWWMd", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a = [1, 0, 1, 0, 1]\n", - "S = 2" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "-wD9AJE9Wa81", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Try two pointers\n", - "def numSubarraysWithSum(a, S):\n", - " i, j = 0, 0\n", - " win_sum = 0\n", - " ans = 0\n", - " while j < len(a):\n", - " win_sum += a[j]\n", - " while i S:\n", - " win_sum -= a[i]\n", - " i += 1\n", - " if win_sum == S:\n", - " ans += 1\n", - " print('({}, {})'.format(i, j))\n", - " j += 1\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NLKtEwQUXaQD", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 90 - }, - "outputId": "783f296b-4ad1-422e-c927-5e475e573874" - }, - "source": [ - "numSubarraysWithSum(a, S)" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "text": [ - "(0, 2)\n", - "(0, 3)\n", - "(1, 4)\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "3" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DdmOnfQNYP1O", - "colab_type": "text" - }, - "source": [ - "We can clearly see that it missed the case $(2, 4)$. Why? Because we are restricting the subarray sum in range $[i, j]$ to be smaller than or equal to $S$, with the occruence of $0$s that might appear in the front or in the rear of the subarray:\n", - "* In the process of expanding the subarray, pointer $j$ is moved one at a time. Thus, even though $0$s appear in the rear of the subarray, the counting is correct.\n", - "* However, in the process of shrinking the subarray while the restriction is violated($sum > S$), we stop right away once $sum \\leq S$. And in the code, we end up only counting it as one occurrence. With $0$s at the beginning of the subarray, such as the subarray $[0, 1, 0, 1]$ with index $1$ and $4$, there count should be two instead of one. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XTTf6yb8hy5f", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def numSubarraysWithSum(a, S):\n", - " i, i_h, j = 0, 0, 0\n", - " win_sum = 0\n", - " ans = 0\n", - " while j < len(a):\n", - " win_sum += a[j]\n", - " while i < j and win_sum > S:\n", - " win_sum -= a[i]\n", - " i += 1\n", - " # Move i_h to count all zeros in the front\n", - " i_h = i\n", - " while i_h < j and win_sum == S and a[i_h] == 0:\n", - " print('({}, {})'.format(i_h, j))\n", - " ans += 1\n", - " i_h += 1\n", - "\n", - " if win_sum == S:\n", - " ans += 1\n", - " print('({}, {})'.format(i_h, j))\n", - " j += 1\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "yPmDad-RjCpw", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 109 - }, - "outputId": "9fbe3bf8-8cc9-4dd8-8edc-2b254edce79d" - }, - "source": [ - "numSubarraysWithSum(a, S)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "(0, 2)\n", - "(0, 3)\n", - "(1, 4)\n", - "(2, 4)\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "4" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_14_sorting.ipynb b/Colab_Codes/Colab Notebooks/chapter_14_sorting.ipynb deleted file mode 100644 index 67e1cbc..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_14_sorting.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_14_sorting.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"XOkspIkgKots","colab_type":"text"},"cell_type":"markdown","source":["## Counting Sorting in O(n+k)"]},{"metadata":{"id":"ptAUeFu8Kvrs","colab_type":"code","colab":{}},"cell_type":"code","source":["def countSort(a):\n"," minK, maxK = min(a), max(a)\n"," k = maxK - minK + 1\n"," count = [0] * (maxK - minK + 1)\n"," n = len(a)\n"," order = [0] * n\n"," # get occurrence\n"," for key in a:\n"," count[key - minK] += 1\n"," \n"," # get prefix sum\n"," for i in range(1, k):\n"," count[i] += count[i-1]\n"," \n"," # put it back in the input\n"," for i in range(n-1, -1, -1):\n"," key = a[i] - minK\n"," count[key] -= 1 # to get the index as position\n"," order[count[key]] = a[i] # put the key back to the sorted position\n"," return order"],"execution_count":0,"outputs":[]},{"metadata":{"id":"IUEe34hHMqBx","colab_type":"code","outputId":"b5074a60-b9b8-4313-ca63-1ba44addd461","executionInfo":{"status":"ok","timestamp":1550259045549,"user_tz":480,"elapsed":271,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 7]\n","print(countSort(a))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[2, 3, 7, 8, 9, 9, 10]\n"],"name":"stdout"}]},{"metadata":{"id":"lHFCwzZNGrMu","colab_type":"text"},"cell_type":"markdown","source":["## Bubble Sort in O(n^2)"]},{"metadata":{"id":"uqzSlyDXGzhO","colab_type":"code","colab":{}},"cell_type":"code","source":["def bubbleSort(a):\n"," if not a or len(a) == 1:\n"," return a\n"," n = len(a)\n"," for i in range(n - 1): #n-1 passes, \n"," for j in range(n - i -1): #each pass will have valid window [0, n-i], and j is the starting index of each pair\n"," if a[j] > a[j + 1]:\n"," a[j], a[j + 1] = a[j + 1], a[j] #swap\n"," return a"],"execution_count":0,"outputs":[]},{"metadata":{"id":"B-WK_VNi_unM","colab_type":"code","colab":{}},"cell_type":"code","source":["def bubbleSortOptimized(a):\n"," if not a or len(a) == 1:\n"," return a\n"," n = len(a)\n"," for i in range(n - 1): #n-1 passes, \n"," bSwap = False\n"," for j in range(n - i -1): #each pass will have valid window [0, n-i], and j is the starting index of each pair\n"," if a[j] > a[j + 1]:\n"," a[j], a[j + 1] = a[j + 1], a[j] #swap\n"," bSwap = True\n"," if not bSwap:\n"," break\n"," return a"],"execution_count":0,"outputs":[]},{"metadata":{"id":"npb5NRJdI4sV","colab_type":"code","outputId":"57c48506-47f9-4d27-cad6-b93c3729e4c5","executionInfo":{"status":"ok","timestamp":1550350492596,"user_tz":480,"elapsed":323,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 7]\n","print(bubbleSortOptimized(a))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[2, 3, 7, 8, 9, 9, 10]\n"],"name":"stdout"}]},{"metadata":{"id":"RZwzr_7xBjae","colab_type":"text"},"cell_type":"markdown","source":["## Selection Sort in O(n^2)"]},{"metadata":{"id":"e0U-HGHcBpL4","colab_type":"code","colab":{}},"cell_type":"code","source":["def selectSort(a):\n"," n = len(a)\n"," for i in range(n - 1): #n-1 passes, \n"," ti = n - 1 -i # the position to fill in the largest item of valid window [0, n-i]\n"," li = 0\n"," for j in range(n - i):\n"," if a[j] > a[li]:\n"," li = j\n"," # swap li and ti\n"," print('swap', a[li], a[ti])\n"," a[ti], a[li] = a[li], a[ti]\n"," print(a)\n"," return a\n"," \n"," \n"," #"],"execution_count":0,"outputs":[]},{"metadata":{"id":"r3v62LNjEZVY","colab_type":"code","outputId":"7d4a8ad0-466f-4ff4-dd2f-e214c7979e3e","executionInfo":{"status":"ok","timestamp":1550350715349,"user_tz":480,"elapsed":388,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":256}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 9]\n","print(selectSort(a))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["swap 10 9\n","[9, 9, 2, 8, 9, 3, 10]\n","swap 9 3\n","[3, 9, 2, 8, 9, 9, 10]\n","swap 9 9\n","[3, 9, 2, 8, 9, 9, 10]\n","swap 9 8\n","[3, 8, 2, 9, 9, 9, 10]\n","swap 8 2\n","[3, 2, 8, 9, 9, 9, 10]\n","swap 3 2\n","[2, 3, 8, 9, 9, 9, 10]\n","[2, 3, 8, 9, 9, 9, 10]\n"],"name":"stdout"}]},{"metadata":{"id":"IQcElo1mM_Sf","colab_type":"text"},"cell_type":"markdown","source":["## Insertion Sort in O(n^2)"]},{"metadata":{"id":"yJL27nHcNEJl","colab_type":"code","colab":{}},"cell_type":"code","source":["def insertionSort(a):\n"," if not a or len(a) == 1:\n"," return a\n"," n = len(a)\n"," sl = [a[0]] # sorted list\n"," for i in range(1, n): # items to be inserted into the sorted\n"," j = 0 \n"," while j < len(sl):\n"," if a[i] > sl[j]:\n"," j += 1\n"," else:\n"," sl.insert(j, a[i])\n"," break\n"," if j == len(sl): # not inserted yet\n"," sl.insert(j, a[i])\n"," return sl\n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"Zb5jMt-iScSz","colab_type":"code","outputId":"766ddcce-178d-4832-d154-e853a03debaa","executionInfo":{"status":"ok","timestamp":1550259083716,"user_tz":480,"elapsed":283,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":221}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 7]\n","print(insertionSort(a))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[9, 10]\n","[2, 9, 10]\n","[2, 9, 10]\n","[2, 8, 9, 10]\n","[2, 8, 9, 10]\n","[2, 8, 9, 9, 10]\n","[2, 8, 9, 9, 10]\n","[2, 3, 8, 9, 9, 10]\n","[2, 3, 8, 9, 9, 10]\n","[2, 3, 7, 8, 9, 9, 10]\n","[2, 3, 7, 8, 9, 9, 10]\n","[2, 3, 7, 8, 9, 9, 10]\n"],"name":"stdout"}]},{"metadata":{"id":"EI1bDD3106fV","colab_type":"code","colab":{}},"cell_type":"code","source":["def shift(a, start, end):\n"," for i in range(end, start, -1): # [i, j)\n"," a[i] = a[i-1]\n"," \n","def insertionSortForward(a):\n"," if not a or len(a) == 1:\n"," return a\n"," n = len(a)\n"," sl = [a[0]] # sorted list\n"," for i in range(1, n): # items to be inserted into the sorted\n"," for j in range(i):\n"," if a[i] < a[j]:\n"," # shift all other elements [j, i-1]\n"," tmp = a[i]\n"," shift(a, j, i)\n"," a[j] = tmp \n"," return a\n","\n","def insertionSortInPlace(a):\n"," if not a or len(a) == 1:\n"," return a\n"," n = len(a)\n"," for i in range(1, n): # items to be inserted into the sorted\n"," t = a[i]\n"," j = i - 1\n"," while j >= 0 and t < a[j]: # keep comparing if target is still smaller\n"," a[j+1] = a[j] # shift current item backward\n"," j -= 1\n"," a[j+1] = t # a[j] <= t , insert t at the location j+1 \n"," return a"],"execution_count":0,"outputs":[]},{"metadata":{"id":"NgeLGKn51mxY","colab_type":"code","outputId":"6051e043-4c10-48c3-d477-10bb3b13a05b","executionInfo":{"status":"ok","timestamp":1550348226402,"user_tz":480,"elapsed":325,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 7]\n","print(insertionSortInPlace(a))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[2, 3, 7, 8, 9, 9, 10]\n"],"name":"stdout"}]},{"metadata":{"id":"GN1yseSKImNy","colab_type":"text"},"cell_type":"markdown","source":["## Merge Sort O(nlgn)"]},{"metadata":{"id":"rILA4lBhdPxD","colab_type":"code","colab":{}},"cell_type":"code","source":["def merge(l, r): \n"," '''combine the left and right sorted list'''\n"," ans = []\n"," i = j = 0 # two pointers each points at l and r\n"," n, m = len(l), len(r)\n"," \n"," # first while loop to merge\n"," while i < n and j < m: \n"," if l[i] <= r[j]:\n"," ans.append(l[i])\n"," i += 1\n"," else:\n"," ans.append(r[j])\n"," j += 1\n"," \n"," # now one list of l and r might have items left\n"," ans += l[i:]\n"," ans += r[j:]\n"," return ans\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"DK003Ic1Isb3","colab_type":"code","colab":{}},"cell_type":"code","source":["def mergeSort(a, s, e):\n"," # base case , can not be divided further\n"," if s == e:\n"," return [a[s]]\n"," # divide into two halves from the middle point\n"," m = (s + e) // 2\n"," \n"," # conquer\n"," l = mergeSort(a, s , m)\n"," r = mergeSort(a, m+1, e)\n"," \n"," # combine\n"," return merge(l, r)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ys6tUAd8i7ao","colab_type":"code","outputId":"afc20a0d-012e-413f-baf1-f098bb8e23ce","executionInfo":{"status":"ok","timestamp":1550374945810,"user_tz":480,"elapsed":336,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["a = [9, 10, 2, 8, 9, 3, 7, 9]\n","mergeSort(a, 0, len(a)-1)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[2, 3, 7, 8, 9, 9, 9, 10]"]},"metadata":{"tags":[]},"execution_count":34}]},{"metadata":{"id":"IL6hPk6IjGSf","colab_type":"text"},"cell_type":"markdown","source":["### prove merge sort is stable by sorting tuple and printing id"]},{"metadata":{"id":"UwgdFdaRipEN","colab_type":"code","colab":{}},"cell_type":"code","source":["def mergeTuple(l, r): \n"," '''combine the left and right sorted list'''\n"," ans = []\n"," i = j = 0 # two pointers each points at l and r\n"," n, m = len(l), len(r)\n"," \n"," # first while loop to merge\n"," while i < n and j < m: \n"," if l[i][0] <= r[j][0]: # chaning it to l[i][0] < r[j][0] will not be stable anymore. \n"," ans.append(l[i])\n"," i += 1\n"," else:\n"," ans.append(r[j])\n"," j += 1\n"," \n"," # now one list of l and r might have items left\n"," ans += l[i:]\n"," ans += r[j:]\n"," return ans\n","\n","def mergeSortTuple(a, s, e):\n"," # base case , can not be divided further\n"," if s == e:\n"," return [a[s]]\n"," # divide into two halves from the middle point\n"," m = (s + e) // 2\n"," \n"," # conquer\n"," l = mergeSort(a, s , m)\n"," r = mergeSort(a, m+1, e)\n"," \n"," # combine\n"," return mergeTuple(l, r)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"mm07Ac0-dO3A","colab_type":"code","outputId":"dd9eca57-98b7-4e04-acc2-7a31e7efd61d","executionInfo":{"status":"ok","timestamp":1550378993589,"user_tz":480,"elapsed":914,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":312}},"cell_type":"code","source":["a = [(9, 1), (10, 1), (2, 1), (8, 1), (9, 2), (3, 1), (7, 1), (9, 3)] # the second item represents the index of duplcates\n","ids = [id(x) if x[0] == 9 else None for x in a]\n","sorted_a = mergeSortTuple(a, 0, len(a)-1)\n","ids2 = [id(x) if x[0] == 9 else None for x in sorted_a]\n","print(sorted_a)\n","ids, ids2"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[(2, 1), (3, 1), (7, 1), (8, 1), (9, 2), (9, 3), (9, 1), (10, 1)]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["([140381548618120,\n"," None,\n"," None,\n"," None,\n"," 140381548653128,\n"," None,\n"," None,\n"," 140381548653320],\n"," [None,\n"," None,\n"," None,\n"," None,\n"," 140381548653128,\n"," 140381548653320,\n"," 140381548618120,\n"," None])"]},"metadata":{"tags":[]},"execution_count":47}]},{"metadata":{"id":"h0QYWQaDxt9D","colab_type":"text"},"cell_type":"markdown","source":["## QuickSort in O(nlogn)"]},{"metadata":{"id":"j4mW9xNrO6hm","colab_type":"code","colab":{}},"cell_type":"code","source":["def partition(a, s, e):\n"," '''Lumutos partition'''\n"," p = a[e]\n"," i = s - 1\n"," for j in range(s, e): #a[s, e-1]\n"," \n"," if a[j] <= p:\n"," i += 1\n"," a[i], a[j] = a[j], a[i] # swap a[i] and a[j]\n"," # print out the range of each region\n","# print('p<->i', [a[x] for x in range(s, i+1)])\n","# print('i+1<->j', [a[x] for x in range(i+1, j+1)])\n"," # place p at position i+1 through swapping with a[i+1]\n"," a[i+1], a[e] = a[e], a[i+1]\n"," return i+1"],"execution_count":0,"outputs":[]},{"metadata":{"id":"YPUErvQ1pT1v","colab_type":"text"},"cell_type":"markdown","source":["### experiment the correctness of lumutos partition"]},{"metadata":{"id":"oTmEOjk2QQZV","colab_type":"code","outputId":"cc8a3971-7ef6-45b4-da03-d2c46460ffcf","executionInfo":{"status":"ok","timestamp":1550560957592,"user_tz":480,"elapsed":474,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["lst = [9, 10, 2, 8, 9, 3, 7]\n","print(partition(lst, 0, len(lst)-1))\n","print(lst)"],"execution_count":37,"outputs":[{"output_type":"stream","text":["2\n","[2, 3, 7, 8, 9, 10, 9]\n"],"name":"stdout"}]},{"metadata":{"id":"qi_gd9TQpbn_","colab_type":"text"},"cell_type":"markdown","source":["### main algorithm of quick sort"]},{"metadata":{"id":"v2_nP14pObAn","colab_type":"code","colab":{}},"cell_type":"code","source":["def quickSort(a, s, e, partition=partition):\n"," # base case , can not be divided further\n"," if s >= e:\n"," return \n"," p = partition(a, s, e)\n"," \n"," # conquer smaller problem\n"," quickSort(a, s , p-1, partition)\n"," quickSort(a, p+1, e, partition)\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"vPtdpJnApa4n","colab_type":"text"},"cell_type":"markdown","source":[""]},{"metadata":{"id":"8N4QPVcqouFf","colab_type":"text"},"cell_type":"markdown","source":["### experiment to see the stability of quick sort"]},{"metadata":{"id":"oOHBweKDgiMw","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"1634d771-17cd-4bec-de8d-2da7e3632066","executionInfo":{"status":"ok","timestamp":1550561308432,"user_tz":480,"elapsed":588,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["a = [(5, 1), (7, 1),(3, 1), (2, 1), (5, 2), (6,1), (7, 2), (8, 1), (9, 1), (5, 3), (5, 4)] # the second item represents the index of duplcates\n","def partition_tuple(a, s, e):\n"," '''Lumutos partition'''\n"," p = a[e][0]\n"," i = s - 1\n"," for j in range(s, e): #a[s, e-1]\n"," \n"," if a[j][0] <= p:\n"," i += 1\n"," a[i], a[j] = a[j], a[i] # swap a[i] and a[j]\n"," # print out the range of each region\n","# print('p<->i', [a[x] for x in range(s, i+1)])\n","# print('i+1<->j', [a[x] for x in range(i+1, j+1)])\n"," # place p at position i+1 through swapping with a[i+1]\n"," a[i+1], a[e] = a[e], a[i+1]\n"," return i+1\n","quickSort(a, 0, len(a) - 1, partition_tuple)\n","print(a)"],"execution_count":47,"outputs":[{"output_type":"stream","text":["[(2, 1), (3, 1), (5, 1), (5, 2), (5, 3), (5, 4), (6, 1), (7, 1), (7, 2), (8, 1), (9, 1)]\n"],"name":"stdout"}]},{"metadata":{"id":"QqcdUXaepGSa","colab_type":"text"},"cell_type":"markdown","source":["### experiment to see the performance of worst time"]},{"metadata":{"id":"5n7nsw13pLWr","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":54},"outputId":"736cf200-711d-4f42-b2b1-f5c369f70ce8","executionInfo":{"status":"ok","timestamp":1550562301025,"user_tz":480,"elapsed":637,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["import random, time\n","lst1 = [random.randint(1, 25) for i in range(400)]\n","lst2 = [i for i in range(400)[::-1]]\n","t1 = time.time()\n","quickSort(lst1, 0, len(lst1)-1, partition)\n","print('time for random values:', time.time()-t1)\n","\n","t1 = time.time()\n","quickSort(lst2, 0, len(lst2)-1, partition)\n","print('time for sorted values:', time.time()-t1)"],"execution_count":56,"outputs":[{"output_type":"stream","text":["time for random values: 0.0017516613006591797\n","time for sorted values: 0.0171658992767334\n"],"name":"stdout"}]},{"metadata":{"id":"0y5x07wwo4Um","colab_type":"text"},"cell_type":"markdown","source":["### Hoare Partition"]},{"metadata":{"id":"3vNUigFmo7ei","colab_type":"code","colab":{}},"cell_type":"code","source":["# def partition_hoare(a, s, e):\n","# '''Hoare Parition'''\n","# p = a[e]\n","# i = s\n","# j = e-1\n","# while True:\n","# while a[i] <= p and i < j:\n","# i += 1\n","# while a[j] > p and i < j:\n","# j -= 1\n","# if i < j:\n","# a[i], a[j] = a[j], a[i]\n","# else:\n","# return j\n","# return j"],"execution_count":0,"outputs":[]},{"metadata":{"id":"GKbXRk4Czwjt","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":54},"outputId":"be048173-9125-4716-89f0-17ad7fb0b345","executionInfo":{"status":"ok","timestamp":1550564268216,"user_tz":480,"elapsed":325,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["# lst = [9, 10, 2, 8, 9, 3, 7]\n","# print(partition_hoare(lst, 0, len(lst)-1))\n","# print(lst)"],"execution_count":72,"outputs":[{"output_type":"stream","text":["2\n","[3, 2, 10, 8, 9, 9, 7]\n"],"name":"stdout"}]},{"metadata":{"id":"4EDqug7Yg2yl","colab_type":"text"},"cell_type":"markdown","source":["## HeapSort in O(nlogn)"]},{"metadata":{"id":"0PE9BxQBg7lu","colab_type":"code","colab":{}},"cell_type":"code","source":["from heapq import heapify, heappop\n","def heapsort(a):\n"," heapify(a)\n"," return [heappop(a) for i in range(len(a))]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"PcUurYvkg_Px","colab_type":"code","outputId":"a96f214c-6fdb-4ed5-9c94-7e8772e65fb5","executionInfo":{"status":"ok","timestamp":1550525399708,"user_tz":480,"elapsed":286,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["lst = [21, 1, 45, 78, 3, 5]\n","heapsort(lst)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[1, 3, 5, 21, 45, 78]"]},"metadata":{"tags":[]},"execution_count":2}]},{"metadata":{"id":"K1EUj-De16tk","colab_type":"text"},"cell_type":"markdown","source":["## Bucket Sort"]},{"metadata":{"id":"GOF5OoRGioTg","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_14_sorting_2.ipynb b/Colab_Codes/Colab Notebooks/chapter_14_sorting_2.ipynb deleted file mode 100644 index 58578e6..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_14_sorting_2.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_14_sorting.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"B0qx9rvaser4","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"7c9daf13-4598-4e08-bc63-f6ab19c419da","executionInfo":{"status":"ok","timestamp":1549672814254,"user_tz":480,"elapsed":610,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["tup = (3, 6, 8, 2, 78, 1, 23, 45, 9)\n","sorted(tup)"],"execution_count":1,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[1, 2, 3, 6, 8, 9, 23, 45, 78]"]},"metadata":{"tags":[]},"execution_count":1}]},{"metadata":{"id":"eA8dMPkksqly","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":71},"outputId":"613cc186-cd57-4439-9f7f-6b122cd3d85b","executionInfo":{"status":"ok","timestamp":1549673318708,"user_tz":480,"elapsed":276,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["from collections import defaultdict\n","import random\n","dic = defaultdict(lambda: defaultdict(list)) # a dictionary of a dictionary of list dic[a][b] = [3, 1, 2, 4]\n","for i in range(10):\n"," a = random.randint(1, 101)\n"," b = random.randint(1, 101)\n"," dic[a][b] = [random.randint(1, 101) for _ in range(10)]\n","print(dic) \n","sorted_dic = sorted(dic)\n","print(sorted_dic)"],"execution_count":8,"outputs":[{"output_type":"stream","text":["defaultdict( at 0x7faf20e3c730>, {72: defaultdict(, {59: [63, 15, 62, 83, 30, 98, 16, 44, 58, 93]}), 82: defaultdict(, {70: [89, 49, 47, 63, 90, 1, 7, 9, 78, 10]}), 53: defaultdict(, {62: [10, 99, 35, 78, 74, 44, 82, 32, 32, 52]}), 78: defaultdict(, {78: [20, 22, 100, 29, 16, 65, 56, 8, 100, 100]}), 13: defaultdict(, {44: [4, 81, 17, 92, 44, 49, 72, 24, 13, 64]}), 84: defaultdict(, {47: [76, 94, 36, 56, 60, 87, 72, 47, 75, 33]}), 49: defaultdict(, {97: [7, 47, 13, 80, 85, 59, 2, 48, 68, 65]}), 87: defaultdict(, {61: [31, 72, 71, 63, 19, 84, 78, 80, 97, 85]}), 17: defaultdict(, {92: [29, 53, 20, 14, 16, 84, 57, 40, 4, 19]}), 54: defaultdict(, {32: [2, 31, 19, 31, 68, 10, 85, 34, 25, 62]})})\n","[13, 17, 49, 53, 54, 72, 78, 82, 84, 87]\n"],"name":"stdout"}]},{"metadata":{"id":"VbmDX6yAvYax","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"441aa86e-17c6-4cbd-b923-67c1ea41e384","executionInfo":{"status":"ok","timestamp":1549674562513,"user_tz":480,"elapsed":273,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["'''sort_list_of_tuple()'''\n","\n","lst = [(1, 8, 2), (3, 2, 9), (1, 7, 10), (1, 7, 1), (11, 1, 5), (6, 3, 10), (32, 18, 9)]\n","sorted_lst = sorted(lst, key = lambda x: x[0]) # sort in the order of the first element, and descresing order of the second element, and incresing of the third element\n","print(sorted_lst)"],"execution_count":16,"outputs":[{"output_type":"stream","text":["[(1, 8, 2), (1, 7, 10), (1, 7, 1), (3, 2, 9), (6, 3, 10), (11, 1, 5), (32, 18, 9)]\n"],"name":"stdout"}]},{"metadata":{"id":"tXDvRwd_047E","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"231e3bd1-06df-4e6c-e4e8-ccfc7803ee62","executionInfo":{"status":"ok","timestamp":1549674993085,"user_tz":480,"elapsed":306,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["lst = [(1, 8, 2), (3, 2, 9), (1, 7, 10), (1, 7, 1), (11, 1, 5), (6, 3, 10), (32, 18, 9)]\n","sorted_lst = sorted(lst, key = lambda x: (x[0], -x[1], x[2])) # sort in the order of the first element, and descresing order of the second element, and incresing of the third element\n","print(sorted_lst)"],"execution_count":17,"outputs":[{"output_type":"stream","text":["[(1, 8, 2), (1, 7, 1), (1, 7, 10), (3, 2, 9), (6, 3, 10), (11, 1, 5), (32, 18, 9)]\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_15_bit_manipulation.ipynb b/Colab_Codes/Colab Notebooks/chapter_15_bit_manipulation.ipynb deleted file mode 100644 index 7bbae13..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_15_bit_manipulation.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_15_bit_manipulation.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"6HDn2LSnUIhV","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","'''built-in functions'''\n","print(bin(1), bin(-1))\n","print(bin(126), bin(-126))\n","a = bin(88)\n","a1 = bin(-88)\n","print(a)\n","print(a1)\n","b = int('01011000', 2)\n","c = int('88', 10)\n","print(b, c)\n","\n","d = chr(88)\n","print(d)\n","\n","e = ord('a')\n","print(e)\n","\n","empty_bytes = bytes(4)\n","print(type(empty_bytes))\n","print(empty_bytes)\n","\n","# Cast bytes to bytearray\n","mutable_bytes = bytearray(b'\\x00\\x0F')\n","\n","# Bytearray allows modification\n","mutable_bytes[0] = 255\n","mutable_bytes.append(255)\n","print(mutable_bytes)\n","\n","# Cast bytearray back to bytes\n","immutable_bytes = bytes(mutable_bytes)\n","print(immutable_bytes)\n","\n","# Some bytes to play with\n","byte1 = int('11110000', 2) # 240\n","byte2 = int('00001111', 2) # 15\n","byte3 = int('01010101', 2) # 85\n","\n","# Ones Complement (Flip the bits)\n","print(~byte1)\n","\n","# AND\n","print(byte1 & byte2)\n","\n","# OR\n","print(byte1 | byte2)\n","\n","# XOR\n","print(byte1 ^ byte3)\n","\n","# Shifting right will lose the right-most bit\n","print(byte2 >> 3)\n","\n","# Shifting left will add a 0 bit on the right side\n","print(byte2 << 1)\n","\n","# See if a single bit is set\n","bit_mask = int('00000001', 2) # Bit 1\n","print(bit_mask & byte1) # Is bit set in byte1?\n","print(bit_mask & byte2) # Is bit set in byte2?\n","\n","'''Combined bit manipulation'''\n","def get_bit(val, i):\n"," mask = 1 << i\n"," if val & mask:\n"," return 1\n"," return 0\n","\n","# i i-1 i-2 ... 2 1 0, keep these positions\n","def clear_bits_left_right(val, i):\n"," print('val', bin(val))\n"," mask = (1 << i) -1\n"," print('mask', bin(mask))\n"," return bin(val & (mask))\n","# i i-1 i-2 ... 2 1 0, erase these positions\n","def clear_bits_right_left(val, i):\n"," print('val', bin(val))\n"," mask = (1 << i) -1\n"," print('mask', bin(~mask))\n"," return bin(val & (~mask))\n","\n","\n","print(get_bit(5,1))\n","\n","print(clear_bits_left_right(int('11111111',2), 5))\n","print(clear_bits_right_left(int('11111111',2), 5))\n","\n","'''applications'''\n","def twos_complement(val, bits):\n"," # first flip implemented with xor of val with all 1's\n"," flip_val = val ^ (1 << bits - 1)\n"," #flip_val = ~val we only give 3 bits\n"," return bin(flip_val + 1)\n","\n","\n","def twos_complement2(val, bits):\n"," zeroFound = False\n"," ans = 0\n"," mask = 1\n"," for i in range(bits):\n"," b = (val & (mask)) # get ith bit\n"," print(b)\n"," b = not b # flipped\n"," if not zeroFound:\n"," if not b: # found zero, flip to one, else flip to zero: no operation needed\n"," print('found')\n"," ans = ans | (mask) # set ith bit\n"," zeroFound = True\n"," else:\n"," if b:\n"," ans = ans | (mask)\n"," mask = mask << 1 # change mask to the next bit\n"," return bin(ans)\n","\n","def twos_complement_result(x):\n"," ans1 = -x\n"," ans2 = ~x + 1\n"," print(ans1, ans2)\n"," print(bin(ans1), bin(ans2))\n"," return ans1\n","\n","def strip_last_set_bit(val):\n"," print(bin(val))\n"," return bin(val & (val - 1))\n","\n","def get_lowest_set_bit(val):\n"," return val ^ (val & (val -1))\n"," return (val & (-val))\n","\n","print(twos_complement(5, 8))\n","print(twos_complement2(5, 8))\n","print(strip_last_set_bit(5))\n","print(get_lowest_set_bit(8))\n","\n","twos_complement_result(8)"],"execution_count":0,"outputs":[]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.depre.ipynb b/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.depre.ipynb deleted file mode 100644 index 93c4864..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.depre.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_6_linear_data_structure.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"3i76tIL3-e9f","colab_type":"code","colab":{}},"cell_type":"code","source":["class Student:\n"," def __init__(self, name, grade, age):\n"," self.name = name\n"," self.grade = grade\n"," self.age = age\n"," def __repr__(self):\n"," return repr((self.name, self.grade, self.age))\n"," \n"," def __cmp__(self, other):\n"," return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age))\n","# def __hash__(self):\n","# return hash((self.name, self.grade, self.age))\n"," \n","# def __lt__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) < 0\n","# def __gt__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) > 0\n","# def __eq__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) == 0\n","# def __le__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) <= 0\n","# def __ge__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) >= 0\n","# def __ne__(self, other):\n","# return cmp((self.name, self.grade, self.age), (other.name, other.grade, other.age)) != 0"],"execution_count":0,"outputs":[]},{"metadata":{"id":"D7cql5y2_DYq","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":72},"outputId":"0af43264-6e4a-4d7a-e5b8-70eb7b71daa9","executionInfo":{"status":"ok","timestamp":1549751870866,"user_tz":480,"elapsed":300,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["a =Student('John', 'A', 14)\n","print(type(('John', 'A', 14)), type(a))\n","print(hash(a))\n","print(a)"],"execution_count":58,"outputs":[{"output_type":"stream","text":[" \n","-9223363270192343580\n","('John', 'A', 14)\n"],"name":"stdout"}]},{"metadata":{"id":"9WTYRI2yNA4G","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"613e4070-7e98-4da7-e9ad-bffec3f9121c","executionInfo":{"status":"error","timestamp":1549749345765,"user_tz":480,"elapsed":334,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["b = Student('Alex', 'A', 14)\n","print(a == b)\n","print(sorted([a,b]))"],"execution_count":56,"outputs":[{"output_type":"error","ename":"NameError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStudent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Alex'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'A'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m\u001b[0m in \u001b[0;36m__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcmp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__eq__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcmp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__le__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcmp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrade\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'cmp' is not defined"]}]},{"metadata":{"id":"ylfCmcuAaTEA","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":227},"outputId":"29974383-bd6e-427d-94d9-0141947e36c8","executionInfo":{"status":"error","timestamp":1549752233299,"user_tz":480,"elapsed":333,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["key1 = hash(('John', 'A', 14))\n","key2 = hash(('John', 'A', 14))\n","print(key1, key2)\n","d = {('John', 'A', 14): 'A+'}\n","print(d[key1], d['John', 'A', 14])"],"execution_count":77,"outputs":[{"output_type":"stream","text":["1266846023488010213 1266846023488010213\n"],"name":"stdout"},{"output_type":"error","ename":"KeyError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'John'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'A'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m14\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'A+'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'John'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'A'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m14\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mKeyError\u001b[0m: 1266846023488010213"]}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.ipynb b/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.ipynb deleted file mode 100644 index ac48285..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_6_linear_data_structure.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Untitled","version":"0.3.2","views":{},"default_view":{},"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","execution_count":0,"metadata":{},"outputs":[],"source":[""]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/chapter_8_heap_priority_queue.ipynb b/Colab_Codes/Colab Notebooks/chapter_8_heap_priority_queue.ipynb deleted file mode 100644 index 5e05726..0000000 --- a/Colab_Codes/Colab Notebooks/chapter_8_heap_priority_queue.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_8_heap_priority_queue.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"XVLSsH816Oz9","colab_type":"text"},"cell_type":"markdown","source":["## Basic implementation"]},{"metadata":{"id":"RbAXuUtD6Swz","colab_type":"code","colab":{}},"cell_type":"code","source":["class Heap:\n"," def __init__(self):\n"," self.heap = [None]\n"," self.size = 0\n"," def __str__(self):\n"," out = ''\n"," for i in range(1, len(self.heap)):\n"," out += str(self.heap[i]) + ' '\n"," return out\n"," def _float(self, index): # enforce min-heap, leaf-to-root\n"," while index // 2: # while parent exist\n"," p_index = index // 2\n"," print('p', p_index, index)\n"," if self.heap[index] < self.heap[p_index]: # a violation\n"," # swap\n"," self.heap[index], self.heap[p_index] = self.heap[p_index], self.heap[index]\n"," else:\n"," break\n"," index = p_index # move up the node\n"," def insert(self, val):\n"," self.heap.append(val)\n"," self.size += 1\n"," self._float(index = self.size)\n"," \n"," def _sink(self, index): # enforce min-heap, root-to-leaf\n"," while 2 * index <= self.size:\n"," li = 2 * index\n"," ri = li + 1\n"," mi = index\n"," if self.heap[li] < self.heap[mi]:\n"," mi = li\n"," if ri <= self.size and self.heap[ri] < self.heap[mi]:\n"," mi = ri\n"," if mi != index:\n"," # swap index with mi\n"," self.heap[index], self.heap[mi] = self.heap[mi], self.heap[index]\n"," else:\n"," break\n"," index = mi\n"," def pop(self):\n"," val = self.heap[1]\n"," self.heap[1] = self. heap.pop()\n"," self.size -= 1\n"," self._sink(index = 1)\n"," return val\n"," \n"," def _float_till_root(self, index): # enforce min-heap, leaf-to-root\n"," while index // 2: # while parent exist\n"," p_index = index // 2\n"," print('p', p_index, index)\n"," if self.heap[index] < self.heap[p_index]: # a violation\n"," # swap\n"," self.heap[index], self.heap[p_index] = self.heap[p_index], self.heap[index]\n"," index = p_index # move up the node\n"," \n"," def heapify(self, lst):\n"," self.heap = [None] + lst\n"," self.size = len(lst)\n"," for i in range(self.size, self.size // 2, -1):\n"," print(self.heap[i])\n"," self._float_till_root(i)\n"," print('after', self.heap)\n"," \n"," def _sink_till_leaf(self, index): # enforce min-heap, root-to-leaf\n"," while 2 * index < self.size:\n"," li = 2 * index\n"," ri = li + 1\n"," mi = li if self.heap[li] < self.heap[ri] else ri\n"," if self.heap[index] > self.heap[mi]:\n"," # swap index with mi\n"," self.heap[index], self.heap[mi] = self.heap[mi], self.heap[index]\n"," index = mi\n"," \n"," def heapify_sink(self, lst):\n"," self.heap = [None] + lst\n"," self.size = len(lst)\n"," for i in range(self.size//2, 0, -1):\n"," self._sink(i)\n"," \n"," def heapsort(self, a):\n"," self.heapify_sink(a)\n"," n = len(a)\n"," for i in range(n, 1, -1): # position to put the root node\n"," self.heap[i], self.heap[1] = self.heap[1], self.heap[i] #swap root with i\n"," self.size -= 1\n"," self._sink(1) # sink dow the new root\n"," print(self.heap)\n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"5-FHH-6o6xPB","colab_type":"code","outputId":"ee4f66eb-affb-4277-c1f7-4895e073e45a","executionInfo":{"status":"ok","timestamp":1550521262381,"user_tz":480,"elapsed":336,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":311}},"cell_type":"code","source":["h = Heap()\n","lst = [21, 1, 45, 78, 3, 5]\n","for v in lst:\n"," h.insert(v)\n"," print(h)\n","print('heapify with insertion: ', h)\n","h.pop()\n","print('after pop(): ', h)\n","h.pop()\n","print(h)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["21 \n","p 1 2\n","1 21 \n","p 1 3\n","1 21 45 \n","p 2 4\n","1 21 45 78 \n","p 2 5\n","p 1 2\n","1 3 45 78 21 \n","p 3 6\n","p 1 3\n","1 3 5 78 21 45 \n","heapify with insertion: 1 3 5 78 21 45 \n","after pop(): 3 21 5 78 45 \n","5 21 45 78 \n"],"name":"stdout"}]},{"metadata":{"id":"WzVkLBxHMMKI","colab_type":"code","outputId":"4f89ad5a-fb4c-4cb8-c3d6-229a408fcccd","executionInfo":{"status":"ok","timestamp":1550534069698,"user_tz":480,"elapsed":924,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":139}},"cell_type":"code","source":["h = Heap()\n","lst = [21, 1, 45, 78, 3, 5]\n","h.heapify_sink(lst)\n","print('heapify with heapify:', h)"],"execution_count":14,"outputs":[{"output_type":"stream","text":["45\n","after [None, 21, 1, 5, 78, 3, 45]\n","1\n","after [None, 21, 1, 5, 78, 3, 45]\n","21\n","after [None, 1, 3, 5, 78, 21, 45]\n","heapify with heapify: 1 3 5 78 21 45 \n"],"name":"stdout"}]},{"metadata":{"id":"TgA1qpeyarvs","colab_type":"code","outputId":"27a2fd8a-a52b-4867-d3fa-5efab881f033","executionInfo":{"status":"ok","timestamp":1550524082582,"user_tz":480,"elapsed":337,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":237}},"cell_type":"code","source":["h = Heap()\n","h.heapsort(lst)\n","print(h)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["45\n","after [None, 21, 1, 45, 78, 3, 5]\n","1\n","after [None, 21, 1, 45, 78, 3, 5]\n","21\n","after [None, 1, 3, 45, 78, 21, 5]\n","[None, 3, 5, 45, 78, 21, 1]\n","[None, 5, 21, 45, 78, 3, 1]\n","[None, 21, 78, 45, 5, 3, 1]\n","[None, 45, 78, 21, 5, 3, 1]\n","[None, 78, 45, 21, 5, 3, 1]\n","78 45 21 5 3 1 \n"],"name":"stdout"}]},{"metadata":{"id":"tTDxuoKlcyzD","colab_type":"text"},"cell_type":"markdown","source":["### through heapq"]},{"metadata":{"id":"x0oarRTYc1fH","colab_type":"code","colab":{}},"cell_type":"code","source":["from heapq import heapify, heappop\n","def heapsort(a):\n"," heapify(a)\n"," return [heappop(a) for i in range(len(a))]\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"A4yuYo2Hc8Ny","colab_type":"code","outputId":"702ef55b-c9a0-4dd6-91cc-c0d81e6a9b1d","executionInfo":{"status":"ok","timestamp":1550525084329,"user_tz":480,"elapsed":381,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["lst = [21, 1, 45, 78, 3, 5]\n","heapsort(lst)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[1, 3, 5, 21, 45, 78]"]},"metadata":{"tags":[]},"execution_count":74}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/graph_data_structure.ipynb b/Colab_Codes/Colab Notebooks/graph_data_structure.ipynb deleted file mode 100644 index ddc2a1c..0000000 --- a/Colab_Codes/Colab Notebooks/graph_data_structure.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"graph_data_structure.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"QqvQdfYqnyIq","colab_type":"text"},"cell_type":"markdown","source":["### Python 2-d array"]},{"metadata":{"id":"GaOxthzYn1vA","colab_type":"code","colab":{}},"cell_type":"code","source":["ta = [[11, 3, 9, 1], [25, 6,10], [10, 8, 12, 5]]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"I7HYxgqMoJUP","colab_type":"code","outputId":"b109f9c0-cf54-47fb-ffdf-a176b0f04b89","executionInfo":{"status":"ok","timestamp":1552498711913,"user_tz":420,"elapsed":4896,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["print(ta[0])\n","print(ta[2][1])"],"execution_count":2,"outputs":[{"output_type":"stream","text":["[11, 3, 9, 1]\n","8\n"],"name":"stdout"}]},{"metadata":{"id":"r8TlWsjzqZ4R","colab_type":"text"},"cell_type":"markdown","source":["#### Empty 2-d array"]},{"metadata":{"id":"MAM2UOQ7qiAN","colab_type":"code","outputId":"e63256a8-7834-4f8c-a07d-2c84eeaf3d5d","executionInfo":{"status":"ok","timestamp":1552498711918,"user_tz":420,"elapsed":4869,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["empty_2d = [[]]\n","print(empty_2d)"],"execution_count":3,"outputs":[{"output_type":"stream","text":["[[]]\n"],"name":"stdout"}]},{"metadata":{"id":"3BoPkppMqo3V","colab_type":"text"},"cell_type":"markdown","source":["#### fix the outer dimension"]},{"metadata":{"id":"LBMKQA59qv8E","colab_type":"code","outputId":"99575bde-cfaa-4f8d-edf7-10d2f71d374e","executionInfo":{"status":"ok","timestamp":1552498711924,"user_tz":420,"elapsed":4852,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["fix_out_d = [[] for _ in range(5)]\n","print(fix_out_d)"],"execution_count":4,"outputs":[{"output_type":"stream","text":["[[], [], [], [], []]\n"],"name":"stdout"}]},{"metadata":{"id":"_GtKfbeRtnIo","colab_type":"text"},"cell_type":"markdown","source":["#### matrices"]},{"metadata":{"id":"PCR24OxUto-Z","colab_type":"code","outputId":"495a315c-e171-4068-f183-310a64a25ec6","executionInfo":{"status":"ok","timestamp":1552498711929,"user_tz":420,"elapsed":4839,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["rows, cols = 3, 4\n","m1 = [[0 for _ in range(cols)] for _ in range(rows)] # rows * cols\n","m2 = [[0]*cols for _ in range(rows)] # rows * cols\n","print(m1, m2)"],"execution_count":5,"outputs":[{"output_type":"stream","text":["[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]\n"],"name":"stdout"}]},{"metadata":{"id":"2bzn_EbNuWrt","colab_type":"code","outputId":"f190b0f1-d8e8-4967-ef48-b4bc5b3f9716","executionInfo":{"status":"ok","timestamp":1552498711932,"user_tz":420,"elapsed":4825,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["m1[1][2] = 1\n","m2[1][2] = 1\n","print(m1, m2)"],"execution_count":6,"outputs":[{"output_type":"stream","text":["[[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]] [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]]\n"],"name":"stdout"}]},{"metadata":{"id":"bILW8JA1vRG0","colab_type":"code","outputId":"3308eee8-202a-405c-fa4b-9aef3f70f9f3","executionInfo":{"status":"ok","timestamp":1552498711935,"user_tz":420,"elapsed":4813,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["m4 = [[0]*cols]*rows\n","m4[1][2] = 1\n","print(m4)"],"execution_count":7,"outputs":[{"output_type":"stream","text":["[[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]]\n"],"name":"stdout"}]},{"metadata":{"id":"l6jRaJGuwKiK","colab_type":"text"},"cell_type":"markdown","source":["#### access rows"]},{"metadata":{"id":"o1MWJZtOwMUr","colab_type":"code","outputId":"5637c2c6-776d-4c31-a697-532e65b5a8cb","executionInfo":{"status":"ok","timestamp":1552498711937,"user_tz":420,"elapsed":4798,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":72}},"cell_type":"code","source":["for row in m1:\n"," print(row)"],"execution_count":8,"outputs":[{"output_type":"stream","text":["[0, 0, 0, 0]\n","[0, 0, 1, 0]\n","[0, 0, 0, 0]\n"],"name":"stdout"}]},{"metadata":{"id":"nuwXftOcw4G0","colab_type":"text"},"cell_type":"markdown","source":["#### access cols"]},{"metadata":{"id":"2JZVSYv_w5yn","colab_type":"code","outputId":"e3b76e39-3260-458f-8906-c3acc9aee004","executionInfo":{"status":"ok","timestamp":1552498711939,"user_tz":420,"elapsed":4785,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":90}},"cell_type":"code","source":["for i in range(cols):\n"," col = [row[i] for row in m1]\n"," print(col)"],"execution_count":9,"outputs":[{"output_type":"stream","text":["[0, 0, 0]\n","[0, 0, 0]\n","[0, 1, 0]\n","[0, 0, 0]\n"],"name":"stdout"}]},{"metadata":{"id":"OUtscU8nxZMY","colab_type":"code","outputId":"8438deaa-00f7-4c85-f39d-af4fb9b91ca8","executionInfo":{"status":"ok","timestamp":1552498711946,"user_tz":420,"elapsed":4778,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["transposedM1 = list(zip(*m1))\n","print(transposedM1)"],"execution_count":10,"outputs":[{"output_type":"stream","text":["[(0, 0, 0), (0, 0, 0), (0, 1, 0), (0, 0, 0)]\n"],"name":"stdout"}]},{"metadata":{"id":"fmVFH68bbFi6","colab_type":"text"},"cell_type":"markdown","source":["### adjacency matrix"]},{"metadata":{"id":"60craYL7bIC_","colab_type":"code","outputId":"b96ceee6-7b8b-4bee-8473-babc442774e2","executionInfo":{"status":"ok","timestamp":1552498711949,"user_tz":420,"elapsed":4762,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["am = [[0]*7 for _ in range(7)]\n","\n","# set 8 edges\n","am[0][1] = am[1][0] = 1\n","am[0][2] = am[2][0] = 1\n","am[1][2] = am[2][1] = 1\n","am[1][3] = am[3][1] = 1\n","am[2][4] = am[4][2] = 1\n","am[3][4] = am[4][3] = 1\n","am[4][5] = am[5][4] = 1\n","am[5][6] = am[6][5] = 1\n","\n","print(am)"],"execution_count":11,"outputs":[{"output_type":"stream","text":["[[0, 1, 1, 0, 0, 0, 0], [1, 0, 1, 1, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0], [0, 1, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 1, 0], [0, 0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 1, 0]]\n"],"name":"stdout"}]},{"metadata":{"id":"Bbh2mPsUdhzm","colab_type":"text"},"cell_type":"markdown","source":["### adjacency list"]},{"metadata":{"id":"nfeVyXxadkJM","colab_type":"code","outputId":"da19e829-0800-4bec-f563-5a48459cd27e","executionInfo":{"status":"ok","timestamp":1552498711954,"user_tz":420,"elapsed":4749,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al[0] = [1, 2]\n","al[1] = [2, 3]\n","al[2] = [0, 4]\n","al[3] = [1, 4]\n","al[4] = [2, 3, 5]\n","al[5] = [4, 6]\n","al[6] = [5]\n","\n","print(al)"],"execution_count":12,"outputs":[{"output_type":"stream","text":["[[1, 2], [2, 3], [0, 4], [1, 4], [2, 3, 5], [4, 6], [5]]\n"],"name":"stdout"}]},{"metadata":{"id":"pc6flSew8FXi","colab_type":"text"},"cell_type":"markdown","source":["### edge list"]},{"metadata":{"id":"LKJ3ch4s8G3x","colab_type":"code","outputId":"8b38471b-d9eb-4b8a-c37a-43bb61474da6","executionInfo":{"status":"ok","timestamp":1552498711957,"user_tz":420,"elapsed":4735,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["el = []\n","el.extend([[0, 1], [1, 0]])\n","el.extend([[0, 2], [2, 0]])\n","el.extend([[1, 2], [2, 1]])\n","el.extend([[1, 3], [3, 1]])\n","el.extend([[3, 4], [4, 3]])\n","el.extend([[2, 4], [4, 2]])\n","el.extend([[4, 5], [5, 4]])\n","el.extend([[5, 6], [6, 5]])\n","\n","print(el)"],"execution_count":13,"outputs":[{"output_type":"stream","text":["[[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1], [1, 3], [3, 1], [3, 4], [4, 3], [2, 4], [4, 2], [4, 5], [5, 4], [5, 6], [6, 5]]\n"],"name":"stdout"}]},{"metadata":{"id":"YFtL16Ko2OaU","colab_type":"text"},"cell_type":"markdown","source":["### Use dictionary data structure"]},{"metadata":{"id":"lLL_GR0L2R7U","colab_type":"code","outputId":"d9f7163c-3927-49b5-86c3-73c814ae1880","executionInfo":{"status":"ok","timestamp":1552498711958,"user_tz":420,"elapsed":4722,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["from collections import defaultdict\n","\n","d = defaultdict(set)\n","for v1, v2 in el:\n"," d[chr(v1 + ord('a'))].add(chr(v2 + ord('a')))\n","\n","print(d)"],"execution_count":14,"outputs":[{"output_type":"stream","text":["defaultdict(, {'a': {'c', 'b'}, 'b': {'a', 'd', 'c'}, 'c': {'a', 'e', 'b'}, 'd': {'e', 'b'}, 'e': {'d', 'f', 'c'}, 'f': {'e', 'g'}, 'g': {'f'}})\n"],"name":"stdout"}]},{"metadata":{"id":"R8_0w0qR3ilj","colab_type":"code","outputId":"1e1bf46f-94ee-4110-d63c-95ce6767e292","executionInfo":{"status":"ok","timestamp":1552498711963,"user_tz":420,"elapsed":4712,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["dw = defaultdict(dict)\n","for v1, v2 in el:\n"," dw[v1][v2] = v1 + v2\n","print(dw)"],"execution_count":15,"outputs":[{"output_type":"stream","text":["defaultdict(, {0: {1: 1, 2: 2}, 1: {0: 1, 2: 3, 3: 4}, 2: {0: 2, 1: 3, 4: 6}, 3: {1: 4, 4: 7}, 4: {3: 7, 2: 6, 5: 9}, 5: {4: 9, 6: 11}, 6: {5: 11}})\n"],"name":"stdout"}]},{"metadata":{"id":"b_55IC6F8iP_","colab_type":"text"},"cell_type":"markdown","source":["# Breath-first Search"]},{"metadata":{"id":"b_Bo_1as8kX1","colab_type":"code","colab":{}},"cell_type":"code","source":["class STATE:\n"," white = 0\n"," gray = 1\n"," black = 2\n"," \n","def bfs(g, s):\n"," '''node by node bfs using queue''' \n"," v = len(g)\n"," state = [False] * v\n"," \n"," # allocate space for the predecessor list and colors \n"," pi = [None] * v\n"," state[s] = True # make the state of the visiting node\n"," dist = [0] * v\n"," \n"," q, orders = [s], [s]\n"," while q:\n"," u = q.pop(0)\n"," \n"," print(u, ' out, ', end = ' ')\n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," pi[v] = u # set the predecessor\n"," dist[v] = dist[u] + 1\n"," q.append(v)\n"," orders.append(v)\n"," print(v, ' in', end = ' ')\n"," return orders, pi, dist\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"meQvv4lE_X27","colab_type":"code","outputId":"0a4875f0-c2ad-4d1a-c869-6a9026221ba9","executionInfo":{"status":"ok","timestamp":1552498711969,"user_tz":420,"elapsed":4697,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["orders, pi, dist = bfs(al, 0)"],"execution_count":17,"outputs":[{"output_type":"stream","text":["0 out, 1 in 2 in 1 out, 3 in 2 out, 4 in 3 out, 4 out, 5 in 5 out, 6 in 6 out, "],"name":"stdout"}]},{"metadata":{"id":"TEZR7eayuZuI","colab_type":"code","outputId":"801c3c94-75f5-4b89-d578-69d262d29c68","executionInfo":{"status":"ok","timestamp":1552498711971,"user_tz":420,"elapsed":4685,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(pi)"],"execution_count":18,"outputs":[{"output_type":"stream","text":["[None, 0, 0, 1, 2, 4, 5]\n"],"name":"stdout"}]},{"metadata":{"id":"CoGfQZk1BmqD","colab_type":"code","colab":{}},"cell_type":"code","source":["def get_path(s, t, pi):\n"," '''iterative'''\n"," p = t\n"," path = []\n"," while p != s:\n"," path.append(p)\n"," p = pi[p]\n"," path.append(s)\n"," return path[::-1]\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"XqYMGN4nCBJA","colab_type":"code","outputId":"8a7e1a63-7409-4c37-f80c-a3ba5a7d58b2","executionInfo":{"status":"ok","timestamp":1552498711982,"user_tz":420,"elapsed":4676,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["get_path(0, 5, pi)"],"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[0, 2, 4, 5]"]},"metadata":{"tags":[]},"execution_count":20}]},{"metadata":{"id":"wmejAVv8s-ZV","colab_type":"code","colab":{}},"cell_type":"code","source":["def get_path(s, t, pi, path):\n"," '''recursive'''\n"," if s == t:\n"," path.append(t)\n"," return\n"," elif pi[t] is None:\n"," print('no path from ', s, ' to ', v)\n"," else:\n"," get_path(s, pi[t], pi, path)\n"," path.append(t)\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"4aUNsrKPtkG6","colab_type":"code","outputId":"940a2f48-7d81-4dcd-e24a-0128271c5263","executionInfo":{"status":"ok","timestamp":1552498711988,"user_tz":420,"elapsed":4663,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["path = []\n","get_path(0, 5, pi, path)\n","print(path)"],"execution_count":22,"outputs":[{"output_type":"stream","text":["[0, 2, 4, 5]\n"],"name":"stdout"}]},{"metadata":{"id":"aR60hTwOQOo0","colab_type":"code","colab":{}},"cell_type":"code","source":["def bfs(g, s):\n"," '''simplified bfs'''\n"," v = len(g)\n"," colors = [STATE.white] * v\n"," \n"," q, orders = [s], [s]\n"," complete_orders = []\n"," colors[s] = STATE.gray # make the state of the visiting node\n"," while q:\n"," u = q.pop(0)\n"," \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," q.append(v)\n"," orders.append(v)\n"," # complete \n"," colors[u] = STATE.black\n"," complete_orders.append(u)\n"," return orders, complete_orders"],"execution_count":0,"outputs":[]},{"metadata":{"id":"CN9MaYWwQ5vb","colab_type":"code","outputId":"4ddf5a46-d062-40ef-8ebe-804bb6467cb2","executionInfo":{"status":"ok","timestamp":1552498711998,"user_tz":420,"elapsed":4658,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(bfs(al, 0))"],"execution_count":24,"outputs":[{"output_type":"stream","text":["([0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6])\n"],"name":"stdout"}]},{"metadata":{"id":"I-LT_HgDrrtI","colab_type":"text"},"cell_type":"markdown","source":["#### level by level bfs"]},{"metadata":{"id":"sQyW-n5qrt6k","colab_type":"code","colab":{}},"cell_type":"code","source":["def bfs_level(g, s):\n"," '''level by level bfs'''\n"," v = len(g)\n"," state = [False] * v\n"," \n"," orders = []\n"," lst = [s]\n"," state[s] = True\n"," d = 0 # track distance\n"," while lst:\n"," print('distance ', d, ': ', lst)\n"," tmp_lst = []\n"," for u in lst:\n"," orders.append(u)\n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," tmp_lst.append(v) \n"," lst = tmp_lst\n"," d += 1\n"," return orders\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"KwUfwlRN2qbe","colab_type":"code","outputId":"79c23a32-db70-483b-db81-3ee958fea037","executionInfo":{"status":"ok","timestamp":1552498712006,"user_tz":420,"elapsed":4646,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":127}},"cell_type":"code","source":["print(bfs_level(al, 0))"],"execution_count":26,"outputs":[{"output_type":"stream","text":["distance 0 : [0]\n","distance 1 : [1, 2]\n","distance 2 : [3, 4]\n","distance 3 : [5]\n","distance 4 : [6]\n","[0, 1, 2, 3, 4, 5, 6]\n"],"name":"stdout"}]},{"metadata":{"id":"KDFDgG-BOBml","colab_type":"text"},"cell_type":"markdown","source":["# Depth-first Search"]},{"metadata":{"id":"TAPb-16WOHMI","colab_type":"code","colab":{}},"cell_type":"code","source":["def dfs(g, s, colors, orders, complete_orders):\n"," colors[s] = STATE.gray\n"," orders.append(s)\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, orders, complete_orders)\n"," # complete\n"," colors[s] = STATE.black # this is not necessary in the code, just to help track the state\n"," complete_orders.append(s)\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"IdRqJxJ34qkw","colab_type":"code","outputId":"45757452-267b-41e1-dfa0-c45cc2c95f01","executionInfo":{"status":"ok","timestamp":1552498712011,"user_tz":420,"elapsed":4634,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["# initialization\n","'''start from 0'''\n","v = len(al)\n","orders, complete_orders = [], []\n","colors = [STATE.white] * v\n","dfs(al,0, colors, orders, complete_orders)\n","\n","print(orders, complete_orders)"],"execution_count":28,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5, 6] [3, 6, 5, 4, 2, 1, 0]\n"],"name":"stdout"}]},{"metadata":{"id":"FNElkf4jmOre","colab_type":"code","outputId":"8dda9d1d-d4da-429a-ac9d-92b56ec3a8f1","executionInfo":{"status":"ok","timestamp":1552498712013,"user_tz":420,"elapsed":4624,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["# initialization\n","'''start from 1'''\n","v = len(al)\n","orders, complete_orders = [], []\n","colors = [STATE.white] * v\n","dfs(al,1, colors, orders, complete_orders)\n","\n","print(orders, complete_orders)"],"execution_count":29,"outputs":[{"output_type":"stream","text":["[1, 2, 0, 4, 3, 5, 6] [0, 3, 6, 5, 4, 2, 1]\n"],"name":"stdout"}]},{"metadata":{"id":"H_5VwJiLqca6","colab_type":"code","colab":{}},"cell_type":"code","source":["def dftIter(g, s):\n"," '''not preserving the same discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":0,"outputs":[]},{"metadata":{"id":"q3hq9ARKqqnw","colab_type":"code","outputId":"6644b5fb-752c-48da-cee3-d617a576f501","executionInfo":{"status":"ok","timestamp":1552498712018,"user_tz":420,"elapsed":4608,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["# initialization\n","'''start from 0'''\n","print(dftIter(al,0))"],"execution_count":31,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5, 6]\n"],"name":"stdout"}]},{"metadata":{"id":"RIVOD221rHii","colab_type":"code","outputId":"cca27f41-bbc6-4f99-8584-c82ef53df5cc","executionInfo":{"status":"ok","timestamp":1552498712021,"user_tz":420,"elapsed":4596,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(dftIter(al, 1))"],"execution_count":32,"outputs":[{"output_type":"stream","text":["[1, 2, 3, 4, 5, 6, 0]\n"],"name":"stdout"}]},{"metadata":{"id":"I4eVWYzGj01U","colab_type":"code","colab":{}},"cell_type":"code","source":["def dftIter(g, s):\n"," '''preserving only discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," #orders.append(s) # track gray order\n"," #colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," if colors[u] == STATE.white:\n"," orders.append(u) # track gray order\n"," colors[u] = STATE.gray\n"," for v in g[u][::-1]:\n"," if colors[v] == STATE.white:\n"," \n"," stack.append(v)\n"," #orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":0,"outputs":[]},{"metadata":{"id":"5JWNwi9rlAER","colab_type":"code","outputId":"b1131559-e0a3-49e6-837b-440d34c01ba3","executionInfo":{"status":"ok","timestamp":1552498712036,"user_tz":420,"elapsed":4596,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(dftIter(al, 0))"],"execution_count":34,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5, 6]\n"],"name":"stdout"}]},{"metadata":{"id":"mBqP-iy9ma3d","colab_type":"code","outputId":"2dab6dff-4fdc-4a44-ec2b-1dee60d0bb06","executionInfo":{"status":"ok","timestamp":1552498712043,"user_tz":420,"elapsed":4588,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(dftIter(al, 1))"],"execution_count":35,"outputs":[{"output_type":"stream","text":["[1, 2, 0, 4, 3, 5, 6]\n"],"name":"stdout"}]},{"metadata":{"id":"rrFl2gwokZON","colab_type":"code","colab":{}},"cell_type":"code","source":["def dfsIter(g, s):\n"," '''iterative dfs'''\n"," v = len(g)\n"," orders, complete_orders = [], []\n"," colors = [STATE.white] * v\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack[-1]\n"," bAdj = False\n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," bAdj = True\n"," break\n"," \n"," if not bAdj: # if no adjacent is found, pop out\n"," # complete\n"," colors[u] = STATE.black # this is not necessary in the code, just to help track the state\n"," complete_orders.append(u)\n"," stack.pop()\n"," \n"," return orders, complete_orders\n"," \n","\n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"vGaO1vCbly-a","colab_type":"code","outputId":"c32c9589-3d7c-46d7-dd2c-546cfcc04393","executionInfo":{"status":"ok","timestamp":1552498712054,"user_tz":420,"elapsed":4587,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["print(dfsIter(al, 0))"],"execution_count":37,"outputs":[{"output_type":"stream","text":["([0, 1, 2, 4, 3, 5, 6], [3, 6, 5, 4, 2, 1, 0])\n"],"name":"stdout"}]},{"metadata":{"id":"wo9AyzN4tRSw","colab_type":"text"},"cell_type":"markdown","source":["### To do\n","Implement the simple iterative version that track the discover and finishing time. So that we can use the iterative version in the topological sort and strongly connected component."]},{"metadata":{"id":"RlYFKGBrfxLo","colab_type":"text"},"cell_type":"markdown","source":["### add finish time"]},{"metadata":{"id":"12yqIv0pf38y","colab_type":"code","colab":{}},"cell_type":"code","source":["# def static_var(varname, value):\n","# def decorate(func):\n","# setattr(func, varname, value)\n","# return func\n","# return decorate\n","# @static_var(\"t\", -1)\n","def dfs(g, s, colors, dt, ft):\n"," dfs.t += 1 # static variable\n"," colors[s] = STATE.gray\n"," dt[s] = dfs.t\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, dt, ft)\n"," # complete\n"," dfs.t += 1\n"," ft[s] = dfs.t\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"WKM_eo7ngKsG","colab_type":"code","outputId":"b2955e44-33bf-4a8c-8e7c-d35198fbfdb0","executionInfo":{"status":"ok","timestamp":1552498712061,"user_tz":420,"elapsed":4578,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["# initialization\n","v = len(al)\n","dt, ft = [-1] * v, [-1] * v\n","colors = [STATE.white] * v\n","dfs.t = -1\n","dfs(al,0, colors, dt, ft)\n","\n","merge_orders = [-1] * 2 * v\n","\n","for i, t in enumerate(dt):\n"," merge_orders[t] = i\n"," \n","for i, t in enumerate(ft):\n"," merge_orders[t] = i\n","\n","print(merge_orders)\n","nodes = set()\n","for i in merge_orders:\n"," if i not in nodes:\n"," print('(', i, end = ', ')\n"," nodes.add(i)\n"," else:\n"," print(i, ') ', end = ' ')\n"],"execution_count":39,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 3, 5, 6, 6, 5, 4, 2, 1, 0]\n","( 0, ( 1, ( 2, ( 4, ( 3, 3 ) ( 5, ( 6, 6 ) 5 ) 4 ) 2 ) 1 ) 0 ) "],"name":"stdout"}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/graph_search.ipynb b/Colab_Codes/Colab Notebooks/graph_search.ipynb deleted file mode 100644 index 990acca..0000000 --- a/Colab_Codes/Colab Notebooks/graph_search.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"graph_search.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"ZhL7pFlp8gTD","colab_type":"text"},"cell_type":"markdown","source":["## Backtracking\n","\n","1. Permutation\n","2. Combination\n","3. All Paths\n","\n","\n","\n"]},{"metadata":{"id":"a76_VshDTaE4","colab_type":"text"},"cell_type":"markdown","source":["### Permutation"]},{"metadata":{"id":"XhTLVA0sZw61","colab_type":"code","colab":{}},"cell_type":"code","source":["def A_n_k(a, n, k, depth, used, curr, ans):\n"," '''\n"," Implement permutation of k items out of n items\n"," depth: start from 0, and represent the depth of the search\n"," used: track what items are in the partial solution from the set of n\n"," curr: the current partial solution\n"," ans: collect all the valide solutions\n"," '''\n"," if depth == k: #end condition\n"," ans.append(curr[::]) # use deepcopy because curr is tracking all partial solution, it eventually become []\n"," return\n"," \n"," for i in range(n):\n"," if not used[i]:\n"," # generate the next solution from curr\n"," curr.append(a[i])\n"," used[i] = True\n"," print(curr)\n"," # move to the next solution\n"," A_n_k(a, n, k, depth+1, used, curr, ans)\n"," \n"," #backtrack to previous partial state\n"," curr.pop()\n"," print('backtrack: ', curr)\n"," used[i] = False\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"RVZsfQb9_Xga","colab_type":"code","outputId":"bb9e8b92-0d8e-4cb7-8dd9-06f2c3384151","executionInfo":{"status":"ok","timestamp":1553585906154,"user_tz":420,"elapsed":440,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":555}},"cell_type":"code","source":["a = [1, 2, 3]\n","n = len(a)\n","ans = [[None]]\n","used = [False] * len(a)\n","ans = []\n","A_n_k(a, n, n, 0, used, [], ans)\n","print(ans)\n"],"execution_count":4,"outputs":[{"output_type":"stream","text":["[1]\n","[1, 2]\n","[1, 2, 3]\n","backtrack: [1, 2]\n","backtrack: [1]\n","[1, 3]\n","[1, 3, 2]\n","backtrack: [1, 3]\n","backtrack: [1]\n","backtrack: []\n","[2]\n","[2, 1]\n","[2, 1, 3]\n","backtrack: [2, 1]\n","backtrack: [2]\n","[2, 3]\n","[2, 3, 1]\n","backtrack: [2, 3]\n","backtrack: [2]\n","backtrack: []\n","[3]\n","[3, 1]\n","[3, 1, 2]\n","backtrack: [3, 1]\n","backtrack: [3]\n","[3, 2]\n","[3, 2, 1]\n","backtrack: [3, 2]\n","backtrack: [3]\n","backtrack: []\n","[[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\n"],"name":"stdout"}]},{"metadata":{"id":"HikETMOQcM7H","colab_type":"text"},"cell_type":"markdown","source":["### Combination"]},{"metadata":{"id":"fcIaL7i7-9na","colab_type":"code","colab":{}},"cell_type":"code","source":["def C_n_k(a, n, k, start, depth, curr, ans):\n"," '''\n"," Implement combination of k items out of n items\n"," start: the start of candinate\n"," depth: start from 0, and represent the depth of the search\n"," curr: the current partial solution\n"," ans: collect all the valide solutions\n"," '''\n"," if depth == k: #end condition\n"," ans.append(curr[::]) \n"," return\n"," \n"," for i in range(start, n): \n"," # generate the next solution from curr\n"," curr.append(a[i])\n"," # move to the next solution\n"," C_n_k(a, n, k, i+1, depth+1, curr, ans)\n","\n"," #backtrack to previous partial state\n"," curr.pop()\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"FQlAWNcp8-5y","colab_type":"code","outputId":"0dcbc55c-b228-4d99-f37f-5d44111f3183","executionInfo":{"status":"ok","timestamp":1553038549590,"user_tz":420,"elapsed":793,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["a = [1, 2, 3]\n","n = len(a)\n","ans = [[None]]\n","ans = []\n","C_n_k(a, n, 2, 0, 0, [], ans)\n","print(ans)\n"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1, 2], [1, 3], [2, 3]]\n"],"name":"stdout"}]},{"metadata":{"id":"ze7L-ttOgCUd","colab_type":"text"},"cell_type":"markdown","source":["### All paths"]},{"metadata":{"id":"jYkhLKk9QZqi","colab_type":"code","colab":{}},"cell_type":"code","source":["def all_paths(g, s, path, ans):\n"," '''generate all pahts with backtrack'''\n"," ans.append(path[::])\n"," for v in g[s]:\n"," path.append(v)\n"," print(path)\n"," all_paths(g, v, path, ans)\n"," path.pop()\n"," print(path, 'backtrack')"],"execution_count":0,"outputs":[]},{"metadata":{"id":"84vSh1JIQyLH","colab_type":"code","outputId":"809fc79d-57cb-4f5a-f444-7f8b41476e79","executionInfo":{"status":"ok","timestamp":1553038549595,"user_tz":420,"elapsed":757,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al = [[1], [2], [4], [], [3, 5], [6], []]\n","print(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1], [2], [4], [], [3, 5], [6], []]\n"],"name":"stdout"}]},{"metadata":{"id":"HhBVClwEVeUJ","colab_type":"code","outputId":"1e88bace-af15-4632-b2c4-0cb9cedbe323","executionInfo":{"status":"ok","timestamp":1553038549597,"user_tz":420,"elapsed":740,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":256}},"cell_type":"code","source":["ans = []\n","path = [0]\n","all_paths(al, 0, path, ans)\n","print(ans)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[0, 1]\n","[0, 1, 2]\n","[0, 1, 2, 4]\n","[0, 1, 2, 4, 3]\n","[0, 1, 2, 4] backtrack\n","[0, 1, 2, 4, 5]\n","[0, 1, 2, 4, 5, 6]\n","[0, 1, 2, 4, 5] backtrack\n","[0, 1, 2, 4] backtrack\n","[0, 1, 2] backtrack\n","[0, 1] backtrack\n","[0] backtrack\n","[[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 2, 4, 5, 6]]\n"],"name":"stdout"}]},{"metadata":{"id":"7kLQciZ1Zt2i","colab_type":"text"},"cell_type":"markdown","source":["## Constraint Satisfaction Problems with Backtracking and Pruning"]},{"metadata":{"id":"oHsib-ORB9Fh","colab_type":"text"},"cell_type":"markdown","source":["First, we build up the board"]},{"metadata":{"id":"wU0IzGC8_EiP","colab_type":"code","colab":{}},"cell_type":"code","source":["board = [[5, 3, None, None, 7, None, None, None, None],\n"," [6, None, None, 1, 9, 5, None, None, None],\n"," [None, 9, 8, None, None, None, None, 6, None],\n"," [8, None, None, None, 6, None, None, None, 3], \n"," [4, None, None, 8, None, 3, None, None, 1], \n"," [7, None, None, None, 2, None, None, None, 6], \n"," [None, 6, None, None, None, None, 2, 8, None], \n"," [None, None, None, 4, 1, 9, None, None, 5],\n"," [None, None, None, None, 8, None, None, 7, 9]]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Psq2cedrMTGJ","colab_type":"text"},"cell_type":"markdown","source":["Define how to change the state"]},{"metadata":{"id":"t2oeCmmvCotc","colab_type":"code","colab":{}},"cell_type":"code","source":["def setState(i, j, v, row_state, col_state, grid_state):\n"," row_state[i] |= 1 << v\n"," col_state[j] |= 1 << v\n"," grid_index = (i//3)*3 + (j//3)\n"," grid_state[grid_index] |= 1 << v\n"," \n","def resetState(i, j, v, row_state, col_state, grid_state):\n"," row_state[i] &= ~(1 << v)\n"," col_state[j] &= ~(1 << v)\n"," grid_index = (i//3)*3 + (j//3)\n"," grid_state[grid_index] &= ~(1 << v)\n"," \n","def checkState(i, j, v, row_state, col_state, grid_state):\n"," row_bit = (1 << v) & row_state[i] != 0\n"," col_bit = (1 << v) & col_state[j] != 0\n"," grid_index = (i//3)*3 + (j//3)\n"," grid_bit = (1 << v) & grid_state[grid_index] != 0\n"," return not row_bit and not col_bit and not grid_bit"],"execution_count":0,"outputs":[]},{"metadata":{"id":"SgghNi99MWXw","colab_type":"text"},"cell_type":"markdown","source":["Get the empty spots and its values"]},{"metadata":{"id":"v0IZMZHU5FR-","colab_type":"code","colab":{}},"cell_type":"code","source":[" def getEmptySpots(board, rows, cols, row_state, col_state, grid_state): \n"," ''' get empty spots and find its corresponding values in O(n*n)'''\n"," empty_spots = {}\n"," # initialize the state, and get empty spots\n"," for i in range(rows):\n"," for j in range(cols):\n"," if board[i][j]:\n"," # set that bit to 1\n"," setState(i, j, board[i][j]-1, row_state, col_state, grid_state) \n"," else:\n"," empty_spots[(i,j)] = []\n"," \n"," # get possible values for each spot\n"," for i, j in empty_spots.keys():\n"," for v in range(9):\n"," if checkState(i, j, v, row_state, col_state, grid_state):\n"," empty_spots[(i, j)].append(v+1)\n"," \n"," return empty_spots"],"execution_count":0,"outputs":[]},{"metadata":{"id":"39DSr_mfCBrQ","colab_type":"text"},"cell_type":"markdown","source":["Second, we intialize the state and find empty spots. "]},{"metadata":{"id":"G76l_z6DAk4n","colab_type":"code","outputId":"1664fb47-2e89-46f7-c15c-47bff27bc873","executionInfo":{"status":"ok","timestamp":1553038549608,"user_tz":420,"elapsed":682,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":74}},"cell_type":"code","source":["# initialize state\n","row_state = [0]*9\n","col_state = [0]*9\n","grid_state = [0]*9\n","\n","empty_spots = getEmptySpots(board, 9, 9, row_state, col_state, grid_state)\n","print(row_state, col_state, grid_state) \n","sorted_empty_spots = sorted(empty_spots.items(), key=lambda x: len(x[1]))\n","print(sorted_empty_spots)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[84, 305, 416, 164, 141, 98, 162, 281, 448] [248, 292, 128, 137, 483, 276, 2, 224, 309] [436, 337, 32, 200, 166, 37, 32, 393, 466]\n","[((4, 4), [5]), ((6, 5), [7]), ((6, 8), [4]), ((7, 7), [3]), ((0, 3), [2, 6]), ((2, 0), [1, 2]), ((2, 3), [2, 3]), ((2, 4), [3, 4]), ((2, 5), [2, 4]), ((4, 1), [2, 5]), ((5, 1), [1, 5]), ((5, 3), [5, 9]), ((5, 5), [1, 4]), ((6, 4), [3, 5]), ((7, 0), [2, 3]), ((7, 6), [3, 6]), ((8, 5), [2, 6]), ((0, 2), [1, 2, 4]), ((0, 8), [2, 4, 8]), ((1, 1), [2, 4, 7]), ((1, 2), [2, 4, 7]), ((1, 7), [2, 3, 4]), ((2, 8), [2, 4, 7]), ((3, 1), [1, 2, 5]), ((3, 3), [5, 7, 9]), ((3, 5), [1, 4, 7]), ((4, 6), [5, 7, 9]), ((4, 7), [2, 5, 9]), ((5, 7), [4, 5, 9]), ((6, 0), [1, 3, 9]), ((6, 3), [3, 5, 7]), ((7, 1), [2, 7, 8]), ((7, 2), [2, 3, 7]), ((8, 0), [1, 2, 3]), ((0, 5), [2, 4, 6, 8]), ((0, 6), [1, 4, 8, 9]), ((0, 7), [1, 2, 4, 9]), ((1, 6), [3, 4, 7, 8]), ((1, 8), [2, 4, 7, 8]), ((3, 2), [1, 2, 5, 9]), ((3, 6), [4, 5, 7, 9]), ((3, 7), [2, 4, 5, 9]), ((4, 2), [2, 5, 6, 9]), ((5, 2), [1, 3, 5, 9]), ((5, 6), [4, 5, 8, 9]), ((8, 1), [1, 2, 4, 5]), ((8, 3), [2, 3, 5, 6]), ((8, 6), [1, 3, 4, 6]), ((2, 6), [1, 3, 4, 5, 7]), ((8, 2), [1, 2, 3, 4, 5]), ((6, 2), [1, 3, 4, 5, 7, 9])]\n"],"name":"stdout"}]},{"metadata":{"id":"jUPMX4-jF7N_","colab_type":"text"},"cell_type":"markdown","source":["Traverse the empty_spots, and fill in. "]},{"metadata":{"id":"ved6mk_0F6F-","colab_type":"code","colab":{}},"cell_type":"code","source":["def dfs_backtrack(empty_spots, index):\n"," if index == len(empty_spots):\n"," return True\n"," (i, j), vl = empty_spots[index]\n"," \n"," for v in vl: #try each value\n"," # check the state\n"," if checkState(i, j, v-1, row_state, col_state, grid_state):\n"," # set the state\n"," setState(i, j, v-1, row_state, col_state, grid_state)\n"," # mark the board\n"," board[i][j] = v\n"," if dfs_backtrack(empty_spots, index+1):\n"," return True\n"," else:\n"," #backtack to previouse state\n"," resetState(i, j, v-1, row_state, col_state, grid_state)\n"," #unmark the board\n"," board[i][j] = None\n"," return False\n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"QUr5dZQxIpdn","colab_type":"code","outputId":"0d08b439-e745-478d-98e8-84498f033412","executionInfo":{"status":"ok","timestamp":1553038549612,"user_tz":420,"elapsed":656,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":74}},"cell_type":"code","source":["ans = dfs_backtrack(sorted_empty_spots, 0)\n","print(ans)\n","print(board)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["True\n","[[5, 3, 4, 6, 7, 8, 9, 1, 2], [6, 7, 2, 1, 9, 5, 3, 4, 8], [1, 9, 8, 3, 4, 2, 5, 6, 7], [8, 5, 9, 7, 6, 1, 4, 2, 3], [4, 2, 6, 8, 5, 3, 7, 9, 1], [7, 1, 3, 9, 2, 4, 8, 5, 6], [9, 6, 1, 5, 3, 7, 2, 8, 4], [2, 8, 7, 4, 1, 9, 6, 3, 5], [3, 4, 5, 2, 8, 6, 1, 7, 9]]\n"],"name":"stdout"}]},{"metadata":{"id":"4vOhZxglCBKD","colab_type":"text"},"cell_type":"markdown","source":["#### Sudoku Solver"]},{"metadata":{"id":"j7d_45x3MiY9","colab_type":"code","colab":{}},"cell_type":"code","source":["from copy import deepcopy\n","import time\n","class SudokoSolver():\n"," def __init__(self, board):\n"," self.original_board = deepcopy(board)\n"," self.board = deepcopy(board)\n"," self.n = len(board)\n"," assert (self.n == len(board[0]))\n"," # initialize state\n"," self.row_state = [0]*self.n\n"," self.col_state = [0]*self.n\n"," self.grid_state = [0]*self.n\n"," \n"," def _setState(self, i, j, v):\n"," self.row_state[i] |= 1 << v\n"," self.col_state[j] |= 1 << v\n"," grid_index = (i//3)*3 + (j//3)\n"," self.grid_state[grid_index] |= 1 << v\n"," \n"," def _resetState(self, i, j, v):\n"," self.row_state[i] &= ~(1 << v)\n"," self.col_state[j] &= ~(1 << v)\n"," grid_index = (i//3)*3 + (j//3)\n"," self.grid_state[grid_index] &= ~(1 << v)\n"," \n"," def _checkState(self, i, j, v):\n"," row_bit = (1 << v) & self.row_state[i] != 0\n"," col_bit = (1 << v) & self.col_state[j] != 0\n"," grid_index = (i//3)*3 + (j//3)\n"," grid_bit = (1 << v) & self.grid_state[grid_index] != 0\n"," return not row_bit and not col_bit and not grid_bit\n"," \n"," def reset(self):\n"," # initialize state\n"," self.row_state = [0]*self.n\n"," self.col_state = [0]*self.n\n"," self.grid_state = [0]*self.n\n"," self.board = deepcopy(self.original_board)\n"," \n"," def _getEmptySpots(self): \n"," ''' get empty spots and find its corresponding values in O(n*n)'''\n"," empty_spots = {}\n"," # initialize the state, and get empty spots\n"," for i in range(self.n):\n"," for j in range(self.n):\n"," if self.board[i][j]:\n"," # set that bit to 1\n"," self._setState(i, j, self.board[i][j]-1) \n"," else:\n"," empty_spots[(i,j)] = []\n"," \n"," # get possible values for each spot\n"," for i, j in empty_spots.keys():\n"," for v in range(self.n):\n"," if self._checkState(i, j, v):\n"," empty_spots[(i, j)].append(v+1)\n"," \n"," return empty_spots\n"," \n"," def helper(self, empty_spots, index):\n"," if index == len(empty_spots):\n"," return True\n"," (i, j), vl = empty_spots[index]\n"," \n"," for v in vl: #try each value\n"," # check the state\n"," if self._checkState(i, j, v-1):\n"," # set the state\n"," self._setState(i, j, v-1)\n"," # mark the board\n"," self.board[i][j] = v\n"," if self.helper(empty_spots, index+1):\n"," return True\n"," else:\n"," #backtack to previouse state\n"," self._resetState(i, j, v-1)\n"," #unmark the board\n"," self.board[i][j] = None\n"," return False\n"," \n"," def backtrackSolver(self):\n"," self.reset()\n"," empty_spots = self._getEmptySpots()\n"," empty_spots = [(k, v) for k, v in empty_spots.items() ]\n"," t0 = time.time()\n"," ans = self.helper(empty_spots, 0)\n"," print('total time: ', time.time() - t0)\n"," return ans\n"," \n"," def backtrackSolverSorted(self):\n"," self.reset()\n"," empty_spots = self._getEmptySpots()\n"," empty_spots = sorted(empty_spots.items(), key=lambda x: len(x[1]))\n"," t0 = time.time()\n"," ans = self.helper(empty_spots, 0)\n"," print('sorted total time: ', time.time() - t0)\n"," return ans\n","\n"," \n"," \n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"zIti82qiQ7Je","colab_type":"code","outputId":"c8220407-50a1-426d-a2df-cc6d005b497d","executionInfo":{"status":"ok","timestamp":1553038549788,"user_tz":420,"elapsed":786,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":72}},"cell_type":"code","source":["board = [[5, 3, None, None, 7, None, None, None, None],\n"," [6, None, None, 1, 9, 5, None, None, None],\n"," [None, 9, 8, None, None, None, None, 6, None],\n"," [8, None, None, None, 6, None, None, None, 3], \n"," [4, None, None, 8, None, 3, None, None, 1], \n"," [7, None, None, None, 2, None, None, None, 6], \n"," [None, 6, None, None, None, None, 2, 8, None], \n"," [None, None, None, 4, 1, 9, None, None, 5],\n"," [None, None, None, None, 8, None, None, 7, 9]]\n","solver = SudokoSolver(board)\n","solver.backtrackSolver()\n","solver.backtrackSolverSorted()"],"execution_count":0,"outputs":[{"output_type":"stream","text":["total time: 0.027954578399658203\n","sorted total time: 0.0004558563232421875\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":239}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/graph_search_application.ipynb b/Colab_Codes/Colab Notebooks/graph_search_application.ipynb deleted file mode 100644 index 27edfcc..0000000 --- a/Colab_Codes/Colab Notebooks/graph_search_application.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"graph_search_application.ipynb","version":"0.3.2","provenance":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"auwghE4hWHSK","colab_type":"text"},"cell_type":"markdown","source":["### Cycle Check"]},{"metadata":{"id":"6nYEnOCo3FOC","colab_type":"code","colab":{}},"cell_type":"code","source":["# initialization\n","class STATE:\n"," white = 0\n"," gray = 1\n"," black = 2"],"execution_count":0,"outputs":[]},{"metadata":{"id":"dLTp6RWw2mYX","colab_type":"text"},"cell_type":"markdown","source":["### For directed graph"]},{"metadata":{"id":"xPzr88zz2pw_","colab_type":"code","outputId":"3bd3c586-7da2-420e-c60c-d24569711308","executionInfo":{"status":"ok","timestamp":1552244194011,"user_tz":420,"elapsed":539,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al[0] = [1]\n","al[1] = [2]\n","al[2] = [0, 4]\n","al[4] = [3]\n","al[5] = [6]\n","\n","print(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1], [2], [0, 4], [], [3], [6], []]\n"],"name":"stdout"}]},{"metadata":{"id":"iq07bZAp29mi","colab_type":"code","colab":{}},"cell_type":"code","source":["def hasCycle(g, s, state):\n"," '''convert dfs to check cycle'''\n"," state[s] = STATE.gray # first be visited\n"," for v in g[s]:\n"," if state[v] == STATE.white:\n"," if hasCycle(g, v, state):\n"," return True\n"," elif state[v] == STATE.gray: # aback edge\n"," return True\n"," else:\n"," pass\n"," state[s] = STATE.black # mark it as complete\n","\n"," return False\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"zbtKB8Rg3bWg","colab_type":"code","colab":{}},"cell_type":"code","source":["def cycleDetect(g):\n"," '''cycle detect in directed graph'''\n"," n = len(g)\n"," state = [STATE.white] * n\n"," for i in range(n):\n"," if state[i] == STATE.white:\n"," if hasCycle(g, i, state):\n"," print('cycle starts at vertex ', i)\n"," return True\n"," return False"],"execution_count":0,"outputs":[]},{"metadata":{"id":"q-0e10OJ4M5i","colab_type":"code","outputId":"54d8c30c-f873-4ed4-84c4-ff1e3e2fdc7a","executionInfo":{"status":"ok","timestamp":1552244246348,"user_tz":420,"elapsed":338,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["cycleDetect(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["cycle starts at vertex 5\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":6}]},{"metadata":{"id":"GTx9ANEY562r","colab_type":"text"},"cell_type":"markdown","source":["#### For undirected Graph"]},{"metadata":{"id":"wE01rAq55-oF","colab_type":"code","colab":{}},"cell_type":"code","source":["def hasCycle(g, s, p, state):\n"," '''convert dfs to check cycle'''\n"," state[s] = STATE.gray # first be visited\n"," for v in g[s]:\n"," if state[v] == STATE.white:\n"," if hasCycle(g, v, s, state):\n"," return True\n"," elif state[v] == STATE.gray and v != p: # aback edge\n"," return True\n"," else:\n"," pass\n"," state[s] = STATE.black # mark it as complete\n","\n"," return False"],"execution_count":0,"outputs":[]},{"metadata":{"id":"sA6dMqrE7MOC","colab_type":"code","colab":{}},"cell_type":"code","source":["def cycleDetect(g):\n"," '''cycle detect in directed graph'''\n"," n = len(g)\n"," state = [STATE.white] * n\n"," for i in range(n):\n"," if state[i] == STATE.white:\n"," if hasCycle(g, i, -1, state):\n"," print('cycle starts at vertex ', i)\n"," return True\n"," return False"],"execution_count":0,"outputs":[]},{"metadata":{"id":"aD-FizTB6WbB","colab_type":"code","outputId":"6bc3e3f4-bcfa-443d-e037-49ca81fca9d7","executionInfo":{"status":"ok","timestamp":1552244252680,"user_tz":420,"elapsed":592,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al[0] = [1, 2]\n","al[1] = [0, 2]\n","al[2] = [0, 4]\n","al[4] = [2, 3]\n","al[5] = [6]\n","al[6] = [5]\n","\n","print(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1, 2], [0, 2], [0, 4], [], [2, 3], [6], [5]]\n"],"name":"stdout"}]},{"metadata":{"id":"30NtiTxy7TIq","colab_type":"code","outputId":"d1c19bb1-7043-469c-a8f3-a40a800abaa3","executionInfo":{"status":"ok","timestamp":1552244255134,"user_tz":420,"elapsed":563,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["cycleDetect(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["cycle starts at vertex 0\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":10}]},{"metadata":{"id":"WkpyVhW8WMN1","colab_type":"text"},"cell_type":"markdown","source":["### Topolgical Sort"]},{"metadata":{"id":"G1MfbU4eWPrA","colab_type":"code","outputId":"f615d164-63d1-4533-95db-4413433655d4","executionInfo":{"status":"ok","timestamp":1552244256915,"user_tz":420,"elapsed":510,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al[0] = [1]\n","al[1] = [2]\n","al[2] = [4]\n","al[3] = []\n","al[4] = [3, 5]\n","al[5] = [6]\n","al[6] = []\n","\n","print(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1], [2], [4], [], [3, 5], [6], []]\n"],"name":"stdout"}]},{"metadata":{"id":"Pf7Jaj8CXWSM","colab_type":"code","colab":{}},"cell_type":"code","source":["def dfs(g, s, colors, orders, complete_orders):\n"," colors[s] = STATE.gray\n"," orders.append(s)\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, orders, complete_orders)\n"," # complete\n"," colors[s] = STATE.black # this is not necessary in the code, just to help track the state\n"," complete_orders.append(s)\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"FvKHA5plXaoW","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"hbTw8Pf3YzjJ","colab_type":"code","colab":{}},"cell_type":"code","source":["def topo_sort(g):\n"," n = len(g)\n"," orders, complete_orders = [], []\n"," colors = [STATE.white] * n\n"," for i in range(n): # run dfs on all the node\n"," if colors[i] == STATE.white:\n"," dfs(g,i, colors, orders, complete_orders)\n","\n"," #print(orders, complete_orders[::-1])\n"," return orders, complete_orders[::-1]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"WeoxmZkcZUKA","colab_type":"code","outputId":"961ef42a-ee79-4d5c-d8a5-bcc71ecf495c","executionInfo":{"status":"ok","timestamp":1552244278240,"user_tz":420,"elapsed":360,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["orders, complete_orders = topo_sort(al)\n","print(orders, complete_orders)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5, 6] [0, 1, 2, 4, 5, 6, 3]\n"],"name":"stdout"}]},{"metadata":{"id":"nHK_jknJXrhq","colab_type":"text"},"cell_type":"markdown","source":["Now, change the edge (2->4) to (4->2) and run the code again."]},{"metadata":{"id":"TX9opC9CXqbZ","colab_type":"code","outputId":"caa8397f-c251-4f8e-c469-7399412eec16","executionInfo":{"status":"ok","timestamp":1552244281902,"user_tz":420,"elapsed":392,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["al[2].remove(4)\n","al[4].append(2)\n","print(al)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1], [2], [], [], [3, 5, 2], [6], []]\n"],"name":"stdout"}]},{"metadata":{"id":"AXgDs24MYJHA","colab_type":"code","outputId":"881cb099-d2d1-4f6e-a8d7-e0c08c5d9108","executionInfo":{"status":"ok","timestamp":1552244283447,"user_tz":420,"elapsed":405,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"cell_type":"code","source":["orders, complete_orders = topo_sort(al)\n","print(orders, complete_orders)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 3, 4, 5, 6] [4, 5, 6, 3, 0, 1, 2]\n"],"name":"stdout"}]},{"metadata":{"id":"GTwh1v4l7G2B","colab_type":"text"},"cell_type":"markdown","source":["### Connected Components\n","\n","In the example, we only experiment with BFS. "]},{"metadata":{"id":"kIPfcNX9ARU7","colab_type":"code","colab":{}},"cell_type":"code","source":["def bfs(g, s, state):\n"," state[s] = True\n"," \n"," q, orders = [s], [s]\n"," while q:\n"," u = q.pop(0)\n"," \n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," q.append(v)\n"," orders.append(v)\n"," return orders"],"execution_count":0,"outputs":[]},{"metadata":{"id":"eH7-zUmhAoDp","colab_type":"code","colab":{}},"cell_type":"code","source":["def connectedComponent(g):\n"," n = len(g)\n"," ccs = []\n"," state = [False] * n\n"," for i in range(n):\n"," if not state[i]:\n"," ccs.append(bfs(g, i, state))\n"," return ccs \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"6VWvKNSGBLeA","colab_type":"code","outputId":"8df73569-85f6-4b11-a59a-66ea47411c6d","executionInfo":{"status":"ok","timestamp":1552244289362,"user_tz":420,"elapsed":371,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["al = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al[0] = [1, 2]\n","al[1] = [0, 2]\n","al[2] = [0, 4]\n","al[4] = [2, 3]\n","al[5] = [6]\n","al[6] = [5]\n","\n","print(al)\n","print(connectedComponent(al))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1, 2], [0, 2], [0, 4], [], [2, 3], [6], [5]]\n","[[0, 1, 2, 4, 3], [5, 6]]\n"],"name":"stdout"}]},{"metadata":{"id":"arzllBW_GsIX","colab_type":"text"},"cell_type":"markdown","source":["####Strongly connected components"]},{"metadata":{"id":"WkIxpFk7GvkW","colab_type":"code","outputId":"589d3781-35f5-45e4-cbe1-65446ddb50ba","executionInfo":{"status":"ok","timestamp":1552244293701,"user_tz":420,"elapsed":360,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["'''in the second undirected graph'''\n","al2 = [[] for _ in range(7)]\n","\n","# set 8 edges\n","al2[0] = [1]\n","al2[1] = [2]\n","al2[2] = [0, 4]\n","al2[4] = [3]\n","al2[5] = [6]\n","\n","print(al)\n","\n","'''in the first undirected graph'''\n","al1 = al2[::]\n","\n","al1[3].append(1)\n","\n","print(al1)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[[1, 2], [0, 2], [0, 4], [], [2, 3], [6], [5]]\n","[[1], [2], [0, 4], [1], [3], [6], []]\n"],"name":"stdout"}]},{"metadata":{"id":"0c7CQ8aUIw1c","colab_type":"code","colab":{}},"cell_type":"code","source":["def topo_sort(g):\n"," v = len(al)\n"," orders, complete_orders = [], []\n"," colors = [STATE.white] * v\n"," for i in range(v): # run dfs on all the node\n"," if colors[i] == STATE.white:\n"," dfs(al,i, colors, orders, complete_orders)\n"," return complete_orders[::-1]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"H3ra08cvIz5B","colab_type":"code","outputId":"41e31576-c092-4f7a-919b-1cb22d1eda04","executionInfo":{"status":"ok","timestamp":1552244300038,"user_tz":420,"elapsed":326,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":54}},"cell_type":"code","source":["print(topo_sort(al2))\n","print(topo_sort(al1))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["[5, 6, 0, 1, 2, 4, 3]\n","[5, 6, 0, 1, 2, 4, 3]\n"],"name":"stdout"}]},{"metadata":{"id":"YVz-BG3YFJFX","colab_type":"text"},"cell_type":"markdown","source":["### Minimum Spanning Tree"]},{"metadata":{"id":"tI9aTeoy7Z2p","colab_type":"text"},"cell_type":"markdown","source":["#### Prim's Algorithm"]},{"metadata":{"id":"ZUW60VFQGwiD","colab_type":"code","colab":{}},"cell_type":"code","source":["a= {1:[(2, 2), (3, 12), (4, 10)], 2:[(1, 2), (3, 8), (5, 9)], 3:[(1, 12), (2, 8), (4, 6), (5, 3)], 4:[(1, 10),(3, 6), (5, 7)], 5:[(2, 9), (3, 3), (4, 7)]}\n","\n","class edge():\n"," def __init__(self, pid, id, w ):\n"," self.pid = pid\n"," self.id = id\n"," self.w = w\n"," def __lt__(self, other):\n"," return self.w < other.w\n"," \n"," def __eq__(self, other):\n"," return self.w == other.w\n"," \n"," def __str__(self):\n"," return str(self.pid) + '->' + str(self.id) + ':' + str(self.w)\n"," \n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"AdLTUkgxAI1I","colab_type":"code","colab":{}},"cell_type":"code","source":["import queue\n","def prim(g, n):\n"," # step 1:\n"," start = 1\n"," V = {start} #spanning tree set\n"," E = queue.PriorityQueue() # the set of all edges, \n"," ans = []\n"," \n"," while len(V) < n:\n"," # add edges of start, and the other endpoint is in nv\n"," idlst = g[start]\n"," for id, w in idlst:\n"," if id not in V:\n"," E.put(edge(start, id, w))\n"," \n"," while E:\n"," # pick the smallest edge\n"," minEdge = E.get()\n","\n"," if minEdge.id not in V:\n"," # set the new id as start\n"," start = minEdge.id\n"," # add this id to the set of tree nodes\n"," V.add(minEdge.id)\n"," ans.append(minEdge)\n"," break\n"," return ans\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"40vRdighTR8f","colab_type":"code","outputId":"d3c8197f-536b-4868-8f21-6dab330cdf69","executionInfo":{"status":"ok","timestamp":1553243555679,"user_tz":420,"elapsed":802,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":90}},"cell_type":"code","source":["ans = prim(a, 5)\n","for e in ans:\n"," print(e)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["1->2:2\n","2->3:8\n","3->5:3\n","3->4:6\n"],"name":"stdout"}]},{"metadata":{"id":"0r0DIbiU7d_v","colab_type":"code","colab":{}},"cell_type":"code","source":["class node:\n"," def __init__(self, p, w):\n"," self.p = p\n"," self.w = w\n"," def __lt__(self, other):\n"," return self.w < other.w\n"," def __eq__(self, other):\n"," return self.w == other.w\n"," def __str__(self):\n"," return str(self.p) + '->' +str(self.id)+':'+str(self.w)\n","\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"iE1wCIcbtj7t","colab_type":"code","colab":{}},"cell_type":"code","source":["def extractMin(q):\n"," minNode = None\n"," minW = float('inf')\n"," minIndex = -1\n"," for idx, node in enumerate(q):\n"," if node.w < minW:\n"," minNode = node\n"," minW = node.w\n"," minIdx = idx\n"," #q.remove(minNode)\n"," return minNode, minIdx\n","\n","def primMst(g, n):\n"," q = [None]*n\n"," S = {}\n"," ans = []\n"," for i in range(n):\n"," q[i] = node(None, float('inf'))\n"," q[0] = node(None, 0)\n"," S = {1}\n"," # main process\n"," while len(S) < n:\n"," minNode, minIdx = extractMin(q)\n"," S.add(minIdx+1)\n"," if minNode.p is not None:\n"," ans.append((minNode.p+1, minIdx+1))\n"," q[minIdx] = node(None, float('inf'))\n"," for v, w in g[minIdx+1]:\n"," if v not in S and w < q[v-1].w:\n"," q[v-1].p = minIdx\n"," q[v-1].w = w\n"," return ans\n"," \n"," \n"," \n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"JzWJqXcYRuyh","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"c71f7b89-d6f5-40d5-cc45-f8391f4511b9","executionInfo":{"status":"ok","timestamp":1553382526262,"user_tz":420,"elapsed":401,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["print(primMst(a, 5))"],"execution_count":42,"outputs":[{"output_type":"stream","text":["[(1, 2), (2, 3), (3, 5), (3, 4)]\n"],"name":"stdout"}]},{"metadata":{"id":"vwa3SlexCQAn","colab_type":"text"},"cell_type":"markdown","source":["#### Kruskal's Algorithm"]},{"metadata":{"id":"QC19-o-XCUF1","colab_type":"code","colab":{}},"cell_type":"code","source":["def kruskalMst(g, n):"],"execution_count":0,"outputs":[]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/linear_search_two_pointer.ipynb b/Colab_Codes/Colab Notebooks/linear_search_two_pointer.ipynb deleted file mode 100644 index cfbfe12..0000000 --- a/Colab_Codes/Colab Notebooks/linear_search_two_pointer.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"linear_search_two_pointer.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"Td9spABdUsmw","colab_type":"text"},"cell_type":"markdown","source":["## Fixed sliding window"]},{"metadata":{"id":"ZtQn4pmaUw9p","colab_type":"code","colab":{}},"cell_type":"code","source":["def fixedSlideWindow(A, k):\n"," n = len(A)\n"," if k >= n:\n"," return sum(A)\n"," # compute the first window\n"," acc = sum(A[:k])\n"," ans = acc\n"," # slide the window\n"," for i in range(n-k): # i is the start point of the window\n"," j = i + k # j is the end point of the window\n"," acc = acc - A[i] + A[j]\n"," ans = max(ans, acc)\n"," return ans"],"execution_count":0,"outputs":[]},{"metadata":{"id":"qadccsiTUyCl","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"deb89a38-04eb-4da8-f75e-49efaccf633c","executionInfo":{"status":"ok","timestamp":1550371239838,"user_tz":480,"elapsed":421,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["A =[8,5,10,7,9,4,15,12,90,13]\n","fixedSlideWindow(A,3)"],"execution_count":2,"outputs":[{"output_type":"execute_result","data":{"text/plain":["117"]},"metadata":{"tags":[]},"execution_count":2}]},{"metadata":{"id":"BGv4PowyVBul","colab_type":"text"},"cell_type":"markdown","source":["## Plot the prefix sum"]},{"metadata":{"id":"HY1vpO-fVKJb","colab_type":"code","colab":{}},"cell_type":"code","source":["nums = [2,3,1,2,4,3]\n","s = 7\n","\n","prefixSum = [0]\n","for n in nums:\n"," prefixSum.append(prefixSum[-1]+n)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"sce50K_-VEGF","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":388},"outputId":"01d62b6a-ef95-434b-8561-deddac3717dd","executionInfo":{"status":"ok","timestamp":1550371305221,"user_tz":480,"elapsed":975,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["import matplotlib.pyplot as plt\n","import numpy as np\n","from mpl_toolkits.mplot3d import Axes3D\n","#nums = [3, 4, 5, 6, 7, 0, 1, 2]\n","x = np.arange(0, len(nums)-1, 1)\n","# fig = plt.figure(1)\n","f=plt.figure(figsize=(10,6))\n","\n","#f.gca().set_aspect('equal', adjustable='box')\n","\n","# f.suptitle('The process of monotone decreasing stack')\n","# f.xlabel('index')\n","# f.ylabel('value')\n","ax1 = f.add_subplot(121)\n","ax1.plot(np.arange(0, len(nums), 1), nums, marker='o', markersize=4, label = 'array')\n","ax1.plot(np.arange(-1, len(prefixSum)-1, 1), prefixSum, marker='o', markersize=4, label = 'prefix sum')\n","ax1.set_xlabel('index')\n","ax1.set_ylabel('value')\n","ax1.legend()\n","\n","x, y = np.meshgrid(np.arange(0, len(nums), 1), np.arange(0, len(nums), 1))\n","\n","Z = [[-1 for c in range(len(nums))] for r in range(len(nums))]\n","for r in range(len(nums)):\n"," for c in range(r, len(nums)):\n"," Z[r][c] = prefixSum[c+1]-prefixSum[r]\n","ax2 = f.add_subplot(122,projection='3d')\n","ax2.scatter(x,y, Z, marker='*',s=50, alpha=0.6)\n","\n","# for i in range(1, 3):\n","# print(deStacks[i-1])\n","# axarrs[0, i].plot(np.arange(0, len(deStacks[i-1]), 1), deStacks[i-1], markersize = 10, color = 'r', marker='*', linestyle=':') # x, A, 'o')\n","# axarrs[0, i].plot(x, A, 'o',markersize=4)\n","# axarrs[0, i].set_title('step: '+ str(i))\n","# for i in range(3):\n","# axarrs[1, i].plot(np.arange(0, len(deStacks[2+i]), 1), deStacks[2+i], markersize = 10, color = 'r', marker='*', linestyle=':')\n","# axarrs[1, i].plot(x, A, 'o',markersize=4)\n","# axarrs[1, i].set_title('step: '+str(i+3))\n","plt.show()"],"execution_count":5,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAlwAAAFzCAYAAADrDtfOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzsnXmcHHW19p9aupbeZp/sGyEkYIDI\nGiBhSQibqCAKGOCConIFvLyoVyBXCIjLZfFFkKsCF9A3AQURVFQWWRVExBCRYBYg+zpLZum1umt5\n/6ip3qaX6u6q6ZqZ8/18+DDdU/Wr0z2TrmfOOb/nMIZhGCAIgiAIgiBcg210AARBEARBEGMdElwE\nQRAEQRAuQ4KLIAiCIAjCZUhwEQRBEARBuAwJLoIgCIIgCJchwUUQBEEQBOEyfKMDKEd3d6Sq41ta\n/Ojri7sUTe14NS7Au7F5NS7Au7F5NS6gutg6OkIuRzNyVPsZRhDE6Kbc59eYynDxPNfoEIri1bgA\n78bm1bgA78bm1bgAb8dGEAQxEowpwUUQBEEQBOFFSHARBEEQBEG4DAkugiAIgiAIlyHBRRAEQRAE\n4TIkuAiCIAiCIFyGBBdBEARBEITLkOAiCIIgCIJwGRJcBEEQBEEQLkOCiyAIgiAIwmU8PdrHKT7c\nNYCNO/oxd1ozZk9pqnu9WCyKW275JhKJBJLJJK699j/xrW/diIULT0BLSwt27twBnvdhcLAfK1as\nxDe/+XUMDEQyxw4MDOCFF57FjTfeCgC47bZv44QTFmPRopPqjo0gCIIgCO8xqgXX4y99gLc2dGUe\ncxwDTTPyjkmrOgbjqczjsF+Ajy+d2Dt6XifOX3Jg2ev29vbi7LPPwYknnow1a97CI4/8DKqqYuHC\n47Fw4fH4znduRjgcxnXX/Re2b9+Gz3zmMzj88GMzx37rW/+Nu+/+PhRFgc/nw7vvvoOvfvW6Gt8F\ngvA2Wwa24fWeXZjsm4JZTTMaHQ5BEERDGNWCyw6qpg97XE5w2aG1tQ0/+9n/4uc/X4V0Og1JkgAA\nhxzykcwx1tetrW34xS9+hp/85P7MsRzH4YQTFuGvf30dbW3tOOywBfD5fHXFRBBe5IO+LfjB2p/A\ngAGWYfHVI75MoosgiHHJqBZc5y85MC8b1dERQnd3JO+YD3cN4L8feRuaboBjGVx7/uF1lxUff/xR\ntLd34sYbb8WGDf/Cvff+AADA81nRZH39+OOPYsKECfjGN27KO/aMMz6G1at/hkmTJmPZsjPqiocg\nvMrvtz4PA2bWWTd0vN+/mQQXQRDjklEtuOwwe0oTrr/oCEd7uAYG+jF79hwAwKuvvgxVVcseu2DB\nocOOnTNnLnp6utHf34crrriq7pgIwmvsiu7B+32bM49ZhsWc5gMaGBFBEETjGBe7FGdPacJZC2c4\nIrYAMzv12GOP4Nprr8JHPjIfvb29MAyj5LEPP/xw3rG///1vAQBHH30s5s49GAzDOBIXQXgFTdew\nev0vYcDApw48G8sPO4fKiQRBjGsYo5RS8ACF5cFKFCspeoFicRmGgf/zf67Cf/7nDZg6dVqDIhtd\n75lX8GpsXorrhe2v4qkPfo9jJh6BSw+5sKrYOjpCLkc3cnjl50EQxMhQ7vNrXGS4vMaePbtx+eWX\n4Oijj2mo2CIIN+iK9+B3m59D0BfAeXM+3uhwCIIgPMGY7+HyIpMmTcZDD61udBgE4Ti6oePRDU8g\nrau45OALEPQFGh0SQRCEJ6AMF0EQjvGX3X/D+/2bcVj7R3BE52GNDocgymIYRsn+W4JwGspwEQTh\nCH3Jfjz1wR8g8xIumHsObQYhPA/L6lBVFQzDAmDAMBz93hKuQRkugiDqxjAMPLbpKSS1JM6d/TE0\ni87sCCYIt+A4ZMSVYejQdRW6noKmpaDrKmW+CMchwUUQRN283fUO3u1Zj4OaZ+P4ycc0OhyCKIsp\ntgAgm83KZraMIcGVhixzQ0JML7YMQVQFCa4R5JFHfoZLLjkf77yzFrff/p1Gh0MQjhBNxfD4pt/A\nx/rw2XnnUUmG8DRZsVUahmHAsgxkWcyILyvzReKLqBXq4RpB3nzzDdx0062YM2cuDj/8o40OhyAc\n4Yn3n0Y0HcO5B34Mnf72RodDEEXhOBaBgIB4PGnreKuiWJj5YhgGmsYM9X2xYFnKWxD2GBeCa8vA\nNrzfvxlzmg+o2+n6D394Gm+++RfEYjF0d3fh/POX42Mf+wQuvPBcLFx4AlpaWvCxj30C3/verVDV\nNFiWxe23/zdefPFP2LRpA2677Tu46aZv4ZZbbsRdd92Lr3zlCvz4xw9B0zRceeXl+NGPHkQoZBqn\nqaqKb33rRvT29iCVSuHyy6/AjBkz8c1vXocHH1wFALj88kvw7W/fhoceuh8tLS3YuHED+vv7cNFF\nl+L3v38aAwP9uPfe+xEMBut+HwmikHU96/HWvrcxIzQNp0xd1OhwCKIoDGNmtjiuPnFE4ouoh1Et\nuJ784HdY2/Vu5jHHMtD0/EZHVVcxmMq6PYeFEHi29Mv+aOeh+NSBZ5e97pYtm/HQQ48gGo3isss+\nizPPPBuqqmLhwuOxcOHx+N73voULL7wIRx99LN544zX86Ec/wjXXXIff/e43+OpXvwGfTzBjCTfh\nggsuwurVP4WiKLjkks9nxBYAfPjhBxgY6Mf//M8DiEQieOON18vGxXE87r77x7jllm/i3Xf/ibvv\n/hFuvfVGvP3233HiiSeXPZcgqiWhJvGLjU+BZVhcdPCnwbFco0MiiGEwDMCylcuI1a9bSnwxAMzd\njlReJ3IZ1YLLDmldHfa4nOCyw4IFR4DneTQ3NyMUCmFgoB8AcMghHwEArFv3T2zfvg0/+9mD0HUd\nEyZ0lFzrzDPPxte+9hWwLIuvfOXavO/NmDET8XgMt956I0488RSceupp2Ldvb8m1Dj7YvH5bWztm\nzJgJAGhpaUMsFq3n5RJEUX774TPoU/px5sxTMSU4qdHhEMQwLKHltu7JFV/m7kYdhoFM5othWBJf\nxOgWXJ868Oy8bFSxeW1bBrbh/779Y+iGDpZhcdXhn6+7rKjnZNHMOr/5D4nnfZn/33rrbWhvby8Z\nl4WmaUgmkzAM0w+G57M/EkmScN99P8W77/4TzzzzNF5//c/4/Oe/lHe+qmYFJcdxRb+m7c2E03zQ\nvwV/2vUGJgYm4PSZSxodDkEMo1BsmQKomhUM5O5irAXD0EHii7AY8wXnWU0z8NUjvoxPzj4TXz3i\ny3WLLQB4771/QtM09Pf3Ix6Poakp33PokEPm489/fgUAsGbNW3j66adLrvXzn6/G0qXLsHjxyfjF\nL/LH/WzcuAF//OOzOPzwBfj612/A1q1b4PcH0Ne3H4ZhoLe3B7t376z79RBENaS0NB5Z/0swYHDx\nvE/DV2fGmCCcxm5mi2VZ8PzIlMINQwfLGgiFBOh6Grqu0R/D4wxXPyk3bdqEK6+8Epdddhkuvvji\nzPN//vOf8YUvfAEbN2508/IZZjXNcERoWUycOBk33ng9du3agS996cphjZKXX/4lfPe7t+CFF54D\nwzC4887bi66zd+8e/OlPL+HHP34IhmHgi1+8FKeeejomTjTLM5MmTcZ99/0PfvObJ8GyLJYvvwTh\ncBhHHXUMvvCFf8OBB87BnDlzHXtdBGGHZ7a+gK5ED06ZtsjRf1cE4QTlxVb2SZ7nEAhImYpFMqlA\nUVJIp9ViJzoCwzDgeY4yX+MUxnBJYsfjcVxxxRWYOXMm5s6dmxFciqLgC1/4ArZs2YLXXnut7Bql\nynClKFe6c4o//OFpbN78Ia6++v/YPmck4qoVr8bm1bgA78Y2EnHtiOzC7X//IVrEJvzXsV+DyAmO\nx9bREap80CjBi78nYxmWNf8r/j0GwaAfg4Mx+Hwc/H4JkUgCiUQCPM9BkkRIkgCGYZBMpqAoKTQ3\nh9DVtd+x+HieR1NTAL29A3nPG4Yx1GRP4mu0U+7zy7WSoiAIeOCBB9DZ2Zn3/E9+8hMsX74cgmDv\ng5ogCG+g6RpWr/8ldEPHZ+edZ1tsEcRIwHGlxVYuPh8Pv19CNJqAqmoAAFXVEI3G0dPTj76+QRiG\ngXA4AIZhEAoF4PO5WzbPHzGUphFDYxTXfot4ns9rAAeALVu2YMOGDbjmmmtwxx13VFyjpcVfdX3d\n7b+OL710eU3nefmvdq/G5tW4AO/G5mZcv17/HHZGd+PkWcfhxLlHVn2+V98zYvRjxz0eMI/x+0VE\nIomSjvGW+IrFEujoaIGu6wiHg2BZFoqiIJlMIZVKO/wKcmMstJvQYA7WZmm49ihnRLtdv/e97+Gb\n3/ym7eP7+uJVrT+eSz214tXYvBoX4N3Y3IxrX6wLj6/7HcJCCB+berqr5X4SZkQ12BVbPp8PDMNg\ncDCWt9O8ErFYArFYAhzHQpJEhEJ+sCwHRUkhmVQaJr7IaHX0MWKCa9++fdi8eTO+/vWvAwC6urpw\n8cUXY/Xq1RXOJAiikeiGjkc2PAFVV3HBQefA7/M3OiSCAGDf0FSSBAiCD4ZhVCW2ctE0PU98iaKI\nYNCswpg9XwoUZSTFF7ncjzZGTHBNmDABL7zwQubxkiVLSGwRxCjgtV1/xYcDW7Gg41As6Dy00eEQ\nBACz5SQWS1QUULIswOfjEY0mEArJjlxb03TE4wnE4wmwLAtJEhAI+NHUZGW+zKb7QpyqBhaKr87O\nVvT09MNsyybx5VVcE1zr1q3Dbbfdhl27doHneTz33HP44Q9/iObmZrcuSRCEw+xP9uHXH/4BMi/j\n/IPOaXQ4BAHAKiNWVi+yLILnOUQicVRvYmrP+FTXdcTjScTjyYz48vslNDUFoSjpjN1EZlWHe+Cz\nuxsp8+V1XBNc8+fPx6pVq0p+/6WXXnLr0gRBOIBhGPj5xiehaClcfPD5aBKpt4poPNmeLUsQFVcw\nfr8ElmWGxFZt2aVqz8kXXwxEUcyIr1Qq7ZrHF8NYTvq5I4Y0ABo0jby+vAJZRBMEUZS39q3Fv3o3\nYl7LHCycWP2uRIJwmsIG+VLaIRCQwDAMotFEzrPVjeqpNxOl6wYSiSQSiSQYhoEkCZBlCT4fj+bm\nUKbsOBK2D2S06g1IcBEEMYxIKoon3v8tBE7A8nnn0Qcz0VAYZniDfCmdEgzKMAyjQGxl12kEhmEg\nkVCgqhpCoQCSyRQkSUA4HEAqpWbsJkh8jW1IcBEEMYxfbvoNYuk4Pj3nE2iTWxsdDjGOKSa2ShEM\nytB1A/F4ctj3vOQfmkwqSCYVMAwDURQgSQJCoQDSaXXoeyS+xiIkuAiCyOOf3e9hTdc7mBWejpOm\nHt/ocIhxjN0h1AwDBIN+qKqGREKxvX6jdYVhGDniCxBFAaIo5ogv026i0k5MJwQSiS/3IcFFEESG\nhJrALzY+BZ7hcNHBnwHL0A4nojHYE1vM0PgdGem0ikRiuBVD2bM9JCYMA0gmTUsJADmZL1NIWpmv\nUg75zsaiQxR5CIJvqDRL4ssJSHARBJHhqQ/+gIHUIM6edRomBSY0OhxinGJHbBmGAZZlEAj4kUql\nM0KlWhiG8eS8QkXJenlZ4svK4hWKLzfiZ1lm6L3RYRjakNiiEUP1QIKLIAgAwKa+D/D67jcxOTAR\ny2ac3OhwiHGK3TIiYM5FNMtu7ji8e0VU5IovQfBBkkS0t1viqzahWZnsa6cRQ85AgosgCKS0FB7Z\n8CswYHDxwZ8Bz9JHAzHysKz5X+Xj2JyROvbFllcEVD2kUmmkUmkMDlriSwAAtLU1Z/rBNM2psuPw\nzBmNGKod+lQlCAK/2/I8ehK9WDr9RMwIT2t0OMQ4xO4Qao5jEQzK0DQdqqq5GpMXS425WOJLliVE\nIjGIooDW1mboup4pO2pabe+RZaZa/hgSX9VAgosgxjnbBnfgpe1/RrvchrNnndbocIhxSDgsIZFQ\nKgocnucQCEiIx5MQBKHqXYZeF1C1YAkjS3xFIjH4fDwkSURraxi6bgzNd1RcFagkvipDgosgxjGq\nrmL1+l/CgIGL5p0HgRMaHRIxzuA4gOfZis3rltiKxZJQVQ2CUJ1z/Nhl+HijdFpFOq3miC8BLS3h\noZ2Qii3xZQqo2gQqia/ikOAiiHHMH7e9it2xvThh8rE4qOXARodDjDPsGpr6fDz8fhHRaMLB/qTx\nQVZ8xcHzpvhqbg4DQGawdqkZj04kBAvnO0oSD5Zlh/zSxpfdBAkughin7Intw7NbX0CTEMa5B57V\n6HCIcUZuz5Zptln8OEHgIcvuiC1BMAWIJTzGOqqqIhpVEY3GwfMcJElEU1MQDMMMeYAprg3YtuA4\nK5s5/oxWSXARxDhEN3Q8sv4JqIaGC+eeC5mXGx0SMY6w2yAviuYuvEgk4bjhpyj6IIoC4vEE/H4J\nTU3BoV6n1DgRXxqi0Xie+AqHg2BZU3wxDDMiJqvA+HG5J8FFEOOQV3f+BVsGt+HIzsNxWMdHGh0O\nMY6wL7YEiKIPkUi86GibclmxcjAMIAjZtRVFQSyWAMsyEEURgYCcEV/W8WOw1z6PXPHFcRwkSYDf\nLw/NsTQFWCrljNdZpV69sSy+SHARxDijN7Efv/3wGQR4Pz5z0CcbHQ4xjigltgzDyLuhSpIAQeAR\nicQd3VloGKaQK7a2rhtIJJJIJJJgWQaSJEKSRHR0tEJRUkgkFMdEh5M4LQg1TUMslsj8PAzDQCjk\nB8dxmbLjSL0PuS73Y0F8keAiiHGEYRh4dMOvkNLT+Oy88xASgo0OiRgHmJkSexkpWRbB8xwikUQF\nsVX9LkUzu8VXXFvXDcTjSQSDfvT2DkAUfQgG/Rmz1ZEUHY2CYQBNMxCPJxCLJcBxLERRzHsfFEWp\n2uXfLFXaV4hZ4VcovphRN2KIBBdBjCP+uncNNvS9j0Pa5uLoCR9tdDjEOKAaseX3i+A4DtFo3PEy\nniyLAIBotJKQy0fXdcTjScTjSbAsm5lp6B3xVbt9Q+V1sz1cmqYjHk8gHk9k3odAwI+mJq6G3rf6\n7CZM8WXkjRgaDeKLBBdBjBMGlAh+9f7TEDkBn537Kc9/OBGjH7tzEQ0DmRE1kUjc1trV9HBZWTPD\nMOoSct4XXyNDsfchu/EgPfQ+pIq+106VQAu9vjjOgCRJiMdNuwkven2R4CKIccIvN/0aCTWBCw46\nB61SS6PDIcY41Qyh5nkWum7YFlvVYGXNIpE4mpoCjq07XHTkltsURxvNG4FdYZT/PjAQxaz4SqXS\nQ5Yb6ZysovMZOYZhMsIvGo151miVBBdBjAP+0fUu1na/i9lNM7FoysJGh0OMcaoRW8GgPOSA7rwV\ng98vgWUZV4RcLqboMMttVq9TKOQHy3JQFHfFl3u7KKsXRubGAwWJhAKGYSBJAmRZQjhsia+Ua/Ga\n6xrDMl9eEl8kuAhijBNPx/HYpl+DZ3lcNO/TYBlv/LVHjD0YxspW6RXFFsMAwaAfmqa5MuMwEJDA\nMAyi0UTmOasM6abNQ26vE8eZma9QKACWZZFOp8dNKd8w8sWXKAqQJAGiKIDnOTCMKbKd+tkXs5sY\nLr5YAI0bX0aCiyDGOE9+8HsMpiL45AFnYkKgs9HhEGMYn49FICBVzCgxDINgUIaqakgkFPj9Ug2e\nWkZJ8VJMbNVCvQJN03TEYtldfn6/DEEQ0NHRmplp6Laze604KUwNw8i83ubmEFRVgygKCIUCSKfV\nTAnWzeHiXhC6JLgIYgyzYf/7eGPPW5ganIyl009sdDjEGMauoSnDMAiFZKRSakEZsbobYql7cyAg\nAzDqFltOo2k6kkkFPh+PgYHIMGf32sWXW7sU3YJBOp1GNBoHw1gGt2KO+DLtJqqxjgAqG6paxzQS\nElwEMUZRtBQe3fAEWIbFRQd/GhzLNTokYoxiia1KOwdZlkEw6IeipAr8m5wRDGY/mIFYLFnmqMYL\nlPzMl+ns7oz4chJ33qfczJnVu2cJb6vsGAr5oapaJvNlZ8TQaJgIQIKLIMYoT29+Fr3JPpw24xRM\nD01tdDjEGCU/s1XajJRlWYRCMhKJ4Q3ktY7pySUYlDOGpaUpdkcuJyyMCt+vH8vZvVbx5W4Tuhvr\nlv5Bm0I8X3wFg/bEl7mutxUXCS6CGINsGdiGV3a8jk5/O86ceWqjwyHGKHYNTTmORTAoD43HcT5z\nY09seZ9c8WUNlG5qCoJhmIzgaHzmq37s9Grlii9B8EGSRLS3W+LLLDtqWq74YijDRRDEyJLWVaze\n8AQMGLho3mcgcL5Gh0SMQYr1bBXLVHEch2DQNKR0UiwYhlmiDIXMnY6m4WXlczzQO22L3IHSueIL\nyIovVXVTfLlXUqyWVCqNVCqNwUFLfAkIBJqh63qmGd+yhfAyJLgIYozx/NaXsDe2DydOOQ4HNs9q\ndDjEGMRugzzPcwgEJMRiSaiqVvK4wuHV9jAgCD6kUioSicpiazRTTHw1N4cAYFT6e9UjjCzxBcQg\nCD6IooDW1mYAgKap4DgOmlbqd42a5gmCcIhd0T14btvLaBGb8YnZZzY6HGIMUk5s5Qonn4+H3y8i\nGk2WuQHWhmWqaRlt1rdWeWHhtYxYofjy+yXwPIf29pZMtqecuG00Tr6flviKRGIIhwPgeR6treEh\nG4pUXe/F5s0f4Prrv4YLLliO8867AN/5zs3YuHE9wuEmAMDy5f+G449flHfOPfd8H5s2rQfDMFix\nYgUOO+ywvO+7Krg2bdqEK6+8Epdddhkuvvhi7NmzBzfccANUVQXP87jjjjvQ0dHhZggEMW7QdR2P\nrH8CmqHhwrnnQualRodEjDHsZrayYitR0GdTP5athKbpjq9diMcrVFBVs5RqWk1EhzJfYQCoW3y5\nZ6FQX4arFLpuQFFSiMUS8Pl4SJKAlpYwDAMYGBjE5s1bMG3aTFtrJRIJ3HXXHTjyyGPynr/iiqtx\nwgmLi56zdu0a7Ny5A4899hg+/PBDrFixAo899ljeMa5ZTsfjcdx666047rjjMs/94Ac/wPnnn4/V\nq1dj2bJlePjhh926PEGMO/7w/kvYFtmBoyccgfntBzc6HGIMwTD2xRZgzi+MRNwSW36kUipSKdVz\n2adGYGXorMxXT08f+vsjAICWljDa25szMx6rxQ1h5NbPLDdTmU6riETi6O4234uurn244YZv4LOf\nPQ8/+tHd2LZta9m1fD4f7rzzbrS3t9u+/po1b2Hx4pMBALNnz8bAwACi0WjeMa4JLkEQ8MADD6Cz\nM+tsvXLlSpx++ukAgJaWFvT397t1eYIYV7y975945J2nIHMSPj3n440OhxhDMIz93YiiaG7QiETi\ntryTLMxm9vIXsBrkFSWF+uYujn2VpqoqolFTcAwMRMEw+eKL4xrryTeSsx9VVUVLSztWrfo5vv3t\n2+DzCXjttVfLrsTzPERxeIXgV796HP/xH/+OlStvGKZfent70dzcnHnc2tqK7u7u/HWreDVVwfM8\neD5/eb/fD8Dc+vroo4/iqquucuvyBDFu2DKwDQ++txoAkNQUdCd6EBQCDY6KGAtUM4RakgQIgim4\nqnUJL+ffZcZRzDC1/DlFr2IYea/FmvGXTCqe3+FWK+m0msn4mKU2Ea2tYei6kdntWKzHzk0fLncy\nZ5XXPfDAg3DQQbVl/08//Sw0NTVhzpy5WLXqp3joofvw1a9eV/L4YrGMeNO8pmn4xje+gYULF+aV\nG4vR0lJ9GrSjI1RPeK7h1bgA78bm1bgAb8X22+3rMl8bMLA7vQvHdMxvYETF8dJ7RlRGknik0/bK\ndrIsguc5RCJxNDU5K/bLGabWQ24vWCjkRzpt7nZUlNyZftWLOi+TFV8xG+LL+0aiubjtNH/UUdl+\nrkWLTsT3v//fed9vb29Hb29v5nFXV9ewHvURF1w33HADZsyYgauvvrrisX195QegFtLREUJ3d6TW\n0FzDq3EB3o3Nq3EB3ovtg+5tma9ZhsVk3xRPxQdU956RMGs8LGsOgB4cjFa8ifn9IjiOQzQah2HU\nNvC5lD9WVmw5a5iaO88xEonBMPQhZ3MR4XAAqVQayeTYtpqoJL7cwq0Ml9sC8b/+6z9x5ZXXYMqU\nqVi7dg1mzZqd9/1jjlmIBx+8D1/84mV477330NnZiWAwmHfMiAqu3/72t/D5fPiP//iPkbwsQYxZ\ndkZ248OBrZgcmIiTD1iIycIUzGqa0eiwiFFMtl+r8lgbv18CyzKIRHL/OHZmHI7lTl/KMLVWE9Ns\n43060wuWO9PPspyQZQkcxyEUCiCRSGZcz+vBjZ1/TqxZTHzxPIemplBmt6PbO0LrxckM14YN63Hv\nvXdh79494HkeL7/8Ij796QuwcuUKSJIEWZaxYsVKAMDKlTdgxYqVOPTQwzF37sG48MILwTAMVq5c\nOTxGw6XC9bp163Dbbbdh165d4HkeEyZMQG9vL0RRzKi+2bNn4+abby65RrV/pXst82Dh1bgA78bm\n1bgAb8X24LrVeLvrn7jy8M/j5HlHeyauQsZrhsurP49S5O5EbGoKDDW/F79FBAISGIZBNJrIe77S\necUQBLPn1xrNU0lsAab1hCDwFQZV5xMMyuA4FoqSFVvpdLpkg39bWzMURYEgCOB5LuPtVGtpUxB8\nCAb92L9/oKbzS60ZCMjo6xt0bE0AaG9vQTQag89njtXJdXWvR3xNnNiOvXt7HIzUpKUljFgsUfJn\nYxgGWFYAy7q2VxBA+c8v1zJc8+fPx6pVq9xaniDGPXtj+7C2611MD03BIa1zGx0OMcoptH0o96d4\nMCjDMIxhYqvSeeWwrp0VW0mk084ZeDIMA57n8jJbdkgmU4hGE2BZNjNMmeM4KIqCRMLZcUVeI51W\nkUymEInEMiN1WluboesaEoli8wwr06jNCQzDuOgtZg9ymieIUcpz216GAQNnzFza8A8SYnRT3GOr\neGmw8qDo6kuK1j3Y7iig/OtUxioj6rpes0DSdR3xeBLxeBIcxw71ewXBskwm8zWWxFdhia5wpI4p\nPpszw6STSaWiFYjZv+VWvG71hjkHCS6CGIV0x3vx933/wOTARBzafkijwyFGMaUMTQt7pBiGQTAo\nQ1W1suN06umtsi+27F/HEluKkhpmVVQrmqYjFksgFkuA4zjIcuFgaW+P16mX7DBpS3yJaG+3xJe5\n27G0+HJHFJHgIgjCFZ7f9jI64IgjAAAgAElEQVR0Q8cZM5eAZdztSSDGLnYNTXN39dVnOlocjjPL\nfdFowlGhkiu2FCU9ZDPkbDZY04YPljZHyphzHgt9rrwuCrLYy1JmxRdyxJe/qPhy27rB65DgIohR\nxv5kH97cuwYT/B34aOdhlU8giCJUGtVjDaK2TEer632yL2p8Ph6iKELTNFfF1kiQO1i6mNWC00O8\nATcNSqtft5L4MkuSlOEiCGKU8Mdtr0IzNJw+g7JbRG3YnYtoiZZk0r5oKXRzL4c15DqRSGZc6p2g\nuDN9rdSWEcu1WrB6ngIBGYBpp1G+7FZtfN4TGoXiS5ZFBIN+MAwz9PqVGiYSlGY0ZM9IcBHEKGJA\nGcRf9vwNbVIrjpqwoNHhEKOQaodQO206aiEIPGRZRDSaqGnTR2mzVCfFln3xWA5LfCSTKQSDfvh8\nPIJBf2YXoDdHCzkn5KzXz3EcWlvDw16/otQvvszfofJrNHpzEQkughhFvLD9Vai6itNmnAyObewA\nWmL0YVdscRyb8Z1yU2xFIgnoug6O42q4GQ7fpeis2HInY2IYBgYGogCQcbcvPVqocbiRMWIYc85m\n/usXMq+/XvHlgbetLCS4CGKUEElF8dquv6JZbMKxk45qdDjEKIJh7DfIW9YMqqrVVPKyer9KYZaX\nhAJz1PrvlHbEVrGsWCOzHmasKTAMio4WMsVXw8JzHev1A8XEl9lwb1d8Ug8XQRCO8dKOPyOlp/HJ\n6SfDx9I/XcIeHGeWWqoRW7GYsz1VFqLogygWiq36cTqzNdIUjhYSRXO0UDgcHHLFV0qOFnKvad6d\nMUSlRNFw8SUiFArYFl+NLhfagT61CWIUEE/H8aedf0FICOL4ycdUPoEgYJYQW1oC6O+PVjzWamCP\nRhPQNB2CwKOWhvFSvVWiKEAUfYhGi4utWu+Xo11sFWIYRsbLy5zrKMLvl9DUFISipIZ66tx/nY3M\nFtUivrye3QJIcBHEqODlna8jqSk4c9apEDjnMw/E2KOakXG5DezWqJZaDUyLIUkCBMGHSCRe9MZY\ny73Sii8Y9A/1mtkRIfbd6b2A6eWVRCKRzBstZPbXKUM2Hd7cpViMWsp+xcquheILsCO4Gv9zJ8FF\nEB4noSbxyo7XEPD5sWjywkaHQ4wCrH4tO4JJFE3LguJlvlpuUvmixhRbfEmxVewcO7AsA5ZlEY+P\nTMan0RQbLRQKBcBxLHRdh8/Hj6nRQoXkl12Hiy/A+31cJLgIwuP8edcbiKsJfPyA0yHxYqPDITxO\nNWIrN/NUKLaq8dPKPy97bVkWwfMcIpGEozdCljXHABmG4brY8mJvUO5oIb9fhiwLjo4Wcku4OLVu\nYc+bJIkQBB86Oloy9hv17PbcvPkDXH/913DBBctx3nkXYN++vfjud78FTVPBcTxuuulbaGtrzxz/\n9tt/x003XY+ZMw+AIPA46KCDcOONNw5blwQXQXgYRUvhxe1/gsxLOGnq8Y0Oh/A4LDu8lGgJoMJ7\nj73MU+1YYisajVcsGVZTvmTZrBmrKApVxeRkmdQrGIYOVdUwMBDNjBZqbg4DMJBMmj1fbjjcewVT\ndKegaTJ6e/szux2zuz1TVe32TCQSuOuuO3Dkkdle2Qce+DE+8YlzsXTpMvzqV4/jsccewZVXXpN3\n3oIFR+Db374dHR2hkmuT4CIID/P67jcRTcdw5sylkHm50eEQHqe4mLDKddk7jp3Mk2GY4qZ6DPh8\nPjAMEInEazi/NJbYSiRSSKdVSFJ1gqtavFyeysUKs9JoIXO8UGWrD7dEqZnhcmtdY9iGA6vs+PnP\nX4qOjk6cdNJSnHDCYvj9gZJr+Xw+3Hnn3Vi9+meZ5772teshCObvWnNzCzZt2lBTnDQXhCA8SlpL\n44Vtr0DkBJw8bVGjwyFGKYVZHb9fAsexNjJbtd0ZeZ4HyzJViq3KPVy5YsvJMqIo+moUlt7GGivU\n3d2HwcEoOI5FW1szWlub4PdLYCvsqnBPa7qx8PCNA5b46u8fxO23/18cf/wiPP/8M7j00uVl/eV4\nnocoSnnPybIMjuOgaRqeeuqXWLbsjGHnbd26Bddddy0++9nP4vXXXy++dvUvjCCIkeCNPX/HQCqC\nZdNPRtBX+i8ygrAoXjLLZrgCAQkMwyAaTdS4Vnms9asVRJWulRVbzo4ZCgTkzE7HWsw2RwvWXMfB\nwdiwodKJhFJktJA7Ox8bNWg7GAzijDPOwllnfbKiMW8pNE3DrbfehCOOOApHHZVvzTNt2nR87nNf\nxJIly5BM9uPf/u3f8Pzzz2eyYhYkuAjCg2i6hue3vQwfy2PJ9MWNDocYxWTtE2QYhmFLbA2diWp2\nDlpiS1HSVVlSVIJlWYRCchGxVZ/FQyAgATDQ3x+FpmlDZqPmrrdUKo1EQqk39BGiOnGUO1S6lLu7\ne4Og3RJy9pvxa90E8d3v3oJp06bj85//0rDvdXR0YunS0wAA06dPR3t7O/bt24dp06blHUeCiyA8\nyJt730af0o+Tp56AsFC6CZMg7OD3S9A001agGuzemwIBGYAp5kTRB6e6VUqLreriKzhrSGwBsVj2\n/cj6PTGQJAF+v5Tntj8WrSfKeVwxDOO48HJPyLnL888/A5/Ph8svv6Lk93t6erB8+SXo7u5Gb28v\nJkyYMOw4ElwE4THM7NZL4BkOp04/qdHhEKMYc4YiC1VVqxZbdm+MVuYsV7zUQmHmoZLYqs0s1YAg\n8NB1A7FY8UyfaTaqIJFQ0NIShqpqCAb94Dgu05DtJb8rJ0RMoc2CLEvw+Xh0dLRWHC1UZbRodIar\nEhs2rMe9996FvXv3gOd5vPzyi+jv74MgCLj6ajO7NXPmAfj616/HypU3YMWKlVi06ETcfPM38dpr\nrwLQcfPNNw8rJwIkuAjCc6zpegfdiV4smnwsWqTmRodDjFIYhsmIodpG3lQu2QWDMnTdyBNztVov\n5N4wK4mtWvH5eDAMg1jMfkN/KpVCJBLLmI3m+l2NRcuFrM2CiP37BzLZPidGCzWqh6sa5s07GPfe\ne7+tY2+55XuZr2+//S4AIFsIghgt6IaO57a+BJZhcdqMUxodDjFKYRizyTyVSoPj2Jr6VioJp2Ji\na+hM1NNb5ZbY8vurb+g3b+Lma8k1G+V5DrKcb7mQSChld7+NRnKzfSxrGowWjhaq5v10y0TWXJdG\n+xAEUQX/6F6HvfEuLJx4FNrk1kaHQ4xCCoc5W/1KTmHt6tM0DfG4s43lltiKx50t25k2CNXvniyF\nqmqIROKIREy/K1kW0d7eXGbX3+jHEteFo4VYlq2q1OrO++LtkT4WJLgIwiMYhoFnt74IBgxOm0nZ\nLaJ6OM7KDmUzD/WU+AozEpbYsoRF8fNqvR6qFlt2MiZ+vwiWNa0wqjdKrXwTz7VcyDae+5FKqUO9\nT4rLjeLO90VV6onKzfZxHAdJMkcLMQyTEZzFRgsxDOBGEnC0NOOT4CIIj7Cudz12RffgqAkLMMHf\n0ehwiFGIz8cXESz1lfgsrJ6wcmKrVsyyJxCPp6rKbFXKavj94pDJq10rjPrI3/UnQpJEhMOBTOO5\nO/MJGys2NE3LK7WWHy3EAHBecdkrKTYeElwE4QEMw8AzW18EAJw+Y0mDoyFGK4lEChyX/1w9GSfr\nPLMnTB7K2pTfrVatsSTHsZnmfifLiLIsguM4x8cL2cHc9ZcdMSNJIgIBGTzPwTAM+Hy8p3Y6OkXu\naCGe54f1ubEsA9Wll00ZLoIgbLGh731sG9yBBR3zMTk4sdHhEGOIWp21rcxYbgN+JbFVLZbYiscV\n+P2iY9ma7KzIfLFV+3zI2jEbz5NIJJKQJAHBoB/hcBAsm93pWKz81kicsFlQVRWRiDleyJrrKEli\nZs5mMplybJNBpXhr/zfgLCS4CMIDPLNlKLs1k7JbhHdgGCAUyjbgO0mu2EqnVRiG6Mi6ltiKRotl\ntpwpr9aKrhvQNB19fYPDym9W75Od4dKjDavPjWUZaJoOnufzRgspigJdr13gudUb5jQkuAiiwbzf\ntxkfDmzB/LZ5mB6a2uhwiDGGmdWp/jzTaZwZavyuxkqhcjahUGzlXBXV9OJYZU8ruSFJQkZsOZEp\nq7Uca4fc8puVAWptbYamaZlyZD0ixJswUFUNyWS87Gih6rNr7vSGOQ0JLoJoMM8O9W6dMXNpgyMh\nxiYGqh21Y1pLyDAMuJ7Zyo+zWrKDuSVJgM/HIxpNVCm2Gl9qsjJAkYg5XFqWRdsDtd0olZklOseX\nHRLH2YWtTQYAhs2yTCZTtnd4Ouk07yYkuAiigWwZ2IYNfe9jXssczGqa0ehwiDFItVmaXONRv99Z\nD6/SYqu+bJIkCRAEHpFIYlTceMthDZcGhosQs/w2vI/Ondc8su9j7ixLy14jd4dnudFC9nr/Gi+s\nHZzpPpxNmzbh1FNPxerVqwEAe/bswSWXXILly5fjmmuuQSrlbAMmQYw2nt36EgDgDOrdIhyg+E3H\nft+S5eMVj9fu8l5KOJUTW/Ugij4XxVZje74UJYX+/gi6u/ugKCn4/RI6O1vR1BTMG6w9WrCTiTIM\nc0djf/8gurv7kErZed3lS9FWebzRuCa44vE4br31Vhx33HGZ5+655x4sX74cjz76KGbMmIEnnnjC\nrcsThOfZEdmFdb3rMbtpJg5sPqDR4RBjFLuZI+cE0XCR4pbYAgBB8NkWW272ZLmJNWKnr28QPT19\nSKdVBIN+dHS0QhB8YGtp0itDo729LEq97s7OVoTDWfHllXgr4ZrgEgQBDzzwADo7OzPPvfnmm1i6\n1OxTOeWUU/DGG2+4dXmC8DxWduvMmad64q8vYvySFUTJugVRoaixu3a1W/dF0Tc0iHr0lxGrwRqx\ns3//APbv74dhGJAkEe3tLQgG/eAKjdhqwh0j0Xp6rXJfd09PP1RVQyhkik6e58FxrhbsHMG1Hi6e\n58Hz+csnEgkIgjlaoa2tDd3d3WXXaGkxh2RWQ7lJ3Y3Eq3EB3o3Nq3EB9ce2Y2A3/tH9Lma3zsDi\nuUc4JrjG8ntG1EYlIcPzHAIBCbFYcpgfVOEuwGrJF1vOeU2Jog+iKEDXjXEltgrRNB3ptIpUKg1F\nSTk2UNvrf//puo54PIF4PAGOY9HS0gS/X0YgIJcdLWSXzZs/wPXXfw0XXLAc5513Afbt24tbb70J\nuq6jra0dN974rYyWsbjnnu/jvffWQRB4rFixAocddtiwdRvWNG/nH0lfX3UOwR0dIXR3R2oNyTW8\nGhfg3di8GhfgTGw/f+9pAMCyqaegpyfqRFhj5j0jYTZylBNbJtldgPYxz3FbbEWjcQSDcpXxVd+T\n5XXxYXlQOT1Q251dis7vJtQ0HbquIxKJZbJ9prcZMqIzO1qoMolEAnfddQeOPPKYzHMPPngfPvWp\n87Fkyam4777/we9//1uce+6nM99fu3YNdu7cgfvuexiDg11YsWIFHnvssWFrj2gOzu/3I5lMAgD2\n7duXV24kiPFCV7wba/a9gynBSZjfdnCjwyHGOKUyXJXFlkm1gsPKigWDMmKx6sRWpWsJQlZsmdmt\n+gVRICBDlsWi79HoSJ4NF5zWMO2urv2IxRIQBB86OlrQ3ByGJIk23rPRMZvQwhJylrdZT08f+vsj\nYBhg1aqHcfnll+L//b+HsHPnjopr+Xw+3Hnn3Whvb888t3btGixadCIA4IQTFuPvf38z75w1a97C\n4sUnAwBmz56NgYEBRKPD/5AeUcF1/PHH47nnngMAPP/881i8ePFIXp4gPMFz216GAQNnzFxKvVtE\nQ/D5TLEVjSbKiq1aBIfVS1NJyA2/VvnskyDwkOWs2HKCYFAGy5o2BKYgCUEUhconjiIUJYWBgQi6\nu/cjmVQgSSI6OlrR1FT6tbrVhO7muoWYo4XiuPDCi3HttV9Hb28Prrrqi1i//r2ya/E8D1HMt0PJ\nbYdqaWlFb29v3vd7e3vR3Nycedza2lq0Zcq1kuK6detw2223YdeuXeB5Hs899xzuvPNOXH/99Xjs\nsccwefJknHPOOW5dniA8SW9iP/62921M9HdiQcf8RodDjAMKs0A+Hw+/X0Q0mrAxRqa6kqJVRgTg\n6HxAU2yJiETqE1u570UwKEPXDUQicWiaOjRkWoDfL6GpKZgZtjxWKDVQ23qtiYTzO0iH41bmrHSp\nkmVZzJ9/KA4//Ch89avX1X0le7thix/jmuCaP38+Vq1aNez5hx9+2K1LEoTneX77K9ANHafPXAKW\n8f6uGmL0MbzMls0cVSe2qrNR4DgOwaBZorRElxNkxVbCscxWICDDMMxdbxaWBUEioYBlWciyOWxZ\nEHzgOK7qXqBieCWhnTtQ23qtuQO1DcOdQEdb5sxClv1QlCREUUJ3d1deuREA2tvb87JeXV1d6Ojo\nGLYOfeITxAjRrwzgr7vfQrvchiM7D290OMQ4wRJNgmCKrUjEntiqhlyxVWtmq5i4s5q/TbFVLObq\nm+DNne8GYrFkyWN0XUcslkAyqSAWS4JhgNbWMNramuH3S3VlvtwQBvWsab3W3t5+9PUNwjAAv1+E\nLJsZMGftFrxnN2GHo446Bq+8Ytr4vPrqSzj22OPzvn/MMQvxyivmiLb33nsPnZ2dCAaDw9ah0T4E\nMUK8sO1VqIaG02ecAo51wiuHIOxgipJaSnJ2vLHsNt9Xi5WNKy22qjcyFUXTKLOc2CrEFCRJRCJx\nCIJvyPPKnHNojdpppDWFk1kzq+ncMAxwHAeWZR0dqD0aDEo3bFiPe++9C3v37gHP83j55RexcuW3\n8Z3v3Izf/OZJTJw4CWeeeTYAYOXKG7BixUoceujhmDv3YPz7v38egsBj5cqVRdcmwUUQI8BgKoLX\ndr+JFrEZx0w8otHhEOMIQfCBYYDBQeeazS1KiS1LqFUnRIqXPmvxkSqG3y+CZZm6RKE153Bw0Jzf\nmJ33l0IioWRmII4FdF1HNBqvaaB2aRqV4bKvSufNOxj33nv/sOd/8IMfDXvullu+l/n6y1/+CoDy\ntjYkuAhiBHhp+5+R1tM4bcbJ4Fn6Z0eMDJZnFcMwNYmtchkkpzNb1rV8Pq6qPjM7yLKY6cMqNKy0\nE1MxkskUkslUpgHdcnm3MkHuN6BbOC9iCgVMLQO1i6/rnr/XaIA++QnCZaLpGP606y9oEkI4btLR\njQ6HGCdIkjA0ZzCOpqZAjasU75GqJLZqdahnWRaSJDgqtiRJAM9ziEbj4DjO8cb13AZ0jmMhSSKa\nmoIAGCQSSSSTiuM9c41EUVJQlNSwXZ12snxuCaPRMm2ABBdBuMwrO16HoqVw9qzT4OOKTbonCGcx\nxRaPSCRe182oWIbHrZ4tlmXg8/FDNg32BEqlHq7898GhQMugaWYDeiyWAM/zQ6N2sj1QTpVHvUD+\nrk57WT43hJHbDfNOQoKLIFwkoSbwys7XEPQFcMKUhY0OhxgHyLIIn49DJJId6lxbT9Vw7Iut6vy7\neJ6Dz8dDUdJVZoNK71IURV8mw9eI+7FpvKnm9UCJojg0fkZAMmmvDFcJN8p01ZagrcHS8fjwLJ8l\nvpwU5/mxer8R34IEF0G4yKs730BCTeKTB5wJkRtbDtaEN9F1Y2inWfa52odQZwVNNZmtanYPWuum\nUmnHMhVW71q9GT6nsHqgRDGFQMAPWZYQDtsrwzWO2t63/CwfB1kW0dISzgwaZ1nW4Uzf6BlDRIKL\nIFwiqSp4acef4OdlLJ56XKPDIcYJyWQKrEPWSZZwcquMmLuu6Y9VP4LAQ5KcFFvO9h3puo7+/sFM\nGS4U8oNl2Yzbu1uZoEaQO1BbEHxoaQnXNVC7GHb+kPBKUz0JLoJwidd2/xWxdBxnzVoGmZcqn0AQ\nLpH106r+5mY1slcvtsrf5DiOzRNxtQquwrFFTowAGgnyy3BmJqi5OQzDMDLiy14myI1dis6X6VRV\ng67r6O7ugyj6MmIzlTJtJhRFqema1MNFEOOclJbGi9v/BIkTccrUExodDkHUhOlQ7xtyp69uEHW5\npII1czFXxBkGqnZwz73P5hul2r8Bl89+lH8dTqFppuFoNBrPiMZsJihZo+dVPbgj4iwUJQ1FSYNh\nAFEUc/zM0kPiq5r+NiopEsS45i97/obBVASnzTgFfp+/0eEQ4xw7jvGF8DwHQfAhlUrXPUMwl2Ji\nayhK1Fq+43nOllGqWSK1f41GJE7SaRXptIrBwZgtz6vR1DReGGf5gdopJJOV+9tG0+snwUUQDqPq\nKv647RX4WB+WTFvc6HAIomqs3ipFqa2Zu1TTvCW24nGnesEMsCwHWeZtend54c5sP4ZSnldWydEt\nc1W3dj6Wy9IVG6gdCgUyA7VL9bdRSZEgxjFv7lmDfmUAS6YtRkgYPsCUIEaaajJcltiKRs1dZmxN\nHfjDs1VZsaUgnS5tlloNphCxK7ZGL/meV6YYCYeDYFkms/PPWRpbprMGals7HSXJ7G8DjEyzvfXz\ntvc7Q03zBDHm0HQNz217GTzLY+n0ExsdDjEOqeeP/VyxpWm6Y87sLJsrtpzJynAcm/HuGks7+ypR\nKEaam0MIhfzw+6VMv5cXDVZrzURZA7Wt/jZJKjSTNSjDRRDjkb/v+wd6k/tx4pTj0Cw2NTocggBg\nL3tkzjCUCrJFtfVV5V6PZVmEQnbElv1rWdmydFr1pLgYKVRVg6rqSCTiQ4aqItrbzQHTVr9XLWLE\nq31RVn+bZSYrSWbDvWEYkGWxAZsLqoMEF0E4hG7oeG7bS2AZFstmnNzocAgih/JiprjYMqktw2Ve\nzxJbTvYb5WbLnPDu8opHU30YGXPVwcHsgOnszr9kzf14TuF0r5X1etPpNCRJhCgKNQ3ULsbvfvdr\nPPvsHzKPN25cjz/+8c+ZxyeddCwOPfTwzOO77/4xOK7y7yIJLoJwiLVd72JfvBvHTzoarVJLo8Mh\niAzlMlzlxFY990eGYTJiK5WqLLbsZOFYlskTcKbgqm7XYe41OI5FU5MMwDSMTSSSw4Sh1wVZsWxU\nfrO9iEDAj3C49IzD4WuOnkZ0gIGmaRgcjA3bXNDd3YN//vNdHHLIYRAE+3Ln7LPPwdlnnwMAWLt2\nDV566YW87weDQdx77/1VR0qCiyAcQDd0PLv1RTBgsGzGKY0OhyCKMFw4WN5VpZvOa7dqkCTBttiy\nA8syCAb9SCRSmTUr+X2VwypLRiIJKIoy5AVlNqKbDepJR+JuJLk7/4rNOEwkkiO22cCtMmXuuoUD\ntXt6enHffT/Gzp27sGTJqTjvvAswffqMqtb/6U//FzfddKsjsZLgIggHeLdnPXbH9uLoCUeg09/e\n6HAIIo9iwqSy2Kpt5yDLMhAEH9JptcoZgaXFHcOYYktRUnXOHbRKnUymLKmqZh9YPJ5APG7N/5PQ\n2toMwEA6rXk+46OktYqyOH/GoWmumtt8nkjUP2anPO7sfCz1s9F1A21tHfjhD3+M3bv34qWXXsC/\n/rWuKsG1fv176OycgLa2/M/0VCqFm2/+L+zbtwcnnbQEF154sa31SHARRJ0YhoFnt74ABgzOmEnZ\nLcL72BFbtWBlodJptep1S4k7qzRpuZM7gZkpK15aM+f/xRCJxBAOB+Hz8ejoaHGkN8gtXl6zC0fP\nbUMoINg6XlVVRCLZ5nNZFhEMWmN2kq4IzEY24k+dOg2XXfaFqs97+ulf48wzzx72/FVXXYPTTjsL\nDMPgqqu+iAULjsC8eYdUXM9p8w6CGHf8a/8mbI/swoLOQzExMKHR4RDEMHJ9uKoRW9X4d+VmoZwS\ncQwDBIMyUinVEaHDMGZ2y8yUVS51apoGRUmhu7sPipJCICCjs7MVoVAAPp838hW6bqB3UMHGnYM1\nnZ9KpTEwEEV3934kkwpkWYLPxyMQ8EMQfA5GOrIZLidYu3ZNXnO8xTnnfBp+vx+yLOOoo47Ghx9+\nYGs9b/zGEMQoxcpuAcAZM5Y0OBqCKJ9FcCuzZWah/EPN2mlIkr1MSyWCQT9UVUMy6YTYAgIBCYZh\nVJ0py+0Nyu+FwjAjzpFi3eY+bNjRj2BARkJRsWnHAHZ2x8AAOO3oKZDF6m7vuWN2WluboGkaQiE/\nWJbNbCiox+/MvR4uxsbszOob/Xp6uiHLfvh8+aJz+/ateOihB7By5behaRreffcdnHzyUltrkuAi\niDp4v/9DbB7YhkPbD8HU0ORGh0MQRTEMAxzHQhCqc2W308NVrORX6yDq3GxaKCQPDW9Wyp5j9zpm\nqVOrardaMXJ7oSwjzra2kR80Pf+AFgg+Bu9ui4JjWaiqBoFncfJHJ9kSW0pag+grbmXAMKaQjERi\n4DgOsmw6vRuGken3qt7/zE33+tLrMgxT007Tnp4etLS0Zh6vWvVTfPSjR2D+/MPQ2TkBX/zipWAY\nBosWnYhDDplva00SXARRB89sfQkAcMZMym4R3oXjOPA8h0gkXmUmpvwuRUtsDS/51bK7MXvTDAZl\naJpeVmxVcx1zPTNTVq/gyiXXiDN30LSipJFIJOts8K/MnKlN2LAzjrhhwDCASW1++G2ILcMw8Oyb\nu/DJRdNLHJEVR5qW7/QuyyLa26sXmG5muNwQuPPmHYzvf/+ezONLLrks8/WVV/5HTWuS4CKIGtk8\nsBWb+j7Awa0HYWa41AcXQTQWn4+HIPigqtU3sgOlM1y5YsuJkp9FMChD1w3E45XElj0CARmGYa5X\nS6bD7imF3lfBoB8cx2XsF9wYP7SrJw7d0HHWwmnYtKMf2/dF8dE5bRXP27s/gZ6BBPYPJtEalmxf\nzxKYg4OmwJQksQqzUXf8zLzqil8MElwEUSPPbH0RAHDGTHv1e4IYaayerUQiWVMDdKkbWW4zezGx\nVYudBGC6yKuqinjcGQ8sv18CwwDRaG3rma+/2tJorveVWY5raQlD1w3H3PYtZIHDJWfMQzKRRFtY\nwN79ibJxvbhmN6JJFam0Dh/H4oU1e+DjGbSERJy8YFLmWDsiJl9gZs1GSxvIwqVSa2MHbVcD7VIk\niBrYNrgD/+rdiDnNBwEkRtwAACAASURBVODA5lmNDocghmGJrUgkYaOpuBTDS3am2PJDVctltqov\nKUqSCIYBYjH74qicsPP7RbAsg2i0tAhxG6sc193dh8HBKDiOhSgKaGkJQ5LEutdva5Ig+LiMOJrY\nKpc8lmEYnHLEZLQ3SUgoKhiGQTKlYXKbHycePrHwaNgVMdaGgr6+QfT09EPTNITDQXR0tCAY9Dsy\nfqkclOEiiDHOc5neLcpuEd4jX2zpYBiupnJaoaDJii0NiUTp8lG1GS6/XwLLog5hmI8si+A4s2et\nIDJUJwSdu5On02bmzmo8l2UJ4XAAyWQKyaRSR7+XfXHEsQwOmhrG+zsGoDOm5ce86c1gHRpfpOu5\n5qocJCmb3dN13fEMHzC6xhBRhosgqmR7/y680/MeZoWnY27LgY0OhyDyYFkmT2yZOHNDyootZ/qr\nAFMcsSxTVWYry3ABJUkCfD4O0Wih2DLxwmjEZDI1lBHqg6qqCIUCmYyQnSHI9bBxxwAmdwTwiROm\no7NZxvrtA8OOcSJrpKrZ7F4kEgPLsggE5Ex2z6kZlXbW8co8zBHNcMViMVx33XUYGBhAOp3GVVdd\nhcWLF49kCARRN0/+6xkAZnbLK/+QCcJC1w3098fyhEWtPVW5hELuiC1r96S5fb++9UTRB0HwIRKJ\nFxUMXkuEmJsDkojHk0MjhUS0toahabpr43bmTG3ChBYJDMPgzGOnYF+f+zMjU6n0UGO9ObtRlkWE\nw+ZuzmQyWfcEgdGS4RpRwfXUU09h1qxZ+NrXvoZ9+/bh0ksvxbPPPjuSIRBEXeyLdeGNHW9jWnAy\nPtI2r9HhEIRNah9CDWRtFeyKLTsO9ZIkgOdLZ6LsXScrJAWBhyQJQ2KrmhuwN/5oMkcKxRGJxAvG\n7Tg7Uii3z4thmBJ9X/U3or+/cwBzpjYNe75wN2cg4Ec4zGVMV6stO46mkuKICq6WlhZs3LgRADA4\nOIiWlpaRvDxB1M0T7z8NAwY+2nkYZbeIUUO9GS5zuLNzmS1RFCAIPCKRRE7WqXZRaPlD1bdBwDtY\nGaHhOwCVkjMgnaTekmIkkcab/+rBgVPCmc/JwjXzd3PmuvczGSsNOzYmo+ljuKLg2rVrF2677Tb0\n9fVh1apVePzxx3HMMcdg5syZVV/sYx/7GJ588kksW7YMg4ODuO+++2qJmSAawp92voF/7Tf/YPjd\nludxUMtszGqyP3meIBpHbWImGDSzH+Ua5KtFFH0QRd+wTFStopBhmMy4ourdz+2s7/iStskdKcSy\nLGQ5K0pM01HFM7v0dN3A1r1RGIaBHV0xqJqOf3zQi7BfgODj8JFwEKWyZrnu/TzPD5VWm4fMas3M\nV2khPYYyXDfeeCMuuugiPPzwwwCAWbNm4cYbb8SqVauqvthvfvMbTJ48GQ8++CA2bNiAFStW4Mkn\nnyx5fEtL9VtKOzpCVcc1Eng1LsC7sXkprk09m/GrD36beawbOnand+GYDnsjHUYKL71nhXg5trFO\nLWLGMiA1y4PV3dRLlRQFwQdRFBCNVlv2Kw7LskM9YM7OhrTw0n08fwegKUra2pozWTBrB6QzVF9S\nZBhgMJ7Cux/uh2GYOyL/+WEffDyLhYd0lv0dUjUdPGfu4VNVFZGI6d6fX1pVkUwmh1mRVP7d9E4K\nrKLgSqfTWLp0KX76058CAI4++uiaL/b2229j0aJFAIB58+ahq6sLmqaV3JXR11ddbb+jI4Tu7kjN\n8bmFV+MCvBubl+LaNrgD96x9AJqugwEDAwZYhsVk3xTPxAh46z0rpJrYSJg1nmDQcmdPIhwOOLKm\nIPCQZbPHyomyn1mGEqCqOjTNvov7WGgFyBUlHR0tEAQfAgHZsSb0WrJmDMNgwYFtaAuJeGHNbvAc\nA5ZlcM7iGUPjhkqLuJfe3oMlR0zKiC6LbGkVEEVxyEojCEVJIZFQMmXXMZPhAsx+K+uX9P3334ei\n1FbLnzFjBt555x2cfvrp2LVrFwKBgOtbYAmiHnZEduPef/wvFE3B5z7yWbRKLdid3oXJvilUTiTG\nJNYonKxNg1WOtH9TK8yo2e2xsiuGWJZFMChDUVIuG2vWt9lgJDAMYHAwCl03hjWhOzlSaDCeQtgv\nVDyuZ1ABz7PoaJKwuzeOgWgKfpEvKeJUTce+vgQ2747goGnDm+wB8zyrtMiyzNBIIT9YlgXDMOB5\nzvW+NieoKLiuuuoqnH/++eju7sbHP/5x9PX14Y477qjpYhdccAFWrFiBiy++GKqq4uabb65pHYIY\nCXZH9+LefzyAhJrEJQefjyMnLAAAHNMx37OZJIIAau+HCgQkAEaeJ1btDffmST4fZ7vHyk6mgmXN\nGY6JhNnXU63gKrxGOOwHw/gzvVJu9IGNBIVN6LIsobk5nDFarea1FQpfwzDw3N924byTZlY0SR2I\nprDsqCmY2Cpj044B7OqOYVKbH4Wi/e1NPdiz3xTgumbgnQ/3Y/PuCBgGWHLEZPj44jahuVYaHMeh\nvb0ZTU0hAGa/WzKp1F1efvvtv+Omm67HzJkHAABmzz4Q1177jcz333rrTdx///+AZTkcd9wJuOyy\nL9hat6LgWrhwIX79619j06ZNEAQBs2bNgijWNpIgEAjg7rvvrulcghhJ9sW6cM8/7kc0HcNF8z6N\nYycd2eiQCKIurL6qUqLGFFsMYjEnRuGY1+B5Dn6/hGjUmR4rhmEQDPqRTKaQSql1Z7dMuwsdsVgc\nsiyhvb0Z6bQ61JDu3EYBNymWOdI0HdFoHNFofCi7mPvaFCiKUrZkWPg7snd/AoOxNLbvi2HmxGDZ\neE7+aHYmY27GqjDOBXPaoG7owYbtA2BZBvGEioDI46QFE0uKrUKsUnJPT18mi9rW1jzkF2f+DGut\nNi5YcAS+/e3bi37v7rvvxPe//0N0dHTi6qu/hJNOWoJZsw6ouGZFwVVKIF1zzTUVFyeI0UhXvAd3\nr70fkVQUFxx0Do6ffEyjQyIIVwkETCPMYnMH7XhqDT/HvMEGAhKiUXvb+yvBMEAoJCOVStfdowSY\nsVml03RaRTodxeCgOdPR6hVKp1VH+oMMw8C7m/tw9CGFMwvdJ/+1CZAkach0NNsHVYqX1+7Bru4Y\nAEDgWbz27l689k+gs0XGsqMmV/V7UXgsyzA4cm47Ptg9iFTK/P04aFoTAnL1Q9YB63WqGByMQRQF\niKKASy65CBMnTsKyZWfiuOMW1ZwsymXXrp0IhcKYMMH8WR533AlYs+ZvtgRXRRnJcVzmP13X8eab\nbyISoXIKMTbpTfThnrX3YyA1iPPmfBwnTj2+0SERhCOUKg36/aXFVq1wnNlbE4slq2poL1e+DAb9\nSKfzB2abx1df7/T7xUx8hSST2UHMhmFAEHxob29GICCDZWubhtfdn8S7m/sa3tydTKbQ32+OFEqn\ni48Uys1EnfLRSThibjustjtNM3DYAS1Viy2Lwte/dW8EHMPg5CMmYXK7H1v2VqctSmVsFSWFwcEo\n7rrrhzjuuBPw1FNP4IorPlfV2lu3bsF1112LL3/5crz11l8zz+/f34vm5qyHaEtLC3p7e22tWTHD\ndfXVV+c91jQNX/nKV+zGTBCjhr5kP+5eex/6lH58cvaZWDKNxk4RY4nhze/m0OjyYqvaHi6OY4ey\nR6ihYbt4g37W6b6wzFebgCk+2DofXdeHSlJmz5Asi3lluWSy/OYxVdPxr639AIA9vXGkVR1/XbcP\nzU0ytHSqqAt77VT3PhSOFJIkc6SQrutD73F2vY4mCZqug2dZ6DrQ2iTVJLaKnSL5OJy7eAYEH4cZ\nnQFs3Rutes1yGjYQCOCss87G2WefW1Vv3rRp0/G5z30RS5Ysw+7du/CVr1yBxx77NXy+4dm3ajR0\n1XJdVVVs37692tMIwtMMKIO4Z+396E3ux1mzluG0Gac0OiSCcJRC4WRHbA2dCbs79azdg/G44phh\nqOUH5oTTvSiaN8xq+9SsUlVX134kEqb46uxsRTgchM9XPG/BcyxEH4t1W/rQ1ZcExzJYv70fG7b2\nozlYebcfACQVOzvv6nuj84dMmz1fDMOguTkMURSwcXs/DpgUxoVLD8BB08L4YOdgjVcano2a0hGA\n4LMyawxmTarWEsb+7tlqspMdHZ1YuvQ0MAyDKVOmoq2tDd3dXQCA9vYO7N+fzWh1d3ehvb3d1roV\nM1wnnXRSRs0ahoHBwUGce+65tgMnCK8TSUVxz9r70ZXowWkzTsFZM09tdEgE4QJZ4eT3izbFlokd\n8WTtHozHnRs9Y/VZxeP1D1i2Zi0aRn3DjpPJFJLJVFHn98KdgHOnN0MWebywZjd8HAueY/GZpQci\nFo1VvI6S1vDC27tx9nHTKx7rVKUylUpDVVUIQguSSQV+v4RTF86GwAOJhIITDp2ASKK2/jk37M/M\nkqLz6z7//DPo6enB8uWXoLe3B/v370dHRycAYNKkyYjFYtizZzc6Ojrxl7+8hptuutXWuhUF16OP\nPpr52twhEkQ4HK7xZRCEt4imY7hn7f3YG+/CkmmL8YkDzhgTxogEUYiV4ZJl0VZJLfe8Sphiy583\n56+2Hp9smcjqs3Kit8zn4zI+YKGQDCeGM+c6v1s75IqVHPf0xiELPCa2ytjRE0N3XwJ+G33hH+wc\nxP5BBWlVL7trz/mPK/O9yfpeseByhGWwijmHhes63cP2/9l77yg57jrd+6lc1V0dJgfNjGaUZWXL\nSclBzkiOgDH2ggGvwbsXw4H1srz37u5l32UPBxYuLCzBvIS7LNiAjTGOwjbOlm3ZshUtK46kiZrY\nuasrvn9UV6fpPD09PVJ9zuFgaaa7ft3qM/XMNzyP+VmpvOLavPlSfO1r/4jXXnsZiqLg/vu/iuee\n2wGnU8Rll12B++//Kr72tf8FANi69Wp0dRXnyZhTcD3yyCN5H/iRj3ykhOPb2NQeESWK/9zzMwyG\nh3FZx0bcumi7LbZszgqy3YMMw2ypkSRZtNiKPxL52laZVg3J65V/IyxWFBYzX5ZqTVGex1bhnwmp\nG3I8z8a3HJ2QJBm6MYptGzvhdjA4MxnD8EQYC1qE7M+j6tixqx+qpkOSdRgG8Phrp2EQBnpaRaxf\n2lTG+adHtkghK+cwab1Q+N96Zn60FiOcy1mqcOJb3/puzq+vXXs+HnjglyU/b07BtXv37rwPtAWX\nzVwmqkr44d6foy84gE3tF+Eji2+0xZbNWQ1FEQBIBAKF21mp5BM1BGFWtmIxuSJWDYABjmNB0xRC\noWJEYX4xaA3wm9uS5Yit0kVjasuR5zlsv3QxCMJsOXYLHFiWhd+ffRuPoUlce+E8vLLvDAKhMEiS\nQFRWsW5xA87r9pZx/tLJm3mYEinEcSwEgYPL5YQsK3F/r3zeZZVv/9VKcHex5BRc3/jGN3I+6Fe/\n+tWMHMbGphpIagw/3vsLnAycxsWt63H70ltBEuWte9vYzAUEgQNBkIhGy52FmipqTLGV2xcrtT1Y\nynUYhkYwGJn2jZQkicQA/3Tibcr9PUzXdUQiUUQiycqQGZtken3lMh9lGQodjQ70j4Sh6QZoisDS\nLk/N/UJoimw5Hp5tvja3W0y42qtq+hzfTLT/5lKOIlDEDNehQ4fwk5/8BJOTkwAAWZYxPDyMT37y\nkzN+OBubSiNrMh7Y939x3H8S65vX4K+Wf9QWWzZnNTzPxrPmFJTTXjGNT9P/jiDM7UFZVvM4speW\nwciyTGKQf7o3UavNGY3K0xrgr9S93KoMKYoCQRDA81yi5RiNSlPO2DscwqION5Z1efDK3jM4PhDA\n0q5cFa7pz6NN5/myRwolo3ZmMi6pxjRoQQreaf7lX/4F11xzDfx+Pz7zmc+gu7sb3/pWdrt7G5ta\nRtEU/HT/r3DEdxxrm1birvNut8WWzVkNz7NgGDouYip3gxJFB1RVq1j8jTl0zkLT9JLEVrbXZDnS\nx2JKVhf1fO/DicEgokVZMZSPrusJ81FV1eB2i2hstMxHzZ9H65c2YPOqFjR6eNy0uQsNHj7n802n\nrTYwGp7yfk/n+axIobGxSfj9IVAUicZGL+rq3PEKXaWrUXOrwlXwbsPzPLZt2waXy4XLL78c//Zv\n/4af//zn1TibjU3FUHUVPzvw3zg0cQSrGpfj0yvuAEVOL4fNxqaW4XkWLEtPu2KU6eaeNCHN74tV\nrMAzh9q5xDmn2zozHem1AvNE2ekdCuBo2T5TpWF6i0UxPu6DzxcAQRBoaPCivt6D+W3exPtAkQQa\n8wiu6fDukXEMjZeyQFE8qd5llq1HU1M9PB4RLFtefE8mhWwhzM997ZTBCrYUY7EYjhw5Ao7jsGvX\nLixatAgDAwPVOJuNTUXQdA2/OPggDox/gOX1S3D3yk+AJgt+9G1s5iwcR4NlaQSDSbFVvphJ3rQq\naUIKmGLLzFusTLh1sWIwlZHJKIbHzSH2MxMSwlE1YcOwsMMLQZh+/l4hVFVDMBieMoxeTN5hqUQk\nFaqmQ9UNTIZkHO0PQoznFzp4GjRNo9KVKEv8jo1Nguc5uFwOkCSZsM8od8bO/DjPnQpXwbvOli1b\ncOrUKXzhC1/AV77yFYyPj+Oee+6pxtlsbKaNbuj4r/d/i72jB7CkbhE+u+ouMLbYsjnLicVUaJpW\noeBlK4haKNGENP8M1/Q3CFOvkWqSWkhspZ/LKVA4PhjEuE8CSRLwhxXsOjSKnjYXVi2q/s8Kaxid\nJIm4OHGa24rRcv2v0hkcj+D1/Wegxv29TgwGcKTPj/ZGRzwjMXdLcTIYQ52rfAGaLVKors4NXTfi\nFhMx6Hopn9m51VIs+Gl6++238bvf/Q7XXXcdvvvd7+K8886rxrlsbKaNbuj49aGHsXtkLxZ6enDv\n6k+BpSpTyraxqWWyuakbhlFm+LIBmqahaXrWsOd8Z8hVUEtGAEnT2iC0rjEdk1SXg8O2DZ14audp\nTAbNSsziDg82rmwGRZKYrZZUpjgRBL4s/6tMFs1zo61ewNNv9kOSNZAksLrbi4vPa877uIlgDK/v\nO4MbNhV2vs8k2zahFSkUCkXAsgwEgYMoOuKLGFJR84FnjS2ExS9/+UtMTExgx44d+MY3vgG/34/t\n27fjs5/9bDXOZ2NTFrqh46EPHsVbw7vR4+7C3675NDiquPwyGxubJCzLgCBKzx/MRXoEULrYKrft\nWYpzfia6rsHQDfjCMurcHCKSglCZ8TUzRb6WY+r7Vez7xzIUJFmFg6cRkTQUUzQ71h+ALywXdL4v\nB1k2FxwIAuA4Lm4cK+bc4rQozhZiDs1wAUB9fT3uuOMOrFy5Eo888ggeeOABW3DZ1CyGYeDhI3/C\nzqFd6HLNw9+uuRs8PTNDpzY2c4VythStzMVyqlDZbv6VsmuwsMKoS6lspb4PZvuKwIgvhkXz3Lh4\neRNiioaX9wxD0w0AUy0xZptU/ytB4MDzBpqa6hCNxvD0zl5sXNEEmsoviI4NBDC/1YVNq1owMhnB\nO4eTYcypIiamaHhyZx9CUcX8tzQM/PYvx6HpwLrF9VizqKGoMxdbiTIMpEUKCQIHt1sEQRCQpBj6\nhnwIRmR0tYhFXpeoKf+ygoJrz5492LFjB1544QV0dnbihhtuwFe+8pVqnM3GpmQMw8Cjx57EKwNv\nYJ7Yhs+vvQcOJnuMho3NuUVpG1tWvE40GgPLTr86bBmlxmJyRYbArTBqYDqGmgYMQ0dHsxMdzU4Y\nhgGKInH9JZ3QdWPa7SpNN0AS5eVKFsKap3O5nJicDIDjOEyENIwENCxsF/K2HJd2erB8vunr1dbg\nxLZLHFm/j2Mo3HLpfOzcP4LjAwGQJAGGIXHVmla0NWR/THZKt4RIjxQyW6pDPhXhqIql3TwkKXb2\nGZ9+/etfx4033ogHH3wQjY2N1TiTjU1ZGIaBx0/swAt9r6LV2YL71t4DJ1PKDwUbm7OXUipclllq\nKBQBRVEVqfJYRqn5IoCKPWNqGLXH4yzrPLquwzDSe2mpFRGCMMBxzLRu6HuPT6DJw6Gz2arIVF54\nvf3+MI6dGoWmA5P+EN55X0XfiAiCoHDZ2lZoqjpF4JIkkfPPme8/SRDgOQoGAJIioKo6mryldQym\nM2s1EZBwcjgMkjTbmgZBwCtyoGgS85poNHlmfou0UhQUXIVCrG1saoWnTz6PZ0+9iGZHI76w9rNw\nscWVnW1szg2Kq3BZ/l2mpcQ0rpYinlwuoWJGqZlh1OUIIsMwoOv524U8z4HnOfj9wfiygVHiBh0w\nPB5BOKqkCC6g0jYG65Y242T/OPpGQ6BJEr5gDKqqY8uaVhAw0iwYotEYNK1Qe3hq1WhoLIKLVzSh\np0XEi3uGcGwgiGVdnhJOWb7pqVfkIMl+HB0IgIx/fnfu68f8VhdWL2wAyzJobq5PvL7MSKFy+NGP\n/gN79+6Bpmn4xCc+hcsu25r42kc+cgOam1sSCyj/+39/HU1N+RcOLOz9eJuzgmdPvoine59DI1+P\nL677HDyca7aPZGNTUxRTPeI4BizLxLMMK+PfZXpjGUV6Y810GLVZ2RIEHgQh5WxtMgwNp1OA3x+E\nrhvxChABgjBgGEZiCzTbe3P6TAj7jk+ApkmM+2PwhRTs2NUPVdWx9YJOtDVVdlOapkhsWd2CM5NR\nRKKm2Fi1sB5NXj5jy5FDfb0n4VMmSbGixepVF7RD4Ey5cN1FHZDk0mb6plPhIkkCG1e2QNMMnBwO\nAQDaGx24fG0rSAIIhyOQZQU8z1UkUujdd9/BiRPH8cADv4Tf78OnP31nmuACgG9/+/twOErvntiC\ny2bO88LpV/CnE8+gjvPiC+s+By9Xym9eNjZnJ6UOyXMcA45j08TWdOF5Nh7iXJydRH4riemHUeu6\nAb8/DI6jwfOmDUEsJkOS5ETlh6YpuFxOBAKhKaIuteWox4fqzXt68v3qahEhyRreOTwGwIy7CUYU\nbFrZjHp3ZZd3LCEzNB6BFNNw/tJGnDoTwqnhIBZ3uBPfZ245RhAMWhYMPFwuB2IxBdFouvDMJo4s\nsWW9B6l/LvKk5by8NMYCMXhEFgxNYjwQS2R8GgbiliXmvJcZE8XhkUd+i507d+Lqqz+Eyy/fCoej\nuNbzmjXrsHz5CgCAKLogSRI0TQNFTT+ZxBZcNnOal/t34g/HnoSX8+CL6z6HBqFuto9kY1OT5KtU\nWQPo2cRWuRmMFEXBMIySvLtyUYntRl03oOs6CMJI2BCQJAGOM4OkDcNALKZAEDiEQpGCos6qepEk\nEpUUc9DewJJOD3qHghgaj4IA0N7gQHtjebNm+TFbdYqq48bNXfA4Wazo8eLQKV/ORyQtGIiE6KSo\nZMtxJjCFUfkiPiZraPRw2LSqBQSAtz8Ygz8so65uaqtSUVQoiopt225CU1MLduzYge9//zv4yU9+\nie7unoLXoigKgmAuWj355J+wYcPGKWLr29/+BoaGBrF69Vrce+/ni64A24LLZs7y+sBb+P2Rx+Bi\nRXxh7T1ochS3omxjcy6SSzhZFYFgMJJjRqn0PDpB4EAQgCRNfxux2DDqbPfzMV8UXhcHkiASYisV\ny+E8GpXAMAxcLgcIggDHsTAMo2hxZ83zWC3HmKzhzISE9UsbEJFUnBwOzeg23fzW5AgFSRBY0V34\nF0/DSL52U2RwqK93wzAQf69qZwOQYylsWd2a+PNFy5sS/53riBzHYcuWy3D55dcgEomA50urLr76\n6kt48sk/4bvf/WHa39999+dwySUb4XK58T//5/146aW/4IorrirqOW3BZTMneXPoHTx0+FGIjBNf\nWPtZtDiLG1q0sTl3mSqcGIaGw2Fu++UbCC+lwmVtOMqyWkZlbOoZpxNGvffYONobnFjS5Z4itlIx\no4tMq4FoVALHsXA4BJAkiVgsBkmSi5oHslqOMVXFDZu60ODhoOsG2hsdULXaEC/Z0LSk67vL5QDH\ncWhqqoMsmy3HfJulxTBT4q3Y5y113uqtt97Ar371C3znOz+AKKYvX11//fbEf19yySacOHG8aMFV\nWbtYG5sq8M6ZPfj1oYch0DzuW3sP2sXWwg+ysTnHyaxwmdt+XGLbL9/jisUcuqfLitjJdsbiwqjT\nRZqq6YjJmlllmozi1JkAYrKKmKzFzUyn4nKJUFUNkYgUN9+U4fcHEQgEQRAEvF4XPB4RHFecH5lX\nZNFUZwo2iiIxv9UFjp3+DBBg5hnOJKqqQZYVjI5OIhZT4HQ60NRUD5fLAZquzGuoFDPhbxYKhfCj\nH/0HvvWt78Ht9kz52pe//HkoiilA9+x5Fz09C4t+brvCZTOn2DOyH//1/m/B0xzuW3sPOlzts30k\nG5s5B01TcDpNa4XC237FtRRZtrJD98WHUafTPxrGC7sHoGm6GUkUlfHfzx6FKDDYtqETLke6aBJF\nBwzDQCg0NRoodRjben1OpwBZViBJxQ3vW1Uvaw4tEokmKjOlboCO+yW8+f4otm3ojD/3TGQJmnNR\n2VqOZtC0npj3KvbfeSbbk5V+3r/85Vn4fD780z99NfF369dfiAULFuGyy67AJZdswuc+9ylwHIfF\ni5fiiiuuLPq5bcFlM2fYP/Y+fnHwQTAkjf+x5m50uTtm+0g2NnMG6+ZOUVRJ1grFDM2bc2Bshewk\nphdG3d3qwseuXIinXj+FYNSMwOlocOKqC+ZNMfx0OARQFAW/P1jwzOnD5ixE0RyCt1qOhW78LpcT\nimI+B0WRKVuOxQuGYwMB+EIyVE0vGN9TLtlEXGrLMT1oWkE0Giur3VuZsxYScqV//m666VbcdNOt\nOb9+220fx223fbzk5wVswWUzR3h//DB+tv+/QREk/nbN3ejxzJ/tI9nYzElE0RRb5VorZMIwVFFz\nYMVgGObGJICiw6izDc3zDAlN18HSJBRNB02TU8QWz3NgWSYhtgDgzYMjWNFTB7czd+vQrPyYFR6a\npsDzZuVHUdScsUVWFS01ADzT2wtIbjmmij4ppuKpN/sRiloD/AYeev44dANYv7QRVzR4i3qfKkWm\n8HQ6hXjQtDn/pvjyTAAAIABJREFUlu1zNXMzXDNR4Zs5bMFlU/McnjiGn+7/LxAEgXtXfxqLvIVX\ne21sznUyK1NUvCISiVRObGW6vk//+UiQJIlAIFz2c+i6juGJMFiGwvZNXQiEZLy6bxi6YYCMvyGW\nF5XfH0gTAmcmo2BoEhcsa8r19GmoqpZoRXIcm6j8mN5eMWiaDoeDT6uiZZIesDzV24vnaNxy6Xy8\ntu8MegeDZp4hTeKKdW1omzGricKkCk+KIiEIfLzlaLUiky3HmcuPLt/BfjawBZdNTXPM14uf7Psl\nDMPA51Z/CkvrF832kWxs5hwkSUIUTW8hRamM2Crk+m4YRsIuoRhYlgZN05BlpcRqiDVjZvps6bqB\nBjePmzZ3gSAIODgat1zanfhuhqEhio6Ei/yRPj9ODgdBEQQmAzJiso5AWIGi6di0qgWikN8ZXo+f\nNRaTEYvJIEkSPM/C7TZdzwmCgM8XKOqVpHt7JVuOBAAHRwGEmWeoGwYaS8wzLJZyPLM0Tc/bcgRm\nphI11ypcVd9SfPzxx3HjjTfi1ltvxUsvvVTty9vMIXr9p/CjvT+Hamj461WfwHkNS2f7SDY2cw6S\nJOByCfGKQ3nVhmzVMlEUKtaatLzAJEkuuxpiGpsaIAgDDE2mteVoigQZn18zXeTDCZG4qMMNr8ih\nbzQCggQiMQVnJqM4r9s7RWyFogr6RkJpf/fBKR+O9ScFleWsHwqFQRAEVFWD1+uGKDrAMMXXOEiS\nSGw5UhSJwfEoLjmvCbds6YJHZHF8oDgRV21kWYHfH4pvOcpwOgW4XE4wDF3xLcda8gorhqpWuCYn\nJ/HDH/4Qf/jDHxCJRPCDH/wAl19+eTWPYDMH6PWfwhOnD+LF3p1QdBV3r7gTqxrPm+1j2djMOVId\n2mVZhSAkq0GlkXxcKRE7xYinVHsK09G7NMVl5RpmMzZNhSRJuN1i3EU+aWhKEgQuWt6E4fEIxvwS\nCAJY0unJCJw2OXzaj0BYTvta/0gYFEVgSWfSQiAp7EJQVS1hpup0CiAIApIkIxaLFTXzZrUcr7u4\nAzxLAzDwoYvNPMNarvCkthydTgEcx6a1HCWpuNefn/yflZmwjZgOVRVcb7zxBjZs2ABRFCGKIv71\nX/+1mpe3mQP0+k/h/+z+MXSYv31u67kGa5tXzfKpbGzmHgRBxPPykoPc+VzZCz8fYIZRFxexU8w1\nMtuS5pxZ6YKLZWnouopcY2QEQcDjEafkBlqEogrG/BLWLm7AqE9C30gY65c2AgCCEQXD4+ac1ukz\nQURlDW8ePINDp3xwO1mEJRUEgB1v9UFRDVyyohlLelrS4oEMw4AkmYHRFEWB51l4vW6oqgZJiuUM\n0U7FwVvVNlOAOSkyETkznY3Q4YkIvCIbF3MzVzVSFAUTE5XdcpxuZFC1qarg6u/vhyRJuPfeexEI\nBHDfffdhw4YNOb+/rq50o7WmJlfhb5oFavVcQO2c7cTEaTx45JGE2AIAj8tRM+dLpRbPBNTuuYDa\nPtvZiNPJIRZTMlzCy6twmULNrGzl2sTL8ijkE0+VCqMOBsMQBC4uYFRI0tTzud1iYpA9G6GIghs2\ndqGpToBhGHi/15cQMRxDonc4iL6RMKxFx4O9kyBJAv6wnBjEnwjGsGFFCxZ3N0OSsgs7wLRYSPX2\nyhWinQ+CMNuNHo+ISCQKiiITlb5SxdeRvgC8IovVC+uLfkw5WLooueVoxu84HHxcDOfecsxFrVWw\nClH1oXmfz4f//M//xODgID75yU/ixRdfzPmmTU4WtxZs0dTkwuho9k2Q2aRWzwXUxtkGQ8N4qvdZ\n7Bk9kPb3JEGinZk36+fLpBbes2zU6rmA0s5mC7PKEApJyJxZL3eGCzDgdPKQ5UwBl+cRea6VK4y6\nlPOZFgp63G4hgnA4fVNQkkyBJYoOaJrpIp+L1oZk9AtBEFixIJlFyDIUrrmwAy+9N4iTw+b8VnO9\ngG0bOvHC7kGcHjE3KtvqHViztA2KohQdAp0M0SbBcWwiRNtsOeb39nK7nQlBbS0nFOvtFZEUSIr5\ni+2ZiSjCUQUdzebGI82wFXPFt8h2jzcd/a2qHwmeN0Wz1YqUJKng65hL1S2gyoKroaEB69atA03T\n6OrqgtPpxMTEBBoa7NDhc5EzkVE83fscdp/ZCwMGetxd2L7gWrAkgyF1EO3MPNtvy8amBiBJErJs\nVo+mSzKMuthK2VQssZVZqbM2BSmKBMeZ/limIJv+ucOSCp6lwDEkQhEZum5gYCyCNQvroesGTgyF\nEs70pb8ePSVEm45nOZreXpIUQ++gD+0NzoSXmCg6oOsGIpH0axXr7RWMqnh+9yAkSQVDk4hIKv7w\n8kk0uDl8ZKtrRubC8omjVEd/c4GCR2NjHWRZjWc5Tv33m2sD80CVBdfmzZvx1a9+Fffccw/8fj8i\nkQjq6gqnmtucXYxFJ/BM7/N4a3g3DBjoFNuxfcG1WNGwLPFD4eKmVTVbrbGxmQtkuxeVM+sjimab\nrfQ5G7OleHzAj4XzkgPlZhi1mqNSlr8N+fq+IWxY2YojfRPwihyaclgjmBuIBlRVQzQqgec5OJ1C\nSW27VBRVB0Dgli3zwdAUXt4zhIGREK6+YB7aGx1wOHgsHo9idNyfMmtVHoqiQlHU+KA9A4dDwJGB\nQZAUg45GARzHgKYp+Hy5fz6mentZ4ivV26ulTsDHrujB02/2wx8yN0MXtLiwdV0bnAJT8vtTCIJA\nzvm6TMzXH0IwOLXlKEmxREW0lhcGclFVwdXS0oJrr70Wt912GwDgH//xH0vyabGZ20xKPuw49QJ2\nDu6Cbuhoc7Zg+4JrsaZxxZzrxdvYnAtYeYZmLmHpw+wxWcXLe4bQ0+4GGZ8BM8Oos4u3fC1FWdVw\nsHcSy7s9ODUcwhgv5RRcpos8C78/CMMw4m07AhzHldS2s2BoMpFfCABXnJ/McOU41tzAc8RgGNMT\nW6kEIzGM+aMwDGDgjB8UAbQ2uqDFAFkpvopmia9Mby+KJCCrGgSOgqzq0LXyB++LOAWA0oxxU1uO\nJElCEDh4PKa3WTQagyzLKDyLWFv3larPcN1+++24/fbbq31Zm1kkIAfx7MkX8ergm1B1Fc1CI7b1\nXI3zW9aAJGzBbWNTLUqpcDkcfCLP0DJNLfYaA2NhkCSFgbEINF3HniNjmN/ugS+iwuso7baz+/Ao\nYoqG02dCGPNH8dBzxxCMqKBpU0QYBrCk04t6Nwcg1UU+mCamdN1AIBhJtO143qyemHNphbcus8Ew\nNJxOYcq1SmHv8XGsWTh1rGbCL+PFPYOIKToYisCJQR9ODAXQUsfjlssWwuN2lhSiDaS3HM9MRODk\nGVx5fhsiMRUv7RmGGg/8rnTlaLrPqetTW451dR4AZrxQJVrd1cB2mreZMUJKGM+fehkv978OWVfQ\nwNfh+p6rcVHLOlBkZYcybWxsKofDwYEkk+HRpQ7bD41HsO/YBCiaAk2S2Hd8AgdP+rBucQO8Cwpv\nw40HJJweDmHdkkYsbHfjhXcHEIzEoBsGTg6HwDIUWut5HDrlx8qeetS5zOzDpIt8aErUkKLqeO7t\nfnxoQ1dG246F0+kAQaAkfyyKIqeYqJaKbhh478g4uppF1Lm4tK91tYr42NYFePz104jGNDAMjQVt\nIi5e3oiYFIMiy+A4Fi6X06wmFhmiDZhVr+Z6B27YNB+6boDnaNy4aT6IMrdYC1O557RajpEIBa/X\nDUHgU7IcY2UJ51S+//3v4ODBAyAIAl/84t9h+fIVia+9/fZb+OlPfwiSpLBhwyZ86lN/XdJz24LL\npuJElChe6HsVL/a9CkmLwct5cGv3VmxouxA0aX/kbGxmi2KEkyCwoCgqIzw6/2xVKgRB4MJlzZjX\nJOLZXf2gKBIESWL7hg7UufLH0VjnO3Lah4lADOuWNMLr4vChDZ349Z+PorVegKzoaK3nwbE0Fs1z\n44Jlpl+WZTYaDIazziAd6/djxBeDqumg47mSqf5YVhC115s/iBowK0Vut2uKiWqxHD7tR0zREIwo\n0HQDbx0aRXuDAzAMrFxQnxiOZ2gKsZiGeo+AQCgGWVYTFUrTQLS0EO201xB/HqvqxcRnvRiGgizL\n0/L26h8NoaXOAYY23+eZqZoRMAwdk5OBKS3HUCiCsbFxuFylBXu/995u9Pf34YEHfomTJ3vxjW/8\nv3jggV8mvv4f//FtfOc7P0BTUzM+//nP4rLLtqKnZ0HRz2/f/WwqhqTG8FL/63j+9MuIqlG4GBHb\nF1yLze0Xg6EqN9tgY2NTLvmFE8+zYBg6Q2yZlHrv9YViIEkCrfUOnBzyIxhW8goufyiGd4+OQ3Sw\nODkchCRreGXPIFTNgKZrIEkCizs8ODEUBEVRaHDzGJk0K3CpLvKpFQ5F1fHs2/3QdQNhSYWu63ji\n9dMgCaC90YkLlydDqqcGUfNwOh2JypFVMSMIxCsqub22CjHqi2Bw3LRjoEjCdLn3SbhoeWNCbAHA\nyaEAFnbV4cr1HTg9NImdB0ayCqFcIdpWxa6YChxBEHA6HTAMs4o0HW+vD04H4A8pWNFjLcXNTMi0\nJeIyW44jI8O4665PYunS5bjuuu247LLLwXGFsyd3734bW7ZcDgDo7u5BMBhAOByC0yliYKAfLpcb\nLS2tAIANGzZh9+5dtuCyqS6ypuCVgZ147tRLCClhOGkHbl74IVzasREcxc728WxsbOLkq3BxHAOW\nZRAMRqZUI8qpToz7Yti2uQeNIoNDpxzoGw2hqzW3x5pH5DCvyYl3D49DipkVqmP9fizp9MDJczh/\nSQM0HaBpAhRB4vwlDXjj4AgUzUCTN7uLPEOTuHxdG156bwgRSY2blSpY0ePFBXEn+Wxk2kt4vS5o\nmgZJMtt4iqIV7bWViWEYODEYwvWXdOKZN/ugGwZUVcc1F3agvdGR9r0rF7VgHUMjEAihySvghk1d\nBYVPthBtXdfjjvZyzn9Lc/ifgc8XTNtyLNbbazIYQ0RSYRjAiC8KWdHhFc2f/yzPI0/qUlnksoVQ\nFBV1dY34/e8fxc6dO/HMM09ibGwEd9zxyYLPOT4+jqVLlyX+7PXWYXx8HE6niImJcXi9SVeFuro6\nDAwMlHRmW3DZlI2iq9g5uAt/PvkX+OUgeIrHtp6rcUXnFgj0zCTZ29jYFEcpthAsS4Pn2bjYynZn\nLL6lCJizVNsvXQjDAAKBMJZ1FWf/s3x+HcIxHe8dGoZhAE1eHhtWNqd9T4M7+eeNK1vg8bgSw+PZ\ncPIMFnd4cGYiCk0zwLIEzl/SWFTFRtN0RCJRRCKmK7zT6QBJEtA0M6KnFPuEE4MBSLKGUESBoup4\nYfcARnxRtDUIIAkKA6PhNMFlbT/6/Un7B7KEKpMVoh2JJJcEnE4h/l7Jaa1QmqZyDv8X6+2lajp2\nHhhBKF61G/VF8eddEbQ3OrBofiP0yjpNFGxTchyHK6+8BldffX3Z18g3D1fOLyG24LIpGU3X8Obw\nO3im9y+YjPnAUiyunb8VV3ZdCifjKPwENjY2NYO59cUhGIzmrGKUMjRvhVFHIhIEobRfvAwD6D8T\nRHO9ALeDwZE+HxRVT8wCZeJ2O+Mu8vltEo4PBNDZLGLZfC9e3zeME4MBLO7w4Gi/H6LAoK2h8M8t\niqLiM0OhsuwlGJrErkOjiMZUUBSBvpEwHDyNNQsb0N7owPu9vuT3xrcffb7ytx9TyVwSEMXkkoCi\nyHC5xPjsW+7WYzZvL6vlCABNXgG3XDofT73Rh0BYAQFgQacLW1a3gucYhMPTG2bPciJUuk3Z2NiI\n8fHxxJ/HxsbQ2NgY/1oTJiaSXxsdHUl8rVhswWVTNLqh4+3h9/D0yecxFh0HQ9LY2rkF18y/Ai5W\nnO3j2djYFMAwkBb3Y4mjUCg6ZatvKoUVV2YYdalzX6qmYXGnF4vaTEEwr9GByWAMzXVTbSlE0QGA\nQCgULvi8K3rq0NVi/oz68OU9GPGZAu3UcAgCSxUUXBzHgufNapM5rC5ltZfIrByl0tks4uYtPH7/\nYi903YBXZHHNhR3oajXPtXmNORuUuv1Y+N+kNDKXBDiOg8fjLtlnLVfLkSQJRCQVooNBVNYgK5U9\nf+YZKj2If9FFl+DnP38AN9/8YRw+/AEaGxvhcJiRR21t7QiHwxgaGkRTUzN27nwN//zP/1rS89uC\ny6YguqHjvZH9eLr3OQxHRkARFC6dtxHXdl8BL+cp/AQ2NjY1ggHAVFwURcHp5BEKSQWHqs32Uf5n\nzgyjLmfDjQBw0Yo26Lo5L9Xdln3my+HgQVFUWrstH5bYAoCRyShODodwejiM4YkoaIrAGwdGoOk6\nlnV50OhNF3epXluZFcDclaPsFg0TgRhkWUNni4gzk1GM+KMJwQWYIsLtFhEOR8vafrQ4eGIyLQ8y\nG6qqQRCIuAeZUlaINpDechwaCaGt0Yktq1sQjip4ee8wNN2YkRges6VYWePTVavWYOnS5bj33s+A\nIAh8+cv/gKeffgJOp4jLLrsC99//VXzta/8LALB169Xo6iotes4WXDY5MQwD+8fex5O9z2IgNASS\nILGx7SJc130lGgQ7ksnGZq5htQYpioQoWpWo6Q/XJMOoU32QSpv7MsWMjslJP1iWyYjjSW7a8bw5\n25Qv2iYfbY0OnDoTwuHTPoAAVA040u/D6gX1aPCkt0CtalOhdls2e4lsFg1D41FccX47Fs5zY9wv\n4cCJybTnMQOp5TJilJKEogp2HRrFsm4vKDL3+y8IPEiSTIhWKwTbHLQv3Y2fIAh0NIvobHGZFTwX\nie0bujAT24nxK+Z97tQqXCn8zd/cl/bnxYuXJP577drz02wiSsUWXDZTMAwDhyaO4MkTz+JUsA8E\nCFzUej6u774KzY7SetY2Nja1hBEXR8lKVFGPMpBmV5AKQRApYdRq2mNKu98lh7JlWYnH8aRv2qmq\nmhbZUw4kQWDjyhaEoyr6R0MAgIXtHqxbkv6zLbXaVIqZZj57iQuXNyfahA0eHpeta0s8LhlILZX8\nmnTDwAcnfdAN03RWN3S8ceAMvCIHliGxpDO9E2EJWr8/kP48GYP2VpahFaJd6H2wBI5V9bKSAGia\ngq7r0/L2mnotO0vRZo5zdPI4njjxZxz3nwQArGtejW09V6PN2TK7B7OxsakABBiGRiQilejInbta\nJYpCnjDq4jBvxlMrSKkCwKp4GYZZnTGrXuVV53TDwNBEBF0tImRFx9DEVN8xj0eEJMkIR6SEUWqp\npNpLmKaqLqiqNqWKJQiltUgzIQkCNE1g16ExqJoOiiJxtN8PnqWxeXX6z26zuulAIBDKa/WQ2S51\nOASQJDnFl6wQ5vsYS1y7XG+vTGaiTTnT2ILLBgBwwn8KT574Mw5PHgMArGo8D9t6rkGnq73AI21s\nbOYCBEFAEFhomp5WiSqGXNWqQmHU+W6oB05MoN7DobVOyCq2UqEoEg4Hj0AgBE3TEluCum4gFovF\n217Fv55ASMb5ixuwckE9DMPA7iNjiMkaONaMHHO7RaiqhklfCK/sG8Y1F3YU/+RZ0LSkMWdmu1TT\ndPA8B58vUPiJ8rCk0wuPyOLJ1/tAUQQIELhpcxccfNJ0On1GrNjqZrJdSlEUeJ7NKRwzcTrNebhI\nRCrL2+tswxZc5zC9/lN44vRBHB7pRW/gFABgef0SbF9wDbrdXbN8Ohsbm+liCSWCAFwusxJFkuVU\na6ZWuJxOHoZhIBLJbQCarwLRNxLCRCCKZg+ft/VoxeiEw0kX+albgkJJQc5eFwdvPLuQIAhcsDTp\nOO90mhuLoVAEH5z2Y8wvQTeMkjyw8pFslxLxdqMATdPBcQxiMWVaVZuhsShomkR7gwOnR0IYmZTQ\n3ZYUXC6XE7GYUvaMmKZpOYSjglgs/b3nOBYsy2SdtSvW2ysfdoXLZs5w3HcS3333R4mRww6xDR9d\ncjMWeXtm9Vw2NjaVRxQd8RaRBp6ffvqDw8GBIJLh1sVy+kwQR/v8YGgSwxMRMBQBTR+EoupYu7gR\nDe70AGezIuNCNCplbVmmtr143gpyLm3YOxVB4KAbwO///D5IgoA/LCMa0/DUztMgQKCzxYk1ixry\nPsfeY+NYvbC+COFAgGVZBINhGIaRIRxz20vkIxRVcP0lHWj2Cjg9HMQZn5TY9DSrTUZBz7JiSRWO\nHMelhWgripbTSDWVfN5ehcQXQRAFK2SVmherFOU1pm3mPM+eejFtv2N981pbbNnYnIWktv0K3cSk\nmAoty2xOakuR581w62LEVmYrsrNZhNvJ4mi/32wHKhpODYfQ1uCYIrYAs7WXz0U+eR0zyHlyMpDI\n06urc0MUHaBpquA5AWuQnEdMimLTqhbIimaalJIEJoIyWhsdWL2wvuA59h2fwKgv/+C7mcfoTMQR\nKYqKYDCMyckAVFWDKDpQV+eGIHAliYbNq1vRHLe16Gp14cJlZuXOzMhkEAwW9iwrFcuXbHIygFAo\nAoqi4PGI0DQdFFXcew+Y4ogkSVAUmfj/wtgVLpsaZyw6jg8mjiT+TBIkFtcVH8BpY2MzNxBFIb75\nVlzu33tHx9Do5bG4w5vxFbOlaOYt0ggGi62SWK1I88ZIEATWL23CqC+KwTFzQ3BBuwfndU+1mXG5\ninORz2SqN5ZpXClJsZxVL5qmIYoO+P2huCkph4UdbowfGoWuGRAFCuuXNOQUP0f7/IjEVEQkFbKi\n453DY5gXj+lZ0VM3Zeje5RKhKNoUIZnPXsJ0hS99MYFhaDgclmt9yQ8vCVVVQZI8olFzoUEQ+JJD\ntIHiWo52S9Gm5jEMAw9+8AeohoZt3VfD43ainZmHHk9pBm42Nja1j6qmbw8WsmoYGo8gHFWmCC7L\nFoLj8uUtFkbXDSiqhqHxMBZ3eBCNaRgYnVp1cTodIAhiWhWZbOLF4XBPsTigKBJut+W1lZxB6h0M\noatZRE+bCzsPnMHgWATzmpxZr+Vxsdh3fAKBiAyKIjAyGcW4X8KFy5umiC1rkDwcnroZmYplL0EQ\nAMuycDh4kKSjpC1Bkkz6iFXatT4bDoc1JG+K5NQAcI/HBU0rHKKdSraWo/kyDNsWwqb2eWPoHRye\nPIaVDctxfc9VaG52Y3S0vFVkGxub2kaSlAyBNXX4/dRQEK/tHwJJkojGVPjDMh56/hgMGLjuok7U\nu3nQNAWKIhEIhEsSW5bAMwyrQqEjEInh0tWtWDDPDQDYf2IiLS9REHgwTPkWCdlIihdTNDqdAgiC\ngCTJ4Hk2q9fWip46LGg355/a4xFDuWj2Crhpy3w89PwJaLoOVTNwxdo2zM9wyrdae6W8NnMuKpe9\nRCynHYfZthTLsAApD5ZlwHFTh+RTA8AZhgHPszlDtPNhiS+SND9LNE1DkmIV9faaaWzBdQ7hi/nx\n6LEnwFMcbl96y5z5kNrY2FSGbBWu+W0u8CyFF94bNKWYAXAMiasvnAeXg4tn7pl2EqWv8ZsCz6yu\n6AAM1Ls41LuS81qrFiTnoqzMwplqf2VWvdxuEQRBgGUZ6LqeJkwssQUAAkdD4PLfLsd8EmKKhp42\nEUPjUQxNRNMEV3prr7wXl91eInscjyg6oapqwfm3SmB5e/n9obyvTVEUKIqSNUQ7FosV/flyuRww\nDB2qqlXU22umsQXXOYJhGPj94ccQVSXcvvRW1PGZMxo2NjZnP9kNTFsaHHBwFCIxBTAAr8jC5eAS\nYdTRqASOK2+70bwB6wVFBssycDqnJ0j2n5hIE3D5EAQzcDocjkypepVy87cY8UVx1QXt6G51IRCR\nsefoeOJryUDqUMVae1Pd+EXouo5YLAaSJEGSBPz+yg/JZ5Lq7VWsEW22EG2v1w1V1eItx9yzaizL\nJOwmsnt7AaXGSlULW3CdI7w3uh97xw5isXcBNrVfNNvHsbGxqQKZFa1cM1zBqIzJkIzrLupEMKLg\n7Q9GAQKJCCDTnLO865v/n1+80DSV4n5eniCJKZqZIdjlTbQnc2GJK2tGTJLMCpFp7Gne/EsdVF+9\nMGkX4XawuHSNGdtTjtloKWTG8TgcAmiaQiymgKbpaYVgF4PL5YQsl+/tpaoaVDWCcNiqcOYO0c5X\nSUsftK/N+S5bcJ0DhJUIfn/4MTAkjTuWfRgkYbuB2NjYJCFA4ONXLgTHmreE9kYnBEFIhFFbN7NS\nURQVbrd188y+pWYOrYsIBsMlCxLDMHDopA+abmDEJ4EA8Pr+YTS4edA0geXzp24/8jyXc47KNPZM\n3vzTB9VLr3oB5hzVdAOpi0XXDVAUCb8/lBCxABKD9pXe6nM4TBUeDlfG28t6n6ZW7cy/d7kKV9LM\nihcBogbvc7bgOgd49OiTCCoh3LzwQ2h2NBV+gI2NzTmFKKTHv3S0eRGLKWkRQKWOxui6jnA4gmjU\nvHl6PK4pLSPTRb70gOjUs/IcjTcOnoGsaCBJAr1DQQyOR7FxRfOU72dZBoLAw+8PFBQfUwfVi2t5\npSKKzkQFaqaxKmmhUASqqibmt6baSxQOoS4Gc0iezeokP10yq3aWo72u6wUroOY/K4latBm1BddZ\nzqHxI3hz+B10uuZha+eW2T6OjY1NjSOKAmR5qp1EKei6mZNHEOk3T2vQ22oZMQyT8McqlwXtLjS4\nWTzy8klQ8Y3IGzZ1wiWkz5xZFR/La6tYUgfVOY6FIFiD6vntGcxAarKi25b5cLvFrGJwqr1EeSHU\nqaQGYM+0F5aiqKAoc0tWkuREO9hqOaaf3wBAoRbFFlCrp7KpCJIaw4OH/wCSIPFXyz4Kiize9dfG\nxubsJN8ml+VKL0mZAqj4IWRTbOkgiKk3YllWEAiE4PMFwbIMKIoEw5hD0NOhfzQCkiTQ0eQEQRAY\nGE33uCLJZNuy2MHubMRiMvz+EAIBc2Db63XB7RannN8UliwCgVDZ1yoFUXRA13VEo7kraZa9hN8f\nLHj+fJi5nDM3k5YJTVPx4PIwJCkGny+IQCCcdn6fbxKxWBSGQaIWh+Ut7ArXWcwTJ3ZgQprEdfO3\nosPVPtt3/2CnAAAgAElEQVTHsbGxqQFSvbFSyRdGXYxhqoOj4HKwOcVW+rUEqKqGYDAQb/NxcUfy\n8qouwYiMD13cgZZ6BwbHwugbSW7nEQQBj6f8tmU2stkzWI7qqqrGK2nlb1uWAs+b1h2ltPZynT/b\noHomouiEopQ/JF8KBEHA5XIiFIqkfSYyQ7QfeeRhPProH3DFFVdi+/absWzZeTN+tnKwK1xnKSf8\nJ/Fy/060OJpxXfeVs30cGxubGsYKow6Hy5s1Otrnw/snJ4sWW6kbgrKswO8Pwe8vv+pyyYoWtNSb\nA+LtjU5cfF5yfmumh9ZTq3Zm9cec26Lpma9nMAwNQeCnVUlLPb9hGHC7RXg8LvA8O0VkCwIPkiQq\nNiRfCJfLGZ8lzD0zJ8sK7r77s/jFL36NpqYWfOc730Qkkt/Ff7awK1xnIYqm4NeHHgEA3LnsI2Co\n6ZXrbWxs5ibZCizJlqL5RUEww6iDwdJuUpNBKeG+3j8SBk0RaPKahqaNHh4ux9SfO6aLPJ1jQzBz\nVopPmZUqb0Mwmcc480PrhqGDYRiEw1Houp4Y9M63oTkd0r29pl9Jyzao7nAIifBwc3OQg88XqMDp\nCyMIPAgCBbM0rSH5lpY23HXX3bjrrrurcr5ysAXXWciOUy/gTGQEl3VswkJv92wfx8bGpkbhOAYM\nQ5cstgCApgjsOTqOUZ8EMy7QwF92D6K1XsBV66eOMJTiIp99Q1BFNFq8L5bTKVTN/BOwAqmVhLN7\nuimpC7quT3tBwMJstc3cHFVqADjPs3C5nCBJsiqu9QASgq+wuDNgNupqd24rFbuleJbRHxzEs6de\nRB3nxY0Lrpvt49jY2NQYVoWLZel4GHW0qC3EzDkul4PDjZu70d4ogCBMEdDdKmLbhk7wGTE4DEPD\n6RRK3mqzql4TE37EYgocDh51dZ5EaysXltdWIFAdsZUMpE6vxlhVo8lJf9ytn0F9vQdOpwCKKn+J\nyeWqzhyV5QhvGEA0KoEgCNTVueFyOcEwM1OvIUkiEbhd6LNiGJY/nC24ciJJEq666io8+uijs3H5\nsxZN1/CbDx6Gbui4Y9mHwdNc4QfZ2Nicc9A0BUHgEApFShBA6ZuKum6AgIFwVIEo0OBZClFJnbIB\nSdNUovU1nbZacsMuBJIk4fVmv/FbXlvVsCwAkuIuGMw/R2XOSoXh8wWg6wbcbic8Hhc4buqsVD5y\nibuZwhySVxGJSAiFIpic9EOWFTgcAurqPHFz2MpJCZdLRDQqFXTIr2W/rVzMSkvxxz/+MTwez2xc\n+qzmhb5XcTo4gItb1+O8hqWzfRwbG5sahePY+OZXaYIkfbvRwGRIgtvJ4sr17dA0Hc+9M4iYrIFj\nzerNdFzkczHVDd5sHaZuCFYyszAf1tC6uZFY3GN03UA0KiEaTTf1tGal8r1PHMcmcgSrgVlJJBEM\nJq9n2Uukt3ynmtqWg2VuGo3mb13ORbEFzILgOn78OI4dO4bLL7+82pc+qxmJjOKp3mfhYkTcunj7\nbB/HpsocH/Dj5f3D6KgXsHCe/cuMTXYoigRNU/HV/9IESaqg0HUdhqGjTuRw7UUdAACaIrF9Y1fi\ne6brIl8M1o3fdFPn4XDwUFUtp89YJalEIHX6rBQHl8sZb+OZryu1QkfTFJxOoWp2E5YY9Ptzz1Hl\nt5cobVGA45gixeTcmttKpeqC65vf/Cb+6Z/+CY899li1L33Wohs6HvzgD1B0FZ8872aIjHO2j2RT\nRY70TeKbv3nPbPgQwD/cuQ5LOqZmyNmc25AkCVE0/a/KEwhmS1HXNRhG/scThOV8Xp0MQU3TQdNU\nvM1mJKwnLF+vSguU9Bid6VfuDGNq1cvh4BNVL13X4XJZxq0zX7kjydI3IGVZybkoIMty3gogRZFw\nOrOHUmcy1+a2Uqmq4Hrsscewdu1adHZ2FvX9dXUO0HRpg4VNTa5yjjbjzOS5nj/+Ko76TuDCeWtw\nzXkbS/7t7lx8z6ZLrZxN03T89r/ehvUjyjCAH/3xAD53y2psXjMv72BxtamV9+xchCAIiKIZRm3+\nTC39c2EYpjCwInvyYW7sqXmdzyuJ2y0mxAkASJI8YxmC1vUkSZ5W+ywXmRuCougERZGQZaUqzu6A\n+foiEams6+XKQczVMk11ri+UAjBXW4kWVRVcL730Evr6+vDSSy9heHgYLMuitbUVGzduzPr9k5Ol\nrSo3NbkwOlqd3nYpzOS5JiUffvXeoxBoHjd3b8fYWGkGeOfiezZdauVsum7g50+9j5NDybMQAEIR\nBf/+69148M8f4ObNC3D+ksaqtFjyUcp7ZguzymFuFhJwuUw/KFk2c+nK+TjougFB4ADoeW/EVlus\nmuaY5k0+/XrJDEECHMfC6TSNUS1rhnKrXqa3V/4YnUpgVr1ioGkamqbBMAzU1bkhy+Z24ky1aV0u\nZyL4erpkE48AEjmOhmFAFM3rFaqEznWxBVRZcH3ve99L/PcPfvADzJs3L6fYsimMYRj47eE/QtIk\n3LnsI/By9uzOuYJuGPi/Oz7AGwfPYGG7G7dcugCjQRkd9QJcThZPvNaLnQeH8cM/7sf8Vhdu2bIA\nqxbUz7rwsqkuBJE9jLrUCpeuGwgEwuB5JnHTNIVLLK1VlPS+qk6GoDUwn+96lrWBJMUSVS+Hwx2v\nuMgFt+HSr2cOkVcrkNoaWreulykeU4VLZa7Hzcjrs8SjVWG1Ko9mMgGBycli/bbmrtgCbOPTOc3u\nkb04MH4IS+oWYUPbhbN9HJsqYRgGfvPsEby2bwjzW1340m1r4OCZtCrS3dvPw4c2zMefXuvFrkMj\n+N7De7Fwnhu3blmA5d31s/wKbKoFRVHx7bFk9cD04Sr+OVLDqC3hkjpnFIuZrSKWpeMu8tURWzzP\ngWWZksRBZtVLFIuvenEcC45jq7YhaA2hp5p/ZhOPlWqZWhuXM+0kb/0b0DQFt1uEruuoq/MkxGO2\n+UIzlHpuiy1gFgXXfffdN1uXPisIyWE8fORPYEgGdy77sF25OEcwDAMP/eUoXnxvAJ3NIv7uY2vh\n4LNHN7U1OHHvTSuxfUMIj73Wi3ePjOLff7sHy7q8uOXSBVjc4a3y6W2qjapqiEbLn/tJFVupWK0i\nkiTAcRw8HjGexViKr1f5sCwDh4NP5P+VSrpwmTqkntkyrfaGIEVRCXuLXNdLikfEq17WooAcNyst\n/pzWkHwwGK5ITFAhLKf8UCgCWVZAURR4ns1qL2G+jLk5JJ+JXeGaozxy9HGElDBuXbQdjULDbB/H\npgoYhoFHXjqO59/px7xGJ/7u9rUQhcI5mR3NIj5/6yr0DgXw2Ku92H9iHN/49btYuaAet2xZgJ42\ndxVOb1MrGIZRlFGlrhswjPxh1LpuxFtyHMLhCFjW9MWybvoz4YVF01RFvbZUVUUolB5jk7RmiIEg\nkl5i1dgQNDcgnQiHi9uANAzE329zUYDjSq96ud1ORCLSjM2FZWKGUidFlemtlm4v8bOf/RSBQBDX\nXbcdCxcuqcq5ZhpbcM1BDowdwttn3sN8dyeu6Nw828exqRJ/eq0Xz7x1Gi31Dtx/+1q4HWxJj+9p\nc+NLt63BsX4//vjqCRw4MYEDJyawbnEjbt6yAJ3N4gyd3GauYVY5dAD5qx2pLvJW69IKOa6UGWYq\nZiWmcnYMqaTOGaW2TAFT0FRLjLjdzri/WOnvmapqUNVMU1gybwC4KDoS/07VwHpPcwWKW/YS1133\nITz++OP4u7/7IlpaWvHtb38fLtfcXqixBdccI6pKeOjwo6AICn+17KMgibnf17YpzJM7T+Lx10+i\nycvjKx9fB49YfmzTog4P/v7j63Do1CT++MoJvHd0DO8dHcNFy5tx0+YetDXYPm5nM1aWYi7Mm7JR\nsCVFkmRWLyprYzASiYLjWAgCD6fTkWjhlduSs7yvolFpRuwYUrFapm63GG/ZMWBZGtFoYU+p6eB0\nOqDrRk4xUgpJN3irXeeeIoB5ngNNU1WbS2MYGhxXTCg10NTUirvvvhd33XUP3n//AARBqMIJZxZb\ncM0x/nT8Gfhifnyo+yq0i62zfRybKrDjrdN49JUTaHBz+PuPr0OdqzIZmcvn12HZX52PA70TePSV\nE9h1aARvfzCCDStaceOmbjTXOSpyHZvZJTN0OvPPWR5RhLEpAY/H9E7KJ37Sb/rTG/B2u83A5mpV\nYqxNQGsJINVTyloUKOQbVQpmJiOd19m9HFLbdakCWFGUqsYEpZqpFjY3Bay5LZqmsXr12mocccax\nBdcc4pivF68OvIE2Zwuu6d4628exqQLPv9OH3794DHUuDn9/x/lo9FT2tzyCILBqQQNW9tRjz9Ex\n/PHVE9h5YBhvvX8Gm1a14YaN3Wjw8BW9pk3tYkX25IMgAI+nNBd5K/8wEgE4jivZE0sUzcpPtby9\nTPFDpW1AZi4KWBt21muYDgxDpywBTPf0uUmNQvJ4XHEfLEdF2765cLmcRZmpng1+W7mwBdccQdYU\n/ObQwyBA4M5lHwVD2v90Zzsv7RnAg88fhcfJ4u8/vg7N3pkrqRMEgXVLmrBmcSPe+WAEj73ai1f2\nDmLngSFctnYetm2YD+802pg2tUOulmIxYguwXOS1sow/zQHvXJ5Y2YObHQ4eFEVVzfuKZZl4IHUg\nq/jJFT5dTn4gkLkhOPND+YBZvYtEoohGY+C4ZAbiTC07mK1SvYjqpAGAwtkotgBbcM0Znjn5PEai\nY9jauQU9nq7CD7CZ07y2bwi/2nEYLgeD+z++Dq311WnvkQSBi5a3YP3SJrx58Az+9Fov/rK7H6/u\nHcTW8ztw3SVdJQ/r29Q+pv1DYX8uUXTEXeRLSwHJRqonlrUdqOtGWsWo2t5XqXYMxdgjpFa9eJ5L\nyw8spuplZU5Wc0PQrBbqiEZN8ROLKYjFlCnLDrFYrKzB/Uw4ji26VWr6bc19+4dc2IJrDnA62I/n\nT7+MBr4e2xdcO9vHsZlh3jw4jF8+fQhOnsb9t6/DvMbqD7FTJIlNq9pw8XkteH3/EB5//SR27DqN\nF/cM4OoLOnDtRV1w5vD/sqltrLgfi1xeW5k4HAIoiqy4sWnmdqAgmBUjRVHBMHTZXlulYtkxlLMB\naQ26RyJSwtagmKqXKDoTM23VIN+QfOqyQ/I1OMqu3AFWKLXlX5b/e6vlt/Xuu+/gn//5q+juXgAA\nWLhwEb70pa/M6DUtbMFV42i6ht8cegS6oeOOZR8GR9nVhbOZdz4Ywc+ePASeM8XWbFs10BSJy9bO\nw8aVrXh5zyCeeuMUntx5Cn/ZPYDrLurEVRd0QuDsHyNzi+Sdr1ixJQilu7qXg1UxYhgabrdY1Rmj\nSgVSW7YGZsWIhcfjgqZp8apX8rnNmCACwWB4ukcvCpqmEnNihch8DaVW7oBk9c4Mpc4v1qo9t7V2\n7fn4+te/VZVrpWL/pKxxnj/9MvpDg9jYdiGW1S+e7ePYzCDvHR3FA48fBMOQ+PLH1mB+a+14zjA0\nhasu6MSWNe148d0BPP3mKfzx1V48904/rr+kC1vP7wDHULN9TJsisLYUixVb1oxPtVzWSZKAKJoz\nTbKsxGeqrGpLbj+p6WAGUpc3l5YLs2KUWfVyxONrtKq2SknSdHYvdU4s+2uwKndy3i1NUXTGMzzP\n/lDqYrEFVw0zHB7B0yefh5t14ZZF22b7ODYzyP4T4/jxYwdAUQS+9NE1WNhem0HkHEPhuou7cNna\ndjz/Th927OrDwy8ex5939WHbhvm4fG07GNoWXrWNAYAo6CIPmNtzTqcDfn+wapEvbrcrzWvLqrZQ\nlDVjVJnsQAur0uT3z1ylKbVi5HDwEATTbJRh6Bmv3AHmosN036/kayi8pcnzZgh2MFhIUFqh1NWd\n2zp5shf/8A9fQiAQwGc+cw8uvPCSqlzXFlw1im7o+M0Hj0DVVXxs6S1wMLYn0tnK+ycn8IM/7AdB\nEPjih1djSWftZxwKHI0bNvVg6/oO/HlXH557pw8PPX8UO946jRs2dWPzqjbQ1Nn/G+tcINN3S1V1\nGIYBr1dENJq7RURRSRf5akTaAGalKZfXlqbpaX5SyexAM/S4nOpbtYfyDcMAwzCJNuJMbwcCyQ3B\nSlXvcm1pWpumAIpuXRqGNbNVPcHV2dmFT3/6HmzdejUGBwdw332fw+9+9xgYZuZnUm3BVaO8OvAm\nTvhPYl3TKqxtWjnbx7GZIQ6fnsT3H9kHwMB9t67G8u762T5SSTh5BrdeugBXX9CBZ946jRd29+NX\nOw7j6TdO4abNPbhkRQuoInL7bKqDFdnj8wXSBtRjMRnRaPKGn8tFfiZJbkAW9tpK9ZOyDFXzWUtk\ng6bpqgZSA9acWCytejeTUUilbAiWQ6Y3mcvlBEmS8USB2prbsmhqasaVV14DAJg3rwMNDQ0YHR1B\ne/u8Gb+2LbhqkPHoJP50/Gk4aAEfXXLzjF1nz7ExjO8ZRHeTEwvn1WYL62zm2IAf33tkHzTdwP+4\ndRVWLpi7IeQuB4vbrliEay7sxFNvnMLLewbw86cO4ak3TuGS81rgcfPoaHDYn7NZJDOyJ3mznHrD\ndzoFRKP5XeQrSbleW6nWEhyXGjxtGapmf5wpKJ1VC6QGUu0Y0itNU6OQUufV5LKrXjRNwekUZtxM\nFUhWvRiGTnym6uo8OUXwbM5tPfvsMxgbG8Mdd3wC4+NjmJiYQFNTc1WubQuuGsMwDPz28KOIaTI+\nsfw2eLjKD06P+KL4zbOHsf/EBABzoPL/ufN8+2ZYRXqHAvju7/dAUXT8zc0rsXZR42wfqSJ4RQ53\nXr0E11/chSd2nsSrewfx2Gu9AACKJPBV+3M2i2SP7Mm84YuiE4BpjkoQxIxXfyrR1rNEliSlBk9n\nt2WwMhnD4WjVvK8EobjMwmQU0vSqXgRhDclHqmamaoVSh0KRxBksfzXL8NYUwTpmY27LYvPmS/G1\nr/0jXnvtZSiKgvvv/2pV2omALbhqjrfPvIf3Jw5jef0SXNy6vqLPPRGQ8MTOk3ht3xA0PX01/MCJ\ncftGWCVOnwni//xuDyRZw+duXIH1S5tm+0gVp97N467rlkHgaOx46zQAQNMNHO7z2Z+zWaBYF3mO\nYyHLpkgpt1VXCuZQfmXbelMjeNItDaw5senG8RSLKQD5ktp6mfNqVv5hsVuaVutSUapToWRZZkoo\ndaa/GsPQuP32j2LFipXYtu1GrFmzvqDR7kzgcDjxrW99t/oXxrmwhzmHCMohPHLkcbAUi48vvTVr\n/EY5+EIx/Oa5I/jqA2/g5T2DaPIKuHlzDygy+fy7PhhBNFad3/bOZfpHQ/j2b/cgIqm4e9tyXLS8\nZbaPNKOsX9KU+JxRJIGlc2Ah4GzDMPSiqhzWDJU1txUKRTA5GYCmaXC5nPB4XOC4yvkAJofyZ6at\nZ7W5Jif9iEYlcByLhgYvSJJMuKzPNBRlxfYU51yfjVhMht8fRCAQBEEQ8HrdcLmcYJjs9RKnU6jo\nkHwhSJKEKDoQDOYOpVYUFZGIhB//+P/D4sVL8e1vfxNf+tL/qMr5agnCqNa0YBmMjpZWYm5qcpX8\nmGpQ7Ll+ceA32D2yFx9dfBMu79w07esGI3JikFlWdTR6+LRB5uMDfvSNR7DngzPYd2ICizo8+PJt\na8Czs1/4rNV/S6D8sw2Nh/HNB99DICzjU9cvw6Vr2mviXDPN8QE/+iei6KgXiqpuNTXVjv/YdKmN\nfw8j7X/Zfo9zOIT4cHXu8zIMk2iNTXerjiQJeDxuhMORqs2JCQIXr+Cp4Hm24gPqmZjiyIVIRKp4\nNY3j2Lj1AhEPETerXlY1LFcO5Ezg9boSn4d8pM5tGYaBQMAPj+fs+wUs38+v2b+z2gAA9o4exO6R\nvVjgmY9LOzZM67kikoId8VX9mKyhzsXh9o3d2Lw6fVV/4TwPLlnbgUtXtuKnTxzErkMj+P4j+/DF\nj66xTSwrzJnJCP79IVNs3Xn1koqLrVrG+pzVhvg4F0ldu9fjVQgj7sFFYGRkGIsXL0YgkD+yR1EU\nKIq5VScI1nyRimi0NH8ny4FckqSqiS3TtNMUImYMT3qrzpr/qmT9weVyQpZnpnWZnPWiEt5kqqqB\npqmiYnQqhSg6oGnFhlIn57YIgjgrxVYhbMFVA0SUKH53+I+gCQp3LvsISKK8Tm80pibMKKMxFW4n\ni1svXVDQjJIkCfz19vOgaQZ2HxnFfz66H1/48CrbwLJCjPmi+PeH3oMvJOP2rYtw5fqO2T6SzTmL\n9bNFh2EAL774HH74wx/g4YcfBVmkfYeuZ/fDsjy9CokWl0uEomhVbOtlD6ROFS2CYM6rVcpQ1ekU\nAKAoi4vpoGkawuEIIhECdXVuGIaRmN8q15usWDiOBU0XG0pNYDYH5WsFW3DVAI8dfwp+OYDtPdei\n1Vn6TE9M0RJxK6GoAlH4/9u78/io62v/469ZkslMdkgChD3IZtgVFBCogBsuIKggVqqgIorFulz9\nabmlUEG2CwJyoYBQq2IuqEhVKtVCRQRUDCCrQCCyJWQjJJklmeX3xzcTJiEkk2Qy30nmPB+PPgoB\nMicByeHzOd/3CeHBWzvUaN2KXqdl8shkln9ykH0nsnn7k4NMHd1dwivrKPeylXnrU8m9bGPMkCRu\n79dG7ZKEALTs3fsDCxcuYMmSt9FodLhclV83VqViHpbJpAzZWyy2Ste+RESYABdFRWbffBjV0Go1\n1eaJORzuaAkwGJRIBvB8qq5mTYvBEEpoaIjfwlQBoqLCsVptmM3Wctlkvkzk96TT6Wq4lFqaLZCG\nS3XHck+w8/z3tIxowW1th9To15bYnfxn3zk+35VOflExRoOeUYPac1stFwrrdVqmjOrG0o8OcOBk\nDis+PcTTI5Ol6aqlS4U25q9PJTvfyshb2nN3/3ZqlyREmUOHfuYvf5lLUlKn0rdcfd3oLc88rLCw\nyte+GI1hXkUj+FJNri7d0QVWqw29XgmFNZnCqmwgK3JnX/kzTDU83IjLBWazMiR/5ffC3UDWPZHf\nkxKrEU5hoTngllLbbFYefXQsjz32BCNG3OuX16wJabhUVOwo5oOjG9Gg4ZEuD6DXevfbYXc4+fbn\nC/xj52nyCmwYQnXcM6Atd/RrQ3hY3fJEQvRapo7uzlsbD/DTL1ms/uwwT957vaSF19DlomLmr08l\nM8/C3f3bct/AdmqXJEQ5EyZMrPAW93/jrrLG61pD9teiRAEoa188lx2XlNj93mxFRoZjt9fu6tJu\nt1NQYPdoIMNxOl3lGsiKPBdE+ytMtarTtPINpGciv7005qN2p14REaayvYpVc89t+e9rx7p1a4iK\nCtzYGWm4VPRZ2layrbkMbzOEtlGtq/35TqeLXYcy2LzzFFmXrITotdzZrw133tyGKJPvHtcODdHx\n+zE9+J//28f3Ry6i02qZdHdXtFo5EvZGoaWEBR+mciHHzO19WzN6cJLPIj6EqH/uIXvPZPqan3q5\nvygbDCFERCgJ8MreQN9fcVVkMhl9spDas4GsuAqpYqCq+zTNX2Gq5a/1qj61qpjIr1zt1vza1Gh0\nL6Wu/vPqcvm32UpPP83p06fo37/uT/jXF2m4VJJ++Qz/PrODeGNT7m5/W5U/1+ly8ePRi2zacYqM\nXDN6nYZhfVpx94C2xEQY6qU+Q6iO5x/sycKUfew6lEGIXsOEO7uglcahSmZrCQs/3MfZrCKG9mnJ\n2KHXSbMlGqiKTzcq/1+Txkun0xIergysl5QocQzKQLmm9Iu9zedP1CnJ9b6foSq/CimU6OgrKfAG\nQ2itT9NqoybXep48E/krzt1VF26rXLOGlQs3vfbrgL+XUi9btog//OG/2LLlM7+9Zk1Jw6UCu9PO\ne0c24MLF+C4PEKqr/HTK5XKRejybTTvSOJtVhFajYXDPRO4d0I6m0WH1XqfRoOeFh3oy/8N9fLP/\nAjqdlt/e1kkaiGuw2OwsTNlPemYBg3u2YLx8rkSj4T6p0Hh93egeWPdcoaPkNRWXm5Gy2UqwWq0+\nuYZzJ9cr+wPrZ4ZKWYVkxWxWrk3Dw42lYapWtFpNrQNOayIyMhybzZtrvWu7+tQrHKj81EtZFaTs\nnqzu41NjT+KWLZ+RnNzdLwuo60IaLhX8K30754syuCXxJjrFdrjqx10uFz+n5bJpRxqnMwrQaGBA\nt+bcN7AdCbEmv9ZqCgvhxbG9mPdBKtt+OkeITiunNpWwFttZtGE/py5cZkC35nIaKBqpqjO9PClX\nbMWVzjxdPSMVidOpnBDVtonQarUeTYF/ZqgUGvLzCzAYQomJqb8nA91MJiVywmz2TeRE5Xsow8qd\nermfgqz+Y/L/3BbArl07OX/+HN999y1ZWRcJCQkhPj6Bvn1v8msd1ZGGy88uFGWy5fTXxBiiGXXd\niKt+/Eh6Hp98k8aJc/kA9O2SwMhb2pMYF+7vUstEGEN46WGl6dr6wxn0Oi1jhshckputxMGSjQc4\ncTaffl0TmDiiqzRbIgiUz/QCpclxOOzs3v0dd955Z7XrZSofsvd+Z6CbEqTp34XUnvledrsDu/3q\nbDJf52EpOwvrL3LCfW3quXhao9GUPjBQ/aogf89tuc2cOafs22vWrKRFi8SAa7ZAGi6/crqcvH9k\nAw6Xg3Gd78eoN5b92Imz+XyyI40j6XkA9O4Yx8hb2tOmWWCsOYkyhfLyuF68+UEqX+xOR6/TMGpQ\nktplqa7E7mDZxz9z9NdL3NApnifuuV4eLhBBxvO60cmiRQvJzc3lllsG1+i9uIfsdTptWXq6t6dF\nUVHKFZu/FlJ7zlBVnHuqmE3mqwXg7gYvP//aOwt9xb142uFwEh5uxOFwEBsbXWVEhhpzWw2NNFx+\n9J+z33Hq8q/ckNCT7nHXA3A64zKffHOKn9NyAOiW1IT7ByXRvkWUmqVWKjrCwH893Js339/L5p2n\nCdFrgzpbyu5w8vYnBzl0Kpde18UxWTLLRFDTsH79Bxw8eIjly1eWpovX/OlGh0NJslfW71QMIr16\nyHg4/T0AACAASURBVD4iwlS2rsdflAavuMrrz/LZZMpp0ZXru5o1hu4Gr6jI7FUemC+4l1K7T/Cq\nishQY27rWiZNmqx2CdckDZefZFty2XxyC+EhJh7sNJKzFwv5ZEcaqcezAejcOob7ByfRqXVg75eK\njTTw8sO9mfv+T3z0nzT0Oi13BGF6ut3hZMWnhzhwModu7ZswZVQ3abZE0IuMjGT+/MWYTO6T+bpk\nelHpbJFnJIN7mbY/872uNHjVX7HBldMii8Xz46g8WuJaruxl9M/uSVCaSrPZWnYqVzEiIyzMwKZN\nH3Ps2DFGjBhJx45d/FZbQ+X3hmvevHns3bsXu93O5MmTuf322/1dgt+5XC7WH/2IYmcJd7W6h/e3\nnOaHIxdxAR1aRnH/oCS6to1tMDNRcdFGpen6IJWUf59Ar9MG1X5Ah8PJ6s8O89MvWXRpE8PU0d0J\n0UuzJcS9946q8BbfZHpdiWRQTlmioyNxOp1oNFqvdvn5SliYwev9gZWp+HG4HxZQTosqb6ZMJuWJ\n9Prey+gpIsJUFnlRGffH0b9/fy5ezOaVV14gLi6B6dNn0rp18P0D3Ft+bbh2797N8ePHSUlJIS8v\nj/vvv7/RN1w7Thzm2293crb4OFGOVqR8ZMblstCmWQSjByfRPalpg2m0PCXEmnhpXC/mfpDK+//6\nBb1Ow5Begf1Iri8cP3uJlL//SNq5y3RsFc20B3oS6uW+SiGCV90zvYCyk6Xi4hKioiJwOJxER0eV\nnYTV52xTSIgekymsNHKibu/L/XG4oyXcDwsosRm2sqcslSH5UL+e4LmXUnuTtxUd3ZQJEyYyfvxj\n7N37AxERgTFzHKj82nD17duXHj16ABAVFYXFYsHhcKDTNc4vWDtOHGZ9+t/QaFy4XJB1Mp7EphGM\nGpREn05xDbLR8tSiaTgvlzZd7/7zGHqdloHdW6hdVr05mp7H/PWpuP+uHXlLewyhjfPPrhD1p+aZ\nXuV+dekKncJCc+mQffllzRaLtU7D6ZW/Zv1FTrgfFtBqtRiNBmJilEDV4uJiTCajX4bk3TzT66vj\nObel12u56ab+9V1eg+fXhkun02EyKQOQGzduZPDgwVU2W7GxJvT6mn1Bi48PnA77wO6jpf+CA40G\nkpJ0LHh4GLoAe4qtLp+z+PhIZj9j4rXlO1n7xRGaxJoY3Ns314uB8ntZYnewdXc6f/viMJ5/7WUV\nFAdMjW6BVo+nQK5NqMH7TC9PFRdSOxwOiorMmM2eAZ7K3JQvnlrUaJTXNJvrd22P06k8LFBUZClN\n5DfhcrkIDQ3BanXWe9MVyEupGwtVhua/+uorNm7cyDvvvFPlz8vLM9fo/cbHR5KV5b+j16pY7BYu\n2I+Xfd/l1DAoqRu5OYUqVnU1X3zOIkK0vDC2J/PXp7Lw/Z8oKrRxY5cE1euqK7vDyXcHM/jHzlPk\nXLah12nQaJS/bHRaDa2aGFWv0VMgfM6upSa1SWMWjCrP9Kp46lXVQurKAjzdew8tFlutT6YiI8PL\n4in8RWmylKcZjUbP07vaL52uTmRkTZdSB9bBQUPg94Zrx44drFixgtWrVxMZ2Tj/YrXarSzf/w55\nxTm0N3UgghYkx13HoOuuV7u0etOueRQvPNSLBSn7WLn5EHqdll4d49Quq1acThd7Dmfy6benuHjJ\ngl6n5fa+rRlxc1uyLlk4m2uhVRMjHVoG7lZ6IRqmK9eN5Yfs4auvtjJs2DA0mupvPSruPXRf01mt\nNUuyN5nC0Gg0FBXVbQl2TVx5TWVI/tpLp323h9JoDEOj0Xr1cSpxH5K3VRt+bbgKCgqYN28e69at\nIyYmsOMPaqvYUcyKA+tIy0/nxma9+N3142iWEB2wJw++1KFlNH94sCf/83/7WL7pZ54b04PuSU3V\nLstrTpeLvcey2LQjjQs5ZnRaDbf2ack9/dsRG6ksCY8KD+XmXq2C4vdTCPWUv27ctOljNmz4kIED\nb8Fo9H69mefeQ4MhFKMxjPBwY9lwelXXdMoSbP8OrCtD8oarBtarWr9zrSBSb4WEKHstvV9K7Z+r\nRKvVyhtvzCAvLxebzcZjjz3BwIGD6v1165NfG64vvviCvLw8nn/++bK3zZ07l8TERH+WUW9KHCWs\nPPA3jl9Ko3d8dyZ0HYtWE1x33J1axzBtTA8WbzzAso9/5vkHetC1XRO1y6qSy+Vi/4kcPtmRxpmL\nhWg1Ggb1aMG9A9sRF22s/h0IIepNamoqq1f/leXLVxEWFlHu1Ksm3AnwOp2u7JruWgnwev2V4XH/\nDaxfCRqt6jXLr9+pPIjUW8oDCIG5lHrnzm/o0qUrjzzyOzIyLvD8889Kw1UTY8eOZezYsf58Sb8p\ncdr568F3OZp3nO5x1/N48nh02uB8gq1ruyY8N7o7Sz46wFsfHeCFh3oFZKCry+Xi0OlcPvnmFKcu\nXEYD9E9uxn23tKeZn5eECyEqp9FomDdvEa1bty19S90yvRyOyhPg3UP2nk9BehNK6gsaDURGKrsg\nvX3CsvI9lDWbWYuMjMBiqclSav9dIw4bdiUyKjMzk4SEus0FBwJJmvcBh9PBOwff53DOMa5v2plJ\n3X4btM2WW7ekpjwzqjtvf/Izizbs56WxvQJq5unYr8qS8F/OKkvCb+wcz8hb2tMyPkLlyoQQnnr1\n6lPhLb7J9KqYAG80Kg2Ly0W1a3t8LSJCGcyv7VOVntESyh7K6mfWlI/VWe2CcXDPbakzKP/00xO5\neDGTefMW+/21fU0arjpyOB2sPfQBB7IP0SW2I092m0CIVj6tAL06xjH5vmRWfHqI//m//bz8cC/a\nNVd3R+TJc8qS8MOnlSXhva6LY9SgwFkSLoSoibplerm5r+kiI03odHrCwkLR63VYLPUbBQHKwLpW\nq6WgoO6zYsrMmnsPZUhpE2nCZrOVBqoq14ahoSGEhoZ4NZ925SpRnSH5FSve4fjxY8yaNZ1169Y3\n6PxK6QzqwOly8u6RFFKzfua6mPZM7vE7QnUhapcVUG7sksATTier/nGYhR/u4+WHe6vS3KRnFPDJ\njjQOnFSWhCe3i2XU4CQ6JAbOqZsQorZql+nlyWg0oNPpyobHDYZQTCYjWq2mLKLB1/Nc7gH4+lhP\nZLMpuxd1OveplxItUVxcUjqfVn2gqpp5W0ePHiE2NpZmzZrTsWNnHA4Hly7lERsb2DPBVZGGq5ac\nLifvH93Ij5n7aB/Vlik9HidUF6p2WQHp5uub43C4eOfzIyz4cB+vjO/tt6u7c1mFbPr2FHuPZQHQ\nqVU09w9OonObWL+8vhDC37zL9PKkND5h5Rof95C9Xn8lyf5aQ/a1qrI0vf7y5cJqB9brwuFQAlWV\nUy8lWkIJVNVXE6jqnttS58Gv/ft/IiMjg2nTXiQ3Nwez2Ux0dODNAteENFy14HK5SPllE7sv/Eib\nyFY822siYfowtcsKaAO7t8DucPK3fx5jfmnT1aJpeL29XmaumU+/PcWew5m4gKREZUn49e0azpJw\nIepqyZKFHDp0EI1Gw7RpL9K1a7LaJfnRtTO9POl0VTc+dvvVQ/a1fSrQk5Je7/2QfF25XKDX67HZ\nlEiM6ppIl0vdJPlRo8YwZ84snnnmCWw2Gy+88ApabcN+6l8arhpyuVxsPL6Zb8/tplVEIlN7PYFR\nL9EB3hjSqyV2h4v3//UL89en8uojfUjw8dOA2ZcsbP7uNN/9nIHT5aJNQgSjBifRs0PDXBIuRG2l\npu7l7NkzrFy5ltOnTzFnzkxWrlyrdlkquPZ1Y0FBIadOnaBv35uqbXw8h+w9nwqsuHDaG0pivh2r\nte6rh7zlnktzz21V9qRmbu4lbLZiwsKMqB1uajCEMWPGG6q9fn2QhqsGXC4Xm05+wfazO2kR3ozn\nej1JeIjEB9TEsBtaYXc4Sfn3CeavT+WVR/r4JOsqr8DGZ9+d5pv953E4XSTGhTPqlvb06RyPVhot\nEYT27v2BQYN+A0C7du0pKLhMUVEh4eHB/CSu+4TERUlJCf/933+kW7du9OjRu0bvpbKF0+71P9UN\n2RuNBrRarVcLon1Fr9dhMhmvGpKv+KTm9u3bmD9/PsOH386oUQ+QlNTBbzUGg4Z9Pudnn53ayle/\n/odmpnh+3/spIkLr70qsMbujXxvGDEki57KN+etTyb1c/WPJ15JfVMz6r47zyopdbEs9R9PoMJ68\n93pmTuzHjV0SpNkSQSsnJ6fcRo+YmFhycnJUrCiQaFi+fCmgYcKESbhcmtJ5r5rNUrkXTufm5pcN\no8fGRhEWZqj0RN09K1ZQ4L+duhqNEm5aWGiu8hSupMTO0KHDWLfuPaKiovnDH57l3//+ym91BgM5\n4fLSllNf88/TXxNnbMrvez9FVKjECNTF3f3bUWJ3snnn6bKZrpgIg9e/vtBSwj/3/MpXe89QXOKk\naVQY9w1sx4DuzdE18Ht+IeqDvxLTGwqj0cSMGbPR690PO9U+0wuuHrI3maLKrd65MiRffaq7L0VG\nhmOzebOUWpnbSkhozhNPPM3jjz/Z4GemAo00XF74V/p2Pjv1JU3CYpnW+yliDBIl4Asjb2lPicPJ\nlt2/suDDffzX+N5Emap+0tNstbP1h1/Z+sMZrMUOYiJCGXtrOwb1TESvk78chHCLi4srd6KVnZ1N\nXFzDXChfH558ckqFt/gm06v8kL2BqKgInE4nWq0Gs9mK3V6/uV6elKXUYDZbqv25SrN5ZW5Lpwvu\n8O76IF+hqrHtzLdsOvkFMYZopvV+iiZhEifgKxqNhgeGdOC2G1tzPruIBev3UWip/F9h1mI7n+86\nzSsrvmPzztOE6LWMG9aRNyf359Y+raTZEqKCfv1uZvv2rwE4duwocXFxmEwyBlE9d6q6DtDW+rrR\nvXonLy+/tIHTYDKFYTKFodXW/6iDO+OroKDIi1pBzQiIYCEnXFXYcW43G49vJio0kmm9nyLO2FTt\nkhodjUbDuGHXYXc62fbTudJw1F5lP15c4mB76jk+351OgbmE8DA9Y4YkMeyGVoSFyh9fIa6le/ee\ndO7claefnohGo+GFF15Ru6QG6MqQvdI0VZ/pVVFYmAGtVkNeXv5VIaTeDNnXquoaLKVWGkmluRT1\nS75iXcOu8z/w4bGPiQgJ5/e9nyLBFK92SY2WRqPhkds64XA4+Wb/BWa/t5fBvVuRl29hz+FMLhUW\nYzToGHlLe267sTWmMPljK4Q3pkx5Tu0SGgmNx/+8X5wdEqLHZAorezrQHUJaVGQhLCyU8HDlKXcl\n08uGr8bslKXU3qwlcpXmbfnv4aLly99i//59OBwOHn30MYYMGeq311abfOWqxPcZP/H+0Y2E6038\nvvdTtAhvpnZJjZ5Wo2HCHV3IvWzj4KlcPvzXLwDodRru7t+WO/q1IcIoa5OECARpaSd49dUXGTt2\nPGPGjFW7HD/yfoVQ+VOmq58OVPK7itHrlcXZJlMYNlsJVqsVh8P7TK+KwsONOJ1OLBZbtT9XWUrt\nv7ytn376kbS0k6xcuZb8/Es8/vgj0nAFs58uHuDdwymE6cOY2vsJWka0ULukoKHVaujUJoaDp3LL\n3nbnTW0ZPThJxaqEEJ4sFguLFs3nhhv6qV2KyipeN14Zsi8pKaGkxIrBEFrtKZPdbqegwI5Wq8Fg\nMBAVFYnT6cBisXn1ZKGn2i2l9t9VYs+evcu2HURERJY2l46gGdCXS1sP+7MOsfbQBxh0oUztNYk2\nka3ULinodG0Ti650oFSn1dCzg8zNCRFIQkJCWLDgLXnisYx7yF75n9MJCxfOY+3atV6dMrk5nVeG\n7C0WG0ajgdjYaK+H7HU6LRERJi5fLgrYpdQ6nQ6jUQm6/uyzT+nff0DQNFsgJ1xlDmYfYc3B99Br\n9TzTcxLtotqoXVJQ6tAymlcf6cPZXAutmhjp0FIiOIQIJHq9Hr1evnRcTbma+/jjjRw+fIQVK9bg\nclGrTC93kn3FIXuLxXbNWInIyAiKiiw4HNXtZnQvpVYvFHrHju189tmnLFr0tmo1qEH+qwGO5h5n\n1cG/o9VomNLjMTrEtFO7pKDWoWU0N/dqRVaW/1ZfCCGEL2zb9hWzZy/AaAynsuvGmnAP2ZvNFgwG\nAxER7iF7KzZbcdmQvXs3ozfLtP09t1XRnj27ePfdd1i4cCkREcG1ZiroG67jeSdZcWAduFxM7vE4\nnWKvU7skIYQQDdSyZX/1+J73Q/ZVcbmUJxmtVltZvpbJZMRmK8bpdKHTaQN2bstTYWEhy5e/xeLF\ny4mKCr7bi6BuuNLyT7P8wFqcLidPdZ9A16ad1C5JCCFEo1T3TC9Qdh6WlChD9iaTEZPJgN3uIDQ0\npMohe7WbLYCvv97KpUuXmD791bK3/fGPM2nevLlqNflT0DZc6ZfP8Pa+d7A77Uzq9lu6xXVVuyQh\nhAh4R48eYdmyRWRkXECv17Nt29fMnj0/KE8saqd2mV4VuVzKAwzuJHmjMYzwcFPZSVj5wXn157YA\nRo4czciRo1WtQU1B2XCdKTjH0n2rsTlsPJ48nl7x3dQuSQghGoQuXbpWuDYTtVO360ZlKXVx2amW\nMmSvK326MYriYjuXL19Go3E3WurNbQlF0DVc5wovsHTfKqx2KxOuH8sNzXqqXZIQQgS9YE4gryrT\nqzImUxhw9VJqh+PK4myDIZQ335zNiRMnGTlyNLfddldZJINQR1DlcGUUXWRp6iqKSsyM7zKGfs37\nqF2SEEIEPc8E8oULl/DWWwvVLkkl1S/ODgnRYzBUvZTa5XJhtdp45ZXXmTz5GXbu3MEDD9xDRsaF\neq5fVCVoTrgumrNZkrqSgpJCxnYaxYDEYE9JFkKIwBDsCeSVc5+HOEubLieZmZksXDiXRYuWVBtu\nCi60Wh39+g2gX78BXLp0icjIyPotWVQpKE64ciy5LEn9K/nFBYzpeC+DWw1QuyQhhBClgj2BvGrK\nk4XFxQ7++Mf/R9++N3mxlJrSpdRXvsTHxMTI51Rljf6EK896ibdSV5Jnu8TIDncxtPUgtUsSQghR\niWBNIPfGunVrSEhoxgMPjKe6pxuVwy8Zkg80jbrhumTL563UleRY87i7/W3c3vZWtUsSQghRiWBO\nIPfGHXeMICGhGRqNZyN1daZXIORtico12t+Ry8UFLEldRZYlhzvaDuWudsPVLkkIIUQl3Ank8+Yt\nljyva2jbtl0lTxlePWR/5W3+kZZ2goceGslHH6X47TUbqkZ5wlVYXMTS1FVkmi8ytPUg7k26o/Rf\nBUIIIQJNsCeQ150614cWi4VFi+Zzww3yEJo3Gl3DZS4xs2zfKs4XZTCk1QBGX3ePNFtCCBHAgj2B\nvKEKCQlhwYK3eO+9v6ldSoPg94Zr9uzZ7N+/H41Gw2uvvUaPHj189r7NJRaW7V/DmcLzDEzsxwMd\n75NmSwghhKgHer0evb7RndvUG79+pr7//nvS09NJSUnh5MmTvPbaa6Sk+Obe91juCT7YvZFscy43\nNb+BcZ1Ho9U02hE1IYQQVbBarbzxxgzy8nKx2Ww89tgTDBwoT6kL9fi14dq1axfDhyvD6x06dCA/\nP5/CwsI6P5FyKj+dJfuU3V4aYGDiTdJsCSFEENu58xu6dOnKI4/8joyMCzz//LPScAlV+bXhys7O\nJjk5uez7TZo0ISsr65oNV2ysCb2++qC2ndnnyr7tAjLs57k5vnud6/Wl+PjATfgN1NoCtS4I3NoC\ntS4I7NpE4zNs2O1l387MzCQhIUHFaoRQeWi+utUEeXlmr95PYkhLtBotTpcTrUZLYkhLsrIKfFGi\nT8THRwZUPZ4CtbZArQsCt7ZArQtqVps0ZsKXnn56IhcvZjJv3mK1S2l0jh49wrJli8jIuIBer2fb\ntq+ZPXu+RHtcg18broSEBLKzs8u+f/HiReLj4+v8fttHt+WFPlM4X3KOxJCWtI9uW+f3KYQQouFb\nseIdjh8/xqxZ01m3br08SOVDXbp0Zdmyv6pdRoPh10GngQMH8uWXXwJw6NAhEhISfJYo3D66LaO6\n3iHNlhBCCI4ePUJmZgYAHTt2xuFwcOlSnspViWDm1xOuPn36kJyczLhx49BoNPzpT3/y58sLIYQI\nEvv3/0RGRgbTpr1Ibm4OZrOZ6OgYtcsSQczvM1wvvfSSv19SCCFEkBk1agxz5szimWeewGaz8cIL\nr6DVytPrQj2SWCaEEKLRMRjCmDHjDbXLEKKMtPtCCCFEDdlsVh56aCRffPEPtUsRDYQ0XEIIIUQN\nrVu3RuIPRI1IwyWEEELUQHr6aU6fPkX//gPVLkU0INJwCSGEEDWwbNkinnvuD2qXUS+WLFnI5MmP\n8/TTEzly5JDa5TQq0nAJIYQQXtqy5TOSk7uTmNhS7VJ8LjV1L2fPnmHlyrW8+up0Fi9eoHZJjYo8\npSiEEEJ4adeunZw/f47vvvuWrKyLhISEEB+fQN++N6ldWp3t3fsDgwb9BoB27dpTUHCZoqJCwsN9\nE1Ae7KThEkIIIbw0c+acsm+vWbOSFi0SG0WzBZCTk0Pnzl3Kvh8TE0tOTo40XD4iV4pCCCGEuIrL\n5VK7hEZF45LPqBBCCBH0li5dSnx8POPGjQNg2LBhfPrppz7beRzs5IRLCCGEEAwcOJAvv/wSgEOH\nDpGQkCDNlg/JDJcQQgihsj179jBt2jQ6duwIQKdOnZg+fbpfa+jTpw/JycmMGzcOjUbDn/70J7++\nfmMnV4pCCCGEyvbs2cP777/PkiVL1C5F1BO5UhRCCCGEqGfScAkhhBAB4MSJEzz99NM8/PDD7Ny5\nU+1yhI/JlaIQQgihsszMTPbu3ctdd93FmTNnmDBhAlu3biU0NFTt0oSPNLoTru+//57+/fuzbds2\ntUsBYPbs2YwdO5Zx48Zx4MABtcsp55dffmH48OG89957apdSzrx58xg7dixjxoxh69atapcDgMVi\nYdq0afz2t7/lwQcfDJg/X56sVivDhw/n448/VrsUQJlJufnmm3n00Ud59NFHmTVrltolCRGwmjVr\nxogRI9BoNLRp04a4uDgyMzPVLkv4UKN6SvHXX39l7dq19OnTR+1SAKX5S09PJyUlhZMnT/Laa6+R\nkpKidlkAmM1mZs2aRf/+/dUupZzdu3dz/PhxUlJSyMvL4/777+f2229Xuyy2bdtGt27dePLJJzl3\n7hwTJ07k1ltvVbuscv73f/+X6Ohotcsop1+/fjIELIQXNm/eTFZWFpMmTSIrK4ucnByaNWumdlnC\nhxpVwxUfH8+yZct4/fXX1S4FgF27djF8+HAAOnToQH5+PoWFhQGRaxIaGsqqVatYtWqV2qWU07dv\nX3r06AFAVFQUFosFh8OBTqdTta4RI0aUffvChQsB9xfhyZMnOXHiBL/5zW/ULkUIUQtDhw7lpZde\n4uuvv6akpIQZM2bIdWIj06gaLqPRqHYJ5WRnZ5OcnFz2/SZNmpCVlRUQDZder0evD7zffp1Oh8lk\nAmDjxo0MHjxY9WbL07hx48jIyGDFihVql1LO3LlzmT59Ops2bVK7lHLcQ8D5+flMnTqVgQMHql2S\nEAEpIiIi4P5eEb4VeF9xvbRhwwY2bNhQ7m3PPfccgwYNUqmi6snzCd776quv2LhxI++8847apZTz\n4YcfcuTIEV5++WU2b96MRqNRuyQ2bdpEr169aN26tdqllNOuXTumTp0qQ8BCCEEDbrgefPBBHnzw\nQbXLqFJCQgLZ2dll37948SLx8fEqVtQw7NixgxUrVrB69WoiIyPVLgeAgwcP0rRpU1q0aEHXrl1x\nOBzk5ubStGlTtUtj+/btnDlzhu3bt5ORkUFoaCjNmzdnwIABqtblHgIGyg0BB1pjKIQQ/tBgG66G\nYODAgSxdupRx48bJXiovFRQUMG/ePNatW0dMTIza5ZT58ccfOXfuHK+//jrZ2dmYzWZiY2PVLguA\nxYsXl3176dKltGzZUvVmC2QIWAghPDWqHK7t27ezZs0a0tLSaNKkCfHx8apfSS1YsIAff/yxbC9V\nly5dVK3H7eDBg8ydO5dz586h1+tp1qwZS5cuVb3JSUlJYenSpbRv377sbXPnziUxMVHFqpTIhddf\nf50LFy5gtVqZOnUqQ4cOVbWmyrgbrtGjR6tdCoWFhbz00ktcvnyZkpISpk6dypAhQ9QuSwghVNGo\nGi4hhBBCiEDU6IJPhRBCCCECjTRcQgghhBD1TBouIYQQQoh6Jg2XEEIIIUQ9k4ZLCCGEEKKeScMl\n/OLIkSPMmjXLq59rt9vp3LlzPVckhBBC+I/EQoiAY7fbSU5O5tixY2qXIoQQQviEJM0Lv9izZw+L\nFy9Gr9fTv39/UlNTOX36NM899xz33XcfaWlpvPzyyxiNRm666aayX1dcXMzMmTNJT0+nqKiIe+65\nh4kTJ/KXv/yFpk2bMmXKFPbs2cPChQtZv359QC26FkIIIdzkSlH4ndlsZtWqVbzxxhusXr0agLff\nfpsxY8bw3nvvlbtOfPfdd0lISODvf/87GzZs4PPPP+fo0aO8+OKLfPHFF5w8eZI5c+bw5ptvSrMl\nhBAiYMkJl/C7fv36AZCYmEh+fj4Av/zyC0899RQAN998c9nP3bNnDxkZGfzwww+AcuL166+/0qVL\nF2bMmMH48eOZMmUKSUlJfv4ohBBCCO9JwyX8Tq+/8sfOPULocrnQapUDV4fDUfbjoaGhPPvss9x5\n551XvZ/s7GyioqI4f/58PVcshBBC1I1cKYqA0KFDB/bt2wfArl27yt5+ww03sGXLFgCcTidz5szh\n0qVL5ObmsmTJElJSUvj555/5/vvvValbCCGE8IY0XCIgPPvss3zwwQdMmjSJtLS0slOwRx55BJPJ\nxNixY3nooYeIjIwkJiaGP//5zzz55JM0adKEmTNnMn36dAoLC1X+KIQQQojKSSyEEEIIIUQ9kxMu\nIYQQQoh6Jg2XEEIIIUQ9k4ZLCCGEEKKeScMlhBBCCFHPpOESQgghhKhn0nAJIYQQQtQzabiEtXn1\ncQAAABVJREFUEEIIIeqZNFxCCCGEEPXs/wPfi3Z9JjdMYQAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[]}}]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/suffix_array.ipynb b/Colab_Codes/Colab Notebooks/suffix_array.ipynb deleted file mode 100644 index db31069..0000000 --- a/Colab_Codes/Colab Notebooks/suffix_array.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"suffix_array.ipynb","version":"0.3.2","provenance":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"3s7lvSxaj9Fi","colab_type":"text"},"cell_type":"markdown","source":["## sort cyclic shifts of string s + '$' to construct the suffix array"]},{"metadata":{"id":"JjYArMUrkDIu","colab_type":"code","colab":{}},"cell_type":"code","source":["from collections import OrderedDict\n","\n","def getCharOrder(s):\n"," n = len(s)\n"," numChars = 256\n"," count = [0]*numChars # totally 256 chars, if you want, can print it out to see these chars\n"," \n"," order = [0]*(n)\n"," \n"," #count the occurrence of each char\n"," for c in s:\n"," count[ord(c)] += 1\n"," \n"," # prefix sum of each char\n"," for i in range(1, numChars):\n"," count[i] += count[i-1]\n"," \n"," # assign from count down to be stable\n"," for i in range(n-1,-1,-1):\n"," count[ord(s[i])] -=1\n"," order[count[ord(s[i])]] = i # put the index into the order instead the suffix string\n"," \n"," return order\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"CXthy2Rqt6nm","colab_type":"code","colab":{}},"cell_type":"code","source":["def getCharClass(order, cls):\n"," n = len(order)\n"," cls = [0]*n\n"," # if it all differs, then cls[i] = order[i]\n"," cls[order[0]] = 0 #the 6th will be 0\n"," for i in range(1, n):\n"," # use order[i] as index, so the last index\n"," if s[order[i]] != s[order[i-1]]:\n"," print('diff',s[order[i]],s[order[i-1]])\n"," cls[order[i]] = cls[order[i-1]] + 1\n"," else:\n"," cls[order[i]] = cls[order[i-1]]\n"," return cls\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"yfCeF3b77m4M","colab_type":"code","colab":{}},"cell_type":"code","source":["'''It is a counting sort using the first part as class'''\n","def sortDoubled(s, L, order, cls):\n"," n = len(s)\n"," count = [0] * n\n"," new_order = [0] * n\n"," # their key is the class\n"," for i in range(n):\n"," count[cls[i]] += 1\n"," \n"," # prefix sum\n"," for i in range(1, n):\n"," count[i] += count[i-1]\n"," \n"," # assign from count down to be stable\n"," # sort the first half\n"," for i in range(n-1, -1, -1):\n"," start = (order[i] - L + n) % n #get the start index of the first half, \n"," count[cls[start]] -= 1\n"," new_order[count[cls[start]]] = start\n"," \n"," return new_order"],"execution_count":0,"outputs":[]},{"metadata":{"id":"2I1nIvImB3O8","colab_type":"code","colab":{}},"cell_type":"code","source":["def updateClass(order, cls, L):\n"," n = len(order)\n"," new_cls = [0]*n\n"," # if it all differs, then cls[i] = order[i]\n"," new_cls[order[0]] = 0 #the 6th will be 0\n"," for i in range(1, n):\n"," cur_order, prev_order = order[i], order[i-1]\n"," # use order[i] as index, so the last index\n"," if cls[cur_order] != cls[prev_order] or cls[(cur_order+L) % n] != cls[(prev_order+L) % n]:\n"," new_cls[cur_order] = new_cls[prev_order] + 1\n"," else:\n"," new_cls[cur_order] = new_cls[prev_order]\n"," return new_cls"],"execution_count":0,"outputs":[]},{"metadata":{"id":"2Wv3muUHkNl_","colab_type":"code","colab":{}},"cell_type":"code","source":["\n","def cyclic_shifts_sort(s):\n"," s = s + '$'\n"," n = len(s)\n"," order = getCharOrder(s)\n"," cls = getCharClass(s, order)\n"," print(order, cls)\n"," L = 1\n"," while L < n:\n"," order = sortDoubled(s, 1, order, cls)\n"," cls = updateClass(order, cls, L)\n"," print(order, cls)\n"," L *= 2\n"," \n"," return order"],"execution_count":0,"outputs":[]},{"metadata":{"id":"OpqYr3Z6rsM_","colab_type":"code","outputId":"eb382cee-6f8d-459c-911f-10c955b45dad","executionInfo":{"status":"ok","timestamp":1549232253713,"user_tz":480,"elapsed":336,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":146}},"cell_type":"code","source":["s = 'ababaa'\n","cyclic_shifts_sort(s)"],"execution_count":32,"outputs":[{"output_type":"stream","text":["diff a $\n","diff b a\n","[6, 0, 2, 4, 5, 1, 3] [1, 2, 1, 2, 1, 1, 0]\n","[6, 5, 4, 0, 2, 1, 3] [3, 4, 3, 4, 2, 1, 0]\n","[6, 5, 4, 0, 2, 3, 1] [3, 6, 4, 5, 2, 1, 0]\n","[6, 5, 4, 0, 2, 3, 1] [3, 6, 4, 5, 2, 1, 0]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["[6, 5, 4, 0, 2, 3, 1]"]},"metadata":{"tags":[]},"execution_count":32}]},{"metadata":{"id":"5nNbGVRTjvvL","colab_type":"text"},"cell_type":"markdown","source":[""]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/tree_data_structure.ipynb b/Colab_Codes/Colab Notebooks/tree_data_structure.ipynb deleted file mode 100644 index 7575aef..0000000 --- a/Colab_Codes/Colab Notebooks/tree_data_structure.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"tree_data_structure.ipynb","version":"0.3.2","provenance":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"3A2mprldYvXe","colab_type":"text"},"cell_type":"markdown","source":["### N-aray Tree"]},{"metadata":{"id":"h5LNx9p_ZxtH","colab_type":"text"},"cell_type":"markdown","source":["#### Define Tree Node"]},{"metadata":{"id":"9R_XYWC2Yz-G","colab_type":"code","colab":{}},"cell_type":"code","source":["class NaryNode:\n"," '''Define a n-ary node'''\n"," def __init__(self, n, val):\n"," self.children = [None] * n\n"," self.val = val\n"," "],"execution_count":0,"outputs":[]},{"metadata":{"id":"Xx5WEdJDZ4-q","colab_type":"text"},"cell_type":"markdown","source":["#### Define a Tree and implement operations"]},{"metadata":{"id":"qtAos_DOZwkw","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"wLPjQrfnZbQY","colab_type":"text"},"cell_type":"markdown","source":["### Binary Tree"]},{"metadata":{"id":"oUZYimKbZ1ZL","colab_type":"text"},"cell_type":"markdown","source":["#### Define Tree Node"]},{"metadata":{"id":"ZKknhNURZdA0","colab_type":"code","colab":{}},"cell_type":"code","source":["class BinaryNode:\n"," '''Define a classical binary tree node'''\n"," def __init__(self, val):\n"," self.left = None\n"," self.right = None\n"," self.val = val"],"execution_count":0,"outputs":[]},{"metadata":{"id":"yPukXXkXZ_zn","colab_type":"text"},"cell_type":"markdown","source":["#### Define a Tree and implement operations\n"," 1\n"," / \\ \n"," 2 3\n"," / \\ \\\n","4 5 6 "]},{"metadata":{"id":"CyXwZ9sf8dyG","colab_type":"code","colab":{}},"cell_type":"code","source":["root = BinaryNode(1)\n","left = BinaryNode(2)\n","right = BinaryNode(3)\n","root.left = left\n","root.right = right\n","left.left = BinaryNode(4)\n","left.right = BinaryNode(5)\n","right.right = BinaryNode(6)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"gOHqKKfY_Ug0","colab_type":"code","colab":{}},"cell_type":"code","source":["def constructTree(a, idx):\n"," '''construct a binary tree recursively from input array a'''\n"," if idx >= len(a):\n"," return None\n"," node = BinaryNode(a[idx])\n"," node.left = constructTree(a, 2*idx + 1)\n"," node.right = constructTree(a, 2*idx + 2)\n"," return node"],"execution_count":0,"outputs":[]},{"metadata":{"id":"STvuN-5p_5Di","colab_type":"code","colab":{}},"cell_type":"code","source":["nums = [1, 2, 3, 4, 5, None, 6]\n","root = constructTree(nums, 0)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hrJC3v5ZAvD-","colab_type":"text"},"cell_type":"markdown","source":["#### To show the nodes at each level, we use LevelOrder function to print out the tree:"]},{"metadata":{"id":"uPxVdjRrA0UB","colab_type":"code","colab":{}},"cell_type":"code","source":["def LevelOrder(root):\n"," q = [root]\n"," while q:\n"," new_q = []\n"," for n in q:\n"," if n is not None:\n"," print(n.val, end=',')\n"," if n.left:\n"," new_q.append(n.left)\n"," if n.right:\n"," new_q.append(n.right)\n"," q = new_q\n"," print('\\n')\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Pl2HGcHkA4rT","colab_type":"code","outputId":"2282a8ed-e1c1-4dae-b83c-0c2a8c19b470","executionInfo":{"status":"ok","timestamp":1550969967703,"user_tz":480,"elapsed":336,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}},"colab":{"base_uri":"https://localhost:8080/","height":127}},"cell_type":"code","source":["LevelOrder(root)"],"execution_count":0,"outputs":[{"output_type":"stream","text":["1,\n","\n","2,3,\n","\n","4,5,None,6,\n","\n"],"name":"stdout"}]},{"metadata":{"id":"2edkOHbCCV7n","colab_type":"text"},"cell_type":"markdown","source":[""]}]} \ No newline at end of file diff --git a/Colab_Codes/Colab Notebooks/tree_search.ipynb b/Colab_Codes/Colab Notebooks/tree_search.ipynb deleted file mode 100644 index f3fada9..0000000 --- a/Colab_Codes/Colab Notebooks/tree_search.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"tree_search.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"metadata":{"id":"-N44hhD6CqRT","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"8xckq2iGCtG8","colab_type":"text"},"cell_type":"markdown","source":["## DFS tree search"]},{"metadata":{"id":"dw0xDCnDCyLo","colab_type":"text"},"cell_type":"markdown","source":["### A general purpose search"]},{"metadata":{"id":"c9xcMvZxC0zK","colab_type":"code","colab":{}},"cell_type":"code","source":["def dfs(t, s):\n"," '''implement the dfs recursive of tree'''\n"," print(s)\n"," for neighbor in t[s]:\n"," dfs(t, neighbor)\n"," return"],"execution_count":0,"outputs":[]},{"metadata":{"id":"pOnCVL5eDIh-","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":121},"outputId":"7c437d60-4881-4eea-b4ba-c9675519a7e2","executionInfo":{"status":"ok","timestamp":1551054538760,"user_tz":480,"elapsed":403,"user":{"displayName":"Li Yin","photoUrl":"https://lh5.googleusercontent.com/-KgiTKdqPRUg/AAAAAAAAAAI/AAAAAAAAAIE/aHQ6xO5vQpY/s64/photo.jpg","userId":"13365523799853678553"}}},"cell_type":"code","source":["t = {1: [2, 3], 2: [4, 5],\n"," 3: [], 4: [6], 5: [], 6: []}\n","dfs(t, 1)"],"execution_count":4,"outputs":[{"output_type":"stream","text":["1\n","2\n","4\n","6\n","5\n","3\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/Colab_Codes/chapter_combinatorial_search.ipynb b/Colab_Codes/chapter_combinatorial_search.ipynb deleted file mode 100644 index 1cf41a1..0000000 --- a/Colab_Codes/chapter_combinatorial_search.ipynb +++ /dev/null @@ -1,2731 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_combinatorial_search.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "ZhL7pFlp8gTD", - "colab_type": "text" - }, - "source": [ - "## Backtracking\n", - "\n", - "1. Permutation\n", - "2. Combination\n", - "3. All Paths\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I7Xx9UKaXO7e", - "colab_type": "text" - }, - "source": [ - "### Template" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JZvh1XD8XQYN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def backtrack():\n", - " # initialization\n", - " A #a working data structure, either a list of candidates or a graph or a matrix representing a board\n", - " state_tracker = []*n\n", - " assist_state_tracker\n", - " # main backtracking\n", - " def dfs(d, n):\n", - " '''d: depth representing level in the tree'''\n", - " if d == n:\n", - " return\n", - " candidates = generate_candidates(state_tracker, assist_state_tracker)\n", - " for c in candidates: \n", - " set_state(state_tracker, assist_state_tracker, c)\n", - " dfs(d+1, n)\n", - " reset_state(state_tracler, assist_state_tracker, c)\n", - " \n", - " dfs(0, n)\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xGT9qW47yghc", - "colab_type": "text" - }, - "source": [ - "### Permutations\n", - "How many ways to permutate an $n$-set? For example {1, 2, 3}: ???\n", - "* Enumerate by position This helps us to enumerate permuation with **backtracking**\n", - "```\n", - "{}\n", - "i=0: {1}, {2}, {3}\n", - "i=1: {1, 2}, {1, 3}, {2, 1}, {2, 3}, {3, 1}, {3,2}\n", - "i=2: {1, 2, 3}, {1, 3, 2}, {2, 1, 3}, {2, 3, 1}, {3, 1, 2}, {3, 2, 1}.\n", - "```\n", - "The recurrence relation is:\n", - "\\begin{align}\n", - "d(i) = (n-i)*d(i-1)\n", - "\\end{align}\n", - "Such that\n", - "```\n", - "d(0) = 3 * 1 = 3\n", - "d(1) = 2 * 3 = 6\n", - "d(2) = 1 * 6 = 6\n", - "```\n", - "* Additionally, can enumerate by iterating elements, and then enumerate all possible positions that it can go.\n", - "```\n", - ": {}\n", - "1: {1}\n", - "2: {1, 2}, {2, 1} : can find i position to insert 2\n", - "3: {3, 1, 2}, {1, 3, 2}, {1, 2, 3}| {3, 2, 1}, {2, 3, 1}, {2, 1, 3}\n", - "```\n", - "\\begin{align}\n", - "d(i) = i*d(i-1)\n", - "\\end{align}\n", - "Such that:\n", - "```\n", - "d(0) = 1 * 1 = 1\n", - "d(1) = 2 * 1 = 2\n", - "d(2) = 3 * 2 = 6\n", - "```\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "UJWteGFF6dSI", - "colab_type": "code", - "outputId": "023cb8ca-329e-46e7-c39e-34745b79c3c0", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 367 - } - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='box'\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = ['[]', '[1]', '[2]', '[3]', '[1, 2]', '[1, 3]', '[2, 1]', '[2, 3]', '[3, 1]', '[3, 2]', '[1, 2, 3]', '[1, 3, 2]', '[2, 1, 3]', '[2, 3, 1]', '[3, 1, 2]', '[3, 2, 1]']\n", - "for i, node in enumerate(nodes):\n", - " dot.node(str(i), label=node)\n", - "edges = [('0', '1'), ('0', '2'), ('0', '3'), ('1', '4'), ('1', '5'), ('2', '6'), ('2', '7'), ('3', '8'), ('3', '9'), ('4', '10'), ('5', '11'), ('6', '12'), ('7', '13'), ('8', '14'), ('9', '15')]\n", - "for n1, n2 in edges:\n", - " dot.edge(n1, n2)\n", - "# dot.edge('0', '1', _attributes={'label': '4'})\n", - "# dot.edge('S', 'B', _attributes={'label': '5'})\n", - "# dot.edge('A', 'G', _attributes={'label': '7'})\n", - "# dot.edge('B', 'G', _attributes={'label': '3'})\n", - "dot.render('test-output/permutation', view=True) \n", - "\n", - "dot" - ], - "execution_count": 145, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n[]\n\n\n\n1\n\n[1]\n\n\n\n0->1\n\n\n\n\n\n2\n\n[2]\n\n\n\n0->2\n\n\n\n\n\n3\n\n[3]\n\n\n\n0->3\n\n\n\n\n\n4\n\n[1, 2]\n\n\n\n1->4\n\n\n\n\n\n5\n\n[1, 3]\n\n\n\n1->5\n\n\n\n\n\n6\n\n[2, 1]\n\n\n\n2->6\n\n\n\n\n\n7\n\n[2, 3]\n\n\n\n2->7\n\n\n\n\n\n8\n\n[3, 1]\n\n\n\n3->8\n\n\n\n\n\n9\n\n[3, 2]\n\n\n\n3->9\n\n\n\n\n\n10\n\n[1, 2, 3]\n\n\n\n4->10\n\n\n\n\n\n11\n\n[1, 3, 2]\n\n\n\n5->11\n\n\n\n\n\n12\n\n[2, 1, 3]\n\n\n\n6->12\n\n\n\n\n\n13\n\n[2, 3, 1]\n\n\n\n7->13\n\n\n\n\n\n14\n\n[3, 1, 2]\n\n\n\n8->14\n\n\n\n\n\n15\n\n[3, 2, 1]\n\n\n\n9->15\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 145 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AYWVQ1YckUzX", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def p_n_m(a, n, m, d, used, curr, ans):\n", - " print(curr, end='->')\n", - " if d == m: #end condition\n", - " ans.append(curr[::]) \n", - " return\n", - " \n", - " for i in range(n):\n", - " if not used[i]:\n", - " # generate the next solution from curr\n", - " curr.append(a[i])\n", - " used[i] = True\n", - " \n", - " # move to the next solution\n", - " p_n_m(a, n, m, d + 1, used, curr, ans)\n", - " #backtrack to previous partial state\n", - " curr.pop()\n", - " print('backtrack:', curr)\n", - " used[i] = False\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "zoBmGC8IkXsp", - "colab_type": "code", - "outputId": "d58d7762-32fc-4534-a9a6-7007fffb9384", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 311 - } - }, - "source": [ - "a = [1, 2, 3]\n", - "n = len(a)\n", - "ans = [[None]]\n", - "used = [False] * len(a)\n", - "ans = []\n", - "p_n_m(a, n, n, 0, used, [], ans)\n", - "print(ans)" - ], - "execution_count": 147, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[]->[1]->[1, 2]->[1, 2, 3]->backtrack: [1, 2]\n", - "backtrack: [1]\n", - "[1, 3]->[1, 3, 2]->backtrack: [1, 3]\n", - "backtrack: [1]\n", - "backtrack: []\n", - "[2]->[2, 1]->[2, 1, 3]->backtrack: [2, 1]\n", - "backtrack: [2]\n", - "[2, 3]->[2, 3, 1]->backtrack: [2, 3]\n", - "backtrack: [2]\n", - "backtrack: []\n", - "[3]->[3, 1]->[3, 1, 2]->backtrack: [3, 1]\n", - "backtrack: [3]\n", - "[3, 2]->[3, 2, 1]->backtrack: [3, 2]\n", - "backtrack: [3]\n", - "backtrack: []\n", - "[[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iJlzXjqjwfsn", - "colab_type": "text" - }, - "source": [ - "###Swapping Method\n", - "\n", - "Extention: Johnson-Trotter algorithm, \n", - "* https://en.wikipedia.org/wiki/Steinhaus%E2%80%93Johnson%E2%80%93Trotter_algorithm\n", - "* " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dxYiveMB6lnz", - "colab_type": "code", - "outputId": "605a8015-6ffe-4568-9a37-12933b355e7e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 427 - } - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='box'\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = ['[1, 2, 3]', '[1, 2, 3]', '[2, 1, 3]', '[3, 2, 1]', '[1, 2, 3]', '[1, 3, 2]', '[2, 1, 3]', '[2, 3, 1]', '[3, 2, 1]', '[3, 1, 2]', \n", - " '[1, 2, 3]', '[1, 3, 2]', '[2, 1, 3]', '[2, 3, 1]', '[3, 2, 1]', '[3, 1, 2]']\n", - "for i, node in enumerate(nodes):\n", - " dot.node(str(i), label=node)\n", - "edges = [('0', '1', '(0, 0)'), ('0', '2', '(0, 1)'), ('0', '3', '(0, 2)'), ('1', '4', '(1, 1)'), ('1', '5', '(1, 2)'), ('2', '6', '(1, 1)'), ('2', '7', '(1, 2)'), \n", - " ('3', '8', '(1, 1)'), ('3', '9', '(1, 2)'), ('4', '10', '(2, 2)'), ('5', '11', '(2, 2)'), ('6', '12', '(2, 2)'), ('7', '13', '(2, 2)'), \n", - " ('8', '14', '(2, 2)'), ('9', '15', '(2, 2)')]\n", - "for n1, n2, l in edges:\n", - " dot.edge(n1, n2, _attributes={'label': l})\n", - "# dot.edge('0', '1', _attributes={'label': '4'})\n", - "# dot.edge('S', 'B', _attributes={'label': '5'})\n", - "# dot.edge('A', 'G', _attributes={'label': '7'})\n", - "# dot.edge('B', 'G', _attributes={'label': '3'})\n", - "dot.render('test-output/permutation_swap', view=True) \n", - "\n", - "dot" - ], - "execution_count": 148, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n[1, 2, 3]\n\n\n\n1\n\n[1, 2, 3]\n\n\n\n0->1\n\n\n(0, 0)\n\n\n\n2\n\n[2, 1, 3]\n\n\n\n0->2\n\n\n(0, 1)\n\n\n\n3\n\n[3, 2, 1]\n\n\n\n0->3\n\n\n(0, 2)\n\n\n\n4\n\n[1, 2, 3]\n\n\n\n1->4\n\n\n(1, 1)\n\n\n\n5\n\n[1, 3, 2]\n\n\n\n1->5\n\n\n(1, 2)\n\n\n\n6\n\n[2, 1, 3]\n\n\n\n2->6\n\n\n(1, 1)\n\n\n\n7\n\n[2, 3, 1]\n\n\n\n2->7\n\n\n(1, 2)\n\n\n\n8\n\n[3, 2, 1]\n\n\n\n3->8\n\n\n(1, 1)\n\n\n\n9\n\n[3, 1, 2]\n\n\n\n3->9\n\n\n(1, 2)\n\n\n\n10\n\n[1, 2, 3]\n\n\n\n4->10\n\n\n(2, 2)\n\n\n\n11\n\n[1, 3, 2]\n\n\n\n5->11\n\n\n(2, 2)\n\n\n\n12\n\n[2, 1, 3]\n\n\n\n6->12\n\n\n(2, 2)\n\n\n\n13\n\n[2, 3, 1]\n\n\n\n7->13\n\n\n(2, 2)\n\n\n\n14\n\n[3, 2, 1]\n\n\n\n8->14\n\n\n(2, 2)\n\n\n\n15\n\n[3, 1, 2]\n\n\n\n9->15\n\n\n(2, 2)\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 148 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "QUrTCuJLhBwK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# permutation by swapping\n", - "ans = []\n", - "def permutate(a, d):\n", - " global ans\n", - " \n", - " if d == len(a):\n", - " ans.append(a[::])\n", - " for i in range(d, len(a)):\n", - " a[i], a[d] = a[d], a[i]\n", - " print(a, '(', d, i, ')')\n", - " permutate(a, d+1)\n", - " a[i], a[d] = a[d], a[i]\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "iFPDsOYwDLau", - "colab_type": "code", - "outputId": "5f62032f-0b5f-4999-c311-fdd86a264d97", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 311 - } - }, - "source": [ - "a = [1, 2, 3]\n", - "permutate(a, 0)\n", - "ans" - ], - "execution_count": 150, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[1, 2, 3] ( 0 0 )\n", - "[1, 2, 3] ( 1 1 )\n", - "[1, 2, 3] ( 2 2 )\n", - "[1, 3, 2] ( 1 2 )\n", - "[1, 3, 2] ( 2 2 )\n", - "[2, 1, 3] ( 0 1 )\n", - "[2, 1, 3] ( 1 1 )\n", - "[2, 1, 3] ( 2 2 )\n", - "[2, 3, 1] ( 1 2 )\n", - "[2, 3, 1] ( 2 2 )\n", - "[3, 2, 1] ( 0 2 )\n", - "[3, 2, 1] ( 1 1 )\n", - "[3, 2, 1] ( 2 2 )\n", - "[3, 1, 2] ( 1 2 )\n", - "[3, 1, 2] ( 2 2 )\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 2, 1], [3, 1, 2]]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 150 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "guKxb4JXI8rg", - "colab_type": "text" - }, - "source": [ - "When there are duplicates, for example, \n", - "https://www.geeksforgeeks.org/distinct-permutations-string-set-2/" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ujwVtrk3Dqs2", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# permutation by swapping and there might be having duplicates\n", - "ans = []\n", - "def checkSwap(a, d, cur):\n", - " for i in range(d, cur):\n", - " if a[i] == a[cur]:\n", - " return False\n", - " return True\n", - "def permutate(a, d):\n", - " global ans\n", - " if d == len(a):\n", - " ans.append(a[::])\n", - " for i in range(d, len(a)):\n", - " if not checkSwap(a, d, i):\n", - " continue\n", - " a[i], a[d] = a[d], a[i]\n", - " permutate(a, d+1)\n", - " a[i], a[d] = a[d], a[i]\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8qLIzNL2EDXC", - "colab_type": "code", - "outputId": "e2e38d1b-a0ef-4ee2-ee93-c5a71c8f80cb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 237 - } - }, - "source": [ - "a = [1,2, 2, 3]\n", - "permutate(a, 0)\n", - "ans" - ], - "execution_count": 152, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[1, 2, 2, 3],\n", - " [1, 2, 3, 2],\n", - " [1, 3, 2, 2],\n", - " [2, 1, 2, 3],\n", - " [2, 1, 3, 2],\n", - " [2, 2, 1, 3],\n", - " [2, 2, 3, 1],\n", - " [2, 3, 2, 1],\n", - " [2, 3, 1, 2],\n", - " [3, 2, 2, 1],\n", - " [3, 2, 1, 2],\n", - " [3, 1, 2, 2]]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 152 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xhrZMOANBdF-", - "colab_type": "text" - }, - "source": [ - "#### Questions to ponder\n", - "1. What if there are duplicates in the input list? How to count and how to enumerate all possible permutations? [1, 2, 3, 2]\n", - "\n", - "We have a way to count:\n", - "\\begin{align}\n", - "\\frac{P(n, n)}{c_1!c_2!...}\n", - "\\end{align}\n", - "\n", - "Resources\n", - "* https://www.cs.sfu.ca/~ggbaker/zju/math/perm-comb-more.html\n", - "\n", - "[47. Permutations II](https://leetcode.com/problems/permutations-ii/)\n", - "\n", - "* first sort it as [1, 2, 2, 3]. We can draw the process\n", - "```\n", - "{}\n", - "i=0: {1}, {2}, *{2}*, {3} = 3\n", - "i=1: {1, 2}, *{1, 2}*, {1, 3}, {2, 1}, {2, 2}, {2, 3}, {3, 1}, {3,2}, *{3, 2}*\n", - "i=2: {1, 2, 2}, {1, 2, 3}, \n", - "```\n", - "Exactlyt the same process except that we skip the duplicates. \n", - "We can not find a recurrence relation to this. This depends on how many duplicates we have. \n", - "\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8i2y2Ri-Bc_w", - "colab_type": "code", - "outputId": "7a572b00-7f89-4e2d-ee51-21af5bc1578b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 483 - } - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='box'\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [('0', '[]'), ('1', '[1]'), ('2', '[2]'), ('3', '[2]'), ('4', '[3]'), \n", - " ('5', '[1, 2]'), ('6', '[1, 2]'), ('7', '[1, 3]'), \n", - " ('8', '[2, 1]'), ('9', '[2, 2]'), ('10', '[2, 3]'), \n", - " ('11', '[3, 1]'), ('12', '[3, 2]'), ('12_1', '[3, 2]'), \n", - " ('13', '[1, 2, 2]'), ('14', '[1, 2, 3]'), \n", - " ('15', '[1, 3, 2]'), \n", - " ('16', '[2, 1, 2]'), ('17', '[2, 1, 3]'),\n", - " ('18', '[2, 2, 1]'), ('19', '[2, 2, 3]'),\n", - " ('20', '[2, 3, 1]'), ('21', '[2, 3, 2]'),\n", - " ('22', '[3, 1, 2]'), ('22_1', '[3, 1, 2]') ,\n", - " ('23', '[3, 2, 1]'), ('24', '[3, 2, 2]'),\n", - " ('25', '[1, 2, 2, 3]'), \n", - " ('26', '[1, 2, 3, 2]'), \n", - " ('27', '[1, 3, 2, 2]'), \n", - " ('28', '[2, 1, 2, 3]'), \n", - " ('29', '[2, 1, 3, 2]'),\n", - " ('30', '[2, 2, 1, 3]'), \n", - " ('31', '[2, 2, 3, 1]'), \n", - " ('32', '[2, 3, 1, 2]'), \n", - " ('33', '[2, 3, 2, 1]'), \n", - " ('34', '[3, 1, 2, 2]'), \n", - " ('35', '[3, 2, 1, 2]'), \n", - " ('36', '[3, 2, 2, 1]')]\n", - "for i, node in nodes:\n", - " dot.node(i, label=node)\n", - "edges = [('0', '1'), ('0', '2'), ('0', '3'), ('0', '4'),\n", - " ('1', '5'), ('1', '6'), ('1', '7'),\n", - " ('2', '8'), ('2', '9'), ('2', '10'),\n", - " ('4', '11'), ('4', '12'),('4', '12_1'),\n", - " ('5', '13'), ('5', '14'),\n", - " ('7', '15'),\n", - " ('8', '16'), ('8', '17'),\n", - " ('9', '18'), ('9', '19'),\n", - " ('10', '20'), ('10', '21'),\n", - " ('11', '22'), ('11', '22_1'),\n", - " ('12', '23'),('12', '24'),\n", - " ('13', '25'),('14', '26'), ('15', '27'),('16', '28'), ('17', '29'),('18', '30'), \n", - " ('19', '31'),('20', '32'), ('21', '33'),('22', '34'), ('23', '35'),('24', '36')]\n", - "for n1, n2 in edges:\n", - " dot.edge(n1, n2)\n", - "# dot.edge('0', '1', _attributes={'label': '4'})\n", - "# dot.edge('S', 'B', _attributes={'label': '5'})\n", - "# dot.edge('A', 'G', _attributes={'label': '7'})\n", - "# dot.edge('B', 'G', _attributes={'label': '3'})\n", - "dot.render('test-output/permutation_repeat', view=True) \n", - "\n", - "dot" - ], - "execution_count": 153, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n[]\n\n\n\n1\n\n[1]\n\n\n\n0->1\n\n\n\n\n\n2\n\n[2]\n\n\n\n0->2\n\n\n\n\n\n3\n\n[2]\n\n\n\n0->3\n\n\n\n\n\n4\n\n[3]\n\n\n\n0->4\n\n\n\n\n\n5\n\n[1, 2]\n\n\n\n1->5\n\n\n\n\n\n6\n\n[1, 2]\n\n\n\n1->6\n\n\n\n\n\n7\n\n[1, 3]\n\n\n\n1->7\n\n\n\n\n\n8\n\n[2, 1]\n\n\n\n2->8\n\n\n\n\n\n9\n\n[2, 2]\n\n\n\n2->9\n\n\n\n\n\n10\n\n[2, 3]\n\n\n\n2->10\n\n\n\n\n\n11\n\n[3, 1]\n\n\n\n4->11\n\n\n\n\n\n12\n\n[3, 2]\n\n\n\n4->12\n\n\n\n\n\n12_1\n\n[3, 2]\n\n\n\n4->12_1\n\n\n\n\n\n13\n\n[1, 2, 2]\n\n\n\n5->13\n\n\n\n\n\n14\n\n[1, 2, 3]\n\n\n\n5->14\n\n\n\n\n\n15\n\n[1, 3, 2]\n\n\n\n7->15\n\n\n\n\n\n16\n\n[2, 1, 2]\n\n\n\n8->16\n\n\n\n\n\n17\n\n[2, 1, 3]\n\n\n\n8->17\n\n\n\n\n\n18\n\n[2, 2, 1]\n\n\n\n9->18\n\n\n\n\n\n19\n\n[2, 2, 3]\n\n\n\n9->19\n\n\n\n\n\n20\n\n[2, 3, 1]\n\n\n\n10->20\n\n\n\n\n\n21\n\n[2, 3, 2]\n\n\n\n10->21\n\n\n\n\n\n22\n\n[3, 1, 2]\n\n\n\n11->22\n\n\n\n\n\n22_1\n\n[3, 1, 2]\n\n\n\n11->22_1\n\n\n\n\n\n23\n\n[3, 2, 1]\n\n\n\n12->23\n\n\n\n\n\n24\n\n[3, 2, 2]\n\n\n\n12->24\n\n\n\n\n\n25\n\n[1, 2, 2, 3]\n\n\n\n13->25\n\n\n\n\n\n26\n\n[1, 2, 3, 2]\n\n\n\n14->26\n\n\n\n\n\n27\n\n[1, 3, 2, 2]\n\n\n\n15->27\n\n\n\n\n\n28\n\n[2, 1, 2, 3]\n\n\n\n16->28\n\n\n\n\n\n29\n\n[2, 1, 3, 2]\n\n\n\n17->29\n\n\n\n\n\n30\n\n[2, 2, 1, 3]\n\n\n\n18->30\n\n\n\n\n\n31\n\n[2, 2, 3, 1]\n\n\n\n19->31\n\n\n\n\n\n32\n\n[2, 3, 1, 2]\n\n\n\n20->32\n\n\n\n\n\n33\n\n[2, 3, 2, 1]\n\n\n\n21->33\n\n\n\n\n\n34\n\n[3, 1, 2, 2]\n\n\n\n22->34\n\n\n\n\n\n35\n\n[3, 2, 1, 2]\n\n\n\n23->35\n\n\n\n\n\n36\n\n[3, 2, 2, 1]\n\n\n\n24->36\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 153 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2sHYXlHKUcnN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from collections import Counter\n", - "def permuteDup(nums, k):\n", - " ans = []\n", - " def permutate(d, n, k, curr, tracker): \n", - " nonlocal ans \n", - " if d == k:\n", - " ans.append(curr)\n", - " return\n", - " for i in range(n):\n", - " if tracker[nums[i]] == 0:\n", - " #print('continue')\n", - " continue\n", - " if i - 1 >= 0 and nums[i] == nums[i-1]:\n", - " continue\n", - " tracker[nums[i]] -= 1\n", - " curr.append(nums[i])\n", - " #print(curr)\n", - " permutate(d+1, n, k, curr[:], tracker)\n", - " curr.pop()\n", - " tracker[nums[i]] += 1\n", - " return\n", - " \n", - " nums.sort()\n", - " permutate(0, len(nums), k, [], Counter(nums))\n", - " return ans\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "tJUz6YWsX71M", - "colab_type": "code", - "outputId": "ae2e236c-dac1-45fb-c8c0-25984d02db9b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "nums = [1,2, 2, 3]\n", - "ans = permuteDup(nums, 4)\n", - "print(len(ans), ans)" - ], - "execution_count": 155, - "outputs": [ - { - "output_type": "stream", - "text": [ - "12 [[1, 2, 2, 3], [1, 2, 3, 2], [1, 3, 2, 2], [2, 1, 2, 3], [2, 1, 3, 2], [2, 2, 1, 3], [2, 2, 3, 1], [2, 3, 1, 2], [2, 3, 2, 1], [3, 1, 2, 2], [3, 2, 1, 2], [3, 2, 2, 1]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HikETMOQcM7H", - "colab_type": "text" - }, - "source": [ - "### Combination" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jHP31tpmjrod", - "colab_type": "code", - "outputId": "07b3cbd7-85be-4b83-eab3-947102fbd109", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 367 - } - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='box'\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = ['{}', '{1}', '{2}', '{3}', '{1, 2}', '{1, 3}', '{2, 3}', '{1, 2, 3}']\n", - "for i, node in enumerate(nodes):\n", - " dot.node(str(i), label=node)\n", - "edges = [('0', '1'), ('0', '2'), ('0', '3'), ('1', '4'), ('1', '5'), ('2', '6'), ('4', '7')]\n", - "for n1, n2 in edges:\n", - " dot.edge(n1, n2)\n", - "dot.render('test-output/combination', view=True) \n", - "dot" - ], - "execution_count": 156, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n{}\n\n\n\n1\n\n{1}\n\n\n\n0->1\n\n\n\n\n\n2\n\n{2}\n\n\n\n0->2\n\n\n\n\n\n3\n\n{3}\n\n\n\n0->3\n\n\n\n\n\n4\n\n{1, 2}\n\n\n\n1->4\n\n\n\n\n\n5\n\n{1, 3}\n\n\n\n1->5\n\n\n\n\n\n6\n\n{2, 3}\n\n\n\n2->6\n\n\n\n\n\n7\n\n{1, 2, 3}\n\n\n\n4->7\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 156 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "0EUTWhjmelaO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def C_n_k(a, n, k, start, d, curr, ans):\n", - " '''\n", - " Implement combination of k items out of n items\n", - " start: the start of candinate\n", - " depth: start from 0, and represent the depth of the search\n", - " curr: the current partial solution\n", - " ans: collect all the valide solutions\n", - " '''\n", - " if d == k: #end condition\n", - " ans.append(curr[::]) \n", - " return\n", - " \n", - " for i in range(start, n): \n", - " # generate the next solution from curr\n", - " curr.append(a[i])\n", - " # move to the next solution\n", - " C_n_k(a, n, k, i+1, d+1, curr, ans)\n", - "\n", - " #backtrack to previous partial state\n", - " curr.pop()\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vmciVqqygk05", - "colab_type": "code", - "outputId": "5b577292-4401-43f6-a68f-6cb048be50b0", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [1, 2, 3]\n", - "n = len(a)\n", - "ans = []\n", - "a.sort()\n", - "C_n_k(a, n, 2, 0, 0, [], ans)\n", - "print(ans, a)" - ], - "execution_count": 158, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[1, 2], [1, 3], [2, 3]] [1, 2, 3]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ILlyV3Nt1KBZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def powerset(a, n, d, curr, ans):\n", - " if d == n:\n", - " ans.append(curr[::]) \n", - " return\n", - "\n", - " # Case 1: select item\n", - " curr.append(a[d])\n", - " powerset(a, n, d + 1, curr, ans)\n", - " # Case 2: not select item\n", - " curr.pop()\n", - " powerset(a, n, d + 1, curr, ans)\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "wLPuCCfF-wTv", - "colab_type": "code", - "outputId": "e205ec65-c371-4e9e-ee62-5dbb7de9342e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [1, 2, 3]\n", - "n = len(a)\n", - "ans = []\n", - "powerset(a, n, 0, [], ans)\n", - "ans" - ], - "execution_count": 160, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[1, 2, 3], [1, 2], [1, 3], [1], [2, 3], [2], [3], []]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 160 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YV37efjrdJCM", - "colab_type": "text" - }, - "source": [ - "#### Questions to ponder\n", - "1. What if there are duplicates in the input list? How to count and how to enumerate all possible combinations? [1, 2, 3, 2]\n", - "\n", - "\n", - "Take the product of all the (frequencies + 1).\n", - "\n", - "For example, in {A,B,B}, the answer is (1+1) [the number of As] * (2+1) [the number of Bs] = 6.\n", - "\n", - "In the second example, count(A) = 2 and count(B) = 2. Thus the answer is (2+1) * (2+1) = 9.\n", - "\n", - "The reason this works is that you can define any subset as a vector of counts - for {A,B,B}, the subsets can be described as {A=0,B=0}, {A=0,B=1}, {0,2}, {1,0}, {1,1}, {1,2}.\n", - "\n", - "For each number in counts[] there are (frequencies of that object + 1) possible values. (0..frequencies)\n", - "\n", - "Therefore, the total number of possiblities is the product of all (frequencies+1).\n", - "\n", - "The \"all unique\" case can also be explained this way - there is one occurence of each object, so the answer is (1+1)^|S| = 2^|S|.\n", - "\n", - "However, how to count the case of $c(n, k)$? Assume we have $m$ unqiue items, and the frequency of each is marked as $x_i$, with $\\sum_{i=0}^{m-1}x_i = n$. \n", - "\\begin{align}\n", - "\\sum_{k=0}^{n} c(n, k) = \\prod_{i=0}^{m-1}(x_i + 1)\n", - "\\end{align}\n", - "\n", - "When the maximum of $k$ is 0.\n", - "\\begin{align}\n", - "\\sum_{k=0}^{0} c(n, k) = \\prod_{i=0}^{m-1}(1) = 1\n", - "\\end{align}\n", - "\n", - "When the maximum of $k$ is 1.\n", - "\\begin{align}\n", - "\\sum_{k=0}^{1} c(n, k) = \\prod_{i=0}^{m-1}(1) = 1\n", - "\\end{align}\n", - "\n", - "We list each as follows:\n", - "```\n", - "c(n,0)\n", - "```\n", - "\n", - "Notes:\n", - "* http://www.math.ucsd.edu/~ebender/CombText/ch-1.pdf\n", - "* https://math.stackexchange.com/questions/1506536/counting-tuples-with-repetitions\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fcIaL7i7-9na", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def C_n_k(a, n, k, start, depth, curr, ans):\n", - " '''\n", - " Implement combination of k items out of n items\n", - " start: the start of candinate\n", - " depth: start from 0, and represent the depth of the search\n", - " curr: the current partial solution\n", - " ans: collect all the valide solutions\n", - " '''\n", - " ans.append(curr[::])\n", - " if depth == k: #end condition\n", - " return\n", - " \n", - " for i in range(start, n): \n", - " if i - 1 >= start and a[i] == a[i-1]:\n", - " continue \n", - " # generate the next solution from curr\n", - " curr.append(a[i])\n", - " # move to the next solution\n", - " C_n_k(a, n, k, i+1, depth+1, curr, ans)\n", - " curr.pop()\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "FQlAWNcp8-5y", - "colab_type": "code", - "outputId": "893cf451-50eb-4a26-a3e9-91331bc1b18c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 256 - } - }, - "source": [ - "a = [1, 2, 3, 2]\n", - "n = len(a)\n", - "ans = [[None]]\n", - "ans = []\n", - "a.sort()\n", - "C_n_k(a, n, 4, 0, 0, [], ans)\n", - "ans, len(ans)" - ], - "execution_count": 162, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[],\n", - " [1],\n", - " [1, 2],\n", - " [1, 2, 2],\n", - " [1, 2, 2, 3],\n", - " [1, 2, 3],\n", - " [1, 3],\n", - " [2],\n", - " [2, 2],\n", - " [2, 2, 3],\n", - " [2, 3],\n", - " [3]],\n", - " 12)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 162 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f7xml9ay0bAm", - "colab_type": "text" - }, - "source": [ - "### More Combinatorics" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ze7L-ttOgCUd", - "colab_type": "text" - }, - "source": [ - "#### All paths" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jYkhLKk9QZqi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def all_paths(g, s, path, ans):\n", - " '''generate all pahts with backtrack'''\n", - " ans.append(path[::])\n", - " for v in g[s]:\n", - " path.append(v)\n", - " print(path)\n", - " all_paths(g, v, path, ans)\n", - " path.pop()\n", - " print(path, 'backtrack')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "84vSh1JIQyLH", - "colab_type": "code", - "outputId": "91bbd82e-e03e-4313-c99b-3950465ee2e0", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "al = [[1, 2], [2, 3, 4], [5], [], [], []]\n", - "print(al)" - ], - "execution_count": 164, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[1, 2], [2, 3, 4], [5], [], [], []]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5ILbY1zOl3OZ", - "colab_type": "code", - "outputId": "bad37d1a-ad23-4623-d1a5-297db9ed8f03", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 271 - } - }, - "source": [ - "dot = Digraph(comment='The Round Table', format='png')\n", - "nodes = range(len(al))\n", - "for idx, neighbors in enumerate(al):\n", - " for n in neighbors:\n", - " dot.edge(str(idx), str(n))\n", - "rank1 = [0]\n", - "rank2 = [1, 2]\n", - "rank3 = [3, 4, 5]\n", - "for rank in [rank1, rank2, rank3]:\n", - " with dot.subgraph() as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " s.node(str(node))\n", - "dot.render('test-output/all_path_demo', view=True) \n", - "dot" - ], - "execution_count": 165, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n1->2\n\n\n\n\n\n3\n\n3\n\n\n\n1->3\n\n\n\n\n\n4\n\n4\n\n\n\n1->4\n\n\n\n\n\n5\n\n5\n\n\n\n2->5\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 165 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HhBVClwEVeUJ", - "colab_type": "code", - "outputId": "6f50820c-c436-4201-f066-6aff0d613198", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 293 - } - }, - "source": [ - "ans = []\n", - "path = [0]\n", - "all_paths(al, 0, path, ans)\n", - "for path in ans:\n", - " path = [str(i) for i in path]\n", - " print('->'.join(path), end = ', ')" - ], - "execution_count": 166, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, 1]\n", - "[0, 1, 2]\n", - "[0, 1, 2, 5]\n", - "[0, 1, 2] backtrack\n", - "[0, 1] backtrack\n", - "[0, 1, 3]\n", - "[0, 1] backtrack\n", - "[0, 1, 4]\n", - "[0, 1] backtrack\n", - "[0] backtrack\n", - "[0, 2]\n", - "[0, 2, 5]\n", - "[0, 2] backtrack\n", - "[0] backtrack\n", - "0, 0->1, 0->1->2, 0->1->2->5, 0->1->3, 0->1->4, 0->2, 0->2->5, " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EK22ibhQ3SYy", - "colab_type": "text" - }, - "source": [ - "#### Subsequences\n", - "\n", - "* Sequence is unique\n", - "* String has repetition: 940. Distinct Subsequences II\n", - "\n", - "The enumeration with backtacking is quite similar to the combination, other than in the case with repetition. In our previous implementation of enumerating unique combinations when there are duplciates in the input, we have sorted the items, making the checking of repetition of choices as simple as checking the precessor. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "y0h_GMlhEw55", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Counting\n", - " def distinctSubseqII(S):\n", - " dp = [1]\n", - " last = {}\n", - " for i, x in enumerate(S):\n", - " dp.append(dp[-1] * 2)\n", - " if x in last:\n", - " dp[-1] -= dp[last[x]]\n", - " last[x] = i\n", - "\n", - " return (dp[-1] - 1) % (10**9 + 7)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "AfYxiTuTE2FN", - "colab_type": "code", - "outputId": "72d83f88-aa0d-49c3-8488-008e40249cea", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "S = 'abaab'\n", - "distinctSubseqII(S)" - ], - "execution_count": 168, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "17" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 168 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "bITdk__o4X82", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Enumerating\n", - "def check_repetition(start, i, a):\n", - " for j in range(start, i):\n", - " if a[i] == a[j]:\n", - " return True\n", - " return False\n", - "\n", - "def subseqs(a, n, start, curr, ans):\n", - " ans.append(''.join(curr[::])) \n", - " if start == n: \n", - " return\n", - " \n", - " for i in range(start, n): \n", - " if check_repetition(start, i, a):\n", - " continue \n", - " curr.append(a[i])\n", - " subseqs(a, n, i+1, curr, ans)\n", - " curr.pop()\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "EE7tL51_4t-E", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from graphviz import Graph\n", - "dot = Graph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='ellipse'\n", - "dot.node('0', label='<\\'\\''+', s={}>'.format(0))\n", - "count = 0\n", - "\n", - "def subseqs(a, n, start, curr, ans, node_label):\n", - " global count\n", - " ans.append(''.join(curr[::])) \n", - " if start == n: \n", - " return\n", - " \n", - " for i in range(start, n): \n", - " node = curr + [a[i]]\n", - " count += 1\n", - " dot.node(str(count), label='<\\''+''.join(node)+'\\''+', s={}>'.format(i+1))\n", - " dot.edge(node_label, str(count), _attributes={ 'label':'i={}'.format(i)})\n", - " if check_repetition(start, i, a):\n", - " dot.node(str(count), label='\\''+''.join(node)+'\\'', _attributes={'color': 'red'})\n", - " continue \n", - " curr.append(a[i])\n", - " subseqs(a, n, i+1, curr, ans, str(count))\n", - " curr.pop()\n", - " #count -= 1\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NVkaCP2C3py4", - "colab_type": "code", - "outputId": "c5b3e660-64cb-47fa-f242-1a2b89bc075b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 562 - } - }, - "source": [ - "S = '1232'\n", - "ans = []\n", - "subseqs(list(S), len(S), 0, [], ans, '0')\n", - "print(len(ans), ans)\n", - "dot.render('test-output/subsequence', view=True) \n", - "dot" - ], - "execution_count": 171, - "outputs": [ - { - "output_type": "stream", - "text": [ - "14 ['', '1', '12', '123', '1232', '122', '13', '132', '2', '23', '232', '22', '3', '32']\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n''\n, s=0\n\n\n\n1\n\n'1'\n, s=1\n\n\n\n0--1\n\ni=0\n\n\n\n9\n\n'2'\n, s=2\n\n\n\n0--9\n\ni=1\n\n\n\n13\n\n'3'\n, s=3\n\n\n\n0--13\n\ni=2\n\n\n\n15\n\n'2'\n\n\n\n0--15\n\ni=3\n\n\n\n2\n\n'12'\n, s=2\n\n\n\n1--2\n\ni=1\n\n\n\n6\n\n'13'\n, s=3\n\n\n\n1--6\n\ni=2\n\n\n\n8\n\n'12'\n\n\n\n1--8\n\ni=3\n\n\n\n3\n\n'123'\n, s=3\n\n\n\n2--3\n\ni=2\n\n\n\n5\n\n'122'\n, s=4\n\n\n\n2--5\n\ni=3\n\n\n\n4\n\n'1232'\n, s=4\n\n\n\n3--4\n\ni=3\n\n\n\n7\n\n'132'\n, s=4\n\n\n\n6--7\n\ni=3\n\n\n\n10\n\n'23'\n, s=3\n\n\n\n9--10\n\ni=2\n\n\n\n12\n\n'22'\n, s=4\n\n\n\n9--12\n\ni=3\n\n\n\n11\n\n'232'\n, s=4\n\n\n\n10--11\n\ni=3\n\n\n\n14\n\n'32'\n, s=4\n\n\n\n13--14\n\ni=3\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 171 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p2UCSXkM3TQK", - "colab_type": "text" - }, - "source": [ - "#### Partition" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7kLQciZ1Zt2i", - "colab_type": "text" - }, - "source": [ - "## Constraint Satisfaction Problems with Backtracking and Pruning" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4vOhZxglCBKD", - "colab_type": "text" - }, - "source": [ - "### Sudoku Solver\n", - "[Search space](https://www.researchgate.net/publication/264572573_Sudoku_Puzzle_Complexity)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oHsib-ORB9Fh", - "colab_type": "text" - }, - "source": [ - "First, we build up the board" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "wU0IzGC8_EiP", - "colab_type": "code", - "colab": {} - }, - "source": [ - "board = [[5, 3, None, None, 7, None, None, None, None],\n", - " [6, None, None, 1, 9, 5, None, None, None],\n", - " [None, 9, 8, None, None, None, None, 6, None],\n", - " [8, None, None, None, 6, None, None, None, 3], \n", - " [4, None, None, 8, None, 3, None, None, 1], \n", - " [7, None, None, None, 2, None, None, None, 6], \n", - " [None, 6, None, None, None, None, 2, 8, None], \n", - " [None, None, None, 4, 1, 9, None, None, 5],\n", - " [None, None, None, None, 8, None, None, 7, 9]]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Psq2cedrMTGJ", - "colab_type": "text" - }, - "source": [ - "Define how to change the state" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "t2oeCmmvCotc", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def setState(i, j, v, row_state, col_state, grid_state):\n", - " row_state[i] |= 1 << v\n", - " col_state[j] |= 1 << v\n", - " grid_index = (i//3)*3 + (j//3)\n", - " grid_state[grid_index] |= 1 << v\n", - " \n", - "def resetState(i, j, v, row_state, col_state, grid_state):\n", - " row_state[i] &= ~(1 << v)\n", - " col_state[j] &= ~(1 << v)\n", - " grid_index = (i//3)*3 + (j//3)\n", - " grid_state[grid_index] &= ~(1 << v)\n", - " \n", - "def checkState(i, j, v, row_state, col_state, grid_state):\n", - " row_bit = (1 << v) & row_state[i] != 0\n", - " col_bit = (1 << v) & col_state[j] != 0\n", - " grid_index = (i//3)*3 + (j//3)\n", - " grid_bit = (1 << v) & grid_state[grid_index] != 0\n", - " return not row_bit and not col_bit and not grid_bit" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SgghNi99MWXw", - "colab_type": "text" - }, - "source": [ - "Get the empty spots and its values" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "v0IZMZHU5FR-", - "colab_type": "code", - "colab": {} - }, - "source": [ - " def getEmptySpots(board, rows, cols, row_state, col_state, grid_state): \n", - " ''' get empty spots and find its corresponding values in O(n*n)'''\n", - " empty_spots = {}\n", - " # initialize the state, and get empty spots\n", - " for i in range(rows):\n", - " for j in range(cols):\n", - " if board[i][j]:\n", - " # set that bit to 1\n", - " setState(i, j, board[i][j]-1, row_state, col_state, grid_state) \n", - " else:\n", - " empty_spots[(i,j)] = []\n", - " \n", - " # get possible values for each spot\n", - " for i, j in empty_spots.keys():\n", - " for v in range(9):\n", - " if checkState(i, j, v, row_state, col_state, grid_state):\n", - " empty_spots[(i, j)].append(v+1)\n", - " \n", - " return empty_spots" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "39DSr_mfCBrQ", - "colab_type": "text" - }, - "source": [ - "Second, we intialize the state and find empty spots. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "G76l_z6DAk4n", - "colab_type": "code", - "outputId": "3b1d1039-2376-4a98-cebb-5c6c7bf3ba34", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - } - }, - "source": [ - "# initialize state\n", - "row_state = [0]*9\n", - "col_state = [0]*9\n", - "grid_state = [0]*9\n", - "\n", - "empty_spots = getEmptySpots(board, 9, 9, row_state, col_state, grid_state)\n", - "print(row_state, col_state, grid_state) \n", - "sorted_empty_spots = sorted(empty_spots.items(), key=lambda x: len(x[1]))\n", - "print(sorted_empty_spots)" - ], - "execution_count": 175, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[84, 305, 416, 164, 141, 98, 162, 281, 448] [248, 292, 128, 137, 483, 276, 2, 224, 309] [436, 337, 32, 200, 166, 37, 32, 393, 466]\n", - "[((4, 4), [5]), ((6, 5), [7]), ((6, 8), [4]), ((7, 7), [3]), ((0, 3), [2, 6]), ((2, 0), [1, 2]), ((2, 3), [2, 3]), ((2, 4), [3, 4]), ((2, 5), [2, 4]), ((4, 1), [2, 5]), ((5, 1), [1, 5]), ((5, 3), [5, 9]), ((5, 5), [1, 4]), ((6, 4), [3, 5]), ((7, 0), [2, 3]), ((7, 6), [3, 6]), ((8, 5), [2, 6]), ((0, 2), [1, 2, 4]), ((0, 8), [2, 4, 8]), ((1, 1), [2, 4, 7]), ((1, 2), [2, 4, 7]), ((1, 7), [2, 3, 4]), ((2, 8), [2, 4, 7]), ((3, 1), [1, 2, 5]), ((3, 3), [5, 7, 9]), ((3, 5), [1, 4, 7]), ((4, 6), [5, 7, 9]), ((4, 7), [2, 5, 9]), ((5, 7), [4, 5, 9]), ((6, 0), [1, 3, 9]), ((6, 3), [3, 5, 7]), ((7, 1), [2, 7, 8]), ((7, 2), [2, 3, 7]), ((8, 0), [1, 2, 3]), ((0, 5), [2, 4, 6, 8]), ((0, 6), [1, 4, 8, 9]), ((0, 7), [1, 2, 4, 9]), ((1, 6), [3, 4, 7, 8]), ((1, 8), [2, 4, 7, 8]), ((3, 2), [1, 2, 5, 9]), ((3, 6), [4, 5, 7, 9]), ((3, 7), [2, 4, 5, 9]), ((4, 2), [2, 5, 6, 9]), ((5, 2), [1, 3, 5, 9]), ((5, 6), [4, 5, 8, 9]), ((8, 1), [1, 2, 4, 5]), ((8, 3), [2, 3, 5, 6]), ((8, 6), [1, 3, 4, 6]), ((2, 6), [1, 3, 4, 5, 7]), ((8, 2), [1, 2, 3, 4, 5]), ((6, 2), [1, 3, 4, 5, 7, 9])]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jUPMX4-jF7N_", - "colab_type": "text" - }, - "source": [ - "Traverse the empty_spots, and fill in. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ved6mk_0F6F-", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def dfs_backtrack(empty_spots, index):\n", - " if index == len(empty_spots):\n", - " return True\n", - " (i, j), vl = empty_spots[index]\n", - " \n", - " for v in vl: #try each value\n", - " # check the state\n", - " if checkState(i, j, v-1, row_state, col_state, grid_state):\n", - " # set the state\n", - " setState(i, j, v-1, row_state, col_state, grid_state)\n", - " # mark the board\n", - " board[i][j] = v\n", - " if dfs_backtrack(empty_spots, index+1):\n", - " return True\n", - " else:\n", - " #backtack to previouse state\n", - " resetState(i, j, v-1, row_state, col_state, grid_state)\n", - " #unmark the board\n", - " board[i][j] = None\n", - " return False" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "qJAi11amrwf3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='ellipse'\n", - "dot.node(str((-1, -1)), label=str((-1, -1)))\n", - "count = 0\n", - "def dfs_backtrack(empty_spots, index, last_node):\n", - " global count\n", - " if index == len(empty_spots):\n", - " return True\n", - " (i, j), vl = empty_spots[index]\n", - " ni, nj = -1, -1\n", - " if index + 1 < len(empty_spots):\n", - " (ni, nj), nvl = empty_spots[index + 1]\n", - "\n", - " for v in vl: #try each value\n", - " # check the state\n", - " if checkState(i, j, v-1, row_state, col_state, grid_state):\n", - "\n", - " cur_node = str((ni, nj, v))\n", - " dot.node(str((ni, nj, v)), label=str((ni, nj))) # label shows, first is index\n", - " dot.edge(last_node, str((ni, nj, v)), label=str(v))\n", - " # set the state\n", - " setState(i, j, v-1, row_state, col_state, grid_state)\n", - " # mark the board\n", - " board[i][j] = v\n", - " if dfs_backtrack(empty_spots, index+1, cur_node):\n", - " count -= 1\n", - " return True\n", - " else:\n", - " #backtack to previouse state\n", - " count -= 1\n", - " resetState(i, j, v-1, row_state, col_state, grid_state)\n", - " dot.edge(str((ni, nj, v)), last_node, label=str(v), _attributes={'color': 'red'})\n", - " #unmark the board\n", - " board[i][j] = None\n", - " return False" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "QUr5dZQxIpdn", - "colab_type": "code", - "outputId": "f67adef0-a0e6-4202-c003-d69fd841843f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - } - }, - "source": [ - "ans = dfs_backtrack(sorted_empty_spots, 0, str((-1, -1)))\n", - "print(ans)\n", - "print(board)\n", - "# dot.render('test-output/sudoku_search_tree', view=True) \n", - "# dot" - ], - "execution_count": 178, - "outputs": [ - { - "output_type": "stream", - "text": [ - "True\n", - "[[5, 3, 4, 6, 7, 8, 9, 1, 2], [6, 7, 2, 1, 9, 5, 3, 4, 8], [1, 9, 8, 3, 4, 2, 5, 6, 7], [8, 5, 9, 7, 6, 1, 4, 2, 3], [4, 2, 6, 8, 5, 3, 7, 9, 1], [7, 1, 3, 9, 2, 4, 8, 5, 6], [9, 6, 1, 5, 3, 7, 2, 8, 4], [2, 8, 7, 4, 1, 9, 6, 3, 5], [3, 4, 5, 2, 8, 6, 1, 7, 9]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RUfbZmY1xmbw", - "colab_type": "text" - }, - "source": [ - "#### Arbitray variables ordering VS minimal domain first ordering" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "j7d_45x3MiY9", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from copy import deepcopy\n", - "import time\n", - "class SudokoSolver():\n", - " def __init__(self, board):\n", - " self.original_board = deepcopy(board)\n", - " self.board = deepcopy(board)\n", - " self.n = len(board)\n", - " assert (self.n == len(board[0]))\n", - " # initialize state\n", - " self.row_state = [0]*self.n\n", - " self.col_state = [0]*self.n\n", - " self.grid_state = [0]*self.n\n", - " \n", - " def _setState(self, i, j, v):\n", - " self.row_state[i] |= 1 << v\n", - " self.col_state[j] |= 1 << v\n", - " grid_index = (i//3)*3 + (j//3)\n", - " self.grid_state[grid_index] |= 1 << v\n", - " \n", - " def _resetState(self, i, j, v):\n", - " self.row_state[i] &= ~(1 << v)\n", - " self.col_state[j] &= ~(1 << v)\n", - " grid_index = (i//3)*3 + (j//3)\n", - " self.grid_state[grid_index] &= ~(1 << v)\n", - " \n", - " def _checkState(self, i, j, v):\n", - " row_bit = (1 << v) & self.row_state[i] != 0\n", - " col_bit = (1 << v) & self.col_state[j] != 0\n", - " grid_index = (i//3)*3 + (j//3)\n", - " grid_bit = (1 << v) & self.grid_state[grid_index] != 0\n", - " return not row_bit and not col_bit and not grid_bit\n", - " \n", - " def reset(self):\n", - " # initialize state\n", - " self.row_state = [0]*self.n\n", - " self.col_state = [0]*self.n\n", - " self.grid_state = [0]*self.n\n", - " self.board = deepcopy(self.original_board)\n", - " \n", - " def _getEmptySpots(self): \n", - " ''' get empty spots and find its corresponding values in O(n*n)'''\n", - " empty_spots = {}\n", - " # initialize the state, and get empty spots\n", - " for i in range(self.n):\n", - " for j in range(self.n):\n", - " if self.board[i][j]:\n", - " # set that bit to 1\n", - " self._setState(i, j, self.board[i][j]-1) \n", - " else:\n", - " empty_spots[(i,j)] = []\n", - " \n", - " # get possible values for each spot\n", - " for i, j in empty_spots.keys():\n", - " for v in range(self.n):\n", - " if self._checkState(i, j, v):\n", - " empty_spots[(i, j)].append(v+1)\n", - " \n", - " return empty_spots\n", - " \n", - " def helper(self, empty_spots, index):\n", - " if index == len(empty_spots):\n", - " return True\n", - " (i, j), vl = empty_spots[index]\n", - " \n", - " for v in vl: #try each value\n", - " # check the state\n", - " if self._checkState(i, j, v-1):\n", - " # set the state\n", - " self._setState(i, j, v-1)\n", - " # mark the board\n", - " self.board[i][j] = v\n", - " if self.helper(empty_spots, index+1):\n", - " return True\n", - " else:\n", - " #backtack to previouse state\n", - " self._resetState(i, j, v-1)\n", - " #unmark the board\n", - " self.board[i][j] = None\n", - " return False\n", - " \n", - " def backtrackSolver(self):\n", - " self.reset()\n", - " empty_spots = self._getEmptySpots()\n", - " empty_spots = [(k, v) for k, v in empty_spots.items() ]\n", - " t0 = time.time()\n", - " ans = self.helper(empty_spots, 0)\n", - " print('total time: ', time.time() - t0)\n", - " return ans\n", - " \n", - " def backtrackSolverSorted(self):\n", - " self.reset()\n", - " empty_spots = self._getEmptySpots()\n", - " empty_spots = sorted(empty_spots.items(), key=lambda x: len(x[1]))\n", - " t0 = time.time()\n", - " ans = self.helper(empty_spots, 0)\n", - " print('sorted total time: ', time.time() - t0)\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "zIti82qiQ7Je", - "colab_type": "code", - "outputId": "35a81f87-07fc-4dca-cc19-bd0440e6f19d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "board = [[5, 3, None, None, 7, None, None, None, None],\n", - " [6, None, None, 1, 9, 5, None, None, None],\n", - " [None, 9, 8, None, None, None, None, 6, None],\n", - " [8, None, None, None, 6, None, None, None, 3], \n", - " [4, None, None, 8, None, 3, None, None, 1], \n", - " [7, None, None, None, 2, None, None, None, 6], \n", - " [None, 6, None, None, None, None, 2, 8, None], \n", - " [None, None, None, 4, 1, 9, None, None, 5],\n", - " [None, None, None, None, 8, None, None, 7, 9]]\n", - "solver = SudokoSolver(board)\n", - "solver.backtrackSolver()\n", - "solver.backtrackSolverSorted()" - ], - "execution_count": 180, - "outputs": [ - { - "output_type": "stream", - "text": [ - "total time: 0.02195119857788086\n", - "sorted total time: 0.00042724609375\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 180 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6y9kEw6B-HGR", - "colab_type": "text" - }, - "source": [ - "#### Implementation" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "SeHAPGl6-KSL", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from copy import deepcopy\n", - "class Sudoku():\n", - " def __init__(self, board):\n", - " self.org_board = deepcopy(board)\n", - " self.board = deepcopy(board)\n", - " \n", - " def init(self):\n", - " self.A = set([i for i in range(1,10)])\n", - " self.row_state = [set() for i in range(9)]\n", - " self.col_state = [set() for i in range(9)]\n", - " self.block_state = [[set() for i in range(3)] for i in range(3)]\n", - " self.unfilled = []\n", - "\n", - " for i in range(9):\n", - " for j in range(9):\n", - " c = self.org_board[i][j]\n", - " if c == 0:\n", - " self.unfilled.append((i, j))\n", - " else:\n", - " self.row_state[i].add(c)\n", - " self.col_state[j].add(c)\n", - " self.block_state[i//3][j//3].add(c)\n", - " \n", - " def set_state(self, i, j, c):\n", - " self.board[i][j] = c\n", - " self.row_state[i].add(c)\n", - " self.col_state[j].add(c)\n", - " self.block_state[i//3][j//3].add(c)\n", - " \n", - " def reset_state(self, i, j, c):\n", - " self.board[i][j] = 0\n", - " self.row_state[i].remove(c)\n", - " self.col_state[j].remove(c)\n", - " self.block_state[i//3][j//3].remove(c)\n", - " \n", - " def _ret_len(self, args):\n", - " i, j = args\n", - " option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3])\n", - " return len(option)\n", - " \n", - " def solve(self):\n", - " '''implement solver restricted spot selection and look ahead'''\n", - " if len(self.unfilled) == 0:\n", - " return True\n", - " i, j = min(self.unfilled, key = self._ret_len)\n", - " option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3])\n", - " #print(option)\n", - " if len(option) == 0:\n", - " return False\n", - " self.unfilled.remove((i, j))\n", - " for c in option:\n", - " self.set_state(i, j, c)\n", - " if self.solve():\n", - " return True\n", - " else:\n", - " self.reset_state(i, j, c)\n", - " # no candidate is valid, backtrack\n", - " self.unfilled.append((i, j))\n", - " return False\n", - " \n", - " def naive_solve(self):\n", - " '''implement naitve solver without restricted spot selection or look ahead'''\n", - " if len(self.unfilled) == 0:\n", - " return True\n", - " i, j = self.unfilled.pop()\n", - " option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3])\n", - " for c in option:\n", - " self.set_state(i, j, c)\n", - " if self.naive_solve():\n", - " return True\n", - " else:\n", - " self.reset_state(i, j, c)\n", - " # no candidate is valid, backtrack\n", - " self.unfilled.append((i, j))\n", - " return False\n", - " \n", - " \n", - " \n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "22LaAbRwAOJl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "board_easy = [[5, 3, 0, 0, 7, 0, 0, 0, 0],\n", - " [6, 0, 0, 1, 9, 5, 0, 0, 0],\n", - " [0, 9, 8, 0, 0, 0, 0, 6, 0],\n", - " [8, 0, 0, 0, 6, 0, 0, 0, 3], \n", - " [4, 0, 0, 8, 0, 3, 0, 0, 1], \n", - " [7, 0, 0, 0, 2, 0, 0, 0, 6], \n", - " [0, 6, 0, 0, 0, 0, 2, 8, 0], \n", - " [0, 0, 0, 4, 1, 9, 0, 0, 5],\n", - " [0, 0, 0, 0, 8, 0, 0, 7, 9]]\n", - "\n", - "board_hard = [[3, 8, 0, 0, 0, 4, 0, 0, 0],\n", - " [0, 0, 5, 0, 0, 0, 0, 1, 0],\n", - " [0, 0, 1, 5, 0, 0, 0, 7, 0],\n", - " [2, 0, 7, 0, 0, 5, 0, 0, 4], \n", - " [0, 0, 0, 6, 7, 9, 0, 0, 0], \n", - " [8, 0, 0, 1, 0, 0, 7, 0, 6],\n", - " [0, 5, 0, 0, 0, 8, 2, 0, 0], \n", - " [0, 4, 0, 0, 0, 0, 5, 0, 0],\n", - " [0, 0, 0, 2, 0, 0, 0, 4, 1]]\n", - "\n", - "# board_evil = [[0, 0, 0, 7, 0, 0, 0, 0, 4],\n", - "# [8, 0, 0, 0, 0, 6, 0, 0, 0],\n", - "# [6, 0, 0, 0, 9, 8, 7, 0, 0],\n", - "# [0, 4, 0, 0, 6, 0, 9, 0, 7], \n", - "# [0, 9, 0, 0, 0, 0, 0, 1, 0], \n", - "# [1, 0, 8, 0, 2, 0, 0, 3, 0],\n", - "# [0, 0, 3, 5, 4, 0, 0, 0, 8], \n", - "# [0, 0, 0, 1, 0, 0, 0, 0, 5],\n", - "# [2, 0, 0, 0, 0, 9, 0, 0, 0]]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "lFy8CNdgENA4", - "colab_type": "code", - "outputId": "2da2bea5-a544-4259-f30a-0fa4ffdc5c4d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 184 - } - }, - "source": [ - "for board in [board_easy, board_hard]:\n", - " solver = Sudoku(board)\n", - "\n", - " import time\n", - " t0 = time.time()\n", - " solver.init()\n", - " solver.naive_solve()\n", - " print(solver.board)\n", - " print('total time using naive solver: ', time.time()-t0, 's')\n", - "\n", - " t0 = time.time()\n", - " solver.init()\n", - " solver.solve()\n", - " print(solver.board)\n", - " print('total time using smart solver: ', time.time()-t0, 's')" - ], - "execution_count": 183, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[5, 3, 4, 6, 7, 8, 9, 1, 2], [6, 7, 2, 1, 9, 5, 3, 4, 8], [1, 9, 8, 3, 4, 2, 5, 6, 7], [8, 5, 9, 7, 6, 1, 4, 2, 3], [4, 2, 6, 8, 5, 3, 7, 9, 1], [7, 1, 3, 9, 2, 4, 8, 5, 6], [9, 6, 1, 5, 3, 7, 2, 8, 4], [2, 8, 7, 4, 1, 9, 6, 3, 5], [3, 4, 5, 2, 8, 6, 1, 7, 9]]\n", - "total time using naive solver: 0.005067586898803711 s\n", - "[[5, 3, 4, 6, 7, 8, 9, 1, 2], [6, 7, 2, 1, 9, 5, 3, 4, 8], [1, 9, 8, 3, 4, 2, 5, 6, 7], [8, 5, 9, 7, 6, 1, 4, 2, 3], [4, 2, 6, 8, 5, 3, 7, 9, 1], [7, 1, 3, 9, 2, 4, 8, 5, 6], [9, 6, 1, 5, 3, 7, 2, 8, 4], [2, 8, 7, 4, 1, 9, 6, 3, 5], [3, 4, 5, 2, 8, 6, 1, 7, 9]]\n", - "total time using smart solver: 0.002804994583129883 s\n", - "[[3, 8, 6, 7, 1, 4, 9, 2, 5], [4, 7, 5, 9, 2, 3, 6, 1, 8], [9, 2, 1, 5, 8, 6, 4, 7, 3], [2, 6, 7, 8, 3, 5, 1, 9, 4], [5, 1, 4, 6, 7, 9, 8, 3, 2], [8, 3, 9, 1, 4, 2, 7, 5, 6], [1, 5, 3, 4, 9, 8, 2, 6, 7], [7, 4, 2, 3, 6, 1, 5, 8, 9], [6, 9, 8, 2, 5, 7, 3, 4, 1]]\n", - "total time using naive solver: 0.029914140701293945 s\n", - "[[3, 8, 6, 7, 1, 4, 9, 2, 5], [4, 7, 5, 9, 2, 3, 6, 1, 8], [9, 2, 1, 5, 8, 6, 4, 7, 3], [2, 6, 7, 8, 3, 5, 1, 9, 4], [5, 1, 4, 6, 7, 9, 8, 3, 2], [8, 3, 9, 1, 4, 2, 7, 5, 6], [1, 5, 3, 4, 9, 8, 2, 6, 7], [7, 4, 2, 3, 6, 1, 5, 8, 9], [6, 9, 8, 2, 5, 7, 3, 4, 1]]\n", - "total time using smart solver: 0.0038022994995117188 s\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "G0Z92Cz0vsvM", - "colab_type": "text" - }, - "source": [ - "## Combinatorial Optimization Problems\n", - "* [Resources](https://www.coursera.org/learn/discrete-optimization/lecture/n2TGL/knapsack-1-intuition)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zfCvP2V3v2q4", - "colab_type": "text" - }, - "source": [ - "### Travelling Salesman Problem\n", - "\n", - "Resources:\n", - "\n", - "* [notes](https://www.mathematics.pitt.edu/sites/default/files/TSP.pdf)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8p6Y7id0-npr", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 195 - }, - "outputId": "7fb92394-d0f7-4956-d250-d7f5f0eceafb" - }, - "source": [ - "from graphviz import Graph\n", - "dot = Graph(comment='The Round Table', format='png')\n", - "dot.node_attr['shape']='ellipse'\n", - "# nodes = ['1', '2', '3', '4']\n", - "# for n in nodes:\n", - "# dot.node(n)\n", - "dot.edge('1', '2', label=str(10))\n", - "dot.edge('1', '3', label=str(15))\n", - "dot.edge('1', '4', label=str(20))\n", - "dot.edge('2', '4', label=str(25))\n", - "dot.edge('3', '4', label=str(30))\n", - "dot.edge('2', '3', label=str(35))\n", - "\n", - "rank1 = [1, 4]\n", - "rank2 = [2, 3]\n", - "rank3 = [2, 3]\n", - "for rank in [rank1, rank2]:\n", - " with dot.subgraph() as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " s.node(str(node))\n", - "dot.render('test-output/tsp_graph', view=True) \n", - "dot" - ], - "execution_count": 184, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n1\n\n1\n\n\n\n2\n\n2\n\n\n\n1--2\n\n10\n\n\n\n3\n\n3\n\n\n\n1--3\n\n15\n\n\n\n4\n\n4\n\n\n\n1--4\n\n20\n\n\n\n2--3\n\n35\n\n\n\n2--4\n\n25\n\n\n\n3--4\n\n30\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 184 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q5MF1uKtPRnH", - "colab_type": "text" - }, - "source": [ - "\n", - "![alt text](https://www.geeksforgeeks.org/wp-content/uploads/Euler12.png)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yb8mmm2r1qxE", - "colab_type": "code", - "outputId": "0e9d5367-21a3-4eb4-d38f-0f9185fa8441", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 350 - } - }, - "source": [ - "# Import our modules that we are using\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import math\n", - "\n", - "# Create the vectors X and Y\n", - "x = np.array(range(10))\n", - "y = (x)**(x+1)\n", - "y2= [math.factorial(i) for i in x]\n", - "print(y)\n", - "print(y2)\n", - "\n", - "# Create the plot\n", - "plt.plot(x,y, label='y = (x-1)**x')\n", - "plt.plot(x, y2, label='y = x!')\n", - "\n", - "# Add a title\n", - "plt.title('My first Plot with Python')\n", - "\n", - "# Add X and y Label\n", - "plt.xlabel('x axis')\n", - "plt.ylabel('y axis')\n", - "\n", - "# Add a grid\n", - "plt.grid(alpha=.4,linestyle='-')\n", - "\n", - "# Add a Legend\n", - "plt.legend()\n", - "\n", - "# Show the plot\n", - "plt.show()" - ], - "execution_count": 185, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[ 0 1 8 81 1024 15625\n", - " 279936 5764801 134217728 3486784401]\n", - "[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZhcVbnv8e/b3Uk6c0iHDE2HJJKQ\nQTxMAUQuMo9y4FFR9CIyGi8PHJDHCY4akOu5DkdUzsErxIiAgp4Dgg9yEVEZAiozYUpXhzB3uitT\nk3R1kp7f+0ftSqo7PVR3165dw+/zPEVq77323m+tkHpr77X2WubuiIhI6SqLOgAREYmWEoGISIlT\nIhARKXFKBCIiJU6JQESkxCkRiIiUOCUCiZSZjTWzP5jZNjO728zONbOHI4rlbTM7Mcfn/KOZnT/A\n9tvM7Du5jCk471wzczOryPW5JfeUCCRjwRdlu5lN67X+xeBLY+4wDns2MAOocvdPufud7n7yMOMb\n9EsziHO7mbWY2Xoz+5GZlQ/xPMeaWf1wYuzN3U9z99uD415gZk8O91hpX94twettM7s6w31zngQl\nfygRyFC9BXw2tWBmHwLGjeB4c4C17t45WMEs/jo90N0nACcA/xP4QpaOmy+mBJ/vs8ByMzs16oAk\nvykRyFD9Cvh82vL5wB2pBTM7zMw2pP/KNrNPmNlLvQ9kZt8GlgPnBL9gL+79qzj4hXuZmb0OvG5J\nPzazjWbWbGavmNkBZrYMOBf4WnCsPwz2Qdw9BjwBHNBHbGPM7Cdm1hC8fhKsGw/8EahO++Vd3Wvf\neWa21czKguWfm9nGtO2/MrMvBe8fM7NLzGwxcDNwZHDMrWmH3MvM/p+ZJczsaTPbb7DPFny+fwCv\nAQeY2U/N7IZecd5vZleZ2a+AfYE/BOf+Wlqxc83sXTPbbGbfGKx+gm3Hmlm9mX05+HtqNLMLM4lZ\nIuLueumV0Qt4GzgRqAMWA+VAPclf9Q7MDcqtAU5L2+8+4Mv9HPM64NdpyxcAT6YtO/BnYCowFjgF\neB6YAlgQx6yg7G3Adwb5DA7MD94vAeLAxemfL3h/PfAUMB3YG/g78L+DbccC9YOc513g0OB9HfAm\nsDht28HB+8eAS/r67GmfaQtwOFAB3An8tp9zzg0+X0VQN0cBO0he+RwONABlQdlpwbYZvT97r2P9\nPKj3A4G2tM8wWP10BmVGAacH59or6v+H9er7VZBXBGZ2a/BL49UMys4xs7+a2cvBr6+aXMRY5FJX\nBScBtcD6XttvBz4HYGZTSX553zWC833X3ZvcfSfQAUwEFgHm7rXu3jjE471gZu8DfwBWAr/so8y5\nwPXuvtHdNwHfBs4bwjkeB44xs5nB8j3B8jxgErDHFdIA7nP3Zzx5++xO4KBBym8Gmkh+tqvd/a/u\n/gywjWRSAPgM8Ji7bxjkWN92953u/lIQ84HB+sHqpyPY3uHuDwItwMKMPq3kXKH2CLgNuIm0WxID\n+CFwh7vfbmbHA99laP+gZU+/AlYB8+j77+DXQG1wG+XTwBPD+LJO917qjbs/YmY3AT8F5pjZvcBX\n3L15CMc7xN3XDVKmGngnbfmdYF2mHgfOJHnFtIrkL//zgFaS9dE9hGPF097vACYMUn6a993mkkrQ\nfw7+vHEE5x6sfrb0iiGTuCUiBXlF4O6rSP7i2cXM9jOzh8zseTN7wswWBZuWAI8E7x8FzsphqEXJ\n3d8h2Wh8OnBvH9vXA/8APkHyy+9XIz1lr+P/h7sfSvLvdn/gq32VG6EGkre8UvYN1mV6nseBo0ne\nJnkceJLkrZpjguW+hD0U8K+Bs8zsQJK31H4/gnMPVD9SYAoyEfRjBfAvwRfEV4D/G6x/ieQXEsDH\ngYlmVhVBfMXmYuB4d9/ez/Y7gK8BH6KPZDFcQWP0EWY2CthO8hd26tf1BuADWTrVb4Bvmtneluwu\nu5zkF2nqPFVmNrm/nd39dWAnyV/ejwdXLBuAT9J/ItgA1JjZ6Cx9ht4x1QPPkkzMvwtutaWfeyh1\nN1D9SIEpikRgZhOAjwB3m9lq4BZgVrD5KyTvzb5I8tfYeqArkkCLiLu/4e7PDVDkPpK/GO9z9x1Z\nPPUkkg2Y75O8HbEF+Pdg2y+AJUGPnd/3s3+mvgM8B7wMvAK8EKzDk72NfgO8GZyrv1tGj5O8RfJe\n2rIFx+rLIyR7+cTNbPMI4+/P7SSTc++rtO+S/GLfamZfyeA4/daPFB5zL8yJaSz58NID7n6AmU0C\n6tx91iD7TABi7q4G4xwwszeAL7r7X6KORZLM7KMkf7nP8UL9xy9ZVxRXBMFl91tm9imAoK/5gcH7\naan+3MA1wK0RhVlSzOyTJO87PzJYWcmN4HbalcBKJQFJV5CJwMx+Q7IxcmHw4MrFJLuzXWzJB5de\nY3ej8LFAnZmtJTmUwb9FEHJJMbPHgJ8Blw2xd4yEJHhgbSvJW6Y/iTgcyTMFe2tIRESyoyCvCERE\nJHsK7oGyadOm+dy5c4e1b0dHB6NGjcpuQAVM9dGT6mM31UVPxVAfzz///GZ337uvbQWXCObOnctz\nzw3Ua7F/DQ0NVFcP5eHQ4qb66En1sZvqoqdiqA8ze6e/bbo1JCJS4pQIRERKnBKBiEiJK7g2gr50\ndHRQX19Pa2vrgOW6urrYtm1bjqLKf9moj8rKSmpqagq+IU2klIWWCMyskuTwu2OC89zj7tf2KnMB\nyXFiUuPZ3+TuK4d6rvr6eiZOnMjcuXMxs37Ltbe3M3p0KON5FaSR1oe7s2XLFurr65k3b14WIxOR\nXArziqCN5OiULcGj7U+a2R/d/ale5f7L3S8fyYlaW1sHTQKSfWZGVVUVmzZtijoUERmB0BJBMJZJ\nS7A4KniF9hizkkA0VO8ihS/UNgJLTmD+PDAf+Km7P91HsU8GIyKuBa5KG7I3/TjLgGUANTU1NDT0\nnP+iq6uL9vb2QePJtFypyFZ9dHV17fF3UoiampoGL1QiVBc95UN93Pp0Ix+aNZ7D9p2U/YPnYmJk\nkhONPwoc0Gt9FTAmeP9F4JHBjnXooYd6b2vWrNljXV/a2toyKpcvfvzjH/vtt98+pH1OOeUUnzx5\nsn/sYx/rsf6cc87xtWvXurt7d3e3u7t/85vf3LWcWnfttdf2KJOJTOs/361fvz7qEPKG6qKnqOtj\nR1unz736Af/xn+uGfQzgOe/nezUnvYbcfauZPQqcCryatn5LWrGVwA9yEU8h6Ozs5NZbb+WFF/qb\nw6RvX/3qV9mxYwe33HJLj/WXXnopP/jBD/j5z3/Oww8/zKpVq2htbWXlypUkEgmWLFnCqlWr6Ojo\n2LXuqquuyuZHEpFhWrshgTssmjkxlOOH9hxBMIXdlOD9WOAkINarTPpEMmcCtWHFE6bly5fzk5/s\nHtn3G9/4BjfemMm84P175JFHOOSQQ6ioqKCzs5PDDjuMxx57DIBrrrmGb3zjG33ud8IJJzBx4p7/\nsxx99NH85S9/obOzk1NOOYVTTjmFm266iS1btnDVVVftWnfjjTfuWvfOO++wYMECNm/eTHd3N0cf\nfTQPP/zwiD6XiAxdXTwBwKKZIdwWItw2glnA7UE7QRnw3+7+gJldT/IS5X7gCjM7E+gkORn9BSM9\n6bf/8BprGpr73Obuw2rcXFI9iWv/+YP9br/ooov4xCc+wZe+9CW6u7v57W9/yzPPPLNHuaOPPppE\nIrHH+h/+8IeceOKJPdb97W9/49BDDwWgoqKC2267jbPPPpv//M//5KGHHuLpp/tqbulfWVkZ8+fP\n56WXXqKpqYnHHnuMyy67jKqqKm688UaWLFnCY489xhVXXLFr3ZVXXsnXv/51Lr30Ug4//HCWLFnC\nySefPKTzisjI1cabGTuqnH2njgvl+GH2GnoZOLiP9cvT3l9DctawgjZ37lyqqqp48cUX2bBhAwcf\nfDBVVVV7lHviiScyPmZjYyOLFy/etfzBD36Q8847jzPOOIN//OMfw+r/P336dBoaGjjjjDM46aST\n+Na3vsUll1ySaq/hpJNO4rrrruux7pJLLuHuu+/m5ptvZvXq1UM+p4iMXF08wf4zJ1JWFk4vvaJ4\nsjjdQL/cw3yg7JJLLuG2224jHo9z0UUX9VlmKFcEY8eO3eNJ6VdeeYUpU6awceNGAJ5++mm++MUv\nAnD99ddz5plnDhhja2srY8eO3XVV9K1vfQvo2QX0uuuu67Fux44d1NfXA9DS0tLnbScRCY+7E4sn\nOGnxjNDOUXSJICof//jHWb58OR0dHdx11119lhnKFcHixYtZt27druV7772XpqYmVq1axRlnnMEz\nzzzDEUccMaRf6WvXruWAAw7IuDzA17/+dc4991zmzJnDF77wBR544IEh7S8iI7OppY2m7e0smhXe\njzANOpclo0eP5rjjjuPTn/405eXlIz7eaaedxqpVqwDYvHkzV199NStXrmT//ffn8ssv58orr+xz\nv6OPPppPfepT/PWvf6WmpoY//elPAGzYsIGxY8cyc+bMjGN4/PHHefbZZ3clg9GjR/PLX/5yxJ9N\nRDIXa0zeRVgYUo8h0BVB1nR3d/PUU09x9913Z+V4c+bMoaqqitdff50FCxawdu3aXduuuOKKfvfr\n76rjrrvu2nUbKVPHHHMMTz21e0SQe++9d0j7i8jIhd1jCHRFkBVr1qxh/vz5nHDCCSxYsCBrx/3e\n975HY2NjVo41ZcoUzj///KwcS0RypzbezIxJY5g6PrwBM3VFkAVLlizhzTffzPpxFy5cyMKFC7Ny\nrAsvvDArxxGR3Io1JlgY4tUA6IpARCRvdXZ1s25jC4tDbB8AJQIRkbz11ubttHd1h9pQDEoEIiJ5\nK5aDhmJQIhARyVuxeDPlZcZ+08eHeh4lggLz9ttvD/mhMBEpTHXxBPvtPZ4xFSN/NmkgSgQiInmq\nNgc9hkCJICvCGIb6rLPO4o477gDglltu4dxzzx3R8USksDS3drB+687Q5iBIV3zPEfzxaoi/0uem\nCu8GG0bum/khOO17/W4OYxjqFStWcNRRRzFv3jxuuOGGHk/4ikjxW7uroViJoCCEMQz1jBkzuP76\n6znuuOO47777mDp1ajZDFpE8t6vH0Kzwbw0VXyIY4Jd7ZwENQw3JYaerqqp6TAw/d+5cXn311T3K\nikhxicWbmVhZQfXkytDPVXyJICLZHob6mWee4Y9//CMvvvgixxxzDCeffDLz5s3LVrgikudijQkW\nzZw4rFkVh0qNxVmSzWGo29ra+MIXvsCtt95KdXU1N9xwAxdddBHuTkNDA2effXaWohaRfOTu1MUT\noT9IlqIrgizJ5jDUY8aM4aWXXtq1fOaZZ+6afay6upp77rlnxOcQkfy1futOEm2doQ8tkaIrgiwI\naxhqESlNqTkIFoc4K1m60K4IzKwSWAWMCc5zj7tf26vMGOAO4FBgC3COu78dVkxhCWsYahEpTake\nQ/vPKPwrgjbgeHc/EDgIONXMPtyrzMXA++4+H/gx8P3hnszdhx2oDJ/qXST7YvEENXuNZWLlqJyc\nL7RE4EktweKo4NX7W+Ms4Pbg/T3ACTaMJvLKykq2bNmiL6Ucc3e2bNlCZWX43dtESkmssTknD5Kl\nhNpYbGblwPPAfOCn7v50ryL7AO8BuHunmW0DqoDNvY6zDFgGUFNT06NfPUBFRQWbNm0iHo8PGE9X\nV1dWJpYvFtmoj/LyciZMmLDH30khampqijqEvKG66CmX9dHe2c2bm1r4yJzxOft3FWoicPcu4CAz\nmwLcZ2YHuPuQn4Zy9xXACoClS5d6dXX1sOJpaGhguPsWI9XHnlQfu6kuespVfbzWsI0uh8MWVOfs\nnDnpNeTuW4FHgVN7bVoPzAYwswpgMslGYxGRkhRrzN0YQymhJQIz2zu4EsDMxgInAbFexe4Hzg/e\nnw084rrRLyIlrG5DgtEVZcytCncymnRh3hqaBdwetBOUAf/t7g+Y2fXAc+5+P/AL4Fdmtg5oAj4T\nYjwiInmvtrGZBdMnUFGeu8e8QksE7v4ycHAf65envW8FPhVWDCIihaYunuDoBXvn9Jx6slhEJE80\nbW9nY6Itp+0DoEQgIpI3YvFmABblaGiJFCUCEZE8keoxlKvB5lKUCERE8kRdPEHV+NHsPWFMTs+r\nRCAikidi8WYWzcrNZDTplAhERPJAV7dTtyHBwhm5mYwmnRKBiEgeeLdpB60d3TlvKAYlAhGRvBBr\nDHoM5bihGJQIRETyQiyeoMxgwXQlAhGRkhSLNzO3ajxjR+d+qHwlAhGRPFAXT0TSPgBKBCIikdvR\n3sk7TTsi6TEESgQiIpFbu6EF99wPLZGiRCAiErEoewyBEoGISORi8QTjRpcze69xkZxfiUBEJGKx\neDMLZ06krCy3Q0ukKBGIiETI3YnFE5HdFgIlAhGRSG1MtLF1RweLZkbTYwiUCEREIlUbNBTneg6C\ndKElAjObbWaPmtkaM3vNzK7so8yxZrbNzFYHr+V9HUtEpFjVxZOT0UR5ayi0yeuBTuDL7v6CmU0E\nnjezP7v7ml7lnnD3M0KMQ0Qkb8XiCWZOqmTKuNGRxRDaFYG7N7r7C8H7BFAL7BPW+UREClEswqEl\nUsK8ItjFzOYCBwNP97H5SDN7CWgAvuLur/Wx/zJgGUBNTQ0NDQ3DiqOpqWlY+xUr1UdPqo/dVBc9\nhVUfnV3O6xsSHFJdOezvtWwIPRGY2QTgd8CX3L251+YXgDnu3mJmpwO/Bxb0Poa7rwBWACxdutSr\nq6uHHc9I9i1Gqo+eVB+7qS56CqM+6uIJOrudw+ZXR1rfofYaMrNRJJPAne5+b+/t7t7s7i3B+weB\nUWY2LcyYRETyRSweDC0R8a2hMHsNGfALoNbdf9RPmZlBOczs8CCeLWHFJCKST2LxBBVlxgemTYg0\njjBvDR0FnAe8Ymarg3X/CuwL4O43A2cDl5pZJ7AT+Iy7e4gxiYjkjbp4gvnTJzC6ItpHukJLBO7+\nJDDgwBnufhNwU1gxiIjks1hjM4fNmxp1GHqyWEQkCtt2dtCwrTXSoSVSlAhERCKQD08UpygRiIhE\noC5PegyBEoGISCRq4wkmVVYwc1Jl1KEoEYiIRKEunmDRrEkEPegjpUQgIpJj3d2eTAR50D4ASgQi\nIjm3futOWto686LHECgRiIjkXCzVYygPGopBiUBEJOdiwaxk+89QIhARKUmxDQn2nTqOCWNyMhPA\noJQIRERyLNbYHOkcxb0pEYiI5FBrRxdvbd7OYiUCEZHStG5jC90OC/OkxxAoEYiI5FS+9RgCJQIR\nkZyKNTYzpqKMuVXjow5lFyUCEZEcisUT7D9jIuVl0Q8tkaJEICKSQ7E8GloiRYlARCRHNre0sbml\nLa+6joISgYhIzqQmo1k8K396DEGIicDMZpvZo2a2xsxeM7Mr+yhjZvYfZrbOzF42s0PCikdEJGq1\nwdAS+XZFEObzzZ3Al939BTObCDxvZn929zVpZU4DFgSvI4CfBX+KiBSduniCaRPGMG3CmKhD6SG0\nKwJ3b3T3F4L3CaAW2KdXsbOAOzzpKWCKmc0KKyYRkSjlY0Mx5KiNwMzmAgcDT/fatA/wXtpyPXsm\nCxGRgtfV7azdkJ+JIPSh78xsAvA74Evu3jzMYywDlgHU1NTQ0NAwrFiampqGtV+xUn30pPrYTXXR\nUzbq4533W2nr7GZGZdewv8PCMmgiMLOjgNXuvt3MPgccAtzo7u9ksO8okkngTne/t48i64HZacs1\nwboe3H0FsAJg6dKlXl1dPdip+zWSfYuR6qMn1cduqoueRlofL25uBODIxftSXT05GyFlTSa3hn4G\n7DCzA4EvA28Adwy2kyVnZP4FUOvuP+qn2P3A54PeQx8Gtrl7Y2ahi4gUjrp4M2UG86dPiDqUPWRy\na6jT3d3MzgJucvdfmNnFGex3FHAe8IqZrQ7W/SuwL4C73ww8CJwOrAN2ABcO9QOIiBSC2niCedPG\nUzmqPOpQ9pBJIkiY2TXA54CPmlkZMGqwndz9SWDAwTTc3YHLMglURKSQ1cUTfKgmv24JpWRya+gc\noA242N3jJO/j/3uoUYmIFJGWtk7ebdrBojyZo7i3Qa8Igi//H6Utv0sGbQQiIpK0dkNqDoL8Gloi\npd9EYGZPuvv/MLME4OmbSN7Vyc9PJCKSZ2KNQSLIw2cIYIBE4O7/I/gzPyMXESkQdfFmJoypYJ8p\nY6MOpU+DthGY2Yl9rDs/nHBERIpPbTzB/jMmUJZHk9Gky6SxeLmZ/czMxpvZDDP7A/DPYQcmIlIM\n3J1YY3Petg9AZongGJIPka0GngTucvezQ41KRKRIxJtbaW7tzNv2AcgsEewFHE4yGbQBc4KnhkVE\nZBC7G4oL+4rgKeAhdz8VOAyoBv4WalQiIkUiFsxKlm+T0aTL5MniE4NnB3D3ncAVZvbRcMMSESkO\nsXgz1ZMrmTx20AEZIpPJA2XvmtleJGcRqww/JBGR4lEXT+R1QzFkNgz1JcCVJIeWWA18GPgHcHy4\noYmIFLb2zm7WbWzhuEXTow5lQJm0EVxJsm3gHXc/juRMY1tDjUpEpAi8ubmFzm7P6x5DkFkiaHX3\nVgAzG+PuMWBhuGGJiBS+QugxBJk1Fteb2RTg98Cfzex9YNDZyURESl1tvJlR5cYH9h4fdSgDyqSx\n+OPB2+vM7FFgMvBQqFGJiBSBuniC/faewKjyTG6+RGdIk9e7++NhBSIiUmxijQmO3K8q6jAGld9p\nSkSkQG3d0U68uTWvHyRLUSIQEQlB6onifO8xBJkNQ/0vwQNlIiKSobogESzO84fJILMrghnAs2b2\n32Z2aqYDzpnZrWa20cxe7Wf7sWa2zcxWB6/lQwlcRCSfxeLNTBk3iukTx0QdyqAGTQTu/k2Sw0v8\nArgAeN3M/o+Z7TfIrrcBpw5S5gl3Pyh4XZ9BvCIiBSEWT7Bo5kQKYbDmjNoI3N2BePDqJDk09T1m\n9oMB9lkFNGUjSBGRQtLd7ckxhvL8QbKUTMYauhL4PLAZWAl81d07zKwMeB342gjOf6SZvQQ0AF9x\n99f6iWEZsAygpqaGhoaGYZ2sqUl5KZ3qoyfVx26qi56GWh/rt7Wxo72LmZVdw/6+yqVMniOYCnzC\n3Xs8Tezu3WZ2xgjO/QIwx91bzOx0kk8uL+iroLuvAFYALF261Kurq4d90pHsW4xUHz2pPnZTXfQ0\nlPp45f04AEcsmk11df73tcmkjeDa3kkgbVvtcE/s7s3u3hK8fxAYZWbThns8EZF8EWtMYAb7z8j/\nrqMQ4XMEZjYz1QPJzA4PYtkSVTwiItlSt6GZfaeOY/yYIQ3eEJnQojSz3wDHAtPMrB64FhgF4O43\nA2cDl5pZJ7AT+EzQKC0iUtBijYmCeJAsJbRE4O6fHWT7TcBNYZ1fRCQKO9u7eHvLdv75wMJpY9EQ\nEyIiWfT6xgTdXhhDS6QoEYiIZNGuMYYKYGiJFCUCEZEsijUmqBxVxr5Tx0UdSsaUCEREsqhuQzML\nZ0ykvCz/h5ZIUSIQEckSd6e2MVEQcxCkUyIQEcmSTS1tNG1vL5gxhlKUCEREsqSugCajSadEICKS\nJbHGZCLQrSERkRIViyfYe+IYqibk/2Q06ZQIRESyJBZvLrjbQqBEICKSFZ1d3by+saUg5ijuTYlA\nRCQL3t6ynfbObhYWyNDT6ZQIRESyYPfQEkoEIiIlKdaYoLzMmD99QtShDJkSgYhIFsTizXxg2njG\nVJRHHcqQKRGIiGRBLF54Q0ukKBGIiIxQorWD+vd3FmSPIVAiEBEZsbUbgieKC7DHECgRiIiMWG1j\n4fYYghATgZndamYbzezVfrabmf2Hma0zs5fN7JCwYhERCVNdPMHEMRXsM2Vs1KEMS5hXBLcBpw6w\n/TRgQfBaBvwsxFhEREITizezcOZEzApnMpp0oSUCd18FNA1Q5CzgDk96CphiZrPCikdEJAzuTiye\nKNjbQgAVEZ57H+C9tOX6YF1j74JmtozkVQM1NTU0NDQM64RNTQPlpdKj+uhJ9bGb6qKngeojnmgn\n0drJzMruYX83RS3KRJAxd18BrABYunSpV1dXD/tYI9m3GKk+elJ97Ka66Km/+qit3QDAhxfNprp6\nai5Dypooew2tB2anLdcE60RECkZqjKH9C/RhMog2EdwPfD7oPfRhYJu773FbSEQkn8XiCfaZMpZJ\nlaOiDmXYQrs1ZGa/AY4FpplZPXAtMArA3W8GHgROB9YBO4ALw4pFRCQsdQU6GU260BKBu392kO0O\nXBbW+UVEwtbW2cUbm7Zz0pIZUYcyInqyWERkmN7YuJ2ubmfhzMIcYyhFiUBEZJhi8WYAFhf4rSEl\nAhGRYaqLJxhdXsa8aeOjDmVElAhERIapNp5g/vQJVJQX9ldpYUcvIhKhWGNzQQ8tkaJEICIyDE3b\n29mYaCv4rqOgRCAiMiyphuJFBd5jCJQIRESGpS4YWkJXBCIiJSrWmGDq+NHsPXFM1KGMmBKBiMgw\nxDYkWDijcCejSadEICIyRN3dztoCn4wmnRKBiMgQvdu0g50dXSwugoZiUCIQERmyVI+hhUXQUAxK\nBCIiQxaLJzCD/WcoEYiIlKRYY4K5VeMZO7o86lCyQolARGSIYkUwGU06JQIRkSHY0d7JO007iqZ9\nAJQIRESGZO2GFtyLY2iJFCUCEZEhqNs1xpCuCDJiZqeaWZ2ZrTOzq/vYfoGZbTKz1cHrkjDjEREZ\nqdrGBGNHlbPv1HFRh5I1oU1eb2blwE+Bk4B64Fkzu9/d1/Qq+l/ufnlYcYiIZFNdPMH+MydSVlb4\nQ0ukhHlFcDiwzt3fdPd24LfAWSGeT0QkVO5OLN5c8HMU9xZmItgHeC9tuT5Y19snzexlM7vHzGaH\nGI+IyIhsSrTx/o6OomofgBBvDWXoD8Bv3L3NzL4I3A4c37uQmS0DlgHU1NTQ0NAwrJM1NTWNINTi\no/roSfWxm+qip1R9PPVOsqF42qj2YX8P5aMwE8F6IP0Xfk2wbhd335K2uBL4QV8HcvcVwAqApUuX\nenV19bCDGsm+xUj10ZPqYzfVRU/V1dVsen0nAEd9cB57jR8dcUTZE+atoWeBBWY2z8xGA58B7k8v\nYGaz0hbPBGpDjEdEZETq4lUb3swAAAlkSURBVAlmTBpTVEkAQrwicPdOM7sc+BNQDtzq7q+Z2fXA\nc+5+P3CFmZ0JdAJNwAVhxSMiMlK18URRPUiWEmobgbs/CDzYa93ytPfXANeEGYOISDZ0dHXzxsYW\nPrpgWtShZJ2eLBYRycBbm7fT3tVdNLOSpVMiEBHJQCyeAGDhjOK7NaREICKSgVhjMxVlxn7Tx0cd\nStYpEYiIZKAunmC/vScwpqI4JqNJp0QgIpKBWDxRVHMQpFMiEBEZRKKtk/VbdxZlQzEoEYiIDOrN\nLa1Acc1BkE6JQERkEOs2J4eWKMaHyUCJQERkUG9uaWViZQWzJldGHUoolAhERAaxbvNOFs+chFnx\nTEaTTolARGQA7s4bm3cWbY8hUCIQERlQ/fs72dFRnENLpCgRiIgMoC4YWqJYewyBEoGIyIBi8eSs\nZPvPKN5EEPVUlSIiecndeWvzdv7+xhaqJ41mYuWoqEMKjRKBiAjw/vZ2Vr+3lRff28rq97by0ntb\n2bazA4DTF0+NOLpwKRGISMlp6+yitjHB6nffZ3Xwxf/2lh0AlFnyNtDpH5rJQbOncNDsvRjf1Rxx\nxOFSIhCRoubuvNu0I/lr/93kl/6ahmbau7oBmDFpDAfNnsI5h+3LQbOn8E81kxk/pudXY0NDIorQ\nc0aJQESKyradHbwU/MpPvZq2twNQOaqMf9pnChccNZeDZk/h4H2nMGvy2Igjjp4SgYgUrI6uburi\nieR9/Xe38uJ77/Pmpu27ts+fPoHjF03n4H2ncNDsKSycMZGKcnWW7C3URGBmpwI3AuXASnf/Xq/t\nY4A7gEOBLcA57v52mDGJSGFyd9Zv3Zn8lR/c4nll/TbaOpO3eKZNGM1Bs6fwiYP34aDZe/FPsycz\nqYh7+mRTaInAzMqBnwInAfXAs2Z2v7uvSSt2MfC+u883s88A3wfOCSsmEckNd8cdutzpdqe7m+Sf\nvd53BeW63enqDvbpDso5bGxu3dWL58V3t7K5pQ2A0RVlHFA9iXOPmMNB+07h4NlTqNlrbNGOBRS2\nMK8IDgfWufubAGb2W+AsID0RnAVcF7y/B7jJzMzdPdvBvPzY75j0+LW8rf9PdnNUH2lc9bGLu/MW\nuyvDkytJ/4fpu/7D7vW9yozU3sDJwMfKjcpR5VRWlVM5qowxFeVYN/BO8AqVs3dnJ1T08XU54FfV\nANuGu9+hF8BRVw6w7/CEmQj2Ad5LW64HjuivjLt3mtk2oArYnF7IzJYBywBqampoaGgYcjA7OmDn\n6NmUlen+YEp3d7fqI02+1Ec+5KLu7m7Kg3vpFvzHsLT3SRasT71Pld/93nqs77lvz+Olnyd1vIoy\nY1JlBaPS7us70JrlzzuY9vZ2Ro8e3c/W/v/GfMArlAG29bNfa9c4Wofx/TeYgmgsdvcVwAqApUuX\nenV19ZCPUV39SRo+eCTD2bdYNTQ0qD7SqD52U1309H5DA5PzoD7GhXTcMH/+rAdmpy3XBOv6LGNm\nFcBkko3GIiKSI2EmgmeBBWY2z8xGA58B7u9V5n7g/OD92cAjYbQPiIhI/0K7NRTc878c+BPJ7qO3\nuvtrZnY98Jy73w/8AviVma0DmkgmCxERyaFQ2wjc/UHgwV7rlqe9bwU+FWYMIiIysOi7SIiISKSU\nCERESpwSgYhIiVMiEBEpcVZovTXNbBPDf6h8Gr2eWi5xqo+eVB+7qS56Kob6mOPue/e1oeASwUiY\n2XPuvjTqOPKF6qMn1cduqoueir0+dGtIRKTEKRGIiJS4UksEK6IOIM+oPnpSfeymuuipqOujpNoI\nRERkT6V2RSAiIr0oEYiIlLiSSQRmdqqZ1ZnZOjO7Oup4omRms83sUTNbY2avmVn2574rMGZWbmYv\nmtkDUccSNTObYmb3mFnMzGrN7MioY4qKmV0V/Bt51cx+Y2aVUccUhpJIBGZWDvwUOA1YAnzWzJZE\nG1WkOoEvu/sS4MPAZSVeHwBXArVRB5EnbgQecvdFwIGUaL2Y2T7AFcBSdz+A5HD6RTlUfkkkAuBw\nYJ27v+nu7cBvgbMijiky7t7o7i8E7xMk/6HvE21U0TGzGuBjwMqoY4mamU0GPkpyrhDcvd3dt0Yb\nVaQqgLHBDIrjgOxPGJwHSiUR7AO8l7ZcTwl/8aUzs7nAwcDT0UYSqZ8AXwO6ow4kD8wDNgG/DG6V\nrTSz8VEHFQV3Xw/8EHgXaAS2ufvD0UYVjlJJBNIHM5sA/A74krs3Rx1PFMzsDGCjuz8fdSx5ogI4\nBPiZux8MbAdKsk3NzPYieedgHlANjDezz0UbVThKJRGsB2anLdcE60qWmY0imQTudPd7o44nQkcB\nZ5rZ2yRvGR5vZr+ONqRI1QP17p66QryHZGIoRScCb7n7JnfvAO4FPhJxTKEolUTwLLDAzOaZ2WiS\nDT73RxxTZMzMSN4DrnX3H0UdT5Tc/Rp3r3H3uST/v3jE3YvyV18m3D0OvGdmC4NVJwBrIgwpSu8C\nHzazccG/mRMo0obzUOcszhfu3mlmlwN/Itnyf6u7vxZxWFE6CjgPeMXMVgfr/jWYY1rkX4A7gx9N\nbwIXRhxPJNz9aTO7B3iBZE+7FynSoSY0xISISIkrlVtDIiLSDyUCEZESp0QgIlLilAhEREqcEoGI\nSIlTIhDJETP7e9QxiPRF3UdFREqcrghEejGzw8zsZTOrNLPxwXj0B/RR7vdm9nywfVmwbo6ZvW5m\n08yszMyeMLOTg20twZ+zzGyVma0Oxrk/OrefUKQnXRGI9MHMvgNUAmNJjr3z3T7KTHX3JjMbS3IY\nk2PcfYuZXQKcAjwDzHf3LwblW9x9gpl9Gah0938L5soYFwwHLhIJJQKRPgTDKzwLtAIfcfeuPspc\nB3w8WJwLnOLuTwXb/gTMBw5KfcmnJYKPArcCvwZ+7+6rex9bJJd0a0ikb1XABGAiySuDHszsWJKj\nUx7p7geSHIemMtg2juQItwTH6MHdV5Gc/GU9cJuZfT6E+EUypkQg0rdbgG8BdwLf72P7ZOB9d99h\nZotITvmZ8v1gv+XAz3vvaGZzgA3u/nOSs6KV6jDPkidKYvRRkaEIfqF3uPtdwT38v5vZ8e7+SFqx\nh4D/ZWa1QB2QuiV0DHAYcJS7d5nZJ83sQnf/Zdq+xwJfNbMOoAXQFYFESm0EIiIlTreGRERKnBKB\niEiJUyIQESlxSgQiIiVOiUBEpMQpEYiIlDglAhGREvf/AVz5xe7rL8tlAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "DcPitdgMPu_R", - "colab_type": "code", - "colab": {} - }, - "source": [ - "g = [[(1, 10), (2, 15), (3, 20)], \n", - " [(0, 10), (2, 35),(3,25)],\n", - " [(0, 15),(1,35),(3,30)],\n", - " [(0,20),(1,25),(2,30)]]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9w-LBP5xQbBZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Implemented with graph search\n", - "def tsp(g, cur, path, mincost, cost, bused, ans):\n", - " if len(path) == len(g): # we can only choose 0\n", - " cost += g[cur][0][1]\n", - " if cost < mincost[0]:\n", - " mincost[0] = cost\n", - " ans[0] = path[::]\n", - " return\n", - " for v, c in g[cur]:\n", - " # constraint on permutation and cost \n", - " if (not bused[v]) and (cost + c < mincost[0]):\n", - " bused[v] = True\n", - " path.append(v)\n", - " cost += c\n", - " tsp(g, v, path, mincost, cost, bused, ans)\n", - " bused[v] = False\n", - " path.pop()\n", - " cost -= c\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "FEYmJ7T5X9K3", - "colab_type": "code", - "outputId": "4cd01cb7-3473-49c5-a429-025cab024f8a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "import sys\n", - "cv = 0\n", - "path = [0]\n", - "mincost = [sys.maxsize]\n", - "cost = 0\n", - "bused = [False] * len(g)\n", - "bused[0] = True\n", - "ans = [[]]\n", - "tsp(g, cv, path, mincost, cost, bused, ans)\n", - "print(mincost[0], ans[0])" - ], - "execution_count": 188, - "outputs": [ - { - "output_type": "stream", - "text": [ - "80 [0, 1, 3, 2]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "r5zIvfeQFntf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "g = [{1: 10, 2: 15, 3:20}, \n", - " {0:10, 2:35,3:25},\n", - " {0:15,1:35,3:30},\n", - " {0:20,1:25,2:30}]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "M3_6OljQv7xv", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Implement with permutation\n", - "def tsp(a, d, used, curr, ans, start, g, mincost, cost):\n", - " if d == len(a): \n", - " # Add the cost from last vertex to the start\n", - " c = g[curr[-1]][start]\n", - " cost += c\n", - " if cost < mincost[0]:\n", - " mincost[0] = cost\n", - " ans[0] = curr[::] + [start]\n", - " return\n", - " \n", - " for i in a:\n", - " if not used[i] and cost + g[curr[-1]][i] < mincost[0] :\n", - " cost += g[curr[-1]][i]\n", - " curr.append(i)\n", - " used[i] = True \n", - " tsp(a, d + 1, used, curr, ans, start, g, mincost, cost)\n", - " curr.pop()\n", - " cost -= g[curr[-1]][i]\n", - " used[i] = False\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "gyRbyYYLH2jk", - "colab_type": "code", - "outputId": "460d7ba1-1d73-45ef-cb02-2686212749d2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "import sys\n", - "mincost = [sys.maxsize]\n", - "bused = [False] * len(g)\n", - "bused[0] = True\n", - "start = 0\n", - "a = [i for i in range(1, len(g))]\n", - "ans = [[]]\n", - "tsp(a, 0, bused, [0], ans, start, g, mincost, 0)\n", - "print(mincost[0], ans[0])" - ], - "execution_count": 191, - "outputs": [ - { - "output_type": "stream", - "text": [ - "80 [0, 1, 3, 2, 0]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uyw4RkhiMIqw", - "colab_type": "text" - }, - "source": [ - "### Knapsack problem\n", - "Resources:\n", - "* [Branch and Bound](https://en.wikipedia.org/wiki/Branch_and_bound)\n", - "* [MILP](https://www.gurobi.com/resource/mip-basics/)\n", - "* [COPs](https://www.math.unipd.it/~luigi/courses/metmodoc1718/m02.meta.en.partial01.pdf)\n", - "#### **Knapsack** \n", - "\n", - "#### Depth first branch and bound\n", - "DFS(backtracking) with branch and bound by estimating the total cost and compare it with **the** best found so far." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JipSSwFPHc76", - "colab_type": "code", - "colab": {} - }, - "source": [ - "c = 10\n", - "w = [5, 8, 3]\n", - "v = [45, 48, 35]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_eKc9AuYIDAi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import heapq\n", - "\n", - "class BranchandBound:\n", - " def __init__(self, c, v, w):\n", - " self.best = 0 \n", - " self.c = c\n", - " self.n = len(v)\n", - " self.items = [(vi/wi, wi, vi) for _, (vi, wi) in enumerate(zip(v, w))]\n", - " self.items.sort(key=lambda x: x[0], reverse=True)\n", - " print(self.items)\n", - "\n", - " def estimate(self, idx, curval, left_cap):\n", - " est = curval\n", - " # use the v/w to estimate\n", - " for i in range(idx, self.n):\n", - " ratio, wi, _ = self.items[i]\n", - " if left_cap - wi >= 0: # use all\n", - " est += ratio * wi\n", - " left_cap -= wi\n", - " else: # use part\n", - " est += ratio * (left_cap)\n", - " left_cap = 0 \n", - " return est\n", - " \n", - " def dfs(self, idx, est, val, left_cap, status):\n", - " if idx == self.n:\n", - " self.best = max(self.best, val)\n", - " return\n", - " print(status, val, left_cap, est )\n", - " \n", - " _, wi, vi = self.items[idx]\n", - " # Case 1: choose the item\n", - " if left_cap - wi >= 0: # prune by constraint\n", - " # Bound by estimate, increase value and volume\n", - " if est > self.best: \n", - " status.append(True)\n", - " nest = self.estimate(idx+1, val+vi, left_cap - wi) \n", - " self.dfs(idx+1, nest, val+vi, left_cap - wi, status)\n", - " status.pop()\n", - "\n", - " # Case 2: not choose the item\n", - " if est > self.best:\n", - " status.append(False)\n", - " nest = self.estimate(idx+1, val, left_cap)\n", - " self.dfs(idx+1, nest, val, left_cap, status) \n", - " status.pop()\n", - " return\n", - "\n", - " def bfs(self):\n", - " # track val, cap, and idx is which item to add next\n", - " q = [(-self.estimate(0, 0, self.c), 0, self.c, 0)] # estimate, val, left_cap, idx\n", - " self.best = 0\n", - " while q:\n", - " est, val, left_cap, idx = heapq.heappop(q)\n", - " est = -est\n", - " _, wi, vi = self.items[idx]\n", - "\n", - " print(est, val, left_cap, idx, q, self.best, idx, vi)\n", - " if idx == self.n - 1:\n", - " self.best = max(self.best, val)\n", - " continue\n", - " \n", - " # Case 1: choose the item\n", - " nest = self.estimate(idx + 1, val + vi, left_cap - wi)\n", - " if nest > self.best:\n", - " heapq.heappush(q, (-nest, val + vi, left_cap - wi, idx + 1))\n", - "\n", - " # Case 2: not choose the item\n", - " nest = self.estimate(idx + 1, val, left_cap)\n", - " if nest > self.best:\n", - " heapq.heappush(q, (-nest, val, left_cap, idx + 1))\n", - " return \n", - "\n", - " def runDfs(self):\n", - " self.dfs(0, self.estimate(0, 0, self.c), 0, self.c, [])\n", - " return self.best\n", - "\n", - " def runBfs(self):\n", - " self.bfs()\n", - " return self.best" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "FI6aNwjvIUmZ", - "colab_type": "code", - "outputId": "75e10440-a1e1-4885-9fc1-264be5a78a61", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 237 - } - }, - "source": [ - "bnb = BranchandBound(c, v, w)\n", - "bnb.runDfs()\n", - "bnb.runBfs()" - ], - "execution_count": 194, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(11.666666666666666, 3, 35), (9.0, 5, 45), (6.0, 8, 48)]\n", - "[] 0 10 92.0\n", - "[True] 35 7 92.0\n", - "[True, True] 80 2 92.0\n", - "[True, False] 35 7 77.0\n", - "[False] 0 10 75.0\n", - "92.0 0 10 0 [] 0 0 35\n", - "92.0 35 7 1 [(-75.0, 0, 10, 1)] 0 1 45\n", - "92.0 80 2 2 [(-77.0, 35, 7, 2), (-75.0, 0, 10, 1)] 0 2 48\n", - "77.0 35 7 2 [(-75.0, 0, 10, 1)] 80 2 48\n", - "75.0 0 10 1 [] 80 1 45\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "80" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 194 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lCtOMorH5DJ5", - "colab_type": "text" - }, - "source": [ - "### Eight Queen" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8Voc6wAPiBeW", - "colab_type": "code", - "outputId": "bec2f965-0a97-421e-ea93-05c8bf5ef1b7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "n=(64*63*62*61*60*59*58*57)/(8*7*6*5*4*3*2*1)\n", - "print(n)" - ], - "execution_count": 195, - "outputs": [ - { - "output_type": "stream", - "text": [ - "4426165368.0\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "O6R0rgFA5Hbm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class Solution:\n", - " def solveNQueens(self, n):\n", - " \"\"\"\n", - " :type n: int\n", - " :rtype: List[List[str]]\n", - " \"\"\"\n", - " # queen can move: vertically, horizontally, diagonally \n", - " col_state = [False]*n\n", - " #diag =[False]*n\n", - " left_diag = [False]* (2*n-1) # x+y -> index\n", - " right_diag = [False]* (2*n-1) # x+(n-1-y) ->index\n", - " n_queen = [] # to track the positions\n", - " ans = []\n", - " board = [['.' for i in range(n)] for j in range(n)] #initialize as '.' we can try to flip\n", - " def collect_solution():\n", - " board = [['.' for i in range(n)] for j in range(n)] \n", - " for i, j in enumerate(n_queen):\n", - " board[i][j] = 'Q'\n", - " \n", - " for i in range(n):\n", - " board[i] = ''.join(board[i])\n", - " return board\n", - " \n", - " def is_valid(r, c):\n", - " return not (col_state[c] or left_diag[r+c] or right_diag[r+(n-1-c)])\n", - " \n", - " def set_state(r, c, val):\n", - " col_state[c] = val\n", - " #diag[abs(r-c)] = val\n", - " left_diag[r+c] = val\n", - " right_diag[r+(n-1-c)] = val\n", - " \n", - " def backtrack(n_queen, k):\n", - " if k == n: # a valid result\n", - " ans.append(collect_solution())\n", - " return\n", - " # generate candidates for kth queen\n", - " for col in range(n):\n", - " if is_valid(k, col):\n", - " set_state(k, col, True)\n", - " n_queen.append(col)\n", - " backtrack(n_queen, k+1)\n", - " set_state(k, col, False)\n", - " n_queen.pop()\n", - " \n", - " backtrack(n_queen, 0)\n", - " return ans\n", - " \n", - " def solveNQueens2(self, n):\n", - " \"\"\"\n", - " :type n: int\n", - " :rtype: List[List[str]]\n", - " \"\"\"\n", - " n_queen = [] # to track the positions\n", - " ans = []\n", - " board = [['.' for i in range(n)] for j in range(n)] #initialize as '.' we can try to flip\n", - " def collect_solution():\n", - " board = [['.' for i in range(n)] for j in range(n)] \n", - " for i, j in enumerate(n_queen):\n", - " board[i][j] = 'Q'\n", - "\n", - " for i in range(n):\n", - " board[i] = ''.join(board[i])\n", - " return board\n", - " \n", - " def generate_candidate(n_queen, k, n):\n", - " if k == 0: #the first row, then the candidates row is all columns\n", - " return set([i for i in range(n)])\n", - " # generate candidate in kth level based on previous levels\n", - " candidates = set([i for i in range(n)])\n", - " for r, c in enumerate(n_queen):\n", - " if c in candidates:\n", - " candidates.remove(c)\n", - " c1 = c-(k-r)\n", - " if c1 >=0 and c1 in candidates:\n", - " candidates.remove(c1)\n", - " c2 = c+(k-r)\n", - " if c2 < n and c2 in candidates:\n", - " candidates.remove(c2)\n", - " return candidates\n", - "\n", - " def backtrack(n_queen, k):\n", - " if k == n: # a valid result\n", - " ans.append(collect_solution())\n", - " return\n", - " # generate candidates for kth queen\n", - " candidates = generate_candidate(n_queen, k, n)\n", - " for c in candidates:\n", - " n_queen.append(c)\n", - " backtrack(n_queen, k+1)\n", - " n_queen.pop()\n", - "\n", - " backtrack(n_queen, 0)\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "0WhwqD8Ri-Z6", - "colab_type": "code", - "outputId": "1e1d48aa-bae3-49ac-e437-356ca827be8c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "import time\n", - "s = Solution()\n", - "n = 4\n", - "t0 = time.time()\n", - "ans = s.solveNQueens(n)\n", - "print(ans)\n", - "t1 = time.time()\n", - "print('time: ', t1-t0)\n", - "ans2 = s.solveNQueens2(n)\n", - "t2 = time.time()\n", - "print('time: ', t2-t1)" - ], - "execution_count": 197, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[['.Q..', '...Q', 'Q...', '..Q.'], ['..Q.', 'Q...', '...Q', '.Q..']]\n", - "time: 0.0006678104400634766\n", - "time: 0.0014612674713134766\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XWc4fJNy43yT", - "colab_type": "text" - }, - "source": [ - "#### Utilize symmetry" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "algw5Z8N42ya", - "colab_type": "code", - "colab": {} - }, - "source": [ - " def solveNQueensSymmetry(n):\n", - " \"\"\"\n", - " :type n: int\n", - " :rtype: List[List[str]]\n", - " \"\"\"\n", - " n_queen = [] # to track the positions\n", - " \n", - " def generate_candidate(n_queen, s, k, n):\n", - " if k == s: #apply symmetry\n", - " candidates = set([i for i in range(n//2)])\n", - " else:\n", - " candidates = set([i for i in range(n)])\n", - "\n", - " for r, c in enumerate(n_queen):\n", - " if c in candidates:\n", - " candidates.remove(c)\n", - " c1 = c-(k-r)\n", - " if c1 >=0 and c1 in candidates:\n", - " candidates.remove(c1)\n", - " c2 = c+(k-r)\n", - " if c2 < n and c2 in candidates:\n", - " candidates.remove(c2)\n", - " return candidates\n", - "\n", - " def backtrack(n_queen, s, k, ans):\n", - " '''add s to track the start depth'''\n", - " if k == n: # a valid result\n", - " ans += 1\n", - " return ans\n", - " # generate candidates for kth queen\n", - " candidates = generate_candidate(n_queen, s, k, n)\n", - " for c in candidates:\n", - " n_queen.append(c)\n", - " ans = backtrack(n_queen, s, k+1, ans)\n", - " n_queen.pop()\n", - " return ans\n", - " \n", - " # deal with the left half of the first row\n", - " ans = 0\n", - "\n", - " ans += backtrack(n_queen, 0, 0, 0)*2\n", - " \n", - " # deal with the left half of the second row\n", - " if n%2 == 1:\n", - " n_queen = [n//2]\n", - " ans += backtrack(n_queen, 1, 1, 0)*2\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZIJaJ4xb6WA2", - "colab_type": "code", - "outputId": "1eddfd0c-9ff9-4ba6-8e13-da650fee3efb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "print(solveNQueensSymmetry(7))" - ], - "execution_count": 199, - "outputs": [ - { - "output_type": "stream", - "text": [ - "40\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r1QQMOyymOmk", - "colab_type": "text" - }, - "source": [ - "## Answers to Exercises" - ] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_decrease_and_conquer.ipynb b/Colab_Codes/chapter_decrease_and_conquer.ipynb deleted file mode 100644 index 3fac7d9..0000000 --- a/Colab_Codes/chapter_decrease_and_conquer.ipynb +++ /dev/null @@ -1,1550 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_decrease_and_conquer.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hIhy6_m3lvTV", - "colab_type": "text" - }, - "source": [ - "## Binary Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HoaV8uZHl1G-", - "colab_type": "code", - "colab": {} - }, - "source": [ - "nums = [1, 3, 4, 6, 7, 8, 10, 13, 14, 18, 19, 21, 24, 37, 40, 45, 71]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Q9iK9qbomHqz", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Standard binary search\n", - "def standard_binary_search(lst, target):\n", - " l, r = 0, len(lst) - 1\n", - " while l <= r:\n", - " mid = l + (r - l) // 2\n", - " if lst[mid] == target:\n", - " return mid\n", - " elif lst[mid] < target:\n", - " l = mid + 1\n", - " else:\n", - " r = mid - 1\n", - " return -1 # target is not found " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3d2wlsFgp7Ge", - "colab_type": "code", - "outputId": "37174701-48d6-4659-b876-b5c54e761808", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# When target exists\n", - "standard_binary_search(nums, 7)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "4" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "gsYY1xYYq2b8", - "colab_type": "code", - "outputId": "7939b051-a09a-4acb-95fe-1176acf1992b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# When target does not exist\n", - "standard_binary_search(nums, 42)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "-1" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "soMDN2h2rBAi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "nums = [1, 3, 4, 4, 4, 4, 6, 7, 8]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "S2xNrvB3rGnH", - "colab_type": "code", - "outputId": "f8e71161-bb9f-4a7c-a835-4d35b478d758", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# When there exists duplicates of the target\n", - "standard_binary_search(nums, 4)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "4" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 7 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "E7xyot8ou6dh", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Binary Search with Lower Bound \n", - "def lower_bound_bs(nums, t):\n", - " l, r = 0, len(nums) - 1\n", - " while l <= r:\n", - " mid = l + (r - l) // 2\n", - " if t <= nums[mid]: # move as left as possible\n", - " r = mid - 1\n", - " else:\n", - " l = mid + 1\n", - " return l\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "FYrRsAqwwQvl", - "colab_type": "code", - "outputId": "83763de7-f709-4393-b9df-4a21a4a5354e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Binary Search with lower bound\n", - "l1 = lower_bound_bs(nums, 4)\n", - "l2 = lower_bound_bs(nums, 5)\n", - "print(l1, l2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2 6\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nGWDrHR12Rtq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Binary Search with Upper Bound \n", - "def upper_bound_bs(nums, t):\n", - " l, r = 0, len(nums) - 1\n", - " while l <= r:\n", - " mid = l + (r - l) // 2\n", - " if t >= nums[mid]: # move as right as possible\n", - " l = mid + 1\n", - " else:\n", - " r = mid - 1\n", - " return l\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "VkCtss6P23RH", - "colab_type": "code", - "outputId": "d94adadf-a113-4de8-f1d0-73521ad65e13", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Binary Search with upper bound\n", - "l1 = upper_bound_bs(nums, 4)\n", - "l2 = upper_bound_bs(nums, 5)\n", - "print(l1, l2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "6 6\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Mn8qGzZezFqe", - "colab_type": "code", - "outputId": "6dea4b52-ba92-43e1-a79a-85be699d343d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "#@title Use Python Module bisect\n", - "from bisect import bisect_left,bisect_right, bisect\n", - "l1 = bisect_left(nums, 4)\n", - "r1 = bisect_right(nums, 5)\n", - "l2 = bisect_right(nums, 4)\n", - "r2 = bisect_right(nums, 5)\n", - "p3 = bisect(nums, 5)\n", - "print(l1, r1, l2, r2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2 6 6 6\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZYxs9WYC2DUk", - "colab_type": "code", - "outputId": "6cbfb9de-c542-437d-839c-1410ebdfebfd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "p1 = bisect_left(nums, 4)\n", - "p2 = bisect_right(nums, 4)\n", - "p3 = bisect(nums, 4)\n", - "print(p1, p2, p3)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2 6 6\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0Rfi8qZndFdd", - "colab_type": "text" - }, - "source": [ - "### Applications\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A6ewzRSkRM5R", - "colab_type": "text" - }, - "source": [ - "#### Rotated Array" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "V_TzJ-H_kHTZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# First bad Version\n", - "def firstBadVersion(self, n):\n", - " l, r = 1, n\n", - " while l <= r:\n", - " mid = l + (r - l) // 2\n", - " if isBadVersion(mid):\n", - " r = mid - 1\n", - " else:\n", - " l = mid + 1 \n", - " return l\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "310ELp0zcyWm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def RotatedBinarySearch(nums, t): \n", - " l, r = 0, len(nums)-1\n", - " while l <= r:\n", - " mid = l + (r-l)//2\n", - " if nums[mid] == t:\n", - " return mid\n", - " # Left is sorted\n", - " if nums[l] < nums[mid]: \n", - " if nums[l] <= t < nums[mid]:\n", - " r = mid - 1\n", - " else:\n", - " l = mid + 1\n", - " # Right is sorted\n", - " elif nums[l] > nums[mid]: \n", - " if nums[mid] < t <= nums[r]:\n", - " l = mid + 1\n", - " else:\n", - " r = mid - 1\n", - " # Left and middle index is the same, move to the right\n", - " else: \n", - " l = mid + 1\n", - " return -1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "abgMk_jNfSPV", - "colab_type": "code", - "outputId": "8dce0c66-850b-405a-dcf9-c031b5a98d2a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "nums = [7,0,1,2,3,4,5,6]\n", - "RotatedBinarySearch(nums, 3)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "4" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 16 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DYQbczWuQ-UM", - "colab_type": "text" - }, - "source": [ - "#### Binary Search to Solve Math Problem" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6nq9BPxuRSI2", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import math\n", - "def arrangeCoins(n: int) -> int:\n", - " return int((math.sqrt(1+8*n)-1) // 2)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "hF5v8wnARdAR", - "colab_type": "code", - "outputId": "6431df36-a202-4e41-aeec-1f5246592d7b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "arrangeCoins(8)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "3" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 18 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8GAML-APRt4m", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Use Binary Search\n", - "def arrangeCoins(n):\n", - " def isValid(row):\n", - " return (row * (row + 1)) // 2 <= n\n", - " \n", - " def bisect_right():\n", - " l, r = 1, n\n", - " while l <= r:\n", - " mid = l + (r-l) // 2\n", - " # Move as right as possible\n", - " if isValid(mid): \n", - " l = mid + 1\n", - " else:\n", - " r = mid - 1\n", - " return l\n", - " return bisect_right() - 1\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "cf1D07VYUYch", - "colab_type": "code", - "outputId": "98316869-95ad-4cef-8acb-790e05764cf1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "arrangeCoins(8)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "3" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 20 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aVJePJSD3e16", - "colab_type": "text" - }, - "source": [ - "## Binary Search Tree" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "bi9eVUn93YcR", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Binary Tree Node\n", - "class BiNode:\n", - " def __init__(self, val):\n", - " self.left = None\n", - " self.right = None\n", - " self.val = val" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "23mTR1N2wf4y", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# A helper function to print out the tree in order\n", - "'''\n", - "Yield from recursive function\n", - "'''\n", - "def inorder_print(root):\n", - " if not root:\n", - " return\n", - " yield from inorder_print(root.left)\n", - " yield root.val\n", - " yield from inorder_print(root.right)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rh4tS_qnunWL", - "colab_type": "text" - }, - "source": [ - "### Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "uPKnMRSAtX5d", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Recursive Search\n", - "def search(root, t):\n", - " if not root:\n", - " return None\n", - " if root.val == t:\n", - " return root\n", - " elif t < root.val:\n", - " return search(root.left, t)\n", - " else:\n", - " return search(root.right, t)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "yC6VSC6Gt6Rp", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#@title Iterative Search\n", - "def searchItr(root, t):\n", - " while root:\n", - " if root.val == t:\n", - " return root\n", - " elif t < root.val:\n", - " root = root.left\n", - " else:\n", - " root = root.right\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ps9yX8EgXGp2", - "colab_type": "text" - }, - "source": [ - "### Minimum and Maximum Node" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "93wNIzgDXFdN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# minimum recursive\n", - "def minimum(root):\n", - " if not root:\n", - " return None\n", - " if not root.left:\n", - " return root\n", - " return minimum(root.left)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "YKk-aYiZXej7", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# minimum iterative\n", - "def minimumIter(root):\n", - " while root:\n", - " if not root.left:\n", - " return root\n", - " root = root.left\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "eb-sC2VbYa4k", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# maximum recursive\n", - "def maximum(root):\n", - " if not root:\n", - " return None\n", - " if not root.right:\n", - " return root\n", - " return maximum(root.right)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "5NLBVCo-YfcI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# maximum iterative\n", - "def maximumIter(root):\n", - " while root:\n", - " if not root.right:\n", - " return root\n", - " root = root.right\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vRjlkcWbfuJ4", - "colab_type": "text" - }, - "source": [ - "### Predecessor and Successor" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JUS3EHJ_Rb8C", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Successor found with inorder\n", - "def successorInorder(root, node):\n", - " if not node:\n", - " return None\n", - " if node.right is not None:\n", - " return minimum(node.right)\n", - " # Inorder traversal\n", - " succ = None\n", - " while root: \n", - " if node.val > root.val:\n", - " root = root.right\n", - " elif node.val < root.val:\n", - " succ = root\n", - " root = root.left\n", - " else:\n", - " break\n", - " return succ" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "LJSGRwcpfy_x", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def reverse(node):\n", - " if not node or not node.p:\n", - " return None\n", - " # node is a left child\n", - " if node.val < node.p.val:\n", - " return node.p\n", - " return reverse(node.p)\n", - "\n", - "# Successor when the target node is not directly given\n", - "def successor(root, t):\n", - " # Traverse backward and see if a node is a left child\n", - " def reverse(node):\n", - " if not node or not node.p:\n", - " return None\n", - " # node is a left child\n", - " if node.val < node.p.val:\n", - " return node.p\n", - " return reverse(node.p)\n", - " \n", - " # Find the target and set its parent while searching\n", - " def helper(root, t):\n", - " # t is not found\n", - " if not root:\n", - " return None\n", - " if t == root.val: \n", - " if root.right:\n", - " return minimum(root.right)\n", - " else:\n", - " return reverse(root)\n", - " elif t < root.val:\n", - " root.left.p = root\n", - " return helper(root.left, t)\n", - " else:\n", - " root.right.p = root\n", - " return helper(root.right, t)\n", - " \n", - " root.p = None\n", - " return helper(root, t)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9EYUc_lwoKSi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Separate the above code into two steps\n", - "def findNodeAddParent(root, t):\n", - " if not root:\n", - " return None\n", - " if t == root.val: \n", - " return root\n", - " elif t < root.val:\n", - " root.left.p = root\n", - " return findNodeAddParent(root.left, t)\n", - " else:\n", - " root.right.p = root\n", - " return findNodeAddParent(root.right, t)\n", - "\n", - "# Find successor from a given node\n", - "def successor2(root):\n", - " if not root:\n", - " return None\n", - " if root.right:\n", - " return minimum(root.right)\n", - " else:\n", - " return reverse(root) " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9mwSazjPUEEN", - "colab_type": "text" - }, - "source": [ - "Predecessor" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AOinBcdcUGRG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def reverse_right(node):\n", - " if not node or not node.p:\n", - " return None\n", - " # node is a right child\n", - " if node.val > node.p.val:\n", - " return node.p\n", - " return reverse_right(node.p)\n", - "\n", - "def predecessor(root):\n", - " if not root:\n", - " return None\n", - " if root.left:\n", - " return maximum(root.left)\n", - " else:\n", - " return reverse_right(root) " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "nRiIVBZsUucH", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# predecessor inorder\n", - "def predecessorInorder(root, node):\n", - " if not node:\n", - " return None\n", - " if node.left is not None:\n", - " return maximum(node.left)\n", - " # Inorder traversal\n", - " pred = None\n", - " while root: \n", - " if node.val > root.val:\n", - " pred = root\n", - " root = root.right\n", - " elif node.val < root.val:\n", - " root = root.left\n", - " else:\n", - " break\n", - " return pred" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t2ATJStuvPvu", - "colab_type": "text" - }, - "source": [ - "### Insert" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "9HdUHb-yvbO3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Clean Recursive Insert\n", - "def insert(root, t):\n", - " if not root:\n", - " return BiNode(t)\n", - " if root.val == t:\n", - " return root\n", - " elif t < root.val:\n", - " root.left = insert(root.left, t)\n", - " return root\n", - " else:\n", - " root.right = insert(root.right, t) \n", - " return root" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Cqb911P_1ocX", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Recursive Insert 2\n", - "def insert2(root, t):\n", - " if not root:\n", - " return \n", - " if root.val == t:\n", - " return \n", - " elif t < root.val:\n", - " if not root.left:\n", - " root.left = BiNode(t)\n", - " else:\n", - " insert2(root.left, t)\n", - " else:\n", - " if not root.right:\n", - " root.right = BiNode(t)\n", - " else:\n", - " insert2(root.right, t) " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "QN6MwXa16jUI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Iterative insertion\n", - "def insertItr(root, t):\n", - " p = None\n", - " node = root #Keep the root node\n", - " while node:\n", - " # Node exists already\n", - " if node.val == t:\n", - " return root\n", - " if t > node.val:\n", - " p = node\n", - " node = node.right\n", - " else:\n", - " p = node\n", - " node = node.left\n", - " # Assign new node\n", - " if not p:\n", - " root = BiNode(t)\n", - " elif t > p.val:\n", - " p.right = BiNode(t)\n", - " else:\n", - " p.left = BiNode(t)\n", - " return root\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BJUGXsoBYI5O", - "colab_type": "text" - }, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nRVCru44YORz", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def deleteMinimum(root):\n", - " if not root:\n", - " return None, None\n", - " if root.left:\n", - " mini, left = deleteMinimum(root.left)\n", - " root.left = left\n", - " return mini, root\n", - " # the minimum node\n", - " if not root.left: \n", - " return root, None \n", - "\n", - "def _delete(root):\n", - " if not root:\n", - " return None\n", - " # No chidren: Delete it\n", - " if not root.left and not root.right:\n", - " return None \n", - " # Two children: Copy the value of successor\n", - " elif all([root.left, root.right]):\n", - " succ, right = deleteMinimum(root.right)\n", - " root.val = succ.val\n", - " root.right = right\n", - " return root\n", - " # One Child: Copy the value\n", - " else:\n", - " if root.left:\n", - " root.val = root.left.val\n", - " root.left = None\n", - " else:\n", - " root.val = root.right.val\n", - " root.right = None\n", - " return root\n", - " \n", - "def delete(root, t):\n", - " if not root:\n", - " return\n", - " if root.val == t:\n", - " root = _delete(root)\n", - " return root \n", - " elif t > root.val:\n", - " root.right = delete(root.right, t)\n", - " return root\n", - " else:\n", - " root.left = delete(root.left, t)\n", - " return root\n", - " \n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8pAODLP1ktWy", - "colab_type": "code", - "colab": {} - }, - "source": [ - "keys = [8, 3, 10, 1, 6, 14, 4, 7, 13]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "xW555Xb6wCb4", - "colab_type": "code", - "outputId": "127d98c4-453d-4873-f714-8ddaec9356ff", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# Construct the examplary tree\n", - "%%time\n", - "root = None\n", - "for k in keys:\n", - " root = insert(root, k)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "CPU times: user 21 µs, sys: 4 µs, total: 25 µs\n", - "Wall time: 28.8 µs\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "TL9-6VMI2Ldn", - "colab_type": "code", - "outputId": "e206db54-d2e4-46fe-a5ef-33c8c40f9346", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# Construct the examplary tree\n", - "%%time\n", - "root = BiNode(keys[0])\n", - "for k in keys[1:]:\n", - " insert2(root, k)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "CPU times: user 54 µs, sys: 0 ns, total: 54 µs\n", - "Wall time: 60.6 µs\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NoBJCgvF7WVo", - "colab_type": "code", - "outputId": "2905fd04-9de0-4e1b-d809-562698194c57", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# Construct the examplary tree\n", - "%%time\n", - "root = BiNode(keys[0])\n", - "for k in keys:\n", - " root = insertItr(root, k)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "CPU times: user 49 µs, sys: 0 ns, total: 49 µs\n", - "Wall time: 103 µs\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vsV8Y2vkwbDI", - "colab_type": "code", - "outputId": "d7491e7c-606c-4d0e-9cf0-d4424d7cf3fd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# insert key 9 \n", - "out_keys = inorder_print(root)\n", - "for k in out_keys:\n", - " print(k, end = ' ')" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1 3 4 6 7 8 10 13 14 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nRzeShLMXuIP", - "colab_type": "code", - "outputId": "ab5374c1-af90-4292-9e84-2688ce6d6674", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "## Test Minimum and maximum\n", - "print(minimum(root).val, minimumIter(root).val,maximum(root).val, maximumIter(root).val)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1 1 14 14\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "oJONUENtl654", - "colab_type": "code", - "outputId": "fc34abec-bc1d-408e-be64-c1a9c51edf35", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "# Test successor and predecessor\n", - "s1 = successor(root, 14)\n", - "s2 = successor(root, 3)\n", - "s3 = successor(root, 4)\n", - "s4 = successor(root, 7)\n", - "if s1:\n", - " print(s1)\n", - "print(s2.val, s3.val, s4.val)\n", - "root.p = None\n", - "node = findNodeAddParent(root, 4)\n", - "suc = successor2(node)\n", - "print(suc.val)\n", - "print(predecessorInorder(root, suc).val, successorInorder(root, suc).val)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "4 6 8\n", - "6\n", - "4 7\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ee9SU288b91o", - "colab_type": "code", - "outputId": "7bc7bdd8-e5ec-4c75-a631-a4950bb80b00", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# Test Delete\n", - "out_keys = inorder_print(root)\n", - "for k in out_keys:\n", - " print(k, end = ' ')\n", - "print(' ,')\n", - "root1 = delete(root, 3)\n", - "out_keys = inorder_print(root1)\n", - "for k in out_keys:\n", - " print(k, end = ' ')\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1 3 4 6 7 8 10 13 14 ,\n", - "1 4 6 7 8 10 13 14 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "muAC9qyz3sXJ", - "colab_type": "text" - }, - "source": [ - "## Segment Tree" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "weAxbsyJ3rrO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class TreeNode:\n", - " def __init__(self, val, s, e):\n", - " self.val = val\n", - " self.s = s\n", - " self.e = e\n", - " self.left = None\n", - " self.right = None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "DWTvjBm06N7N", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def getNodes(root):\n", - " if not root:\n", - " return []\n", - " left = getNodes(root.left)\n", - " right = getNodes(root.right)\n", - " return left + [(root.s, root.e, root.val)] + right" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3tPVDCeE4MFX", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def merge(left, right, s, e):\n", - " return TreeNode(left.val + right.val, s, e)\n", - "\n", - "def _buildSegmentTree(nums, s, e):\n", - " '''\n", - " s, e: start index and end index\n", - " '''\n", - " if s > e:\n", - " return None\n", - " if s == e:\n", - " return TreeNode(nums[s], s, e)\n", - " \n", - " m = (s + e)//2\n", - " # Divide: return a subtree \n", - " left = _buildSegmentTree(nums, s, m)\n", - " right = _buildSegmentTree(nums, m+1, e)\n", - " \n", - " # Conquer: merge two subtree\n", - " node = TreeNode(left.val + right.val, s, e)\n", - " node.left = left\n", - " node.right = right\n", - " return node" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_11QSpH8901s", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Range Query\n", - "def _rangeQuery(root, i, j, s, e): \n", - " if s == i and j == e:\n", - " return root.val if root else 0 \n", - " m = (s + e)//2\n", - " if j <= m:\n", - " return _rangeQuery(root.left, i, j, s, m)\n", - " elif i > m:\n", - " return _rangeQuery(root.right, i, j, m+1, e)\n", - " else:\n", - " return _rangeQuery(root.left, i, m, s, m) + _rangeQuery(root.right, m+1, j, m+1, e)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "aAc6LT911CDs", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Update\n", - "def _update(root, s, e, i, val):\n", - " if s == e == i:\n", - " root.val = val\n", - " return \n", - " m = (s + e) // 2\n", - " if i <= m:\n", - " _update(root.left, s, m, i, val)\n", - " else:\n", - " _update(root.right, m + 1, e, i, val)\n", - " root.val = root.left.val + root.right.val\n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "YaBuyWTr5WHH", - "colab_type": "code", - "outputId": "450f71db-3588-47cf-d556-46f2436a735c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Test tree construction\n", - "nums = [2, 9, 4, 5, 8, 7]\n", - "root = _buildSegmentTree(nums, 0, len(nums) - 1)\n", - "print(getNodes(root))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(0, 0, 2), (0, 1, 11), (1, 1, 9), (0, 2, 15), (2, 2, 4), (0, 5, 35), (3, 3, 5), (3, 4, 13), (4, 4, 8), (3, 5, 20), (5, 5, 7)]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mEzfRgUS-UH4", - "colab_type": "code", - "outputId": "a0f13949-6215-49fc-ceae-b6cfdc500788", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Test Range Query\n", - "print(_rangeQuery(root, 0, 2, 0, len(nums) - 1))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "15\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sqy_bZWr2AAw", - "colab_type": "code", - "outputId": "10c13b8d-9c44-41d3-e0aa-3f0caff4bb3e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Test updates\n", - "_update(root, 0, len(nums) - 1, 1, 3)\n", - "print(_rangeQuery(root, 0, 2, 0, len(nums) - 1))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "9\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_python_comparison_sorting.ipynb b/Colab_Codes/chapter_python_comparison_sorting.ipynb deleted file mode 100644 index f23e6bf..0000000 --- a/Colab_Codes/chapter_python_comparison_sorting.ipynb +++ /dev/null @@ -1,1154 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_python_comparison_sorting.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fdKUnCibJGfe", - "colab_type": "text" - }, - "source": [ - "Python offers a variety of built-in functions, modules, and libraries to help with comparison, sorint, and selections." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Js7-4f1M71PF", - "colab_type": "text" - }, - "source": [ - "## Comparison operators" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dtlOO7PU_-Wz", - "colab_type": "code", - "outputId": "9120041c-ebcd-4082-b61a-48425a1ec45c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Compare numericals\n", - "c1 = 2 < 3\n", - "c2 = 2.5 > 3\n", - "c1, c2" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(True, False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3SB8i6NE9eMM", - "colab_type": "code", - "outputId": "70380bfa-49fe-4611-d0f2-2f6e76ec5be9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Compare strings\n", - "c1 = 'ab' < 'bc'\n", - "c2 = 'abc' > 'abd'\n", - "c3 = 'ab' < 'abab'\n", - "c4 = 'abc' != 'abc'\n", - "c1, c2, c3, c4" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(True, False, True, False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 10 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5ePi3mnw9stn", - "colab_type": "code", - "outputId": "1c05d2d6-89b1-4e9d-f1f1-bf3eb3c838dc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Compare Sequences\n", - "c1 = [1, 2, 3] < [2, 3]\n", - "c2 = (1, 2) > (1, 2, 3)\n", - "c3 = [1, 2] == [1, 2]\n", - "c1, c2, c3" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(True, False, True)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 18 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AYbdgocGCqNt", - "colab_type": "code", - "outputId": "71702e44-48ee-4319-e378-91c898bd2309", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 172 - } - }, - "source": [ - "[1, 2, 3] < (2, 3)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "error", - "ename": "TypeError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'list' and 'tuple'" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XlA1fZxxCr44", - "colab_type": "code", - "outputId": "ffab3de3-5c39-4152-c233-816b3500492c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 172 - } - }, - "source": [ - "{1: 'a', 2:'b'} < {1: 'a', 2:'b', 3:'c'}" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "error", - "ename": "TypeError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m{\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m}\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: '<' not supported between instances of 'dict' and 'dict'" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FHjrUBMvJTqS", - "colab_type": "text" - }, - "source": [ - "## max() and min() built-in functions\n", - "\n", - "max(iterable, *[, key, default])\n", - "max(arg1, arg2, *args[, key])\n", - " If one positional argument is provided, it should be an iterable. The largest item in the iterable is returned. If two or more positional arguments are provided, the largest of the positional arguments is returned.\n", - "\n", - " There are two optional keyword-only arguments. The key argument specifies a one-argument ordering function like that used for list.sort(). The default argument specifies an object to return if the provided iterable is empty. If the iterable is empty and default is not provided, a ValueError is raised.\n", - "\n", - " If multiple items are maximal, the function returns the first one encountered. This is consistent with other sort-stability preserving tools such as sorted(iterable, key=keyfunc, reverse=True)[0] and heapq.nlargest(1, iterable, key=keyfunc).\n", - "\n", - " New in version 3.4: The default keyword-only argument.\n", - "\n", - " Changed in version 3.8: The key can be None.\n", - "\n", - "What is really interesting is that when we pass two iterables such as two lists into the $max$ function, it compares them as they are strings with lexicographical order. This character makes it useful to problem solving sometimes. \n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ysb8M9hNL42Z", - "colab_type": "code", - "outputId": "574e135e-f2be-4dd6-d15d-ea5cb914062d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# One iterable\n", - "lst1 = [4, 8, 9, 20, 3]\n", - "max([4, 8, 9, 20, 3])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "20" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 3 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Zcr-WC5sMFjc", - "colab_type": "code", - "outputId": "72ba2a72-0d14-4411-9f81-a25c06ee3873", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Two arguments\n", - "m1 = max(24, 15)\n", - "m2 = max([4, 8, 9, 20, 3], [6, 2, 8])\n", - "m3 = max('abc', 'ba')\n", - "m1, m2, m3" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(24, [6, 2, 8], 'ba')" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ilxRqKv5MygZ", - "colab_type": "code", - "outputId": "6201a811-9e8e-4b24-d51f-f9d64eb9e32c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'ba'" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Td_fQvXGM7Ss", - "colab_type": "code", - "outputId": "439e70b9-46eb-4fe7-cbb6-c254dd6d6785", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# For dictionary, it defaultly compares with keys, and it returns the key\n", - "dict1 = {'a': 5, 'b': 8, 'c': 3}\n", - "k1 = max(dict1)\n", - "k2 = max(dict1, key=dict1.get)\n", - "k3 = max(dict1, key =lambda x: dict1[x])\n", - "k1, k2, k3" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "('c', 'b', 'b')" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "b2A71c2JTA3r", - "colab_type": "code", - "outputId": "898c9b40-bfd5-4625-b69f-d3e009e35ca1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "max([], default=0)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 2 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0yLoqm8ZyaWM", - "colab_type": "text" - }, - "source": [ - "## Rich Comparison" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BJDcW18ry4HS", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from functools import total_ordering\n", - "\n", - "@total_ordering\n", - "class Person(object):\n", - " def __init__(self, firstname, lastname):\n", - " self.first = firstname\n", - " self.last = lastname\n", - "\n", - " def __eq__(self, other):\n", - " return ((self.last, self.first) == (other.last, other.first))\n", - " \n", - " def __ne__(self, other):\n", - " return not (self == other)\n", - "\n", - " def __lt__(self, other):\n", - " return ((self.last, self.first) < (other.last, other.first))\n", - "\n", - " def __repr__(self):\n", - " return \"%s %s\" % (self.first, self.last)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "zeww0AgS3p6R", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "c8eec9ba-6623-48cd-a1bc-5e44eafb36f5" - }, - "source": [ - "p1 = Person('Li', 'Yin')\n", - "p2 = Person('Bella', 'Smith')\n", - "p1 > p2" - ], - "execution_count": 36, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 36 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j9da_uDkJYQr", - "colab_type": "text" - }, - "source": [ - "## seq.sort() and sorted()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ag6v7yjLmR80", - "colab_type": "text" - }, - "source": [ - "### Basics" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fJdwJaQkgc9l", - "colab_type": "code", - "outputId": "ab889de3-3fe1-4a0c-a54e-5b8a231f26e4", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# List bulti-in in-place sort\n", - "lst = [4, 5, 8, 1, 2, 7]\n", - "lst.sort()\n", - "lst" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1, 2, 4, 5, 7, 8]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 7 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LvOw_sH9hHG2", - "colab_type": "code", - "outputId": "b143c999-8934-42c1-eec3-384c4715fd1a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# sorted() out-of-place sorting\n", - "lst = [4, 5, 8, 1, 2, 7]\n", - "new_lst = sorted(lst)\n", - "new_lst, lst" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([1, 2, 4, 5, 7, 8], [4, 5, 8, 1, 2, 7])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "B0qx9rvaser4", - "colab_type": "code", - "outputId": "2326e9a9-a761-4d0b-9147-8379baa53e64", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 209 - } - }, - "source": [ - "# cant sort other iterable with .sort()\n", - "tup = (3, 6, 8, 2, 78, 1, 23, 45, 9)\n", - "tup.sort()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "error", - "ename": "AttributeError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtup\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m78\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m23\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m45\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m9\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtup\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;31m#sorted(tup)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'sort'" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "aM1KExWNk33x", - "colab_type": "code", - "outputId": "ef49fd48-4c27-4854-e437-9d9c262dea24", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Sort iterable with sorted()\n", - "fruit = ('apple', 'pear', 'berry', 'peach', 'apricot')\n", - "new_fruit = sorted(fruit)\n", - "new_fruit" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['apple', 'apricot', 'berry', 'peach', 'pear']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 11 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ugBtubRqksNR", - "colab_type": "code", - "colab": {} - }, - "source": [ - "tup = (3, 6, 8, 2, 78, 1, 23, 45, 9)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "rVASApvegGrB", - "colab_type": "code", - "outputId": "87b4a560-a789-4c33-a500-d8b236dcea18", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "lst = list(tup)\n", - "lst.sort()\n", - "lst\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1, 2, 3, 6, 8, 9, 23, 45, 78]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "0DJi8M9N535w", - "colab_type": "code", - "colab": {} - }, - "source": [ - "## Customize key\n", - "def cmp(x, y):\n", - " return y - x" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "I_X20Gc-58RE", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "af0499e1-3661-4092-f54b-ebd1c660a4e1" - }, - "source": [ - "from functools import cmp_to_key\n", - "lst = [4, 5, 8, 1, 2, 7]\n", - "lst.sort(key=cmp_to_key(cmp))\n", - "lst" - ], - "execution_count": 59, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[8, 7, 5, 4, 2, 1]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 59 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rCWeiCAGnNFP", - "colab_type": "text" - }, - "source": [ - "### Arguments" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Y5YH4QJRnOtM", - "colab_type": "code", - "outputId": "b85028bc-9010-4a44-f05f-b2584a4f7c7b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Reverse\n", - "lst = [4, 5, 8, 1, 2, 7]\n", - "lst.sort(reverse=True)\n", - "lst" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[8, 7, 5, 4, 2, 1]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 12 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "y_dOIXFqotZA", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class Int(int):\n", - " def __init__(self, val):\n", - " self.val = val\n", - " def __lt__(self, other):\n", - " return other.val < self.val" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8qdEy_ctpZcI", - "colab_type": "code", - "outputId": "542e0581-00e4-4306-cf72-f43f2c47716f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "lst = [Int(4), Int(5), Int(8), Int(1), Int(2), Int(7)]\n", - "lst.sort()\n", - "lst" - ], - "execution_count": 38, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[8, 7, 5, 4, 2, 1]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 38 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HJ-elndv7iDD", - "colab_type": "code", - "colab": {} - }, - "source": [ - "lst = [(8, 1), (5, 7), (4, 1), (1, 3), (2, 4)]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "v1jkMSha7CeW", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "97317155-9f42-4a78-b6a7-cd8994232721" - }, - "source": [ - "## Trhough a function\n", - "def get_key(x):\n", - " return x[1]\n", - "new_lst = sorted(lst, key = get_key)\n", - "new_lst" - ], - "execution_count": 52, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[(8, 1), (4, 1), (1, 3), (2, 4), (5, 7)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 52 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5h4XOS_y7sKM", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "429a0632-f00e-422b-de9f-a846a8718c7d" - }, - "source": [ - "# Through lambda function\n", - "new_lst = sorted(lst, key = lambda x: x[1])\n", - "new_lst" - ], - "execution_count": 53, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[(8, 1), (4, 1), (1, 3), (2, 4), (5, 7)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 53 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "iVMU4_4a7zjh", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "3ac52a3b-e17e-438b-de1a-119ff040d7d2" - }, - "source": [ - "new_lst = sorted(lst, key = lambda x: (x[1], x[0]))\n", - "new_lst" - ], - "execution_count": 54, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[(4, 1), (8, 1), (1, 3), (2, 4), (5, 7)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 54 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "eS-KOH4Y_3WO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# A class\n", - "class Student(object):\n", - " def __init__(self, name, grade, age):\n", - " self.name = name\n", - " self.grade = grade\n", - " self.age = age\n", - " \n", - " # To support indexing\n", - " def __getitem__(self, key):\n", - " return (self.name, self.grade, self.age)[key]\n", - "\n", - " def __repr__(self):\n", - " return repr((self.name, self.grade, self.age))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "cFYLwtZn_-qL", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "36f4f32b-2ee0-47de-e2bb-91e47d8bca08" - }, - "source": [ - "students = [Student('john', 'A', 15), Student('jane', 'B', 12), Student('dave', 'B', 10)]\n", - "sorted(students, key=lambda x: x.age)" - ], - "execution_count": 75, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 75 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "lshxkSOJBLiT", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "b2804f89-de0a-4cc7-c6af-6a7d26676625" - }, - "source": [ - "# Use operator\n", - "from operator import attrgetter\n", - "sorted(students, key=attrgetter('age'))" - ], - "execution_count": 77, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 77 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nA1JzPi-CkIB", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "d2fc29e7-7217-41c8-ca77-b974f02feb3e" - }, - "source": [ - "from operator import attrgetter\n", - "sorted(students, key=attrgetter('grade', 'age'))" - ], - "execution_count": 78, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('john', 'A', 15), ('dave', 'B', 10), ('jane', 'B', 12)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 78 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mNhR68JvCZJ_", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - }, - "outputId": "bb0937af-e97e-4c5e-f3da-4834129b283e" - }, - "source": [ - "# Use itemgetter\n", - "from operator import itemgetter\n", - "sorted(students, key=itemgetter(2))" - ], - "execution_count": 79, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 79 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "eA8dMPkksqly", - "colab_type": "code", - "outputId": "613cc186-cd57-4439-9f7f-6b122cd3d85b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 71 - } - }, - "source": [ - "from collections import defaultdict\n", - "import random\n", - "dic = defaultdict(lambda: defaultdict(list)) # a dictionary of a dictionary of list dic[a][b] = [3, 1, 2, 4]\n", - "for i in range(10):\n", - " a = random.randint(1, 101)\n", - " b = random.randint(1, 101)\n", - " dic[a][b] = [random.randint(1, 101) for _ in range(10)]\n", - "print(dic) \n", - "sorted_dic = sorted(dic)\n", - "print(sorted_dic)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "defaultdict( at 0x7faf20e3c730>, {72: defaultdict(, {59: [63, 15, 62, 83, 30, 98, 16, 44, 58, 93]}), 82: defaultdict(, {70: [89, 49, 47, 63, 90, 1, 7, 9, 78, 10]}), 53: defaultdict(, {62: [10, 99, 35, 78, 74, 44, 82, 32, 32, 52]}), 78: defaultdict(, {78: [20, 22, 100, 29, 16, 65, 56, 8, 100, 100]}), 13: defaultdict(, {44: [4, 81, 17, 92, 44, 49, 72, 24, 13, 64]}), 84: defaultdict(, {47: [76, 94, 36, 56, 60, 87, 72, 47, 75, 33]}), 49: defaultdict(, {97: [7, 47, 13, 80, 85, 59, 2, 48, 68, 65]}), 87: defaultdict(, {61: [31, 72, 71, 63, 19, 84, 78, 80, 97, 85]}), 17: defaultdict(, {92: [29, 53, 20, 14, 16, 84, 57, 40, 4, 19]}), 54: defaultdict(, {32: [2, 31, 19, 31, 68, 10, 85, 34, 25, 62]})})\n", - "[13, 17, 49, 53, 54, 72, 78, 82, 84, 87]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "VbmDX6yAvYax", - "colab_type": "code", - "outputId": "441aa86e-17c6-4cbd-b923-67c1ea41e384", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "'''sort_list_of_tuple()'''\n", - "\n", - "lst = [(1, 8, 2), (3, 2, 9), (1, 7, 10), (1, 7, 1), (11, 1, 5), (6, 3, 10), (32, 18, 9)]\n", - "sorted_lst = sorted(lst, key = lambda x: x[0]) # sort in the order of the first element, and descresing order of the second element, and incresing of the third element\n", - "print(sorted_lst)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(1, 8, 2), (1, 7, 10), (1, 7, 1), (3, 2, 9), (6, 3, 10), (11, 1, 5), (32, 18, 9)]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tXDvRwd_047E", - "colab_type": "code", - "outputId": "231e3bd1-06df-4e6c-e4e8-ccfc7803ee62", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "lst = [(1, 8, 2), (3, 2, 9), (1, 7, 10), (1, 7, 1), (11, 1, 5), (6, 3, 10), (32, 18, 9)]\n", - "sorted_lst = sorted(lst, key = lambda x: (x[0], -x[1], x[2])) # sort in the order of the first element, and descresing order of the second element, and incresing of the third element\n", - "print(sorted_lst)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(1, 8, 2), (1, 7, 1), (1, 7, 10), (3, 2, 9), (6, 3, 10), (11, 1, 5), (32, 18, 9)]\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_python_datastrcutures.ipynb b/Colab_Codes/chapter_python_datastrcutures.ipynb deleted file mode 100644 index 8ea9a3d..0000000 --- a/Colab_Codes/chapter_python_datastrcutures.ipynb +++ /dev/null @@ -1,1653 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_python_datastrcutures.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f3PH6cZu5ori", - "colab_type": "text" - }, - "source": [ - "### Sequence\n", - "#### List" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "VBKkaaiS5lhk", - "colab_type": "code", - "colab": {} - }, - "source": [ - "lst_lst = [[], [1], ['1'], [1, 2], ['1', '2']]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "qjrTbHg_5xMR", - "colab_type": "code", - "outputId": "835af2d0-bc33-45af-f56a-c94d7b8408ac", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "import sys\n", - "for lst in lst_lst:\n", - " print(sys.getsizeof(lst), end=' ')\n", - "print(sys.getsizeof(lst_lst))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "64 72 72 80 80 104\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WpkbbrQISfWR", - "colab_type": "text" - }, - "source": [ - "print('compare the length and the additiobal memory size in bytes')" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XS34IasNPvcJ", - "colab_type": "code", - "outputId": "00a3e7c4-431b-40cb-fb99-7265004f2077", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 312 - } - }, - "source": [ - "a = []\n", - "for size in range(17):\n", - " a.insert(0, size)\n", - " print('size:', len(a), 'bytes:', (sys.getsizeof(a)-64)//8, 'id:', id(a))\n", - " " - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "size: 1 bytes: 4 id: 139702851424712\n", - "size: 2 bytes: 4 id: 139702851424712\n", - "size: 3 bytes: 4 id: 139702851424712\n", - "size: 4 bytes: 4 id: 139702851424712\n", - "size: 5 bytes: 8 id: 139702851424712\n", - "size: 6 bytes: 8 id: 139702851424712\n", - "size: 7 bytes: 8 id: 139702851424712\n", - "size: 8 bytes: 8 id: 139702851424712\n", - "size: 9 bytes: 16 id: 139702851424712\n", - "size: 10 bytes: 16 id: 139702851424712\n", - "size: 11 bytes: 16 id: 139702851424712\n", - "size: 12 bytes: 16 id: 139702851424712\n", - "size: 13 bytes: 16 id: 139702851424712\n", - "size: 14 bytes: 16 id: 139702851424712\n", - "size: 15 bytes: 16 id: 139702851424712\n", - "size: 16 bytes: 16 id: 139702851424712\n", - "size: 17 bytes: 25 id: 139702851424712\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JsLMOxtfKKHY", - "colab_type": "text" - }, - "source": [ - "### Tuple" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6pW_iKdQccTi", - "colab_type": "code", - "outputId": "0cf35433-af54-4bc4-ff2a-864af0f00f1b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "record1 = ('Bob', 12345, 89)\n", - "from collections import namedtuple\n", - "Record = namedtuple('Computer_Science', 'name id score')\n", - "record2 = Record('Bob', id=12345, score=89)\n", - "print(record1, record2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "('Bob', 12345, 89) Computer_Science(name='Bob', id=12345, score=89)\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jtg_FHoGZXRV", - "colab_type": "code", - "outputId": "3685bfb8-f91e-449e-b943-765d83efc5ce", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "a=()\n", - "b=(1)\n", - "c=(1,)\n", - "print(type(a), type(b), type(c))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - " \n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BjCNDU8bKMEF", - "colab_type": "code", - "colab": {} - }, - "source": [ - "lst_tup = [(), (1,), ('1',), (1, 2), ('1', '2')]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7Bozw-YBKRs-", - "colab_type": "code", - "outputId": "f5f821d5-c2d3-4cac-f03f-d8f70c1881c1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "import sys\n", - "for tup in lst_tup:\n", - " print(sys.getsizeof(tup), end=' ')\n", - " " - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "48 56 56 64 64 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n5Sm5NJVn9ON", - "colab_type": "text" - }, - "source": [ - "### String" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "9C-_UxZSn_hM", - "colab_type": "code", - "outputId": "6a32a157-1ea5-475e-d535-da7662f9e10a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "lst_string = ['', str(), \"\",'b', 'ab', 'abc', 'abcd']\n", - "import sys\n", - "for string in lst_string:\n", - " print(sys.getsizeof(string), end=' ')" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "53 53 53 58 51 52 53 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z9fadMw_dfJD", - "colab_type": "text" - }, - "source": [ - "### Experiments about tuple and list" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "aiznFcZLQnRB", - "colab_type": "code", - "colab": {} - }, - "source": [ - "## experiment\n", - "\n", - "def iterate(obj):\n", - " for _ in obj:\n", - " pass" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "mtsz7-awQzKb", - "colab_type": "code", - "outputId": "6e01af72-c00e-4362-e660-a4be0a11bfdb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 69 - } - }, - "source": [ - "a = [1]*100000\n", - "b =(1,)*100000\n", - "print(type(b))\n", - "%timeit iterate(a)\n", - "%timeit iterate(b)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "\n", - "1000 loops, best of 3: 774 µs per loop\n", - "1000 loops, best of 3: 768 µs per loop\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ptmcUSMS7LX-", - "colab_type": "code", - "outputId": "88f51fdf-6d44-4aae-9a1f-2faf93e89f08", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 106 - } - }, - "source": [ - "print(dir(1))#print all attributes\n", - "print(isinstance(1, int)) # check instance type\n", - "print(isinstance(1, float))\n", - "o=object()\n", - "print(dir(o))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "['__abs__', '__add__', '__and__', '__bool__', '__ceil__', '__class__', '__delattr__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floor__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getnewargs__', '__gt__', '__hash__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__le__', '__lshift__', '__lt__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__', '__rxor__', '__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__', '__trunc__', '__xor__', 'bit_length', 'conjugate', 'denominator', 'from_bytes', 'imag', 'numerator', 'real', 'to_bytes']\n", - "True\n", - "False\n", - "['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__']\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8zZI8X9MFHXY", - "colab_type": "code", - "outputId": "9f5b0584-52ae-4ad0-ba6e-85e46d630cd3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 217 - } - }, - "source": [ - "eg_tuple = ([1,2,3], '3', '4')\n", - "eg_tuple[0].append(4)\n", - "print(eg_tuple)\n", - "eg_tuple[0]=[1]" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "([1, 2, 3, 4], '3', '4')\n" - ], - "name": "stdout" - }, - { - "output_type": "error", - "ename": "TypeError", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0meg_tuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0meg_tuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0meg_tuple\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cRwZzC1nbya-", - "colab_type": "text" - }, - "source": [ - "## Linked List\n", - "### Singly Linked List" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_bOzVhCsb2r2", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class Node(object):\n", - " def __init__(self, val = None):\n", - " self.val = val\n", - " self.next = None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "JBELJl1Mg_5t", - "colab_type": "code", - "colab": {} - }, - "source": [ - "head = None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7HNDqOpYv1eQ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "head = Node(None)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "aaFT7eqJiBOh", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def append(head, val):\n", - " node = Node(val)\n", - " cur = head\n", - " while cur.next:\n", - " cur = cur.next\n", - " cur.next = node\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "mgqoX1FvlQ6e", - "colab_type": "code", - "colab": {} - }, - "source": [ - "for val in ['A', 'B', 'C', 'D']:\n", - " append(head, val)\n", - "print(head)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "r6jguLJ2ncuk", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def iter(head):\n", - " cur = head.next\n", - " while cur:\n", - " yield cur\n", - " cur = cur.next" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "fr2smn7KnlLr", - "colab_type": "code", - "colab": {} - }, - "source": [ - "for node in iter(head):\n", - " print(node.val, end = ' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "slfA4hZKp39X", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def search(head, val):\n", - " for node in iter(head):\n", - " if node.val == val:\n", - " return node\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "O8pCCYD5rvSa", - "colab_type": "code", - "colab": {} - }, - "source": [ - "node = search(head, 'B')\n", - "print(node)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "mD-zaFfktkuU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Delete 'B'\n", - "def deleteByNode(node):\n", - " node.val = node.next.val\n", - " node.next = node.next.next\n", - " return node" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "2Xl5HJaNts6e", - "colab_type": "code", - "colab": {} - }, - "source": [ - "deleteByNode(node)\n", - "for n in iter(head):\n", - " print(n.val, end = ' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "DA9EM3JWzApy", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def delete(head, val):\n", - " cur = head.next # start from dummy node\n", - " prev = head\n", - " while cur:\n", - " if cur.val == val:\n", - " # rewire\n", - " prev.next = cur.next\n", - " return\n", - " prev = cur\n", - " cur = cur.next" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bEyQTqp_zsyT", - "colab_type": "code", - "colab": {} - }, - "source": [ - "delete(head,'A')\n", - "for n in iter(head):\n", - " print(n.val, end = ' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ahdQYQ4l01Xf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def clear(head):\n", - " head.next = None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "eZ1FMYEH05vq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "clear(head)\n", - "for n in iter(head):\n", - " print(n.val, end = ' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tzyU7Uf-2Fa7", - "colab_type": "text" - }, - "source": [ - "### Doubly Linked List" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aBPpNAi6hAiS", - "colab_type": "text" - }, - "source": [ - "#### Class" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZyASd_JKe2mI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class SinglyLinkeList:\n", - " def __init__(self):\n", - " self.head = None\n", - " self.size = 0\n", - " \n", - " def len(self):\n", - " return self.size\n", - " \n", - " def append(self, val):\n", - " node = SinglyLinkedNode(val)\n", - " if self.head:\n", - " current = self.head\n", - " while current:\n", - " current = current.next\n", - " current.next = node\n", - " else:\n", - " self.head = node\n", - " self.size += 1\n", - "\n", - " def delete(self, val):\n", - " current = self.head\n", - " prev = self.head\n", - " while current:\n", - " if current.val == val:\n", - " # if the node is head\n", - " if current == self.head:\n", - " self.head = current.next\n", - " # rewire\n", - " else:\n", - " prev.next = current.next\n", - " self.size -= 1\n", - " prev = current\n", - " current = current.next\n", - "\n", - " def deleteByNode(self, node):\n", - " node.val = node.next.val\n", - " node.next = node.next.next\n", - " \n", - " def iter(self):\n", - " current = self.head\n", - " while current:\n", - " val = current.val\n", - " current = current.next\n", - " yield val\n", - " \n", - " def search(self, val):\n", - " for value in self.iter():\n", - " if value == val:\n", - " return True\n", - " return False\n", - " \n", - " def clear(self):\n", - " self.head = None\n", - " self.size = 0" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "lRzzlJK2b4Rc", - "colab_type": "code", - "colab": {} - }, - "source": [ - "head = Node('A')\n", - "print(head)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "torUZUYliBhC", - "colab_type": "text" - }, - "source": [ - "## Heap\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IXOKDdj0iGks", - "colab_type": "text" - }, - "source": [ - "### push" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "qjBdqfnyISQg", - "colab_type": "code", - "colab": {} - }, - "source": [ - "'''\n", - "Enforce min-heap property, leaf-to-root\n", - "'''\n", - "def _float(idx, heap): \n", - " while idx // 2: \n", - " p = idx // 2\n", - " # Violation\n", - " if heap[idx] < heap[p]:\n", - " heap[idx], heap[p] = heap[p], heap[idx]\n", - " else:\n", - " break\n", - " idx = p \n", - " return\n", - "\n", - "def push(heap, k):\n", - " heap.append(k)\n", - " _float(idx = len(heap) - 1, heap=heap)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ICFvjZoAz-uh", - "colab_type": "code", - "colab": {} - }, - "source": [ - "heap = [None, 6, 7, 12, 10, 15, 17] \n", - "push(heap, 5)\n", - "print(heap)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "09fdIkas1RxR", - "colab_type": "text" - }, - "source": [ - "### Pop" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "S0lEx88ARQgE", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def _sink(idx, heap): \n", - " size = len(heap)\n", - " while 2 * idx < size:\n", - " li = 2 * idx\n", - " ri = li + 1\n", - " mi = idx\n", - " if heap[li] < heap[mi]:\n", - " mi = li\n", - " if ri < size and heap[ri] < heap[mi]:\n", - " mi = ri\n", - " if mi != idx:\n", - " # swap index with mi\n", - " heap[idx], heap[mi] = heap[mi], heap[idx]\n", - " else:\n", - " break\n", - " idx = mi\n", - "\n", - "def pop(heap):\n", - " val = heap[1]\n", - " # Move the last item into the root position\n", - " heap[1] = heap.pop()\n", - " _sink(idx=1, heap=heap)\n", - " return val" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "10cRJ3MCSzaO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "k = pop(heap)\n", - "print(k, heap)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "58m92FBiqRMp", - "colab_type": "text" - }, - "source": [ - "### Heapify" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yk3HbmPHqPk8", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def _sink(idx, heap): \n", - " size = len(heap)\n", - " while 2 * idx < size:\n", - " li = 2 * idx\n", - " ri = li + 1\n", - " mi = idx\n", - " if heap[li] < heap[mi]:\n", - " mi = li\n", - " if ri < size and heap[ri] < heap[mi]:\n", - " mi = ri\n", - " if mi != idx:\n", - " # swap index with mi\n", - " heap[idx], heap[mi] = heap[mi], heap[idx]\n", - " else:\n", - " break\n", - " idx = mi\n", - "\n", - "def heapify(lst):\n", - " heap = [None] + lst\n", - " n = len(lst)\n", - " for i in range(n//2, 0, -1):\n", - " _sink(i, heap)\n", - " return heap" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "sItWBj0CyXI7", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a = [21, 1, 45, 78, 3, 5]\n", - "heapify(a)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vfd0-49zzW-9", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def _float(idx, heap): \n", - " while idx // 2: \n", - " p = idx // 2\n", - " # Violation\n", - " if heap[idx] < heap[p]:\n", - " heap[idx], heap[p] = heap[p], heap[idx]\n", - " # else:\n", - " # break\n", - " idx = p \n", - " return\n", - "\n", - "\n", - "def heapify(lst):\n", - " heap = [None] + lst\n", - " n = len(lst)\n", - " for i in range(n, n//2, -1):\n", - " _float(i, heap)\n", - " return heap" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "JLTpZvJiziSb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a = [21, 1, 45, 78, 3, 5]\n", - "heapify(a)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZU3Y9FdTOJy1", - "colab_type": "text" - }, - "source": [ - "## Python Heapq" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6X_33HRdOM70", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from heapq import heappush, heappop, heapify\n", - "h = [21, 1, 45, 78, 3, 5]\n", - "heapify(h)\n", - "h" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "6sKjGWBcVzWj", - "colab_type": "code", - "colab": {} - }, - "source": [ - "heappop(h)\n", - "heappush(h, 15)\n", - "h" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "r5EZnDW6XGFN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from heapq import nlargest, nsmallest\n", - "h = [21, 1, 45, 78, 3, 5]\n", - "nl = nlargest(3, h)\n", - "ns = nsmallest(3, h)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "9HBRESbMXdU9", - "colab_type": "code", - "colab": {} - }, - "source": [ - "print(nl)\n", - "print(ns)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "hTp-vCjabfip", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Efficiency\n", - "import random\n", - "h = [random.randint(1, 1000) for _ in range(10000)]\n", - "%time nl = nlargest(3, h)\n", - "%time ns = nsmallest(3, h)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8wdwg2Ora5L9", - "colab_type": "code", - "colab": {} - }, - "source": [ - "%time h.sort()\n", - "%time nl = h[:3]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ad-PPc_5YOOM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Merge\n", - "from heapq import merge\n", - "a = [1, 3, 5, 21, 45, 78]\n", - "b = [2, 4, 8, 16]\n", - "ab = merge(a, b)\n", - "ab" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "wrR9-VeiaLVU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "ab_lst = [n for n in ab]\n", - "ab_lst" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bDrjp_ATY9Vr", - "colab_type": "code", - "colab": {} - }, - "source": [ - "for n in ab:\n", - " print(n, end=' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n3xnm-3_cuN0", - "colab_type": "text" - }, - "source": [ - "#### Max heap" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "o0v6p3A4cv5S", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from heapq import _heapify_max\n", - "h = [21, 1, 45, 78, 3, 5]\n", - "_heapify_max(h)\n", - "h" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "tYh_pTBHdKXC", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from heapq import _heapify_max\n", - "h = [21, 1, 45, 78, 3, 5]\n", - "h = [-n for n in h]\n", - "heapify(h)\n", - "a = -heappop(h)\n", - "a" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I0IsJlmz18CZ", - "colab_type": "text" - }, - "source": [ - "### Heap with More Operations\n", - "\n", - "Random access and change of value" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fHEzP69W2EYx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import heapq\n", - "heap = [[3, 'a'], [10, 'b'], [5,'c'], [8, 'd']]\n", - "heapify(heap)\n", - "print(heap)\n", - "\n", - "heap[0] = [6, 'a']\n", - "# Increased value\n", - "heapq._siftup(heap, 0) \n", - "print(heap)\n", - "#Decreased Value\n", - "heap[2] = [3, 'a']\n", - "heapq._siftdown(heap, 0, 2)\n", - "print(heap)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "aQzfFn7M-vO8", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# tasks with same priorities\n", - "h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']]\n", - "heapify(h)\n", - "print(h)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q5E_Ne9POTdu", - "colab_type": "text" - }, - "source": [ - "## Priority Queue" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "I2PUG9MpPmjH", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from dataclasses import dataclass, field\n", - "from typing import Any\n", - "\n", - "@dataclass(order=True)\n", - "class PrioritizedItem:\n", - " priority: int\n", - " item: Any=field(compare=False)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "feBiuMDQPrK1", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# it does not seem working\n", - "h = [PrioritizedItem(3, 'c'), PrioritizedItem(3, 'a'), PrioritizedItem(10, 'b'), PrioritizedItem(5,'c'), PrioritizedItem(3, 'b')]\n", - "heapify(h)\n", - "print(h)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Krf5b7uxoA6a", - "colab_type": "text" - }, - "source": [ - "### With heapq" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MEZVoPbkAaM0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Add counter as a tie-breaker so that sort stability is kept\n", - "import itertools\n", - "counter = itertools.count()\n", - "h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']]\n", - "h = [[p, next(counter), t] for p, t in h]\n", - "print(h)\n", - "heapify(h)\n", - "h" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8HTtbM3FiKaE", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Removed mark\n", - "REMOVED = ''\n", - "# Remove task 'd'\n", - "h[1][2] = REMOVED\n", - "# Updata task 'b''s proprity to 14\n", - "h[3][2] = REMOVED\n", - "heappush(h, [14, next(counter), 'b'])\n", - "vh = []\n", - "while h:\n", - " item = heappop(h)\n", - " if item[2] != REMOVED:\n", - " vh.append(item)\n", - "vh\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "EX-43EOHiAuK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# A heap associated with entry_finder\n", - "counter = itertools.count()\n", - "entry_finder = {}\n", - "h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']]\n", - "heap = []\n", - "for p, t in h:\n", - " item = [p, next(counter), t]\n", - " heap.append(item)\n", - " entry_finder[t] = item\n", - "heapify(heap)\n", - "print(heap)\n", - "print(entry_finder)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "yc1LDeRyrzd5", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# With entry_finder\n", - "REMOVED = ''\n", - "def remove_task(task_id):\n", - " if task_id in entry_finder:\n", - " entry_finder[task_id][2] = REMOVED\n", - " entry_finder.pop(task_id) # delete from the dictionary\n", - " return\n", - "\n", - "# Remove task 'd'\n", - "remove_task('d')\n", - "# Updata task 'b''s priority to 14\n", - "remove_task('b')\n", - "new_item = [14, next(counter), 'b']\n", - "heappush(heap, new_item)\n", - "entry_finder['b'] = new_item\n", - "\n", - "print(heap)\n", - "vh = []\n", - "while heap:\n", - " item = heappop(heap)\n", - " if item[2] != REMOVED:\n", - " vh.append(item)\n", - "vh" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "4DtjHkQ0oGzO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# A heapq based priority queue class\n", - "from heapq import heappush, heappop, heapify\n", - "from typing import List\n", - "import itertools\n", - "class PriorityQueue:\n", - " def __init__(self, items:List[List]=[]):\n", - " self.heap = []\n", - " self.entry_finder = {} \n", - " self.REMOVED = ''\n", - " self.counter = itertools.count() \n", - " # Add items to heap\n", - " for p, t in items:\n", - " item = [p, next(self.counter), t]\n", - " self.entry_finder[t] = item\n", - " self.heap.append(item)\n", - " heapify(self.heap)\n", - " \n", - " def add_task(self, task, priority=0):\n", - " 'Add a new task or update the priority of an existing task'\n", - " if task in self.entry_finder:\n", - " self.remove_task(task)\n", - " count = next(self.counter)\n", - " item = [priority, count, task]\n", - " self.entry_finder[task] = item\n", - " heappush(self.heap, item)\n", - " \n", - " def remove_task(self, task):\n", - " 'Mark an existing task as REMOVED. Raise KeyError if not found.'\n", - " entry = self.entry_finder.pop(task)\n", - " entry[-1] = self.REMOVED\n", - "\n", - " def pop_task(self):\n", - " 'Remove and return the lowest priority task. Raise KeyError if empty.'\n", - " while self.heap:\n", - " priority, count, task = heappop(self.heap)\n", - " if task is not self.REMOVED:\n", - " del self.entry_finder[task]\n", - " return task\n", - " raise KeyError('pop from an empty priority queue')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "0Ad-zEmD-4hW", - "colab_type": "code", - "colab": {} - }, - "source": [ - "pq = PriorityQueue([[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']])\n", - "pq.remove_task('d')\n", - "pq.add_task('b', 14)\n", - "vh = []\n", - "print(pq.heap)\n", - "while pq.heap:\n", - " task_id = pq.pop_task()\n", - " vh.append(task_id)\n", - "print(vh)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B3mm_e1-oDbx", - "colab_type": "text" - }, - "source": [ - "### With PriorityQueue()" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "y-PvMDzG0k1h", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from queue import PriorityQueue\n", - "data = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']]\n", - "pq = PriorityQueue()\n", - "for d in data:\n", - " pq.put(d)\n", - " \n", - "process_order = []\n", - "while not pq.empty():\n", - " process_order.append(pq.get())\n", - "process_order" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ReGWutff4DAt", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import itertools\n", - "from dataclasses import dataclass, field\n", - "\n", - "@dataclass(repr=True, order=True)\n", - "class Job:\n", - " counter = itertools.count()\n", - " priority: float\n", - " task: str = field(compare=False)\n", - " count: int = next(Job.counter)\n", - " \n", - "\n", - " # def __init__(self, priority, task):\n", - " # self.priority = priority\n", - " # self.count = next(Job.counter)\n", - " # self.task = task\n", - "\n", - " # def __lt__(self, other): \n", - " # try:\n", - " # return [self.priority, self.count] < [other.priority, other.count]\n", - " # except AttributeError:\n", - " # return NotImplemented\n", - " # def __eq__(self, other): \n", - " # try:\n", - " # return [self.priority, self.count] == [other.priority, other.count]\n", - " # except AttributeError:\n", - " # return NotImplemented\n", - " # def __cmp__(self, other):\n", - " # return cmp([self.priority, self.count], [other.priority, other.count])\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "kdZxnhAa9aRM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "@dataclass\n", - "class C:\n", - " x: int\n", - " y: int = field(repr=False)\n", - " z: int = field(repr=False, default=10)\n", - " t: int = 20\n", - "\n", - "a = C(1, 2)\n", - "print(a)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3FWTCf9z5Mat", - "colab_type": "code", - "colab": {} - }, - "source": [ - "data = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']]\n", - "pq = PriorityQueue()\n", - "for p, t in data:\n", - " pq.put(Job(p, t))\n", - " \n", - "process_order = []\n", - "while not pq.empty():\n", - " process_order.append(pq.get())\n", - "process_order" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mtOycJegCj8K", - "colab_type": "text" - }, - "source": [ - "### Hands-on examples" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jLcY0VH4CmTk", - "colab_type": "code", - "colab": {} - }, - "source": [ - "nums = [1,1,1,2,2,3]\n", - "k = 2" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "OVFTSwrNCqUn", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from collections import Counter\n", - "topk = [x for x, _ in Counter(nums).most_common(k)]\n", - "print(topk)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZeOezbL9DRmb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import heapq\n", - "count = Counter(nums)\n", - "# Use the value to compare with\n", - "topk = heapq.nlargest(k, count.keys(), key=lambda x: count[x])\n", - "topk" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "KPrKjREaFPhD", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from queue import PriorityQueue\n", - "count = Counter(nums)\n", - "pq = PriorityQueue()\n", - "for key, c in count.items():\n", - " pq.put((-c, key))\n", - "\n", - "topk = [pq.get()[1] for i in range(k)]\n", - "topk\n" - ], - "execution_count": 0, - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_search_strategies.ipynb b/Colab_Codes/chapter_search_strategies.ipynb deleted file mode 100644 index ac8e0a8..0000000 --- a/Colab_Codes/chapter_search_strategies.ipynb +++ /dev/null @@ -1,3030 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_search_strategies.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Ev0G5LfkFTzu", - "colab_type": "text" - }, - "source": [ - "## Linear Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "23e3DXOGE59o", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Linear Search\n", - "def linear_search(A, t):\n", - " for i, v in enumerate(A):\n", - " if A[i] == t:\n", - " return i\n", - " return -1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hfcQ8BdWSO8U", - "colab_type": "text" - }, - "source": [ - "## Uninformed Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ncgNw3NUJ3lC", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import inspect\n", - "\n", - "\n", - "def get_methods(cls_):\n", - " methods = inspect.getmembers(cls_, inspect.isfunction)\n", - " return dict(methods)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "OJ9IAM2uIfci", - "colab_type": "code", - "outputId": "8bd64ccf-3b66-4025-df1c-8544e38e52a7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 313 - } - }, - "source": [ - "from graphviz import Digraph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = ['S', 'A', 'B', 'G']\n", - "for node in nodes:\n", - " dot.node(node)\n", - "dot.edge('S', 'A', _attributes={'label': '4'})\n", - "dot.edge('S', 'B', _attributes={'label': '5'})\n", - "dot.edge('A', 'G', _attributes={'label': '7'})\n", - "dot.edge('B', 'G', _attributes={'label': '3'})\n", - "dot.render('test-output/ucs', view=True) \n", - "\n", - "dot" - ], - "execution_count": 478, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\nS\n\nS\n\n\n\nA\n\nA\n\n\n\nS->A\n\n\n4\n\n\n\nB\n\nB\n\n\n\nS->B\n\n\n5\n\n\n\nG\n\nG\n\n\n\nA->G\n\n\n7\n\n\n\nB->G\n\n\n3\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 478 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nRps_otMIJGX", - "colab_type": "code", - "outputId": "04e2165f-a5d3-4da8-de4a-876de4a47089", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - } - }, - "source": [ - "## Graph Search\n", - "from collections import defaultdict\n", - "al = defaultdict(list)\n", - "al['S'] = [('A', 4), ('B', 5)]\n", - "al['A'] = [('G', 7)]\n", - "al['B'] = [('G', 3)]\n", - "al" - ], - "execution_count": 479, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "defaultdict(list,\n", - " {'A': [('G', 7)], 'B': [('G', 3)], 'S': [('A', 4), ('B', 5)]})" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 479 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YT0tkaJ4SifH", - "colab_type": "text" - }, - "source": [ - "### Breath-first Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2yOpMY0_U-ov", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bfs(g, s):\n", - " q = [s]\n", - " while q:\n", - " n = q.pop(0)\n", - " print(n, end = ' ')\n", - " for v, _ in g[n]:\n", - " q.append(v)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "LqiB-SJ8VJMg", - "colab_type": "code", - "outputId": "98a4fbbf-ebef-41a2-f911-b49bc1c9b45e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "bfs(al, 'S')" - ], - "execution_count": 481, - "outputs": [ - { - "output_type": "stream", - "text": [ - "S A B G G " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PJ2MLkuJSTf_", - "colab_type": "text" - }, - "source": [ - "### Depth-first Search\n", - "The completeness of DFS depends on the search space. If your search space is finite, then Depth-First Search is complete. However, if there are infinitely many alternatives, it might not find a solution. For example, suppose you were coding a path-search problem on city streets, and every time your partial path came to an intersection, you always searched the left-most street first. Then you might just keep going around the same block indefinitely.\n", - "\n", - "Sometimes there are ways to bound the search to get completeness even when the search space is unbounded. For example, for the path-search problem above, if we prune the search whenever a path returns to a previous location on the path, then DFS will always find a solution if one exists.\n", - "\n", - "There are variants of DFS that are complete. One is iterative deepening: you set a maximum search depth for DFS, and only search that far down the search tree. If you don’t find a solution, then you increase the bound and try again. (Note, however, that this method might run forever if there is no solution.)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "IH9FpE8DTce7", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Implementation of recursive dfs\n", - "def dfs(g, vi):\n", - " print(vi, end=' ')\n", - " for v, _ in g[vi]: \n", - " dfs(g, v)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Q9tKweIvUHFC", - "colab_type": "code", - "outputId": "de3e961e-8181-4b30-ab94-dc9319a32c39", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "dfs(al, 'S')" - ], - "execution_count": 483, - "outputs": [ - { - "output_type": "stream", - "text": [ - "S A G B G " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Fikls4VUUReu", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Implementation of iterative dfs\n", - "def dfs_iter(g, s):\n", - " stack = [s]\n", - " while stack:\n", - " n = stack.pop()\n", - " print(n, end = ' ')\n", - " for v, _ in g[n]:\n", - " stack.append(v)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "KegJMT0iUyJ3", - "colab_type": "code", - "outputId": "287815f2-1fbf-4b4c-cd84-4ca905d3b616", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "dfs_iter(al, 'S')" - ], - "execution_count": 485, - "outputs": [ - { - "output_type": "stream", - "text": [ - "S B G A G " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yZ3viz03H4EJ", - "colab_type": "text" - }, - "source": [ - "### Uniform-Cost Search" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fdT2z_NrLKJP", - "colab_type": "text" - }, - "source": [ - "```\n", - "q = [(0, S)]\n", - "Expand S, add A and B\n", - "q = [(4, A), (5, B)]\n", - "Expand A, add G\n", - "q = [(5, B), (11, G)]\n", - "Expand B, add G\n", - "q = [(8, G), (11, G)]\n", - "Expand G, goal found, terminate.\n", - "```" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZhCNgx-rKzHq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import heapq\n", - "def ucs(graph, s, t):\n", - " q = [(0, s)] # initial path with cost 0\n", - " while q:\n", - " cost, n = heapq.heappop(q)\n", - " # Need to check when goal is removed from the priority queue\n", - " if n == t:\n", - " return cost\n", - " else:\n", - " for v, c in graph[n]:\n", - " heapq.heappush(q, (c + cost, v))\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "QQb89fDwMm6V", - "colab_type": "code", - "outputId": "e0d318db-87b2-4200-a1f4-07e2d6c12820", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "ucs(al, s='A', t='G')" - ], - "execution_count": 487, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "7" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 487 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mahGDvIKNs0k", - "colab_type": "text" - }, - "source": [ - "### Iterative-Deepening Search (IDS)\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MoPms3bOSbIK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Depth limited Search\n", - "def dls(graph, cur, t, maxDepth):\n", - " # End Condition\n", - " if maxDepth == 0:\n", - " print('test: ', cur, end = ' ')\n", - " if cur == t:\n", - " return True\n", - " if maxDepth < 0:\n", - " return False\n", - "\n", - " # Recur for adjacent vertices\n", - " for n, _ in graph[cur]:\n", - " if dls(graph, n, t, maxDepth - 1):\n", - " return True\n", - " return False" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "5D25vnOVN2H5", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Iterative-deepening search\n", - "def ids(graph, s, t, maxDepth):\n", - " for i in range(maxDepth):\n", - " print('depth:', i)\n", - " if dls(graph, s, t, i):\n", - " return True\n", - " print('\\n')\n", - " return False\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "PPUE1EfrTff0", - "colab_type": "code", - "outputId": "083f0c90-ab80-4a10-e331-efad578966d7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 156 - } - }, - "source": [ - "# Find 'B'\n", - "ids(al, 'S', 'G', 3)" - ], - "execution_count": 490, - "outputs": [ - { - "output_type": "stream", - "text": [ - "depth: 0\n", - "test: S \n", - "\n", - "depth: 1\n", - "test: A test: B \n", - "\n", - "depth: 2\n", - "test: G " - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 490 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "84kFFfrjHxH7", - "colab_type": "text" - }, - "source": [ - "### Bidirectional Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "g7NGJLqrH1AM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bfs_level(graph, q, bStep):\n", - " '''\n", - " q: set for better efficiency in intersection checking\n", - " '''\n", - " if not bStep:\n", - " return q\n", - " nq = set()\n", - " for n in q:\n", - " for v, c in graph[n]:\n", - " nq.add(v)\n", - " return nq\n", - "\n", - "def intersect(qs, qt):\n", - " if qs & qt: # intersection \n", - " return True\n", - " return False" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "6a9IpOExIXXm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bis(graph, s, t):\n", - " # First build a graph with opposite edges \n", - " bgraph = defaultdict(list)\n", - " for key, value in graph.items():\n", - " for n, c in value:\n", - " bgraph[n].append((key, c))\n", - " print(graph, bgraph)\n", - " # Start bidirectional search\n", - " qs = {s}\n", - " qt = {t}\n", - " step = 0\n", - " while qs and qt:\n", - " print(qs, qt)\n", - " if intersect(qs, qt):\n", - " return True\n", - " qs = bfs_level(graph, qs, step%2 == 0)\n", - " qt = bfs_level(bgraph, qt, step%2 == 1)\n", - " step = 1 - step\n", - " return False\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "nrZW4C9_K37z", - "colab_type": "code", - "outputId": "56c325a2-63a9-4247-ba7e-87e461bf896c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 106 - } - }, - "source": [ - "bis(al, 'S', 'A')" - ], - "execution_count": 493, - "outputs": [ - { - "output_type": "stream", - "text": [ - "defaultdict(, {'S': [('A', 4), ('B', 5)], 'A': [('G', 7)], 'B': [('G', 3)], 'G': []}) defaultdict(, {'A': [('S', 4)], 'B': [('S', 5)], 'G': [('A', 7), ('B', 3)]})\n", - "{'S'} {'A'}\n", - "{'B', 'A'} {'A'}\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 493 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ByphOhrKZtMj", - "colab_type": "text" - }, - "source": [ - "## Graph Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "E7ZgUDIIaSaD", - "colab_type": "code", - "outputId": "b4b4bbaa-2199-4415-b31e-28eda9b623bf", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 465 - } - }, - "source": [ - "from graphviz import Digraph, Graph\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "for node in nodes:\n", - " dot.node(str(node))\n", - "dot.edges(['01','12', '24', '43', '45'])\n", - "dot.render('test-output/free_tree', view=True) \n", - "dot" - ], - "execution_count": 494, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 494 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "-5v2VZ8bImzM", - "colab_type": "code", - "outputId": "eb70eca0-d48b-45c6-9141-b8685859e244", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 369 - } - }, - "source": [ - "dot = Graph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "# for node in nodes:\n", - "# dot.node(str(node))\n", - "dot.edges(['01','02', '12','13', '24','34', '45'])\n", - "rank1 = [0]\n", - "rank2 = [1, 2]\n", - "rank3 = [3, 4]\n", - "rank4 = [5]\n", - "for rank in [rank1, rank2, rank3, rank4]:\n", - " with dot.subgraph() as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " s.node(str(node))\n", - "dot.render('test-output/undirected_cyclic_graph', view=True) \n", - "dot" - ], - "execution_count": 495, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0--1\n\n\n\n\n2\n\n2\n\n\n\n0--2\n\n\n\n\n1--2\n\n\n\n\n3\n\n3\n\n\n\n1--3\n\n\n\n\n4\n\n4\n\n\n\n2--4\n\n\n\n\n3--4\n\n\n\n\n5\n\n5\n\n\n\n4--5\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 495 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "cI2CmyM-eGna", - "colab_type": "code", - "outputId": "f5f64c50-7e20-403c-a32a-36db75db2f52", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 369 - } - }, - "source": [ - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "# for node in nodes:\n", - "# dot.node(str(node))\n", - "dot.edges(['01', '12','31', '20', '24','43', '45'])\n", - "rank1 = [0]\n", - "rank2 = [1, 2]\n", - "rank3 = [3, 4]\n", - "rank4 = [5]\n", - "for rank in [rank1, rank2, rank3, rank4]:\n", - " with dot.subgraph() as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " s.node(str(node))\n", - "dot.render('test-output/directed_cyclic_graph', view=True) \n", - "dot" - ], - "execution_count": 496, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n2->0\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n3->1\n\n\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 496 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "2K9nipQJff_F", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Prepare Graph Example\n", - "# Adjacency List with cycle\n", - "ft = [[] for _ in range(6)]\n", - "ft[0] = [1]\n", - "ft[1] = [2]\n", - "ft[2] = [4]\n", - "ft[4] = [3, 5]\n", - "ft[5] = []" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ajKsBPhBgQcN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# directed cyclc graph\n", - "dcg = [[] for _ in range(6)]\n", - "dcg[0] = [1]\n", - "dcg[1] = [2]\n", - "dcg[2] = [0, 4]\n", - "dcg[3] = [1]\n", - "dcg[4] = [3, 5]\n", - "dcg[5] = []" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "0385JGxBPGWl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Prepare Graph Example\n", - "# Adjacency List with cycle\n", - "ucg = [[] for _ in range(6)]\n", - "ucg[0] = [1, 2]\n", - "ucg[1] = [0, 2, 3]\n", - "ucg[2] = [0, 1, 4]\n", - "ucg[3] = [1, 4]\n", - "ucg[4] = [2, 3, 5]\n", - "ucg[5] = [4]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "E_EBbni8WYoB", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#STATE Class\n", - "class STATE:\n", - " white = 0\n", - " gray = 1\n", - " black = 2" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fjb4NQ4QGRIu", - "colab_type": "text" - }, - "source": [ - "### Depth-first Search in Graph" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7O4uRkjzouHz", - "colab_type": "text" - }, - "source": [ - "#### Track Paths and Avoid Cycle in the tree-based search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BnwTjoskRGm0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Free Tree Search\n", - "def dfs(g, vi):\n", - " print(vi, end=' ')\n", - " for nv in g[vi]: \n", - " dfs(g, nv)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "oo4bQGakSble", - "colab_type": "code", - "outputId": "7491e818-5a1c-4c48-a95c-9901eb5a2edc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "dfs(ft, 0)" - ], - "execution_count": 502, - "outputs": [ - { - "output_type": "stream", - "text": [ - "0 1 2 4 3 5 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "CpnZbW1nUV5v", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Failed with directed cyclic graph\n", - "# dfs(dcg, 0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "95BRIU3VlyAb", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Graph Search, track paths and avoids cycle\n", - "def dfs(g, vi, path):\n", - " paths.append(path)\n", - " orders.append(vi)\n", - " for nv in g[vi]: \n", - " if nv not in path: \n", - " dfs(g, nv, path+[nv])\n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "LjXfmd17mKAi", - "colab_type": "code", - "outputId": "8526d563-8e86-4508-84c2-c9b4cf393cec", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - } - }, - "source": [ - "# Test free tree\n", - "paths, orders = [], []\n", - "dfs(ft, 0, [0])\n", - "paths, orders" - ], - "execution_count": 505, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n", - " [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 505 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ki0HtnO8mgD4", - "colab_type": "code", - "outputId": "546bd0ca-41ca-413d-923c-7ec57f1f8a05", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - } - }, - "source": [ - "# Test dcg\n", - "paths, orders = [], []\n", - "dfs(dcg, 0, [0])\n", - "paths, orders" - ], - "execution_count": 506, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n", - " [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 506 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nARQsx-3nlGd", - "colab_type": "code", - "outputId": "fe99a308-04d7-44ab-80f7-e0b90cb91f33", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 364 - } - }, - "source": [ - "# Test ucg\n", - "paths, orders = [], []\n", - "dfs(ucg, 0, [0])\n", - "paths, orders" - ], - "execution_count": 507, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0],\n", - " [0, 1],\n", - " [0, 1, 2],\n", - " [0, 1, 2, 4],\n", - " [0, 1, 2, 4, 3],\n", - " [0, 1, 2, 4, 5],\n", - " [0, 1, 3],\n", - " [0, 1, 3, 4],\n", - " [0, 1, 3, 4, 2],\n", - " [0, 1, 3, 4, 5],\n", - " [0, 2],\n", - " [0, 2, 1],\n", - " [0, 2, 1, 3],\n", - " [0, 2, 1, 3, 4],\n", - " [0, 2, 1, 3, 4, 5],\n", - " [0, 2, 4],\n", - " [0, 2, 4, 3],\n", - " [0, 2, 4, 3, 1],\n", - " [0, 2, 4, 5]],\n", - " [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 507 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZVhNQx4DgDe6", - "colab_type": "text" - }, - "source": [ - "Draw the search tree" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mSYmoZ_0gG4B", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Only keep the longest path\n", - "def dfs_helper(g, vi, path):\n", - " orders.append(vi)\n", - " bpath = True\n", - " for nv in g[vi]: \n", - " if nv not in path: \n", - " dfs_helper(g, nv, path+[nv])\n", - " bpath = False\n", - " if bpath:\n", - " paths.append(path)\n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "C0wF99z6huEu", - "colab_type": "code", - "outputId": "96f8adfa-b67d-4751-9fc1-44648993cb49", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 156 - } - }, - "source": [ - "paths, orders = [], []\n", - "dfs_helper(ucg, 0, [0])\n", - "paths, orders" - ], - "execution_count": 509, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0, 1, 2, 4, 3],\n", - " [0, 1, 2, 4, 5],\n", - " [0, 1, 3, 4, 2],\n", - " [0, 1, 3, 4, 5],\n", - " [0, 2, 1, 3, 4, 5],\n", - " [0, 2, 4, 3, 1],\n", - " [0, 2, 4, 5]],\n", - " [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 509 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4TVSieudlvH3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Track edges\n", - "def dfs_helper(g, vi, path):\n", - " orders.append(vi)\n", - " node = (vi, 0)\n", - " if vi in tracker:\n", - " node = (vi, tracker[vi] + 1)\n", - " tracker[vi] += 1\n", - " else:\n", - " tracker[vi] = 0\n", - " for nv in g[vi]: \n", - " if nv not in path: \n", - " # add an edge\n", - " node1 = (nv, 0)\n", - " if nv in tracker:\n", - " node1 = (nv, tracker[nv]+1)\n", - " edges[node].append(node1)\n", - " dfs_helper(g, nv, path+[nv]) \n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "WwGSwE4PnRbB", - "colab_type": "code", - "outputId": "1dffc8ad-fa7e-4694-ea51-ad50d18cd0e6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 312 - } - }, - "source": [ - "paths, orders = [], []\n", - "tracker = defaultdict(int) # node: maximum count\n", - "edges = defaultdict(list) # node: node\n", - "dfs_helper(ucg, 0, [0])\n", - "paths, orders, edges, tracker, len(orders)" - ], - "execution_count": 511, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([],\n", - " [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5],\n", - " defaultdict(list,\n", - " {(0, 0): [(1, 0), (2, 2)],\n", - " (1, 0): [(2, 0), (3, 1)],\n", - " (1, 1): [(3, 2)],\n", - " (2, 0): [(4, 0)],\n", - " (2, 2): [(1, 1), (4, 3)],\n", - " (3, 1): [(4, 1)],\n", - " (3, 2): [(4, 2)],\n", - " (3, 3): [(1, 2)],\n", - " (4, 0): [(3, 0), (5, 0)],\n", - " (4, 1): [(2, 1), (5, 1)],\n", - " (4, 2): [(5, 2)],\n", - " (4, 3): [(3, 3), (5, 3)]}),\n", - " defaultdict(int, {0: 0, 1: 2, 2: 2, 3: 3, 4: 3, 5: 3}),\n", - " 19)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 511 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "s8Ti-Bo-ic0J", - "colab_type": "code", - "outputId": "bbdb4d18-1cc1-426e-ef12-168ecdefd045", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 561 - } - }, - "source": [ - "# Plot the search tree\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "for node, count in tracker.items():\n", - " for i in range(count+1):\n", - " name=str(node)+str(i)\n", - " label=str(node)\n", - " #print(name, label)\n", - " dot.node(name=str(node)+str(i), label=str(node))\n", - "for node1, nlist in edges.items():\n", - " for node2 in nlist:\n", - " a = str(node1[0])+str(node1[1])\n", - " b = str(node2[0])+str(node2[1])\n", - " #print(a, b)\n", - " dot.edge(a, b)\n", - "#dot.edges(['01', '12','31', '20', '24','43', '45'])\n", - "dot.render('test-output/search_tree_dfs', view=True) \n", - "dot" - ], - "execution_count": 512, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n00\n\n0\n\n\n\n10\n\n1\n\n\n\n00->10\n\n\n\n\n\n22\n\n2\n\n\n\n00->22\n\n\n\n\n\n20\n\n2\n\n\n\n10->20\n\n\n\n\n\n31\n\n3\n\n\n\n10->31\n\n\n\n\n\n11\n\n1\n\n\n\n32\n\n3\n\n\n\n11->32\n\n\n\n\n\n12\n\n1\n\n\n\n40\n\n4\n\n\n\n20->40\n\n\n\n\n\n21\n\n2\n\n\n\n22->11\n\n\n\n\n\n43\n\n4\n\n\n\n22->43\n\n\n\n\n\n30\n\n3\n\n\n\n40->30\n\n\n\n\n\n50\n\n5\n\n\n\n40->50\n\n\n\n\n\n41\n\n4\n\n\n\n41->21\n\n\n\n\n\n51\n\n5\n\n\n\n41->51\n\n\n\n\n\n42\n\n4\n\n\n\n52\n\n5\n\n\n\n42->52\n\n\n\n\n\n33\n\n3\n\n\n\n43->33\n\n\n\n\n\n53\n\n5\n\n\n\n43->53\n\n\n\n\n\n31->41\n\n\n\n\n\n32->42\n\n\n\n\n\n33->12\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 512 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZD5tQSWFoVOK", - "colab_type": "text" - }, - "source": [ - "We can see as the edges in the directed graph increase, the search tree explode exponentially with the number of edges. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yCuIaMa4c-Cl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Graph Search, track paths and avoids cycle\n", - "def recursive(g, vi, path):\n", - " '''\n", - " g: graph as an adjacency list\n", - " vi: the vertex index\n", - " '''\n", - " #print(vi, end=' ')\n", - " paths, nodes = [[]], []\n", - " for nv in g[vi]: \n", - " if nv not in path: \n", - " spaths, snodes = recursive(g, nv, path+[nv])\n", - " paths.extend(spaths)\n", - " nodes.extend(snodes)\n", - " paths = [[vi] + p for p in paths]\n", - " return paths, [vi] + nodes\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "FUkmQgjndWkn", - "colab_type": "code", - "outputId": "d10feafd-209b-43fd-c43d-fd9f5ef00fd6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 364 - } - }, - "source": [ - "# print path with free tree\n", - "recursive(ucg, 0, [0])\n" - ], - "execution_count": 514, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0],\n", - " [0, 1],\n", - " [0, 1, 2],\n", - " [0, 1, 2, 4],\n", - " [0, 1, 2, 4, 3],\n", - " [0, 1, 2, 4, 5],\n", - " [0, 1, 3],\n", - " [0, 1, 3, 4],\n", - " [0, 1, 3, 4, 2],\n", - " [0, 1, 3, 4, 5],\n", - " [0, 2],\n", - " [0, 2, 1],\n", - " [0, 2, 1, 3],\n", - " [0, 2, 1, 3, 4],\n", - " [0, 2, 1, 3, 4, 5],\n", - " [0, 2, 4],\n", - " [0, 2, 4, 3],\n", - " [0, 2, 4, 3, 1],\n", - " [0, 2, 4, 5]],\n", - " [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 514 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qG_4NLC-o4bn", - "colab_type": "text" - }, - "source": [ - "#### Graph-based Search that avoids repeating vertex" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NTH_S5wQi_Lf", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Avoid Repeating Vertex\n", - "def dfgs(g, vi, visited, path):\n", - " visited.add(vi)\n", - " orders.append(vi)\n", - " bEnd = True # node without unvisited adjacent nodes \n", - " for nv in g[vi]: \n", - " if nv not in visited: \n", - " if bEnd:\n", - " bEnd = False\n", - " dfgs(g, nv, visited, path + [nv])\n", - " if bEnd:\n", - " paths.append(path)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "037PpuQvjMTj", - "colab_type": "code", - "outputId": "0f8982ed-76ef-4e3d-faf6-2b317cd6aa15", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "paths, orders = [], []\n", - "dfgs(ucg, 0, set(), [0])\n", - "paths, orders" - ], - "execution_count": 516, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 516 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vn-U1OFJof4B", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def backtrace(parent, s, t):\n", - " p = t\n", - " path = []\n", - " while p != s:\n", - " path.append(p)\n", - " p = parent[p]\n", - " path.append(s)\n", - " return path[::-1]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "1aN6kwYPoH5d", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Backtrace path\n", - "# s is used to backtrace the path\n", - "def dfgs(g, vi, s, t, visited, parent): \n", - " visited.add(vi) \n", - " if vi == t:\n", - " return backtrace(parent, s, t)\n", - "\n", - " for nv in g[vi]: \n", - " if nv not in visited: \n", - " parent[nv] = vi\n", - " fpath = dfgs(g, nv, s, t, visited, parent)\n", - " if fpath:\n", - " return fpath\n", - " \n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_dGSXhR2phQh", - "colab_type": "code", - "outputId": "2800b185-daae-46d1-c7d8-7d115fcece60", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "parent = {}\n", - "path = dfgs(ucg, 0, 0, None, set(), parent)\n", - "path, parent" - ], - "execution_count": 519, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(None, {1: 0, 2: 1, 3: 4, 4: 2, 5: 4})" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 519 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "TXjsA5BRLx87", - "colab_type": "code", - "outputId": "b30b4b1f-73fc-4e38-8939-6c8c2b6d61ac", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 465 - } - }, - "source": [ - "# Visualize the search tree\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "for node in nodes:\n", - " dot.node(name=str(node))\n", - "for s, p in parent.items():\n", - " dot.edge(str(p), str(s))\n", - "#dot.edges(['01', '12','31', '20', '24','43', '45'])\n", - "dot.render('test-output/depth_first_graph_search_tree', view=True) \n", - "dot" - ], - "execution_count": 520, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 520 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b_55IC6F8iP_", - "colab_type": "text" - }, - "source": [ - "### Breath-first Search in Graph" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8YhKn3V3biau", - "colab_type": "text" - }, - "source": [ - "#### Track pahts and avoid cycle in the tree-based search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "9-iHJjQYWfPy", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Track paths and avoid cycles\n", - "def bfs(g, s):\n", - " q = [[s]]\n", - " paths, orders = [], []\n", - " while q:\n", - " path = q.pop(0)\n", - " n = path[-1]\n", - " orders.append(n)\n", - " bEnd = True\n", - " for v in g[n]:\n", - " if v not in path:\n", - " if bEnd:\n", - " bEnd = False\n", - " q.append(path + [v])\n", - " if bEnd:\n", - " paths.append(path)\n", - " return paths, orders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "LSYyXM_EXHxD", - "colab_type": "code", - "outputId": "590521ea-deb7-4f93-dadf-e18881cf634c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "# Test free tree\n", - "bfs(ft, 0)" - ], - "execution_count": 522, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 522 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "00091jV7Yr3z", - "colab_type": "code", - "outputId": "6376b5c1-d2c3-4368-84e3-feadcafc95f3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "# Test dcg\n", - "bfs(dcg, 0)" - ], - "execution_count": 523, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 523 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1xo7CLOBYzyE", - "colab_type": "code", - "outputId": "dd54c574-02ee-444d-9c52-c3afa45ae105", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - } - }, - "source": [ - "# Test ucg\n", - "paths, orders = bfs(ucg, 0)\n", - "print(paths, orders)\n", - "'''\n", - "From dfs\n", - "[[0, 1, 2, 4, 3],\n", - " [0, 1, 2, 4, 5],\n", - " [0, 1, 3, 4, 2],\n", - " [0, 1, 3, 4, 5],\n", - " [0, 2, 1, 3, 4, 5],\n", - " [0, 2, 4, 3, 1],\n", - " [0, 2, 4, 5]],\n", - "'''\n", - "print(len(orders))" - ], - "execution_count": 524, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0, 2, 4, 5], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 3, 4, 2], [0, 1, 3, 4, 5], [0, 2, 4, 3, 1], [0, 2, 1, 3, 4, 5]] [0, 1, 2, 2, 3, 1, 4, 4, 4, 3, 3, 5, 3, 5, 2, 5, 4, 1, 5]\n", - "19\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OaceX9XiboJr", - "colab_type": "text" - }, - "source": [ - "#### Graph-based search that avoids repeating vertex" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "r6g03TClZAJu", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Track paths and avoid cycles in a memory efficient way\n", - "# Only track shortest paths\n", - "def bfgs(g, s, t):\n", - " q = [s]\n", - " bfgs.parent = {}\n", - " visited = {s}\n", - " while q:\n", - " n = q.pop(0)\n", - " if n == t:\n", - " return backtrace(s, t, bfgs.parent)\n", - " for v in g[n]:\n", - " if v not in visited:\n", - " q.append(v)\n", - " visited.add(v)\n", - " bfgs.parent[v] = n\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "p1JuvjNMqZ0O", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Print Shortest Path Iterative\n", - "def backtrace(s, t, parent):\n", - " p = t\n", - " path = []\n", - " while p != s:\n", - " path.append(p)\n", - " p = parent[p]\n", - " path.append(s)\n", - " return path[::-1]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "OGUHuKrb6EDu", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Print Shortest Path Recursive\n", - "def get_path(s, t, pl, path):\n", - " if s == t: \n", - " pass\n", - " elif pl[t] is None:\n", - " print('no path from ', s, ' to ', t)\n", - " else:\n", - " get_path(s, pl[t], pl, path) \n", - " path.append(t)\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7129c26IbK0N", - "colab_type": "code", - "outputId": "ccb546e7-be27-4573-dfbe-63cf48b5cbbd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "# Test ucg\n", - "bfgs(ucg, 0, 5)" - ], - "execution_count": 528, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0, 2, 4, 5]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 528 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Mwt51VlKFI5-", - "colab_type": "code", - "outputId": "482b57a4-29b1-4136-85fe-835ee577c4c2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 369 - } - }, - "source": [ - "# Visualize the search tree\n", - "bfgs(ucg, 0, None)\n", - "parent = bfgs.parent\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "for node in nodes:\n", - " dot.node(name=str(node))\n", - "for s, p in parent.items():\n", - " dot.edge(str(p), str(s))\n", - "#dot.edges(['01', '12','31', '20', '24','43', '45'])\n", - "dot.render('test-output/breath_first_graph_search_tree', view=True) \n", - "dot" - ], - "execution_count": 529, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n3\n\n3\n\n\n\n1->3\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 529 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w0UuaLXj5bgZ", - "colab_type": "text" - }, - "source": [ - "#### Multiple Starts" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yg1tgj8Y5czv", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Multiple Starts\n", - "def BFSLevel(starts):\n", - " q = starts # a list of nodes\n", - " #root.visited = 1\n", - " while q:\n", - " new_q = []\n", - " for node in q:\n", - " for neig in node.adjacent:\n", - " if not neig.visited:\n", - " neig.visited = 1\n", - " new_q.append(neig)\n", - " q = new_q" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I-LT_HgDrrtI", - "colab_type": "text" - }, - "source": [ - "####Level by level bfs" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sQyW-n5qrt6k", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bfs_level(g, s):\n", - " '''level by level bfs'''\n", - " v = len(g)\n", - " state = [False] * v\n", - " \n", - " orders = []\n", - " lst = [s]\n", - " state[s] = True\n", - " d = 0 # track distance\n", - " while lst:\n", - " print('distance ', d, ': ', lst)\n", - " tmp_lst = []\n", - " for u in lst:\n", - " orders.append(u)\n", - " for v in g[u]:\n", - " if not state[v]:\n", - " state[v] = True\n", - " tmp_lst.append(v) \n", - " lst = tmp_lst\n", - " d += 1\n", - " return orders\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "KwUfwlRN2qbe", - "colab_type": "code", - "outputId": "e2048ab1-018e-4141-ea4f-766d0cefd436", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 104 - } - }, - "source": [ - "print(bfs_level(ucg, 0))" - ], - "execution_count": 532, - "outputs": [ - { - "output_type": "stream", - "text": [ - "distance 0 : [0]\n", - "distance 1 : [1, 2]\n", - "distance 2 : [3, 4]\n", - "distance 3 : [5]\n", - "[0, 1, 2, 3, 4, 5]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KDFDgG-BOBml", - "colab_type": "text" - }, - "source": [ - "\n", - "## Depth-first Graph Search" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V0lQCtvcWWxi", - "colab_type": "text" - }, - "source": [ - "### Recursive Implementation" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ddy-8DejWciM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Recursive implementation with three states\n", - "def dfs(g, s, colors, orders, complete_orders):\n", - " colors[s] = STATE.gray\n", - " orders.append(s)\n", - " for v in g[s]:\n", - " if colors[v] == STATE.white:\n", - " dfs(g, v, colors, orders, complete_orders)\n", - " # complete\n", - " colors[s] = STATE.black\n", - " complete_orders.append(s)\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Co2HV7k4XDJm", - "colab_type": "code", - "outputId": "4f4b44f1-ce20-4fc3-802b-254aa2d19898", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "# initialization\n", - "'''start from 0'''\n", - "v = len(ucg)\n", - "orders, complete_orders = [], []\n", - "colors = [STATE.white] * v\n", - "dfs(ucg,0, colors, orders, complete_orders)\n", - "print(orders, complete_orders)" - ], - "execution_count": 534, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hjByXvweDX3c", - "colab_type": "text" - }, - "source": [ - "Visualizing the state change of nodes" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jDQvblzsOK9m", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# directed cyclc graph\n", - "dcg = [[] for _ in range(6)]\n", - "dcg[0] = [1, 2]\n", - "dcg[1] = [2, 4]\n", - "dcg[2] = [0, 4]\n", - "dcg[3] = [1]\n", - "dcg[4] = [3, 5]\n", - "dcg[5] = [3] # cross edge" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "7ZK2pX87OvMH", - "colab_type": "code", - "outputId": "cb41c3bd-7aa5-4304-c571-0bb39aea86ad", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 369 - } - }, - "source": [ - "dot = Digraph(comment='The Round Table', format='png')\n", - "#print(get_methods(Digraph))\n", - "#print(Digraph.__dict__)\n", - "nodes = [0, 1, 2, 3, 4, 5]\n", - "rank1 = [0]\n", - "rank2 = [1, 2]\n", - "rank3=[3, 4]\n", - "rank4 = [5]\n", - "ranks=[rank1, rank2, rank3, rank4]\n", - "for i, rank in enumerate(ranks):\n", - " with dot.subgraph(name=name+str(i)) as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " s.node(str(node))\n", - "dot.edges(['01', '02', '12', '14', '31', '20', '24','43', '45', '53'])\n", - "dot.render('test-output/directed_cyclic_graph_2', view=True) \n", - "dot" - ], - "execution_count": 536, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n1->4\n\n\n\n\n\n2->0\n\n\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n3->1\n\n\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n5->3\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 536 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8TdvPlxTAVhp", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def set_node_style(dot, color, node, name):\n", - " dot.attr('node', style='filled', fillcolor=color, fontcolor='red')#color=color)\n", - " dot.node(name=name, label=str(node))\n", - "\n", - "\n", - "def plot(g, colors, dot, edges, nodes):\n", - " #dot = Digraph(comment='The Round Table', format='png')\n", - " name = str(next(counter))\n", - " \n", - " with dot.subgraph(name=name) as s:\n", - " for node in nodes:\n", - " if colors[node] == STATE.gray:\n", - " set_node_style(dot, 'gray', node, name=str(node)+name)\n", - " elif colors[node] == STATE.black:\n", - " set_node_style(dot, 'black', node, name=str(node)+name)\n", - " else:\n", - " set_node_style(dot, 'white', node, name=str(node)+name)\n", - " for s, e in edges:\n", - " dot.edge(str(s)+name, str(e)+name)\n", - "\n", - " \n", - " #s.edges(['01', '12','31', '20', '24','43', '45'])\n", - " #dot.render('test-output/depth_first_graph_search'+str(next(counter)), view=True) \n", - " #s.view()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Av-u8YzcCW8_", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def plot_state(g, colors, dot, edges_list):\n", - " #dot = Digraph(comment='The Round Table', format='png')\n", - " name = str(next(counter))\n", - " with dot.subgraph(name=name) as init:\n", - " nodes = len(g)\n", - " rank1 = [0]\n", - " rank2 = [2, 1]\n", - " rank3 = [4, 3]\n", - " rank4 = [5]\n", - " ranks = [rank1, rank2, rank3, rank4]\n", - " \n", - " for i, rank in enumerate(ranks):\n", - " with init.subgraph(name=name+str(i)) as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " \n", - " if colors[node] == STATE.gray:\n", - " set_node_style(s, 'gray', node, name=str(node)+name)\n", - " elif colors[node] == STATE.black:\n", - " set_node_style(s, 'black', node, name=str(node)+name)\n", - " else:\n", - " set_node_style(s, 'white', node, name=str(node)+name)\n", - " # \n", - " for s in range(nodes):\n", - " for e in g[s]:\n", - " init.edge(str(s)+name, str(e)+name)\n", - " # tracker = defaultdict(set) # edges\n", - " # for s in range(nodes):\n", - " # for e in g[s]:\n", - " # ##print(s, e, tracker)\n", - " # if e in tracker and s in tracker[e]:\n", - " # continue\n", - " # else:\n", - " # tracker[s].add(e)\n", - " # init.edge(str(s)+name, str(e)+name)\n", - " init.render('test-output/depth_first_graph_search_process'+name, view=True, format='png') " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "dwJMwokbXyPX", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def plot_all(g, colors, dot, edges_list):\n", - " #dot = Digraph(comment='The Round Table', format='png')\n", - " name = str(next(counter))\n", - " with dot.subgraph(name=name) as init:\n", - " nodes = set()\n", - " rank1 = [0]\n", - " rank2 = [1]\n", - " rank2_1=[2]\n", - " rank3 = [4]\n", - " rank4 = [3, 5]\n", - " ranks = [rank1, rank2, rank2_1, rank3, rank4]\n", - " for edges in edges_list:\n", - " for s, e in edges:\n", - " nodes.add(s)\n", - " nodes.add(e)\n", - " \n", - " for i, rank in enumerate(ranks):\n", - " with init.subgraph(name=name+str(i)) as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " if node not in nodes:\n", - " continue\n", - " \n", - " if colors[node] == STATE.gray:\n", - " set_node_style(s, 'gray', node, name=str(node)+name)\n", - " elif colors[node] == STATE.black:\n", - " set_node_style(s, 'black', node, name=str(node)+name)\n", - " else:\n", - " set_node_style(s, 'white', node, name=str(node)+name)\n", - " # \n", - " # tracker = defaultdict(set) # edges\n", - " # for s in range(nodes):\n", - " # for e in g[s]:\n", - " # ##print(s, e, tracker)\n", - " # if e in tracker and s in tracker[e]:\n", - " # continue\n", - " # else:\n", - " # tracker[s].add(e)\n", - " # init.edge(str(s)+name, str(e)+name)\n", - " \n", - " \n", - " colors = ['black', 'red']\n", - " for i, edges in enumerate(edges_list):\n", - " if not edges:\n", - " continue\n", - " for start, end in edges:\n", - " if start is not None:\n", - " init.edge(str(start)+name, str(end)+name,_attributes={'dir':'forward', 'color': colors[i]})\n", - " init.render('test-output/depth_first_graph_search_process'+name, view=True, format='png') \n", - "\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "kLCE2dNJJjum", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!rm test-output/depth_first_graph_search*" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "SpzWbATRNjtL", - "colab_type": "code", - "outputId": "05521fd1-9abb-46ae-98c4-50d1b2e9e076", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "!rm *.gv*" - ], - "execution_count": 541, - "outputs": [ - { - "output_type": "stream", - "text": [ - "rm: cannot remove '*.gv*': No such file or directory\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "TCNTJ6sBADIU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Visualizing\n", - "edges = []\n", - "back_edges = []\n", - "def dfs(g, s, colors, dot, nodes, pre_node):\n", - " nodes.add(s)\n", - " colors[s] = STATE.gray\n", - " #plot(g, colors, dot, edges, nodes)\n", - " global edges\n", - " global back_edges\n", - " plot_fun(g, colors, dot, [edges, back_edges])\n", - " for v in g[s]:\n", - " if colors[v] == STATE.white:\n", - " edges += [(s, v)]\n", - " dfs(g, v, colors, dot, nodes, s)\n", - " back_edges += [(v, s)]\n", - " #plot_fun(g, colors, dot, [edges, back_edges])\n", - " # complete\n", - " \n", - " colors[s] = STATE.black\n", - " #plot(g, colors, dot, edges, nodes)\n", - " plot_fun(g, colors, dot, [edges, back_edges])\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3y8edac7CkYd", - "colab_type": "code", - "outputId": "d562fdbd-be9b-48ee-ebe4-a4ecfaaaa0a3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 406 - } - }, - "source": [ - "# Plot_state\n", - "import itertools\n", - "counter = itertools.count()\n", - "print(dcg)\n", - "v = len(dcg)\n", - "colors = [STATE.white] * v\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "plot_fun = plot_state\n", - "dfs(dcg,0, colors, dot, set(), None)\n", - "dot.render('test-output/depth_first_graph_search_process', view=True) \n", - "dot" - ], - "execution_count": 543, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[1, 2], [2, 4], [0, 4], [1], [3, 5], [3]]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n00\n\n0\n\n\n\n20\n\n2\n\n\n\n00->20\n\n\n\n\n\n10\n\n1\n\n\n\n00->10\n\n\n\n\n\n20->00\n\n\n\n\n\n40\n\n4\n\n\n\n20->40\n\n\n\n\n\n10->20\n\n\n\n\n\n10->40\n\n\n\n\n\n30\n\n3\n\n\n\n40->30\n\n\n\n\n\n50\n\n5\n\n\n\n40->50\n\n\n\n\n\n30->10\n\n\n\n\n\n50->30\n\n\n\n\n\n01\n\n0\n\n\n\n21\n\n2\n\n\n\n01->21\n\n\n\n\n\n11\n\n1\n\n\n\n01->11\n\n\n\n\n\n21->01\n\n\n\n\n\n41\n\n4\n\n\n\n21->41\n\n\n\n\n\n11->21\n\n\n\n\n\n11->41\n\n\n\n\n\n31\n\n3\n\n\n\n41->31\n\n\n\n\n\n51\n\n5\n\n\n\n41->51\n\n\n\n\n\n31->11\n\n\n\n\n\n51->31\n\n\n\n\n\n02\n\n0\n\n\n\n22\n\n2\n\n\n\n02->22\n\n\n\n\n\n12\n\n1\n\n\n\n02->12\n\n\n\n\n\n22->02\n\n\n\n\n\n42\n\n4\n\n\n\n22->42\n\n\n\n\n\n12->22\n\n\n\n\n\n12->42\n\n\n\n\n\n32\n\n3\n\n\n\n42->32\n\n\n\n\n\n52\n\n5\n\n\n\n42->52\n\n\n\n\n\n32->12\n\n\n\n\n\n52->32\n\n\n\n\n\n03\n\n0\n\n\n\n23\n\n2\n\n\n\n03->23\n\n\n\n\n\n13\n\n1\n\n\n\n03->13\n\n\n\n\n\n23->03\n\n\n\n\n\n43\n\n4\n\n\n\n23->43\n\n\n\n\n\n13->23\n\n\n\n\n\n13->43\n\n\n\n\n\n33\n\n3\n\n\n\n43->33\n\n\n\n\n\n53\n\n5\n\n\n\n43->53\n\n\n\n\n\n33->13\n\n\n\n\n\n53->33\n\n\n\n\n\n04\n\n0\n\n\n\n24\n\n2\n\n\n\n04->24\n\n\n\n\n\n14\n\n1\n\n\n\n04->14\n\n\n\n\n\n24->04\n\n\n\n\n\n44\n\n4\n\n\n\n24->44\n\n\n\n\n\n14->24\n\n\n\n\n\n14->44\n\n\n\n\n\n34\n\n3\n\n\n\n44->34\n\n\n\n\n\n54\n\n5\n\n\n\n44->54\n\n\n\n\n\n34->14\n\n\n\n\n\n54->34\n\n\n\n\n\n05\n\n0\n\n\n\n25\n\n2\n\n\n\n05->25\n\n\n\n\n\n15\n\n1\n\n\n\n05->15\n\n\n\n\n\n25->05\n\n\n\n\n\n45\n\n4\n\n\n\n25->45\n\n\n\n\n\n15->25\n\n\n\n\n\n15->45\n\n\n\n\n\n35\n\n3\n\n\n\n45->35\n\n\n\n\n\n55\n\n5\n\n\n\n45->55\n\n\n\n\n\n35->15\n\n\n\n\n\n55->35\n\n\n\n\n\n06\n\n0\n\n\n\n26\n\n2\n\n\n\n06->26\n\n\n\n\n\n16\n\n1\n\n\n\n06->16\n\n\n\n\n\n26->06\n\n\n\n\n\n46\n\n4\n\n\n\n26->46\n\n\n\n\n\n16->26\n\n\n\n\n\n16->46\n\n\n\n\n\n36\n\n3\n\n\n\n46->36\n\n\n\n\n\n56\n\n5\n\n\n\n46->56\n\n\n\n\n\n36->16\n\n\n\n\n\n56->36\n\n\n\n\n\n07\n\n0\n\n\n\n27\n\n2\n\n\n\n07->27\n\n\n\n\n\n17\n\n1\n\n\n\n07->17\n\n\n\n\n\n27->07\n\n\n\n\n\n47\n\n4\n\n\n\n27->47\n\n\n\n\n\n17->27\n\n\n\n\n\n17->47\n\n\n\n\n\n37\n\n3\n\n\n\n47->37\n\n\n\n\n\n57\n\n5\n\n\n\n47->57\n\n\n\n\n\n37->17\n\n\n\n\n\n57->37\n\n\n\n\n\n08\n\n0\n\n\n\n28\n\n2\n\n\n\n08->28\n\n\n\n\n\n18\n\n1\n\n\n\n08->18\n\n\n\n\n\n28->08\n\n\n\n\n\n48\n\n4\n\n\n\n28->48\n\n\n\n\n\n18->28\n\n\n\n\n\n18->48\n\n\n\n\n\n38\n\n3\n\n\n\n48->38\n\n\n\n\n\n58\n\n5\n\n\n\n48->58\n\n\n\n\n\n38->18\n\n\n\n\n\n58->38\n\n\n\n\n\n09\n\n0\n\n\n\n29\n\n2\n\n\n\n09->29\n\n\n\n\n\n19\n\n1\n\n\n\n09->19\n\n\n\n\n\n29->09\n\n\n\n\n\n49\n\n4\n\n\n\n29->49\n\n\n\n\n\n19->29\n\n\n\n\n\n19->49\n\n\n\n\n\n39\n\n3\n\n\n\n49->39\n\n\n\n\n\n59\n\n5\n\n\n\n49->59\n\n\n\n\n\n39->19\n\n\n\n\n\n59->39\n\n\n\n\n\n010\n\n0\n\n\n\n210\n\n2\n\n\n\n010->210\n\n\n\n\n\n110\n\n1\n\n\n\n010->110\n\n\n\n\n\n210->010\n\n\n\n\n\n410\n\n4\n\n\n\n210->410\n\n\n\n\n\n110->210\n\n\n\n\n\n110->410\n\n\n\n\n\n310\n\n3\n\n\n\n410->310\n\n\n\n\n\n510\n\n5\n\n\n\n410->510\n\n\n\n\n\n310->110\n\n\n\n\n\n510->310\n\n\n\n\n\n011\n\n0\n\n\n\n211\n\n2\n\n\n\n011->211\n\n\n\n\n\n111\n\n1\n\n\n\n011->111\n\n\n\n\n\n211->011\n\n\n\n\n\n411\n\n4\n\n\n\n211->411\n\n\n\n\n\n111->211\n\n\n\n\n\n111->411\n\n\n\n\n\n311\n\n3\n\n\n\n411->311\n\n\n\n\n\n511\n\n5\n\n\n\n411->511\n\n\n\n\n\n311->111\n\n\n\n\n\n511->311\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 543 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "WBFPoKth-Eui", - "colab_type": "code", - "outputId": "77ef6b6b-6ac2-4ee8-a663-0ae3da4ac5e2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 769 - } - }, - "source": [ - "# Classify edges\n", - "# edges is tree edge\n", - "# need to formulate better\n", - "'''\n", - "dcg[0] = [1, 2]\n", - "dcg[1] = [2, 4]\n", - "dcg[2] = [0, 4]\n", - "dcg[3] = [1]\n", - "dcg[4] = [3, 5]\n", - "dcg[5] = [3] # cross edge\n", - "'''\n", - "print(edges, back_edges)\n", - "tree = ft\n", - "tree_edges = edges\n", - "nodes = len(dcg)\n", - "new_edges, n_back_edges, forward_edges, cross_edges = [], [], [], []\n", - "# reversed tree:\n", - "reverse_tree = [[] for _ in range(nodes)]\n", - "for s in range(nodes):\n", - " for e in tree[s]: \n", - " reverse_tree[e].append(s)\n", - "for s in range(nodes):\n", - " for e in dcg[s]:\n", - " print(s, e, tree)\n", - " if (s, e) in tree_edges:\n", - " new_edges.append((s, e))\n", - " elif bfgs(tree, e, s ):\n", - " n_back_edges.append((s, e))\n", - " elif bfgs(tree, s, e ):\n", - " forward_edges.append((s ,e))\n", - " else:\n", - " cross_edges.append((s, e))\n", - "\n", - "edges_list = [new_edges, n_back_edges, forward_edges, cross_edges ]\n", - "dot = Digraph(comment='The Round Table', format='png')\n", - "print(edges_list)\n", - "rank1 = [0]\n", - "rank2 = [1]\n", - "rank2_1=[2]\n", - "rank3 = [4]\n", - "rank4 = [3, 5]\n", - "ranks = [rank1, rank2, rank2_1, rank3, rank4]\n", - "dot.attr(ranksep='0.75', rank='same')\n", - "\n", - " \n", - "\n", - "colors = ['black', 'red', 'yellow', 'blue']\n", - "for i, edgesx in enumerate(edges_list):\n", - " if not edgesx:\n", - " continue\n", - " for start, end in edgesx:\n", - " if start is not None:\n", - " dot.edge(str(start), str(end),_attributes={ 'color': colors[i]})\n", - "for i, rank in enumerate(ranks):\n", - " with dot.subgraph(name=str(i)) as s:\n", - " s.attr(ranksep='0.75', rank='same')\n", - " for node in rank:\n", - " s.node(name=str(node))\n", - "dot.render('test-output/depth_first_graph_search_edges', view=True, format='png') \n", - "#print(dot.source)\n", - "dot\n" - ], - "execution_count": 544, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(0, 1), (1, 2), (2, 4), (4, 3), (4, 5)] [(3, 4), (5, 4), (4, 2), (2, 1), (1, 0)]\n", - "0 1 [[1], [2], [4], [], [3, 5], []]\n", - "0 2 [[1], [2], [4], [], [3, 5], []]\n", - "1 2 [[1], [2], [4], [], [3, 5], []]\n", - "1 4 [[1], [2], [4], [], [3, 5], []]\n", - "2 0 [[1], [2], [4], [], [3, 5], []]\n", - "2 4 [[1], [2], [4], [], [3, 5], []]\n", - "3 1 [[1], [2], [4], [], [3, 5], []]\n", - "4 3 [[1], [2], [4], [], [3, 5], []]\n", - "4 5 [[1], [2], [4], [], [3, 5], []]\n", - "5 3 [[1], [2], [4], [], [3, 5], []]\n", - "[[(0, 1), (1, 2), (2, 4), (4, 3), (4, 5)], [(2, 0), (3, 1)], [(0, 2), (1, 4)], [(5, 3)]]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n1->4\n\n\n\n\n\n2->0\n\n\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n3->1\n\n\n\n\n\n5->3\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 544 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fSCrzb82GPUy", - "colab_type": "text" - }, - "source": [ - "More about the application of classificatio of edges. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XIfzSpwDYZON", - "colab_type": "text" - }, - "source": [ - "Discover and finish time" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XmsNF54Gd0rN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Discovering and finishing time\n", - "def dfs(g, s, colors):\n", - " dfs.t += 1 # static variable\n", - " colors[s] = STATE.gray\n", - " dfs.discover[s] = dfs.t\n", - " for v in g[s]:\n", - " if colors[v] == STATE.white:\n", - " dfs(g, v, colors)\n", - " # complete\n", - " dfs.t += 1\n", - " dfs.finish[s] = dfs.t\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "4U0E0RQfeKkh", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "43996495-b35e-4dc9-f5c8-34830552c240" - }, - "source": [ - "v = len(dcg)\n", - "colors = [STATE.white] * v\n", - "dfs.t = -1\n", - "dfs.discover, dfs.finish = [-1] * v, [-1] * v\n", - "dfs(dcg,0, colors)\n", - "dfs.discover, dfs.finish" - ], - "execution_count": 546, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([0, 1, 2, 4, 3, 6], [11, 10, 9, 5, 8, 7])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 546 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Q3qw9TVueO4r", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def parenthesis(dt, ft, n):\n", - " merge_orders = [-1] * 2 * n\n", - " for v, t in enumerate(dt):\n", - " merge_orders[t] = v\n", - " for v, t in enumerate(ft):\n", - " merge_orders[t] = v\n", - "\n", - " print(merge_orders)\n", - " nodes = set()\n", - " for i in merge_orders:\n", - " if i not in nodes:\n", - " print('(', i, end = ', ')\n", - " nodes.add(i)\n", - " else:\n", - " print(i, '),', end = ' ')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bsZ4WOk3e4OT", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - }, - "outputId": "6460ea8f-3334-444f-d6ce-cbc0b389c90a" - }, - "source": [ - "parenthesis(dfs.discover, dfs.finish, v)" - ], - "execution_count": 548, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, 1, 2, 4, 3, 3, 5, 5, 4, 2, 1, 0]\n", - "( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, 5 ), 4 ), 2 ), 1 ), 0 ), " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qRdHVeAG9z29", - "colab_type": "text" - }, - "source": [ - "### Iterative Implementation ***" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xT0mVw8gZTIi", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#Iterative implementation with three states\n", - "def dftIter(g, s):\n", - " '''not preserving the same discovery ordering'''\n", - " n = len(g)\n", - " orders = []\n", - " colors = [STATE.white] * n\n", - " stack = [s]\n", - "\n", - " orders.append(s) # track gray order\n", - " colors[s] = STATE.gray\n", - " \n", - " while stack:\n", - " u = stack.pop()\n", - " \n", - " for v in g[u]:\n", - " if colors[v] == STATE.white:\n", - " colors[v] = STATE.gray\n", - " stack.append(v)\n", - " orders.append(v) # track gray order\n", - " \n", - " return orders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "q3hq9ARKqqnw", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "3d0b9d01-b012-4794-af03-042a1e5512ce" - }, - "source": [ - "# initialization\n", - "'''start from 0'''\n", - "print(dftIter(ucg,0))" - ], - "execution_count": 550, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, 1, 2, 4, 3, 5]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "RIVOD221rHii", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "9f6a74e9-769b-405c-b518-b88d2c9eeae2" - }, - "source": [ - "print(dftIter(ucg, 1))" - ], - "execution_count": 551, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[1, 0, 2, 3, 4, 5]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "I4eVWYzGj01U", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def dftIter(g, s):\n", - " '''preserving only discovery ordering'''\n", - " n = len(g)\n", - " orders = []\n", - " colors = [STATE.white] * n\n", - " stack = [s]\n", - "\n", - " #orders.append(s) # track gray order\n", - " #colors[s] = STATE.gray\n", - " \n", - " while stack:\n", - " u = stack.pop()\n", - " if colors[u] == STATE.white:\n", - " orders.append(u) # track gray order\n", - " colors[u] = STATE.gray\n", - " for v in g[u][::-1]:\n", - " if colors[v] == STATE.white:\n", - " \n", - " stack.append(v)\n", - " #orders.append(v) # track gray order\n", - " \n", - " return orders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "5JWNwi9rlAER", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "61c6eca2-9dcd-4b23-ebe0-fe7fae0ba312" - }, - "source": [ - "print(dftIter(ucg, 0))" - ], - "execution_count": 553, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[0, 1, 2, 4, 3, 5]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mBqP-iy9ma3d", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "dbad8a60-6b64-432b-e053-ab3aade53a5a" - }, - "source": [ - "print(dftIter(ucg, 1))" - ], - "execution_count": 554, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[1, 0, 2, 4, 3, 5]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "rrFl2gwokZON", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def dfsIter(g, s):\n", - " '''iterative dfs'''\n", - " v = len(g)\n", - " orders, complete_orders = [], []\n", - " colors = [STATE.white] * v\n", - " stack = [s]\n", - "\n", - " orders.append(s) # track gray order\n", - " colors[s] = STATE.gray\n", - " \n", - " while stack:\n", - " u = stack[-1]\n", - " bAdj = False\n", - " for v in g[u]:\n", - " if colors[v] == STATE.white:\n", - " colors[v] = STATE.gray\n", - " stack.append(v)\n", - " orders.append(v) # track gray order\n", - " bAdj = True\n", - " break\n", - " \n", - " if not bAdj: # if no adjacent is found, pop out\n", - " # complete\n", - " colors[u] = STATE.black # this is not necessary in the code, just to help track the state\n", - " complete_orders.append(u)\n", - " stack.pop()\n", - " \n", - " return orders, complete_orders " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "vGaO1vCbly-a", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "5f6a1035-9b9f-40dd-be46-34686fdb9534" - }, - "source": [ - "print(dfsIter(ucg, 0))" - ], - "execution_count": 556, - "outputs": [ - { - "output_type": "stream", - "text": [ - "([0, 1, 2, 4, 3, 5], [3, 5, 4, 2, 1, 0])\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oAxBg1ZR-ct5", - "colab_type": "text" - }, - "source": [ - "## Breath-first Graph Search" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "aR60hTwOQOo0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bfgs_state(g, s):\n", - " v = len(g)\n", - " colors = [STATE.white] * v\n", - " \n", - " q, orders = [s], [s]\n", - " complete_orders = []\n", - " colors[s] = STATE.gray # make the state of the visiting node\n", - " while q:\n", - " u = q.pop(0) \n", - " for v in g[u]:\n", - " if colors[v] == STATE.white:\n", - " colors[v] = STATE.gray\n", - " q.append(v)\n", - " orders.append(v)\n", - "\n", - " # complete \n", - " colors[u] = STATE.black\n", - " complete_orders.append(u)\n", - " return orders, complete_orders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ld25XyC8Uxun", - "colab_type": "code", - "outputId": "539d70aa-6c17-48b1-b328-6a9aa97b4649", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "bfgs_state(dcg, 0)" - ], - "execution_count": 558, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([0, 1, 2, 4, 3, 5], [0, 1, 2, 4, 3, 5])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 558 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "GxtnwF3_XN1F", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Plot state\n", - "def plot_state_bfs(g, colors, dot, png_name):\n", - " #dot = Digraph(comment='The Round Table', format='png')\n", - " #name = str(next(counter))\n", - " with dot.subgraph(name=png_name) as init:\n", - " nodes = len(g)\n", - " rank1 = [0]\n", - " rank2 = [2, 1]\n", - " rank3 = [4, 3]\n", - " rank4 = [5]\n", - " ranks = [rank1, rank2, rank3, rank4]\n", - " \n", - " for i, rank in enumerate(ranks):\n", - " subgraph_name = png_name + str(i)\n", - " with init.subgraph() as s:\n", - " s.attr(rank='same')\n", - " for node in rank:\n", - " \n", - " if colors[node] == STATE.gray:\n", - " set_node_style(s, 'gray', node, name=str(node)+png_name)\n", - " elif colors[node] == STATE.black:\n", - " set_node_style(s, 'black', node, name=str(node)+png_name)\n", - " else:\n", - " set_node_style(s, 'white', node, name=str(node)+png_name)\n", - " # \n", - " for s in range(nodes):\n", - " for e in g[s]:\n", - " init.edge(str(s)+png_name, str(e)+png_name)\n", - " init.render('test-output/'+png_name, view=True, format='png') " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "sFud8KIWVZka", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bfgs_state_plot(g, s):\n", - " v = len(g)\n", - " colors = [STATE.white] * v\n", - "\n", - " dot = Digraph(comment='The Round Table', format='png')\n", - "\n", - " \n", - " q = [s]\n", - " colors[s] = STATE.gray # make the state of the visiting node\n", - " counter = itertools.count()\n", - " plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n", - " while q:\n", - " u = q.pop(0) \n", - " for v in g[u]:\n", - " if colors[v] == STATE.white:\n", - " colors[v] = STATE.gray\n", - " q.append(v)\n", - " plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n", - "\n", - " # complete \n", - " colors[u] = STATE.black\n", - " plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n", - " #dot.render('test-output/breath_first_graph_search_process', view=True) \n", - " return dot\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "b6C9AhL5WCPH", - "colab_type": "code", - "outputId": "82ca88fe-7e9e-40b8-ac00-1af9dc05b62a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 389 - } - }, - "source": [ - "dot = bfgs_state_plot(dcg, 0)\n", - "dot" - ], - "execution_count": 561, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0breath_first_graph_search_process0\n\n0\n\n\n\n2breath_first_graph_search_process0\n\n2\n\n\n\n0breath_first_graph_search_process0->2breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0\n\n1\n\n\n\n0breath_first_graph_search_process0->1breath_first_graph_search_process0\n\n\n\n\n\n2breath_first_graph_search_process0->0breath_first_graph_search_process0\n\n\n\n\n\n4breath_first_graph_search_process0\n\n4\n\n\n\n2breath_first_graph_search_process0->4breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0->2breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0->4breath_first_graph_search_process0\n\n\n\n\n\n3breath_first_graph_search_process0\n\n3\n\n\n\n4breath_first_graph_search_process0->3breath_first_graph_search_process0\n\n\n\n\n\n5breath_first_graph_search_process0\n\n5\n\n\n\n4breath_first_graph_search_process0->5breath_first_graph_search_process0\n\n\n\n\n\n3breath_first_graph_search_process0->1breath_first_graph_search_process0\n\n\n\n\n\n5breath_first_graph_search_process0->3breath_first_graph_search_process0\n\n\n\n\n\n0breath_first_graph_search_process1\n\n0\n\n\n\n2breath_first_graph_search_process1\n\n2\n\n\n\n0breath_first_graph_search_process1->2breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1\n\n1\n\n\n\n0breath_first_graph_search_process1->1breath_first_graph_search_process1\n\n\n\n\n\n2breath_first_graph_search_process1->0breath_first_graph_search_process1\n\n\n\n\n\n4breath_first_graph_search_process1\n\n4\n\n\n\n2breath_first_graph_search_process1->4breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1->2breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1->4breath_first_graph_search_process1\n\n\n\n\n\n3breath_first_graph_search_process1\n\n3\n\n\n\n4breath_first_graph_search_process1->3breath_first_graph_search_process1\n\n\n\n\n\n5breath_first_graph_search_process1\n\n5\n\n\n\n4breath_first_graph_search_process1->5breath_first_graph_search_process1\n\n\n\n\n\n3breath_first_graph_search_process1->1breath_first_graph_search_process1\n\n\n\n\n\n5breath_first_graph_search_process1->3breath_first_graph_search_process1\n\n\n\n\n\n0breath_first_graph_search_process2\n\n0\n\n\n\n2breath_first_graph_search_process2\n\n2\n\n\n\n0breath_first_graph_search_process2->2breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2\n\n1\n\n\n\n0breath_first_graph_search_process2->1breath_first_graph_search_process2\n\n\n\n\n\n2breath_first_graph_search_process2->0breath_first_graph_search_process2\n\n\n\n\n\n4breath_first_graph_search_process2\n\n4\n\n\n\n2breath_first_graph_search_process2->4breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2->2breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2->4breath_first_graph_search_process2\n\n\n\n\n\n3breath_first_graph_search_process2\n\n3\n\n\n\n4breath_first_graph_search_process2->3breath_first_graph_search_process2\n\n\n\n\n\n5breath_first_graph_search_process2\n\n5\n\n\n\n4breath_first_graph_search_process2->5breath_first_graph_search_process2\n\n\n\n\n\n3breath_first_graph_search_process2->1breath_first_graph_search_process2\n\n\n\n\n\n5breath_first_graph_search_process2->3breath_first_graph_search_process2\n\n\n\n\n\n0breath_first_graph_search_process3\n\n0\n\n\n\n2breath_first_graph_search_process3\n\n2\n\n\n\n0breath_first_graph_search_process3->2breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3\n\n1\n\n\n\n0breath_first_graph_search_process3->1breath_first_graph_search_process3\n\n\n\n\n\n2breath_first_graph_search_process3->0breath_first_graph_search_process3\n\n\n\n\n\n4breath_first_graph_search_process3\n\n4\n\n\n\n2breath_first_graph_search_process3->4breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3->2breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3->4breath_first_graph_search_process3\n\n\n\n\n\n3breath_first_graph_search_process3\n\n3\n\n\n\n4breath_first_graph_search_process3->3breath_first_graph_search_process3\n\n\n\n\n\n5breath_first_graph_search_process3\n\n5\n\n\n\n4breath_first_graph_search_process3->5breath_first_graph_search_process3\n\n\n\n\n\n3breath_first_graph_search_process3->1breath_first_graph_search_process3\n\n\n\n\n\n5breath_first_graph_search_process3->3breath_first_graph_search_process3\n\n\n\n\n\n0breath_first_graph_search_process4\n\n0\n\n\n\n2breath_first_graph_search_process4\n\n2\n\n\n\n0breath_first_graph_search_process4->2breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4\n\n1\n\n\n\n0breath_first_graph_search_process4->1breath_first_graph_search_process4\n\n\n\n\n\n2breath_first_graph_search_process4->0breath_first_graph_search_process4\n\n\n\n\n\n4breath_first_graph_search_process4\n\n4\n\n\n\n2breath_first_graph_search_process4->4breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4->2breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4->4breath_first_graph_search_process4\n\n\n\n\n\n3breath_first_graph_search_process4\n\n3\n\n\n\n4breath_first_graph_search_process4->3breath_first_graph_search_process4\n\n\n\n\n\n5breath_first_graph_search_process4\n\n5\n\n\n\n4breath_first_graph_search_process4->5breath_first_graph_search_process4\n\n\n\n\n\n3breath_first_graph_search_process4->1breath_first_graph_search_process4\n\n\n\n\n\n5breath_first_graph_search_process4->3breath_first_graph_search_process4\n\n\n\n\n\n0breath_first_graph_search_process5\n\n0\n\n\n\n2breath_first_graph_search_process5\n\n2\n\n\n\n0breath_first_graph_search_process5->2breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5\n\n1\n\n\n\n0breath_first_graph_search_process5->1breath_first_graph_search_process5\n\n\n\n\n\n2breath_first_graph_search_process5->0breath_first_graph_search_process5\n\n\n\n\n\n4breath_first_graph_search_process5\n\n4\n\n\n\n2breath_first_graph_search_process5->4breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5->2breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5->4breath_first_graph_search_process5\n\n\n\n\n\n3breath_first_graph_search_process5\n\n3\n\n\n\n4breath_first_graph_search_process5->3breath_first_graph_search_process5\n\n\n\n\n\n5breath_first_graph_search_process5\n\n5\n\n\n\n4breath_first_graph_search_process5->5breath_first_graph_search_process5\n\n\n\n\n\n3breath_first_graph_search_process5->1breath_first_graph_search_process5\n\n\n\n\n\n5breath_first_graph_search_process5->3breath_first_graph_search_process5\n\n\n\n\n\n0breath_first_graph_search_process6\n\n0\n\n\n\n2breath_first_graph_search_process6\n\n2\n\n\n\n0breath_first_graph_search_process6->2breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6\n\n1\n\n\n\n0breath_first_graph_search_process6->1breath_first_graph_search_process6\n\n\n\n\n\n2breath_first_graph_search_process6->0breath_first_graph_search_process6\n\n\n\n\n\n4breath_first_graph_search_process6\n\n4\n\n\n\n2breath_first_graph_search_process6->4breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6->2breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6->4breath_first_graph_search_process6\n\n\n\n\n\n3breath_first_graph_search_process6\n\n3\n\n\n\n4breath_first_graph_search_process6->3breath_first_graph_search_process6\n\n\n\n\n\n5breath_first_graph_search_process6\n\n5\n\n\n\n4breath_first_graph_search_process6->5breath_first_graph_search_process6\n\n\n\n\n\n3breath_first_graph_search_process6->1breath_first_graph_search_process6\n\n\n\n\n\n5breath_first_graph_search_process6->3breath_first_graph_search_process6\n\n\n\n\n\n0breath_first_graph_search_process7\n\n0\n\n\n\n2breath_first_graph_search_process7\n\n2\n\n\n\n0breath_first_graph_search_process7->2breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7\n\n1\n\n\n\n0breath_first_graph_search_process7->1breath_first_graph_search_process7\n\n\n\n\n\n2breath_first_graph_search_process7->0breath_first_graph_search_process7\n\n\n\n\n\n4breath_first_graph_search_process7\n\n4\n\n\n\n2breath_first_graph_search_process7->4breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7->2breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7->4breath_first_graph_search_process7\n\n\n\n\n\n3breath_first_graph_search_process7\n\n3\n\n\n\n4breath_first_graph_search_process7->3breath_first_graph_search_process7\n\n\n\n\n\n5breath_first_graph_search_process7\n\n5\n\n\n\n4breath_first_graph_search_process7->5breath_first_graph_search_process7\n\n\n\n\n\n3breath_first_graph_search_process7->1breath_first_graph_search_process7\n\n\n\n\n\n5breath_first_graph_search_process7->3breath_first_graph_search_process7\n\n\n\n\n\n0breath_first_graph_search_process8\n\n0\n\n\n\n2breath_first_graph_search_process8\n\n2\n\n\n\n0breath_first_graph_search_process8->2breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8\n\n1\n\n\n\n0breath_first_graph_search_process8->1breath_first_graph_search_process8\n\n\n\n\n\n2breath_first_graph_search_process8->0breath_first_graph_search_process8\n\n\n\n\n\n4breath_first_graph_search_process8\n\n4\n\n\n\n2breath_first_graph_search_process8->4breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8->2breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8->4breath_first_graph_search_process8\n\n\n\n\n\n3breath_first_graph_search_process8\n\n3\n\n\n\n4breath_first_graph_search_process8->3breath_first_graph_search_process8\n\n\n\n\n\n5breath_first_graph_search_process8\n\n5\n\n\n\n4breath_first_graph_search_process8->5breath_first_graph_search_process8\n\n\n\n\n\n3breath_first_graph_search_process8->1breath_first_graph_search_process8\n\n\n\n\n\n5breath_first_graph_search_process8->3breath_first_graph_search_process8\n\n\n\n\n\n0breath_first_graph_search_process9\n\n0\n\n\n\n2breath_first_graph_search_process9\n\n2\n\n\n\n0breath_first_graph_search_process9->2breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9\n\n1\n\n\n\n0breath_first_graph_search_process9->1breath_first_graph_search_process9\n\n\n\n\n\n2breath_first_graph_search_process9->0breath_first_graph_search_process9\n\n\n\n\n\n4breath_first_graph_search_process9\n\n4\n\n\n\n2breath_first_graph_search_process9->4breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9->2breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9->4breath_first_graph_search_process9\n\n\n\n\n\n3breath_first_graph_search_process9\n\n3\n\n\n\n4breath_first_graph_search_process9->3breath_first_graph_search_process9\n\n\n\n\n\n5breath_first_graph_search_process9\n\n5\n\n\n\n4breath_first_graph_search_process9->5breath_first_graph_search_process9\n\n\n\n\n\n3breath_first_graph_search_process9->1breath_first_graph_search_process9\n\n\n\n\n\n5breath_first_graph_search_process9->3breath_first_graph_search_process9\n\n\n\n\n\n0breath_first_graph_search_process10\n\n0\n\n\n\n2breath_first_graph_search_process10\n\n2\n\n\n\n0breath_first_graph_search_process10->2breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10\n\n1\n\n\n\n0breath_first_graph_search_process10->1breath_first_graph_search_process10\n\n\n\n\n\n2breath_first_graph_search_process10->0breath_first_graph_search_process10\n\n\n\n\n\n4breath_first_graph_search_process10\n\n4\n\n\n\n2breath_first_graph_search_process10->4breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10->2breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10->4breath_first_graph_search_process10\n\n\n\n\n\n3breath_first_graph_search_process10\n\n3\n\n\n\n4breath_first_graph_search_process10->3breath_first_graph_search_process10\n\n\n\n\n\n5breath_first_graph_search_process10\n\n5\n\n\n\n4breath_first_graph_search_process10->5breath_first_graph_search_process10\n\n\n\n\n\n3breath_first_graph_search_process10->1breath_first_graph_search_process10\n\n\n\n\n\n5breath_first_graph_search_process10->3breath_first_graph_search_process10\n\n\n\n\n\n0breath_first_graph_search_process11\n\n0\n\n\n\n2breath_first_graph_search_process11\n\n2\n\n\n\n0breath_first_graph_search_process11->2breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11\n\n1\n\n\n\n0breath_first_graph_search_process11->1breath_first_graph_search_process11\n\n\n\n\n\n2breath_first_graph_search_process11->0breath_first_graph_search_process11\n\n\n\n\n\n4breath_first_graph_search_process11\n\n4\n\n\n\n2breath_first_graph_search_process11->4breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11->2breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11->4breath_first_graph_search_process11\n\n\n\n\n\n3breath_first_graph_search_process11\n\n3\n\n\n\n4breath_first_graph_search_process11->3breath_first_graph_search_process11\n\n\n\n\n\n5breath_first_graph_search_process11\n\n5\n\n\n\n4breath_first_graph_search_process11->5breath_first_graph_search_process11\n\n\n\n\n\n3breath_first_graph_search_process11->1breath_first_graph_search_process11\n\n\n\n\n\n5breath_first_graph_search_process11->3breath_first_graph_search_process11\n\n\n\n\n\n" - }, - "metadata": { - "tags": [] - }, - "execution_count": 561 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I0j42hiHHZBq", - "colab_type": "text" - }, - "source": [ - "## Tree Search\n", - "\n", - "The sample code is writen in [tree_datastructure_and_traversal](https://colab.research.google.com/drive/1pg49npUd4Rhbg5fggs8ZYakA563YATeA)." - ] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_sorting_and_selection_algorithms.ipynb b/Colab_Codes/chapter_sorting_and_selection_algorithms.ipynb deleted file mode 100644 index 4b149f3..0000000 --- a/Colab_Codes/chapter_sorting_and_selection_algorithms.ipynb +++ /dev/null @@ -1,1540 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_sorting_and_selection_algorithms.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IQcElo1mM_Sf", - "colab_type": "text" - }, - "source": [ - "## Naive Sort in O(n^2)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zqAHZFGyelhI", - "colab_type": "text" - }, - "source": [ - "### Insertion Sort in O(n^2)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "q2pruFdZjpm2", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Not in-place\n", - "def insertionSort(a):\n", - " if not a or len(a) == 1:\n", - " return a\n", - " n = len(a)\n", - " sl = [a[0]] # sorted list\n", - " for i in range(1, n):\n", - " for j in range(i):\n", - " if sl[j] > a[i]:\n", - " sl.insert(j, a[i])\n", - " break\n", - " if len(sl) != i + 1: # not inserted yet\n", - " sl.insert(i, a[i])\n", - " return sl" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "z09E9HTdlMSE", - "colab_type": "code", - "outputId": "9ce33518-bc5f-4286-8f0e-37ef296c8c2d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3, 7]\n", - "sa = insertionSort(a)\n", - "sa" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 7, 8, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "yJL27nHcNEJl", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Backward and in-place\n", - "def insertionSort(a):\n", - " if not a or len(a) == 1:\n", - " return a\n", - " n = len(a)\n", - " for i in range(1, n):\n", - " t = a[i]\n", - " j = i - 1\n", - " while j >= 0 and t < a[j]: \n", - " a[j+1] = a[j] # Move item backward\n", - " j -= 1\n", - " a[j+1] = t \n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Zb5jMt-iScSz", - "colab_type": "code", - "outputId": "bb15e619-06fe-4c65-d989-bf2bf16d6c2e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3, 7]\n", - "insertionSort(a)\n", - "print(a)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[2, 3, 7, 8, 9, 9, 10]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lHFCwzZNGrMu", - "colab_type": "text" - }, - "source": [ - "### Bubble Sort in O(n^2)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "uqzSlyDXGzhO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bubbleSort(a):\n", - " if not a or len(a) == 1:\n", - " return \n", - " n = len(a)\n", - " for i in range(n - 1): #n-1 passes \n", - " for j in range(n - i -1): \n", - " # Swap\n", - " if a[j] > a[j + 1]:\n", - " a[j], a[j + 1] = a[j + 1], a[j] \n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "dkpw1G1Xb9Gp", - "colab_type": "code", - "outputId": "69676f57-56a6-40db-f162-2c6f50234b8d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3]\n", - "bubbleSort(a)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 8, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 21 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "B-WK_VNi_unM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def bubbleSortOptimized(a):\n", - " if not a or len(a) == 1:\n", - " return\n", - " n = len(a)\n", - " for i in range(n - 1): #n-1 passes, \n", - " bSwap = False\n", - " for j in range(n - i -1): #each pass will have valid window [0, n-i], and j is the starting index of each pair\n", - " if a[j] > a[j + 1]:\n", - " a[j], a[j + 1] = a[j + 1], a[j] #swap\n", - " bSwap = True\n", - " if not bSwap:\n", - " break\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "npb5NRJdI4sV", - "colab_type": "code", - "outputId": "a91cc02d-78be-4623-f9be-417aa09a2772", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3]\n", - "bubbleSortOptimized(a)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 8, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 24 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RZwzr_7xBjae", - "colab_type": "text" - }, - "source": [ - "### Selection Sort in O(n^2)\n", - "In selection sort, each time it selects the current largest item and swap it with the last item in the unrestricted region.\n", - "Given the input size to be `n`, we have index `[0, n-1]`. \n", - "\n", - "* At the first pass, we choose the largest item from `A[0,n-1]` and swap it with `A[n-1]`. \n", - "\n", - "* At the second pass, we choose the largest item from `A[0,n-2]` and swap it with `A[n-2]`. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ud6JuKtBhZ5N", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def selectSort(a):\n", - " n = len(a)\n", - " for i in range(n - 1): #n-1 passes \n", - " ti = n - 1 - i\n", - " li = 0 # The index of the largest item\n", - " for j in range(n - i):\n", - " if a[j] >= a[li]:\n", - " li = j\n", - " # swap li and ti\n", - " a[ti], a[li] = a[li], a[ti]\n", - " return " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "e0U-HGHcBpL4", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def selectSort(a):\n", - " n = len(a)\n", - " for i in range(n - 1): #n-1 passes \n", - " ti = n - 1 - i\n", - " li = 0 # The index of the largest item\n", - " for j in range(n - i):\n", - " if a[j] >= a[li]:\n", - " li = j\n", - " # swap li and ti\n", - " print('swap', a[li], a[ti], li)\n", - " a[ti], a[li] = a[li], a[ti]\n", - " print(a)\n", - " return a" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "r3v62LNjEZVY", - "colab_type": "code", - "outputId": "ab2a8871-62d7-4190-dced-149fba761a64", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 219 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3]\n", - "selectSort(a)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "swap 10 3 1\n", - "[9, 3, 2, 8, 9, 10]\n", - "swap 9 9 4\n", - "[9, 3, 2, 8, 9, 10]\n", - "swap 9 8 0\n", - "[8, 3, 2, 9, 9, 10]\n", - "swap 8 2 0\n", - "[2, 3, 8, 9, 9, 10]\n", - "swap 3 3 1\n", - "[2, 3, 8, 9, 9, 10]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 8, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 31 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "g1CFDBHpGaOG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def insertionSort(a):\n", - " '''implement insertion sort'''\n", - " if not a or len(a) == 1:\n", - " return a\n", - " n = len(a)\n", - " sl = [a[0]] + [None] *(n-1) # sorted list\n", - " for i in range(1, n): # items to be inserted into the sorted\n", - " key = a[i]\n", - " j = i-1 \n", - "\n", - " while j >= 0 and sl[j] > key: # compare key from the last sorted element\n", - " sl[j+1] = sl[j] # shift a[j] backward\n", - " j -= 1\n", - " sl[j+1] = key\n", - " print(sl)\n", - " return sl\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "EI1bDD3106fV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def shift(a, start, end):\n", - " for i in range(end, start, -1): # [i, j)\n", - " a[i] = a[i-1]\n", - " \n", - "def insertionSortForward(a):\n", - " if not a or len(a) == 1:\n", - " return a\n", - " n = len(a)\n", - " sl = [a[0]] # sorted list\n", - " for i in range(1, n): # items to be inserted into the sorted\n", - " for j in range(i):\n", - " if a[i] < a[j]:\n", - " # shift all other elements [j, i-1]\n", - " tmp = a[i]\n", - " shift(a, j, i)\n", - " a[j] = tmp \n", - " return a\n", - "\n", - "def insertionSortInPlace(a):\n", - " if not a or len(a) == 1:\n", - " return a\n", - " n = len(a)\n", - " for i in range(1, n): # items to be inserted into the sorted\n", - " t = a[i]\n", - " j = i - 1\n", - " while j >= 0 and t < a[j]: # keep comparing if target is still smaller\n", - " a[j+1] = a[j] # shift current item backward\n", - " j -= 1\n", - " a[j+1] = t # a[j] <= t , insert t at the location j+1 \n", - " return a" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NgeLGKn51mxY", - "colab_type": "code", - "outputId": "6051e043-4c10-48c3-d477-10bb3b13a05b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3, 7]\n", - "print(insertionSortInPlace(a))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[2, 3, 7, 8, 9, 9, 10]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GN1yseSKImNy", - "colab_type": "text" - }, - "source": [ - "## Merge Sort O(nlgn)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "rILA4lBhdPxD", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def merge(l, r): \n", - " ans = []\n", - " # Two pointers each points at l and r\n", - " i = j = 0 \n", - " n, m = len(l), len(r)\n", - "\n", - " while i < n and j < m: \n", - " if l[i] <= r[j]:\n", - " ans.append(l[i])\n", - " i += 1\n", - " else:\n", - " ans.append(r[j])\n", - " j += 1\n", - " \n", - " ans += l[i:]\n", - " ans += r[j:]\n", - " return ans\n", - " " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "DK003Ic1Isb3", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def mergeSort(a, s, e):\n", - " if s == e:\n", - " return [a[s]]\n", - "\n", - " m = (s + e) // 2 \n", - "\n", - " l = mergeSort(a, s , m)\n", - " r = mergeSort(a, m+1, e)\n", - " return merge(l, r)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ys6tUAd8i7ao", - "colab_type": "code", - "outputId": "afc20a0d-012e-413f-baf1-f098bb8e23ce", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = [9, 10, 2, 8, 9, 3, 7, 9]\n", - "mergeSort(a, 0, len(a)-1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 7, 8, 9, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 34 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IL6hPk6IjGSf", - "colab_type": "text" - }, - "source": [ - "### prove merge sort is stable by sorting tuple and printing id" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "UwgdFdaRipEN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def mergeTuple(l, r): \n", - " '''combine the left and right sorted list'''\n", - " ans = []\n", - " i = j = 0 # two pointers each points at l and r\n", - " n, m = len(l), len(r)\n", - " \n", - " # first while loop to merge\n", - " while i < n and j < m: \n", - " if l[i][0] <= r[j][0]: # chaning it to l[i][0] < r[j][0] will not be stable anymore. \n", - " ans.append(l[i])\n", - " i += 1\n", - " else:\n", - " ans.append(r[j])\n", - " j += 1\n", - " \n", - " # now one list of l and r might have items left\n", - " ans += l[i:]\n", - " ans += r[j:]\n", - " return ans\n", - "\n", - "def mergeSortTuple(a, s, e):\n", - " # base case , can not be divided further\n", - " if s == e:\n", - " return [a[s]]\n", - " # divide into two halves from the middle point\n", - " m = (s + e) // 2\n", - " \n", - " # conquer\n", - " l = mergeSort(a, s , m)\n", - " r = mergeSort(a, m+1, e)\n", - " \n", - " # combine\n", - " return mergeTuple(l, r)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "mm07Ac0-dO3A", - "colab_type": "code", - "outputId": "dd9eca57-98b7-4e04-acc2-7a31e7efd61d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 312 - } - }, - "source": [ - "a = [(9, 1), (10, 1), (2, 1), (8, 1), (9, 2), (3, 1), (7, 1), (9, 3)] # the second item represents the index of duplcates\n", - "ids = [id(x) if x[0] == 9 else None for x in a]\n", - "sorted_a = mergeSortTuple(a, 0, len(a)-1)\n", - "ids2 = [id(x) if x[0] == 9 else None for x in sorted_a]\n", - "print(sorted_a)\n", - "ids, ids2" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(2, 1), (3, 1), (7, 1), (8, 1), (9, 2), (9, 3), (9, 1), (10, 1)]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([140381548618120,\n", - " None,\n", - " None,\n", - " None,\n", - " 140381548653128,\n", - " None,\n", - " None,\n", - " 140381548653320],\n", - " [None,\n", - " None,\n", - " None,\n", - " None,\n", - " 140381548653128,\n", - " 140381548653320,\n", - " 140381548618120,\n", - " None])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 47 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h0QYWQaDxt9D", - "colab_type": "text" - }, - "source": [ - "## QuickSort in O(nlogn)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "j4mW9xNrO6hm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def partition(a, s, e):\n", - " p = a[e]\n", - " i = s - 1\n", - " # Scan unresticted area\n", - " for j in range(s, e): \n", - " # Swap \n", - " if a[j] <= p:\n", - " i += 1\n", - " a[i], a[j] = a[j], a[i] \n", - " a[i+1], a[e] = a[e], a[i+1]\n", - " return i+1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "oTmEOjk2QQZV", - "colab_type": "code", - "outputId": "d8fe6f91-cfc0-42e9-c5af-f87ee5d7357b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# Experiment the correctness of lumutos partition\n", - "lst = [9, 10, 2, 8, 9, 3, 7]\n", - "print(partition(lst, 0, len(lst)-1))\n", - "print(lst)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2\n", - "[2, 3, 7, 8, 9, 10, 9]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "v2_nP14pObAn", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# main algorithm of quick sort\n", - "def quickSort(a, s, e, partition=partition):\n", - " # base case , can not be divided further\n", - " if s >= e:\n", - " return \n", - " p = partition(a, s, e)\n", - " \n", - " # conquer smaller problem\n", - " quickSort(a, s , p-1, partition)\n", - " quickSort(a, p+1, e, partition)\n", - " return" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ep4sVlJI7gNs", - "colab_type": "code", - "outputId": "ec1ca64d-ea4d-4c48-b2cc-cd57534b4228", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "quickSort(lst, 0, len(lst) - 1)\n", - "lst" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[2, 3, 7, 8, 9, 9, 10]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UjMN8PWW7AVD", - "colab_type": "text" - }, - "source": [ - "### Quick Select" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "AUs3mt3o7DaF", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def quickSelect(a, s, e, k, partition=partition):\n", - " if s >= e:\n", - " return a[s]\n", - "\n", - " p = partition(a, s, e) \n", - " if p == k:\n", - " return a[p]\n", - " if k > p:\n", - " return quickSelect(a, p+1, e, k, partition)\n", - " else:\n", - " return quickSelect(a, s, p-1, k, partition)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "22m8CitD8747", - "colab_type": "code", - "outputId": "d697de6c-cff1-435a-e2cc-291f1f7305ea", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "lst = [9, 10, 2, 8, 9, 3, 7]\n", - "quickSelect(lst, 0, len(lst) - 1, 2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "7" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8N4QPVcqouFf", - "colab_type": "text" - }, - "source": [ - "### experiment to see the stability of quick sort" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "oOHBweKDgiMw", - "colab_type": "code", - "outputId": "948493b2-40d4-4573-dffd-4dbfbba6f870", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "#a = [(5, 1), (7, 1),(3, 1), (2, 1), (5, 2), (6,1), (7, 2), (8, 1), (9, 1), (5, 3), (5, 4)] # the second item represents the index of duplcates\n", - "a = [(2, 1), (2, 2), (1, 1)]\n", - "def partition_tuple(a, s, e):\n", - " '''Lumutos partition'''\n", - " p = a[e][0]\n", - " i = s - 1\n", - " for j in range(s, e): #a[s, e-1]\n", - " \n", - " if a[j][0] <= p:\n", - " i += 1\n", - " a[i], a[j] = a[j], a[i] # swap a[i] and a[j]\n", - " a[i+1], a[e] = a[e], a[i+1]\n", - " return i+1\n", - "quickSort(a, 0, len(a) - 1, partition_tuple)\n", - "print(a)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[(1, 1), (2, 2), (2, 1)]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QqcdUXaepGSa", - "colab_type": "text" - }, - "source": [ - "### experiment to see the performance of worst time" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5n7nsw13pLWr", - "colab_type": "code", - "outputId": "736cf200-711d-4f42-b2b1-f5c369f70ce8", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "import random, time\n", - "lst1 = [random.randint(1, 25) for i in range(400)]\n", - "lst2 = [i for i in range(400)[::-1]]\n", - "t1 = time.time()\n", - "quickSort(lst1, 0, len(lst1)-1, partition)\n", - "print('time for random values:', time.time()-t1)\n", - "\n", - "t1 = time.time()\n", - "quickSort(lst2, 0, len(lst2)-1, partition)\n", - "print('time for sorted values:', time.time()-t1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "time for random values: 0.0017516613006591797\n", - "time for sorted values: 0.0171658992767334\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0y5x07wwo4Um", - "colab_type": "text" - }, - "source": [ - "### Hoare Partition" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3vNUigFmo7ei", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# def partition_hoare(a, s, e):\n", - "# '''Hoare Parition'''\n", - "# p = a[e]\n", - "# i = s\n", - "# j = e-1\n", - "# while True:\n", - "# while a[i] <= p and i < j:\n", - "# i += 1\n", - "# while a[j] > p and i < j:\n", - "# j -= 1\n", - "# if i < j:\n", - "# a[i], a[j] = a[j], a[i]\n", - "# else:\n", - "# return j\n", - "# return j" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "GKbXRk4Czwjt", - "colab_type": "code", - "outputId": "be048173-9125-4716-89f0-17ad7fb0b345", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "# lst = [9, 10, 2, 8, 9, 3, 7]\n", - "# print(partition_hoare(lst, 0, len(lst)-1))\n", - "# print(lst)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "2\n", - "[3, 2, 10, 8, 9, 9, 7]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4EDqug7Yg2yl", - "colab_type": "text" - }, - "source": [ - "## HeapSort in O(nlogn)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "0PE9BxQBg7lu", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from heapq import heapify, heappop\n", - "def heapsort(a):\n", - " heapify(a)\n", - " return [heappop(a) for i in range(len(a))]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "PcUurYvkg_Px", - "colab_type": "code", - "outputId": "a96f214c-6fdb-4ed5-9c94-7e8772e65fb5", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "lst = [21, 1, 45, 78, 3, 5]\n", - "heapsort(lst)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1, 3, 5, 21, 45, 78]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 2 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XOkspIkgKots", - "colab_type": "text" - }, - "source": [ - "## Linear Sort" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K1EUj-De16tk", - "colab_type": "text" - }, - "source": [ - "### Bucket Sort" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "KR6BfJia-Nza", - "colab_type": "code", - "outputId": "6f52adca-30ab-4b13-e2db-f1231d6de755", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Prepare input data\n", - "import numpy as np\n", - "np.random.seed(1)\n", - "a = np.random.uniform(0, 1, 10)\n", - "a = np.round(a, decimals=2)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([0.42, 0.72, 0. , 0.3 , 0.15, 0.09, 0.19, 0.35, 0.4 , 0.54])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3tI_x9WqBI0m", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from functools import reduce\n", - "def bucketSort(a):\n", - " n = len(a)\n", - " buckets = [[] for _ in range(n)]\n", - " # Divide numbers into buckets\n", - " for v in a:\n", - " buckets[int(v*n)].append(v)\n", - " print(buckets)\n", - " # Apply insertion sort within each bucket\n", - " for i in range(n):\n", - " insertionSort(buckets[i])\n", - " # Combine sorted buckets\n", - " return reduce(lambda a, b: a + b, buckets)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "DZAfxnRNBVuG", - "colab_type": "code", - "outputId": "24aeffb1-1963-4848-c021-017fcdb800a7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 54 - } - }, - "source": [ - "bucketSort(a)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0.0, 0.09], [0.15, 0.19], [], [0.3, 0.35], [0.42, 0.4], [0.54], [], [0.72], [], []]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0.0, 0.09, 0.15, 0.19, 0.3, 0.35, 0.4, 0.42, 0.54, 0.72]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 16 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W-4vTjaf9cKN", - "colab_type": "text" - }, - "source": [ - "### Counting Sort" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LrGlbYH59nZT", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a1 = [1, 4, 0, 2, 7, 5, 9]\n", - "a2 = [1, 4, 1, 2, 7, 5, 2]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "ptAUeFu8Kvrs", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def countSort(a):\n", - " minK, maxK = min(a), max(a)\n", - " k = maxK - minK + 1\n", - " count = [0] * (maxK - minK + 1)\n", - " n = len(a)\n", - " order = [0] * n\n", - " # Get occurrence\n", - " for key in a:\n", - " count[key - minK] += 1\n", - " \n", - " # Get prefix sum\n", - " for i in range(1, k):\n", - " count[i] += count[i-1]\n", - " \n", - " # Put key in position\n", - " for i in range(n-1, -1, -1):\n", - " key = a[i] - minK\n", - " count[key] -= 1 # to get the index as position\n", - " order[count[key]] = a[i] \n", - " return order" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "cvrX-2Co_AgO", - "colab_type": "code", - "outputId": "49b5ea36-34c9-481b-a539-7d7327878cfa", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "countSort(a1)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[0, 1, 2, 4, 5, 7, 9]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 3 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "sk5qIeoo_D6G", - "colab_type": "code", - "outputId": "e0b9bf82-685b-4087-b8a5-a9e7f8aa0388", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 90 - } - }, - "source": [ - "countSort(a2)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[2, 2, 0, 1, 1, 0, 1]\n", - "[2, 4, 4, 5, 6, 6, 7]\n", - "[1, 1, 2, 2, 4, 5, 7]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1, 1, 2, 2, 4, 5, 7]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 6 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R85He0D050tZ", - "colab_type": "text" - }, - "source": [ - "### Radix Sort" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ma4Sqj-KViqD", - "colab_type": "code", - "outputId": "87e16a22-de0c-407f-a30e-2b7181649892", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "# Get digits\n", - "a = 178\n", - "digits = []\n", - "while a > 0:\n", - " digits.append(a%10)\n", - " a = a // 10\n", - "digits" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[8, 7, 1]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 46 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Jh65UuU4IeMj", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def count_sort(a, exp):\n", - " count = [0] * 10 # [0, 9]\n", - " n = len(a)\n", - " order = [0] * n\n", - " # Get occurrence\n", - " for key in a:\n", - " key = (key // exp) % 10\n", - " count[key] += 1\n", - " \n", - " # Get prefix sum\n", - " for i in range(1, 10):\n", - " count[i] += count[i-1]\n", - " \n", - " # Put key in position\n", - " for i in range(n-1, -1, -1):\n", - " key = (a[i] // exp) % 10\n", - " count[key] -= 1 # to get the index as position\n", - " order[count[key]] = a[i] \n", - " return order" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Mq1PxuWQ50HD", - "colab_type": "code", - "colab": {} - }, - "source": [ - "a = [170, 45, 75, 90, 802, 24]\n", - "\n", - "# LSD radix sortin\n", - "def radixSort(a):\n", - " maxInt = max(a)\n", - " exp = 1\n", - " while maxInt // exp > 0:\n", - " a = count_sort(a, exp)\n", - " exp *= 10\n", - " return a" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "6mNsMh4rNXA8", - "colab_type": "code", - "outputId": "c14cefac-5007-49cc-9b98-1fba67be05f1", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "a = radixSort(a)\n", - "a" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[24, 45, 75, 90, 170, 802]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 45 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Rq82GJm5lHaH", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# MSD radix soring of strings with bucket sort\n", - "def MSD_radix_string_sort(a, i):\n", - " '''\n", - " s : strings\n", - " i: starting radix for sorting\n", - " '''\n", - " # End condition: bucket has only one item\n", - " if len(a) <= 1:\n", - " return a\n", - "\n", - " # Divide\n", - " buckets = [[] for _ in range(26)]\n", - " done_bucket = []\n", - " for s in a:\n", - " if i >= len(s):\n", - " done_bucket.append(s)\n", - " else:\n", - " buckets[ord(s[i]) - ord('a')].append(s)\n", - " # Conquer and chain all buckets\n", - " ans = []\n", - " for b in buckets:\n", - " ans += MSD_radix_string_sort(b, i + 1)\n", - " return done_bucket + ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "tnHSSy6WzXPG", - "colab_type": "code", - "outputId": "a11f8ba2-c679-4169-827d-944d61002fe6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "s = ['apple', 'pear', 'berry', 'peach', 'apricot', 'ap', 'pear']\n", - "MSD_radix_string_sort(s, 0)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['ap', 'apple', 'apricot', 'berry', 'peach', 'pear', 'pear']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 53 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Q92n6NwiMInZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Experiment with Python sort" - ], - "execution_count": 0, - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/Colab_Codes/chapter_tree_data_structure_and_traversal.ipynb b/Colab_Codes/chapter_tree_data_structure_and_traversal.ipynb deleted file mode 100644 index 985dec3..0000000 --- a/Colab_Codes/chapter_tree_data_structure_and_traversal.ipynb +++ /dev/null @@ -1,636 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "chapter_tree_data_structure_and_traversal.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "3A2mprldYvXe", - "colab_type": "text" - }, - "source": [ - "## Tree Representation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SYVbYid7SPos", - "colab_type": "text" - }, - "source": [ - "### N-aray Tree" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "9R_XYWC2Yz-G", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Define Tree Node\n", - "class NaryNode:\n", - " def __init__(self, val, n):\n", - " self.children = [None] * n\n", - " self.val = val " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "cQfD7fA7WWPT", - "colab_type": "code", - "colab": {} - }, - "source": [ - "root = NaryNode(1, 2)\n", - "left = NaryNode(2, 2)\n", - "right = NaryNode(3, 2)\n", - "# connect root to its left and right, the order does not matter\n", - "root.children[0] = left\n", - "root.children[1] = right\n", - "left = NaryNode(4, 0)\n", - "right = NaryNode(5, 5)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wLPjQrfnZbQY", - "colab_type": "text" - }, - "source": [ - "### Binary Tree" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZKknhNURZdA0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Binary Tree Node\n", - "class BinaryNode:\n", - " def __init__(self, val):\n", - " self.left = None\n", - " self.right = None\n", - " self.val = val" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yPukXXkXZ_zn", - "colab_type": "text" - }, - "source": [ - "#### Tree Construction\n", - "```\n", - " 1\n", - " / \\ \n", - " 2 3\n", - " / \\ \\\n", - "4 5 6 \n", - "```" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "CyXwZ9sf8dyG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Naive Tree Construction\n", - "root = BinaryNode(1)\n", - "left = BinaryNode(2)\n", - "right = BinaryNode(3)\n", - "root.left = left\n", - "root.right = right\n", - "left.left = BinaryNode(4)\n", - "left.right = BinaryNode(5)\n", - "right.right = BinaryNode(6)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "gOHqKKfY_Ug0", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Recursive Tree Construction\n", - "def constructTree(a, idx):\n", - " '''\n", - " a: input array of nodes\n", - " idx: index to indicat the location of the current node\n", - " '''\n", - " if idx >= len(a):\n", - " return None\n", - " if a[idx]:\n", - " node = BinaryNode(a[idx])\n", - " node.left = constructTree(a, 2*idx + 1)\n", - " node.right = constructTree(a, 2*idx + 2)\n", - " return node\n", - " return None" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "STvuN-5p_5Di", - "colab_type": "code", - "colab": {} - }, - "source": [ - "nums = [1, 2, 3, 4, 5, None, 6]\n", - "root = constructTree(nums, 0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NsyaTExQIAos", - "colab_type": "code", - "outputId": "1681353d-30ad-4269-d273-cf939958e015", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "nums = [1] * 1_000_000\n", - "print(nums.__sizeof__()/1024/1024)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "7.629432678222656\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "D4Bj_uMsHpAZ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Constrcut a large tree\n", - "nums = [1] * 1_000_000\n", - "#root = constructTree(nums, 0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ymohgmrC9-ih", - "colab_type": "text" - }, - "source": [ - "## Tree Traversal" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W4XlEDMn7ZE5", - "colab_type": "text" - }, - "source": [ - "### Depth-first Tree Traversal" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U_ZjljRzugyQ", - "colab_type": "text" - }, - "source": [ - "#### Recursive Tree Traversal" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "zERenCBy99sV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Preorder Traversal\n", - "def recursive(node):\n", - " if not node:\n", - " return\n", - " print(node.val, end=' ')\n", - " recursive(node.left)\n", - " recursive(node.right)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "M86_7JR7-cfu", - "colab_type": "code", - "outputId": "46132bdc-06ad-406b-8db4-ff08febe0a89", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "recursive(root)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1 2 4 5 3 6 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "o1U3R9A8GvPr", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Inorder Traversal\n", - "def inorder_traversal(node):\n", - " if not node:\n", - " return\n", - " inorder_traversal(node.left)\n", - " print(node.val, end=' ')\n", - " inorder_traversal(node.right)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "sy6hIovuG0dq", - "colab_type": "code", - "outputId": "ab4403e1-b5ac-4747-a388-30a79f2f2bc6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "inorder_traversal(root)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "4 2 5 1 3 6 " - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Si1c44bDKLkG", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Preorder traversal with returns\n", - "def PreOrder(root):\n", - " if root is None:\n", - " return []\n", - " ans = []\n", - " # Divide and brings back the subresult\n", - " left = PreOrder(root.left)\n", - " right = PreOrder(root.right)\n", - " # Combine\n", - " ans = [root.val] + left + right\n", - " return ans" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "tCVpSw9ALNEM", - "colab_type": "code", - "outputId": "30f7cb20-2abd-4a00-f978-e4609d2d6300", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "print(PreOrder(root))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[1, 2, 4, 5, 3, 6]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yxCOd-iZuc-s", - "colab_type": "text" - }, - "source": [ - "#### Iterative Tree Traversal" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "N43NhcugSfYN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def PreOrderIterative(root):\n", - " if root is None:\n", - " return []\n", - " res = []\n", - " stack = [root]\n", - " while stack:\n", - " tmp = stack.pop()\n", - " res.append(tmp.val)\n", - " if tmp.right:\n", - " stack.append(tmp.right)\n", - " if tmp.left:\n", - " stack.append(tmp.left)\n", - " return res" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "5ay0zisQShpr", - "colab_type": "code", - "colab": {} - }, - "source": [ - "preorders = PreOrderIterative(root)\n", - "preorders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "iz2S46vA7ked", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def PostOrderIterative(root):\n", - " if root is None:\n", - " return []\n", - " res = []\n", - " stack = [root]\n", - " while stack:\n", - " tmp = stack.pop()\n", - " res.append(tmp.val)\n", - " if tmp.left:\n", - " stack.append(tmp.left)\n", - " if tmp.right:\n", - " stack.append(tmp.right)\n", - " return res[::-1]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_SH4Cr6c7sP5", - "colab_type": "code", - "outputId": "7d956975-f3e2-46e3-d9d2-fe10c41bc8c6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "postorders = PostOrderIterative(root)\n", - "postorders" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[4, 5, 2, 6, 3, 1]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 22 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "cxWckHgZupxE", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Inorder and Preorder\n", - "def iterative_traversal(root):\n", - " stack = []\n", - " cur = root\n", - " preorders = []\n", - " inorders = []\n", - " while stack or cur:\n", - " while cur:\n", - " preorders.append(cur.val)\n", - " stack.append(cur)\n", - " cur = cur.left\n", - " node = stack.pop()\n", - " inorders.append(node.val)\n", - " cur = node.right\n", - " return preorders, inorders" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "TFuu6oUmwwDq", - "colab_type": "code", - "outputId": "c54bfafe-5789-4538-a6bf-5a4f843ab75d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "preorders, inorders = iterative_traversal(root)\n", - "preorders, inorders" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "([1, 2, 4, 5, 3, 6], [4, 2, 5, 1, 3, 6])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 20 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GyJug1-77Re6", - "colab_type": "text" - }, - "source": [ - "### Breath-first Tree Traversal" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8IxSoqTthZb9", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Level Order Traversal: To show the nodes at each level, we use LevelOrder function to print out the tree:\n", - "def LevelOrder(root):\n", - " if not root:\n", - " return\n", - " nodes_same_level = [root]\n", - " while nodes_same_level:\n", - " temp = []\n", - " for n in nodes_same_level:\n", - " print(n.val, end=' ')\n", - " if n.left:\n", - " temp.append(n.left)\n", - " if n.right:\n", - " temp.append(n.right)\n", - " nodes_same_level = temp" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "uPxVdjRrA0UB", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Use a queue\n", - "def bfs(root):\n", - " if not root:\n", - " return\n", - " q = [root]\n", - " while q:\n", - " node = q.pop(0) # get node at the front of the queue\n", - " print(node.val, end=' ')\n", - " if node.left:\n", - " q.append(node.left)\n", - " if node.right:\n", - " q.append(node.right)\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Pl2HGcHkA4rT", - "colab_type": "code", - "outputId": "9df917c4-592e-4233-b48f-502ceef94f27", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 35 - } - }, - "source": [ - "LevelOrder(root)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1 2 3 4 5 6 " - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file diff --git a/Easy-Book/IEEEbib.bst b/Easy-Book/IEEEbib.bst deleted file mode 100644 index b009a00..0000000 --- a/Easy-Book/IEEEbib.bst +++ /dev/null @@ -1,1034 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% IEEE.bst %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Bibliography Syle file for articles according to IEEE instructions -% balemi@aut.ee.ethz.ch <22-JUN-93> -% modified from unsrt.bib. Contributions by Richard H. Roy - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = -% next line commented out by rhr and changed to write comma -% { add.period$ write$ - { ", " * write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -% 5/24/89 rhr -% modified fin.entry function - prints note field after body of entry -%FUNCTION {fin.entry} -%{ add.period$ -% note empty$ -% 'write$ -% { "\par\bgroup\parindent=0em " * annote * "\par\egroup " * write$ -% } -% if$ -% newline$ -%} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -% new block without terminating last block with a comma -FUNCTION {new.ncblock} -{ - write$ - newline$ - "\newblock " - before.all 'output.state := -} - -FUNCTION {new.nccont} -{ - write$ - " " - before.all 'output.state := -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -FUNCTION {boldface} -{ duplicate$ empty$ - { pop$ "" } - { "{\bf " swap$ * "}" * } - if$ -} - -%FUNCTION {boldface} -%{ 's swap$ := -% s "" = -% { "" } -% { "{\bf " s * "}" * } -% if$ -%} -% -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - %{ s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := - { s nameptr "{f. }{vv~}{ll}{, jj}" format.name$ 't := -% nameptr #1 > -% { namesleft #1 > -nameptr #1 > - { - nameptr #3 - #1 + = - numnames #3 - > and - { "others" 't := - #1 'namesleft := } - 'skip$ - if$ - namesleft #1 > - - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", Eds." * } - { ", Ed." * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { "``" title "t" change.case$ * } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -% FUNCTION {format.date} -% { year empty$ -% 'year -% { " " year * } -% if$ -% } - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "vol." volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pp." pages n.dashify tie.or.space.connect } - { "p." pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ -volume empty$ - {"" } - {"vol. " volume *} -if$ -number empty$ - 'skip$ - {", no. " number * *} -if$ -pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ", pp. " * pages n.dashify * } - if$ - } -if$ -} - -%FUNCTION {format.vol.num.pages} -%%boldface added 3/17/87 rhr -%{ volume field.or.null boldface -% number empty$ -% 'skip$ -% { "(" number * ")" * * -% volume empty$ -% { "there's a number but no volume in " cite$ * warning$ } -% 'skip$ -% if$ -% } -% if$ -% pages empty$ -% 'skip$ -% { duplicate$ empty$ -% { pop$ format.pages } -% { ":" * pages n.dashify * } -% if$ -% } -% if$ -%} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "in " booktitle emphasize * } - { "in " booktitle emphasize * ", " * format.editors * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Tech. {R}ep." } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "vol." volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title ",''" * "title" output.check - new.nccont - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - address empty$ - { organization publisher new.sentence.checkb - organization output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - organization output - } - if$ - format.bvolume output - format.number.series output - format.pages output - publisher output - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - "M.S. thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title ",''" * output - new.nccont - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "Ph.D. thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title ",''" * "title" output.check - new.ncblock - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Computing Surveys"} - -MACRO {acta} {"Acta Informatica"} - -MACRO {cacm} {"Communications of the ACM"} - -MACRO {ibmjrd} {"IBM Journal of Research and Development"} - -MACRO {ibmsj} {"IBM Systems Journal"} - -MACRO {ieeese} {"IEEE Transactions on Software Engineering"} - -MACRO {ieeetc} {"IEEE Transactions on Computers"} - -MACRO {ieeetcad} - {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} - -MACRO {ipl} {"Information Processing Letters"} - -MACRO {jacm} {"Journal of the ACM"} - -MACRO {jcss} {"Journal of Computer and System Sciences"} - -MACRO {scp} {"Science of Computer Programming"} - -MACRO {sicomp} {"SIAM Journal on Computing"} - -MACRO {tocs} {"ACM Transactions on Computer Systems"} - -MACRO {tods} {"ACM Transactions on Database Systems"} - -MACRO {tog} {"ACM Transactions on Graphics"} - -MACRO {toms} {"ACM Transactions on Mathematical Software"} - -MACRO {toois} {"ACM Transactions on Office Information Systems"} - -MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} - -MACRO {tcs} {"Theoretical Computer Science"} - -READ - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * "}" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%% End of IEEE.bst %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/Easy-Book/chapters/chapter_10_non_linear_backtracking.tex b/Easy-Book/chapters/chapter_10_non_linear_backtracking.tex deleted file mode 100644 index 048c6c3..0000000 --- a/Easy-Book/chapters/chapter_10_non_linear_backtracking.tex +++ /dev/null @@ -1,158 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% DFS and BFS -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\documentclass[../main.tex]{subfiles} -\begin{document} - -% The basic breath-first and depth-first search we learned in Chapter~\ref{chapter_non_linear_graph} is like a linear search on linear data structures. There are more advanced universal graph search techniques: -% \begin{enumerate} - - -% \item Better Efficiency: -% \begin{enumerate} -% \item Bidirectional Search that can increase the efficiency of the BFS shown in Section~\ref{}. -% \item Application of DFS on Problem Searching Space: Backtracking techniques to prune the searching space based on DFS in Section~\ref{sec_backtrack}. -% \end{enumerate} -% \end{enumerate} - -% \subsection{Implementation} - - - -% A mutant for trees to visit it level by level -% \begin{lstlisting}[language = Python] -% def BFS(root): -% q = [root] -% root.visited = 1 -% level = 0 -% while q: -% n=q.pop() -% visit(n) #finish visit -% for node in n.adjacent: -% if not node.visited: -% node.visited = 1 #start to visit -% q.insert(0,node) -% \end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%Bidirectional%%%%%%%%%%%%%%%%%%%%%%%% - -%\subsection{$A^*$} - - - - - -% \section{Backtracking}% for Constraint Satisfaction Problem} -% \label{sec_backtrack} -% ``A variant of Depth First Search is called Back Tracking search, which uses less memory. In this search only one successor is generated at a time rather than all successors. Each partially expanded node remembers which successor to generate next. In this way only $O(m)$ memory is needed rather than $O(b^m)$. It is the most common algorithm for solving constraint satisfaction problem (CSP)." (Searching and Optimization Techniques in Artificial Intelligence: A Comparative Study $\&$ -% Complexity Analysis) - - - -\paragraph{Visualize Backtracking} Let us represent different $s$ as a node in a tree. Initial state $s=[]$ as the root node. The first level represents all possible states for $s=(s_0)$ of length 1, and the second level for $s=(s_0, s_1)$ of length 2. And the edge represents making a choice out of all items in the ordered set $A$. If we reach to end condition (leaf candidates) we succeed and the search stop. If the partial candidate can not satisfy the constraint, we return to the root node, and reset the state ('backtrack'). The process is shown in Fig.~\ref{fig:backtrack_tree}. -\begin{figure}[h!] - \centering - \includegraphics[width=0.98\columnwidth]{fig/back_tree_possibility.png} - \caption{Tree of possibilities for a typical backtracking algorithm} - \label{fig:backtrack_tree} -\end{figure} - -\paragraph{Backtrack Template} We list the template here which we summarized after viewing different backtracking algorithms. It usually is composed of two parts: initialization and main dfs backtracking. After we figure out our state vector $s$ where in our template is \texttt{state\_tracker} with total search tree depth $n$. In the main backtracking state: we first generate candidates according to previous states and then try out each candidate by iteration. We set the state before call recursive function to the next depth and reset the state after we return and move on to try next candidate. -\begin{lstlisting}[language=Python] -def backtrack(): - # initialization - A #a working data structure, either a list of candidates or a graph or a matrix representing a board - state_tracker = []*n - assist_state_tracker - # main backtracking - def dfs(d, n): - '''d: depth representing level in the tree''' - if d == n: - return - candidates = generate_candidates(state_tracker, assist_state_tracker) - for c in candidates: - set_state(state_tracker, assist_state_tracker, c) - dfs(d+1, n) - reset_state(state_tracler, assist_state_tracker, c) - - dfs(0, n) -\end{lstlisting} - -\paragraph{Complexity Analysis} The time complexity of backtracking can be obtained from analyzing the search tree. The worst case incurs when the complete result occurs at the right most of the search tree, thus we need to traverse all the paths resulting visiting each node twice-one forward and one backward. Assume the cost to generate and visit a node is $O(2)$, and the total time complexity will be $O(|V|)$, where $|V|$ is the total nodes in the traverse tree. - - - - - - -In this chapter, the organization is as follows: -\begin{enumerate} - \item Show Property 1: We will first show how backtrack construct the complete solution incrementally and how it backtracks to its previous state in Sec.~\ref{sec_enumeration}. - \begin{enumerate} - \item \textbf{On Implicit Graph:} start with the combination and permutation problem to show us how the backtracking works in Section~\ref{backtrack_permutation} and ~\ref{sec_combination} with simple and commonly seen combinatorial problems: combination and permutation. - \item \textbf{On Explicit Graph:} Enumerating all paths between the source and target vertex in a graph drawing in Section~\ref{subsec_all_paths}. Similarly, it can be applied on enumerate all spanning trees, graph partition. - \end{enumerate} - \item Show Property 2: we demonstrate the application of search pruning in backtracking through CSP problems in Section~\ref{sec_sudoku}. -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%Combination%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Enumeration} -\label{sec_enumeration} -\subsection{Permutation} -\label{backtrack_permutation} - - -\paragraph{Implicit Graph} In the graph, each node is either a partial or final solution. If we look it as a tree, the internal node is a partial solution and all leaves are final solutions. One edge represents generating the next solution based on the current solution. The vertices and edges are not given by an explicitly defined graph or trees, the vertices are generated on the fly and the edges are implicit relation between these nodes. - -\paragraph{Backtracking VS DFS} The implementation of the state transfer we can use either BFS or DFS on the implicit vertices. With recursive DFS, we can start from node [], and traverse to [1,2], then [1,2,3]. Then we backtrack to [1,2], backtrack to [1], and go to [1, 3], to [1, 3, 2]. To clear the relation between backtracking and DFS, we can say backtracking is a complete search technique which systematically builds the search tree (implicitly and not graph) and DFS is an ideal way to implement it. - - -\paragraph{Back to Permutation} -We can generalize Permutation, Permutations refer to the permutation of $n$ things taken $k$ at a time without repetition, the math formula is $A_{n}^{k} = n *(n-1)*(n-2)*...*k$. In Fig.~\ref{fig:backtrack_permutation}, we can see from each level $k$ shows all the solution of $A_{n}^{k}$. The generation of $A_{n}^{k}$ is shown in the following Python Code.%Compared with combination, [a, b] and [b, a] would be considered as different solution. The relation of the number of combination and permutation solution can be described in formula: $C_{n}^{k}=\frac{A_{n}^{k}}{k!}$, where $k!=k*(k-1)...*1$. So $A\ge C$. - - - - - -% (N-Queens : -% permutations -% with backtracking -% Soduko : -% counting -% with backtracking -% Scheduling: -% subsets -% with backtracking\url{https://www.cs.princeton.edu/~rs/AlgsDS07/24CombinatorialSearch.pdf}) - - - - - - - - % Also, in this process, we can trim the branches, so actually backtracking is DFS with trimming and backtrack to the previous stages. - - -\subsection{Combination} -\label{sec_combination} - - - -Note: To generate the power set, backtracking is NOT the only solution, if you are interested right now, check out Section~\ref{part4_array_combine}. - - - - - -% \subsection{Analysis} -% % \paragraph{Backtracking VS DFS} The implementation of Backtracking is equivalent to a DFS on the implicit or explicit search space and visiting each vertex no more than once. Backtracking is a technique to build up solution spaces incrementally and each exactly only once. And once one path reaches to the end, it backtrack to its previous state and try another candidate. DFS is a natural way to implement backtarck technique. - - - - - -% \paragraph{Applications} Backtracking can be applied where we can incrementally build up our final soultion from partial solution. It is a searching technique that applied on implicit graph which is built on-the-fly. It guarentees that it only visit each search vertex no more than once. The problems that backtracking can be used are these three types: (1) Combinations (Section~\ref{sec_combination}), (2) Permutations (Section~\ref{sec_permutation}), (3) enumerate all paths from a to b in graph. and (3) Optimization problems with constraints such as classical travels salesman, puzzles, and sudoku (Section~\ref{sec_sudoku}). In the first three cases, backtracking visit each implicit or explicit vertex exactly once in the searching space. And for problems with restraints, we can do search pruning and we end up amortizely visiting each vertex less than once which is more efficient compared with an exhaustive graph search such as DFS and BFS. - -\section{Solve CSP with Search Prunning} -\label{sec_sudoku} - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_12_dynamicprogramming.tex b/Easy-Book/chapters/chapter_12_dynamicprogramming.tex deleted file mode 100644 index 985141b..0000000 --- a/Easy-Book/chapters/chapter_12_dynamicprogramming.tex +++ /dev/null @@ -1,1459 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Dynamic Programming} -\begin{figure}[h] - \centering - \includegraphics[width=0.95\columnwidth]{fig/dynamic_programming_chapter.png} - \caption{Dynamic Programming Chapter Recap} - \label{fig:dynamic_prorgramming_divide_conquer} -\end{figure} - -% \begin{figure}[h!] -% \centering -% \includegraphics[width=0.7\columnwidth]{fig/fibanacci.png} -% \caption{Fibonacci number's Recursion Tree} -% \label{fig:fibonacci number} -% \end{figure} -% Use The subproblem graphs and the recurrence function to denote the dynamic programming. - -Dynamic programming is simplifying a complicated problem by breaking it down into simpler sub-problems in a recursive manner. As introduced in Divide-and-Conquer in Chapter~\ref{chapter_divide_conquer}, dynamic programming is applied on problems wherein its subproblems overlap when you construct them in Divide-and-Conquer manner. We use the recurrence function: $T(n) = T(n-1) + T(n-2) +...+T(1) + f(n)$, to highlight its most special characteristic -- Overlapping Subproblems -- compared with $T(n)=2T(n/2)+f(n)$ for Divide-and-Conquer's nonoverlapping subproblems. As we shall see there are more types of recurrence functions in dynamic programming field other than this exemplary formulation, either in this chapter briefly or in Chapter~\ref{chapter_dynamic-programming} where a comprehensive list of dynamic programming categories/patterns are given. - -\paragraph{Importance and Applications} Dynamic Programming is one of the fundamental methods in computer science and plays a very important role in computer algorithms, even the book \textit{artificial Intelligence, a modern approach} has mentioned this terminology for as many as 47 times. Dynamic programming firs for optimizing the problems fall into the following categories: -\begin{enumerate} - \item Optimization: Compute the maximum or minimum value; - \item Counting: Count the total number of solutions; - \item Checking if a solution works. -\end{enumerate} -To be noticed, not that all problems with the above formats will be certainly solved with dynamic programing, it requires the problem to show two properties: overlapping subproblems and optimal substructures in order for dynamic programming to be applied. These two properties will be defined and explained in this chapter. - -\paragraph{Our Difference and Plan} A lot of textbooks or courses describe dynamic programming as obscure and demands creativity and subtle insights from users to identify and construct its dynamic programming solutions. However, we are determined to unfold such ``mystery'' by being grounding practical. We have two chapters topiced with dynamic programming: The current chapter oriented with clear definition by distinguishing, relating, and exampling the concept with divide-conquer and complete search. This chapter serves as the frontline of our contents on Dynamic Programming. Further, Chapter ~\ref{chapter_dynamic-programming} is focusing on categorizing problems patterns and giving examples on each. - -\begin{itemize} - \item In order to understand how dynamic programming's role in the algorithms evolution map, the very first thing we do in Section~\ref{dynamic_programming_sec_search} is to show how we evolve the complete search to dynamic programming solution by: (1) discussing the relation between complete search, divide-and-conquer, and our dynamic programming; and (2) examining two elementary examples -- Fibonacci Sequence and Longest Increasing Subsequence. - \item Dynamic programming is typically applied on optimization problems. Section~\ref{sec_dynamic_programming_knowledge_base} discuss the principle properties, elements, and experience based guideline. And we show how we can relate these key characteristics to the field of optimization. - \item The naive solutions for dynamic programming applicable problems have either exponential or polynomial time using complete searching method. In Section.~\ref{sec_dynamic_programming_example} we showcase how we can decrease the complexity from the two baselines: from exponential to polynomial and from polynomial to polynomial with lower power. -\end{itemize} - -% Follow the naive complete search, in this Chapter, we will first explain how to evolve the naive complete search solution to the dynamic programming using two examples: fibonacci sequence and longest increasing sequence in Section~\ref{dynamic_programming_sec_search}. Followed by this, we would have another second characterize the key elements of dynamic programming, and we give examples when dynamic programming used to optimize exponential or polynomial. In the second section, we give generalization: steps to solve the dynamic programming. and more related . - - - - - - - %I explain the definition of dynamic programming, the three ways to implement it with Python, what types of dynamic programming we have, and how to solve each categories. -% \paragraph{Dynamic Programming VS Divide and Conquer} -% For example, the famous fibonacci number $f(i) = f(i-2) + f(i-1)$ shown in Fig~\ref{fig:fibonacci number}. We divide the problem $i$ into two subproblems $i-2$ and $i-1$. If we want to obtain result for n, we would have n subproblems. This is different compared with our previous divide and conquer examples, which normally divide the problems into half and half. Because here each subproblem for example i, is one or two size larger than its previous subproblem. Thus, the subproblems in dynamic programming \textit{overlapps} in some degree. Second, there is overlapping in dynamic programming's divided subproblems. We can find overlapping in two ways: 1) From the state transfer function; for example, $f(i-1) = f(i-3) + f(i-2)$, which means we computed $f(i-2)$ twice. 2) From the tree structure as shown in Fig~\ref{fig:fibonacci number}, where f(2) the subtree is computed twice. %This is the main difference between dynamic programming and Divide and conquer. - -\section{Introduction to Dynamic Programming} -\label{dynamic_programming_sec_search} -In this section, we answer two questions: -\begin{enumerate} - \item \textbf{How to distinct divide and conquer from dynamic programming?} We have already conceptually know that it differs in the case of the characteristics of subproblems in two cases: overlapping subproblems for dynamic programming where each subproblems share subproblems and non-overlapping subproblems for divide and conquer where the subproblems are disjoint with each other. In this section, we further answer this question in a more visualized way using the concept of \textit{subproblem graph}. - \item \textbf{How to develop the dynamic programming solution from the complete search naive method? } We are not offering a fully and detail-oriented answer in this section. Instead, we first identify the problem using complete search in dynamic programming applicable problems using subproblem graph. Then we answer this question using two elementary examples by showing a sorted solutions so that we can demonstrate the relation between complete search and dynamic programming. -\end{enumerate} - -\subsection{Concepts} -% Dynamic programming is an optimization methodology that used to improve efficiency from the problem's naive solution--complete search on subprogram graph (Depth-first-search and Breadth-first-search). - -\begin{figure}[ht!] - \centering - \begin{subfigure}[b]{0.3\textwidth} - \includegraphics[width=\columnwidth]{fig/Subproblem_graph.png} - \caption{Subproblem Graph for Fibononacci Sequence} - \label{fig:subproblem_graph_1_fs} - \end{subfigure} - \begin{subfigure}[b]{0.65\textwidth} - \includegraphics[width=\columnwidth]{fig/subproblem_graph_merge_sort.png} - \caption{Subproblem Graph for Merge Sort} - \label{fig:subproblem_graph_1_ms} - \end{subfigure} -\caption{Subproblem Graph} -\label{fig:subproblem_graph_1} -\end{figure} - -\paragraph{Subproblem Graph} If we treat each subproblem as a vertex, and the relation between subprblems as arced edges, we can get a directed subproblem graph. If the arced edge points from larger subproblems to smaller subproblems, we say it is in \textit{top-down fashion}. In contrast, if arced edge is pointing from smaller subproblems to large subproblems, it is \textit{bottom-up fashion}. In Fig.~\ref{fig:subproblem_graph_1} we draw the subproblem graph for Fibonacci Sequence which we have defined in Page\ref{} with $n=4$. In comparison, we also give the subproble m graph for sorting for array $[29, 10, 14, 37, 13]$. In the following contents, we show how we can use subproblem graph to answer the two questions we lay out at the beginning of this section. -\paragraph{Terminologies} To make reading other materials accessible, we introduce more related terminologies that are widely used in the field. -\begin{itemize} - \item \textbf{State}: State and subproblem is interchangeable among different books. Both of them can be used to describe the optimal solution to a problem with a solution space/search space. - \item \textbf{State Transition}: State transition and recurrence function is interchangeable. In the case of fibonacci sequence, our state transition/recurrence function is given as $f(n)=f(n-1)+f(n-2)$. On the flip side, there are problems that will not specify the state transitions. We will have to figure them out by ourselves. -\end{itemize} - -\paragraph{Distinction between Divide and Conquer and Dynamic Programming} In divide and conquer, problems are divided into disjoint subproblems, the subproblem graph would degrade to a \textit{tree structure}, each subproblem other than the base problems (here it is each individual element in the array) will only have out degree equals to 1. In comparison, in the case of fibonacci sequence shown in Fig.~\ref{fig:subproblem_graph_1}), some problems would have out degree larger than one, which makes the network a graph instead of a tree structure. This characteristics is directly induced by the fact that the subproblems overlap -- that is, subproblems share subprolems. For example, $f(4)$ and $f(3)$ share $f(2)$, and $f(3)$ and $f(2)$ shares $f(1)$. - -\paragraph{Complete Search and Dynamic Programming} If we program these two problems in a recursive top-down divide-and-conquer manner, they are essentially equivalent to applying Depth-first-search on the subproblem graph/tree. The only difference is for sorting, there will be no recomputation (such as in merge sort and quick sort), while for the fibonacci sequence, subproblems that have in-degree larger than 1 will be recomputed multiple times, which gives us space for optimization and this is where the dynamic programming comes into rescue. - -With the subproblem graph, we reconstruct the problem as a graph problem, which means all complete searching methods can be applied; such as Breadth-first search other than the recursive depth-first-search. - -\paragraph{Depth-first-search} Because of the usage of subproblems, the depth-first-search implemented with divide-and-conquer manner (with the result of subproblems as return) outweight the usage of Breath-first-search. BFS doesn’t compute the values of “optimal” sub-solutions (of sub-instances) and use these to build up a solution, then build the optimum using this information. I don’t see anything “bottom-up” in the process of BFS. I don’t see where the “intermediate” states come. Therefore, we shall see that the close bond of dynamic programming with DFS instead of with BFS. - -% We can also do a Breadth-first-search on the subproblem graph as an alternative choice. To draw the conclusion: \textit{complete search} on the subproblem graph is the most naive way to solve the dynamic programming possible problems. - -% \paragraph{Complete Search as Naive Solution} -% In Fig.~\ref{fig:subproblem_graph_1} shows the corresponding subproblem graph for Fibonacci sequence. We can see for some subproblems, e.g. node 2 and node 1 are both searched twice. Therefore, using the naive complete search method can led to redundant computation. - -\subsection{From Complete Search to Dynamic Programming} -\label{subsec_cs_to_dp} -So far, we know dynamic programming is an optimization methodology over the compete search solutions for typical optimization problems. Dynamic Programming's core principle is to solve each subproblem only \textit{once} by \textit{saving} and \textit{reusing} its solution. Therefore, compare with its naive counterpart -- Complete Search: -\begin{enumerate} -\item Dynamic Programming avoids the redundant recomputation met in its compete search counterpart as demonstrated in the last section. - \item Dynamic Programming uses additional memory as a trade-off for better computation/time efficiency; it serves as an example of a \textit{time-space trade-off}. In most cases as we shall see in this chapter, the space overhead is well-worthy; it can decrease the time complexity dramatically from exponential to polynomial level. -\end{enumerate} - -\paragraph{Two Forms of Dynamic Programming Solution} -There are two ways -- either recursive or iterative in general to add \textit{space mechanism} into naive complete search to construct our dynamic programming solution. But do remember that we cannot eliminate recursive thinking completely. we will always have to define a recursive relation irrespective of the approach we use. -\begin{enumerate} - \item \textbf{Top-down + Memoization (recursive DFS):} we start from larger problem (from top) and recursively search the subproblems space (to bottom) until the leaf node. This method is built on top of Depth-First Graph Search together with Divide and Conquer Methodology which treat each node as a subproblem and return its solution to its caller so that it can be used to build up its solution. Following a top-down fashion as is in divide and conquer, along the process, in the recursive call procedure, a hashmap is relied on to save and search solutions. The memoization works in such way that at the very first time that the subproblem is solved it will be saved in the hashmap, and whenever this problem is met again, it finds the solution and returns it directly instead of computing again. The key elements of this style of dynamic programming is: - \begin{enumerate} - \item Define subproblem; - \item Develop solution using Depth-first Graph search and Divide and conquer (leave alone the recomputation). - \item Adding hashmap to save and search the state of each subproblem. - \end{enumerate}% In order to avoid the recomputation, we use a hashmap to save the solution to the solved subproblems, and whenever the subproblem is solved, we get its result from the hashmap instead of recompute its solution again. This method follows a top-down fashion. - \item \textbf{Bottom-up + Tabulation (iterative):} different from the last method, which use recursive calls, in this method, we approach the subproblems from the smallest subproblems, and construct the solutions to larger subproblems using the tabulaized result. The nodes in the subproblem graph is visited in a \textit{reversed topological sort order}. This means that to reconstruct the state of current subproblem, all dependable (predecessors) have already be computed and saved. -\end{enumerate} - -\paragraph{Comparison} -The Figure~\ref{fig:dynamic_prorgramming_divide_conquer} record the two different methods, we can use \textit{memoization} and \textit{tabulation} for short. Momoization and tabulation yield the same asymptotic time complexity, however the tabulation approach often has much better constant factors, since it has less overhead for procedure calls. - -The memoization method applies better for beginners that who have decent understanding of divide and conquer. However, once you study further and have enough practice, the tabulation should be more intuitive compared with recursive solution. Usually, dynamic programming solution to a problem refers to the solution with tabulation. - - - -We enumerate two examples: Fibonacci Sequence (Subsection~\ref{subsec_fibonacci_sequence}) and Longest Increasing Subsequence (subsection~\ref{subsec_longest_increasing_subsequence}) in the remaining section to showcase \textit{memoization} and \textit{tabulation} in practice. - -\subsection{Fibonacci Sequence} -\label{subsec_fibonacci_sequence} -\paragraph{Problem Definition} -% \begin{lstlisting}[numbers=none] -Given $f(0)=0, f(1)= 1, f(n) = f(n-1) + f(n-2), n>=2$. Return the value for any given $n$. -% \end{lstlisting} - -As the most elementary and classical example demonstrating dynamic programming, we carry on this tradition and give multi-fold of solutions for fibonacci sequence. Since in Chapter.~\ref{chapter_divide_conquer} the recursive and naive solution is already given, we will just briefly explain it here. - -\paragraph{Complete Search} Because the relation between current state and previous states are directly given, it is straightforward to solve the problem in a top-down fashion using depth-first search. The time complexity can be easily obtained from using induction or recurion tree: $O(2^n)$, where the base $2$ is the width of the tree, and $n$ is the depth. The Python code is given: -\begin{lstlisting}[language = Python] -# DFS on subproblem graph -def fibonacciDFS(n): - # base case - if n <= 1: return n - return fibonacciDFS(n-1)+fibonacciDFS(n-2) # use the result of subtree to build up the result of current tree. -\end{lstlisting} - -\paragraph{Memoization} As we explained, there are subproblems computed more than once in the complete search solution. To avoid the recomputation, we can use a hashtable \texttt{memo} to save the solved subproblem. We need to make \texttt{memo} globally and available for all recursive calls; in the memoized complete search, instead of calling the recursion function $f(n) = f(n-1) + f(n-2)$ to get answer for current state $n$, it first check if the problem is already solved and available in \texttt{memo}. - -Because to solve f(n), there will be n subproblems, and each subproblem only depends on two smaller problems, so the time complexity will be lowered to $O(n)$ if we use DFS+memoizataion. -\begin{lstlisting}[language=Python] -# DFS on subproblem graph + Memoization -def fibonacciDFSMemo(n, memo): - if n <= 1: return n - if n not in memo: - memo[n]= fibonacciDFSMemo(n-1, memo)+fibonacciDFSMemo(n-2, memo) - return memo[n] -\end{lstlisting} - -\paragraph{Bottom-up Tabulation} In the top-down recursive solution, where exists two passes: one pass to divide the problems into subproblems, and the other recursive pass to gather solution from the base case and construct solution for larger problems. However, in the bottom-up tabulation way, for this specific problem, we have four key steps: -\begin{enumerate} - \item We start by \textit{assigning} \texttt{dp} array to save each state's result. It represents the fibonacci number at each index (from 0 to $n$), which is also called a \textit{state}. - \item Then, we \textit{initialize} results of base cases which either were given or can be obtained easily with simple deduction. - \item We iterate through each subproblem/state in reversed topological sort order, which is $[0, 1, 2, 3, 4]$ as in Fig~\ref{fig:subproblem_graph_1}, and use tabulazied solution to build up the answer of current state through the given \textit{recurrence function} $f(n) = f(n-1) + f(n-2)$. - \item We return the last state in \texttt{dp} as the final \textit{answer}. -\end{enumerate} -The tabulation code of dynamic programming is given: -\begin{lstlisting}[language=Python] -# Dynamic Programming: bottom-up tabulation O(n), O(n) -def fibonacciDP(n): - dp = [0]*(n+1) - # init - dp[1] = 1 - for i in range(2,n+1): - dp[i] = dp[i-1] + dp[i-2] - return dp[n] -\end{lstlisting} - -% \textbf{BFS}. Here, I want to mention the BFS, because it can still solve dynamic programming related problems, sometimes in the shortest path problems can be especially helpful. Also, for BFS, it works better if accumulate result from root to the leaves, while in DFS, the result is brought back from leaves to the root. Also because BFS is naturally implemented iteratively without recursion and easier to understand compared with DFS, this is one advantage compared with DFS. The fibonacci number, however, it is difficult and inefficient to be solved using BFS. The tree structure is in Fig~\ref{fig:fibo_bfs}. -% \begin{figure}[h] -% \centering -% \includegraphics[width=0.6\columnwidth]{fig/fibonacci_bfs.png} -% \caption{The BFS Tree structure for fibonacci sequence} -% \label{fig:fibo_bfs} -% \end{figure} -% If we carefully design the queue of the BFS, which would only add the last two nodes. Then, the dp is filled in without repetition. And it is almost as efficient as the dynamic programming solution. -% \begin{lstlisting}[language = Python] -% #Optimized BFS -% def FinonacciBFS(n): -% dp = [0]*(n+1) -% dp[1] = 1 -% bfs = [1] -% dirs = [1, 2] -% ans = 0 -% while bfs: -% new_bfs = [] -% for i, v in enumerate(bfs): -% for di in dirs: -% ni = v+di -% if ni<= n: -% dp[ni] += dp[v] -% if i == len(bfs)-1: #the last element -% new_bfs += [ni] -% bfs = new_bfs -% return dp - -% print(FinonacciBFS(9)) -% # output -% # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34] -% \end{lstlisting} -% \begin{lstlisting}[language = Python] -% # BFS -% def FinonacciBFS(n): -% dp = [0]*(n+1) -% dp[1] = 1 -% bfs = set([1]) #starts from 1 as root -% dirs = [1, 2] #each move to 1 or 2. -% ans = 0 -% while bfs: -% new_bfs = set() -% new_dp = [0]*(n+1) -% for i in bfs: -% for di in dirs: -% ni = i+di -% if ni<= n: -% new_dp[ni] += dp[i] -% new_bfs.add(ni) -% bfs = new_bfs -% dp = new_dp -% ans += dp[n] - -% return ans - -% print(FinonacciBFS(8)) -% # output -% # 21 -% \end{lstlisting} - - - -% Thus, to get the dynamic programming solution, we need to figure out a way that we will fill out the results for all the states and each state will only dependent on the previous computed states. -% \subsection{Summary} -% For dynamic -%%%%%%%%%%%%%%Dynamic Programming Knowledge Base%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Dynamic Programming Knowledge Base} -\label{sec_dynamic_programming_knowledge_base} -So far, we have learned most of the knowledge related to Dynamic programming, including basic concepts and two examples. In this section, we would officially answer three questions -- \textit{when} and \textit{how} to apply dynamic programming? and \textit{which} type of dynamic programming we need? Tabulation or Memoization. With clear definition, and offering some more practical guideline, complexity analysis, and comprehensive comprension between memoization and tabulation, we are determined to demystify dynamic programming. The subsections are organized as: -\begin{enumerate} - \item Two properties and Practical Guideline (Section~\ref{sec_dp_two_properties}) that an optimization problems must have in order to answer the \textit{when} question. - \item Five key elements, General Steps to Sovle Dynamic Programming, and Complexity Analysis (Section~\ref{sec_dp_elements}) in implementing the dynamic programming solution and to answer the \textit{how} question. - \item Tabulation VS Memoization (Section~\ref{subsec_dp_comparison}) to answer the \textit{which} question. -\end{enumerate} - -\subsection{When? Two properties} -\label{sec_dp_two_properties} -In order for the dynamic programming to apply, these two properties: overlapping subproblems and optimal substructure must be found in our solving problems. From our illustrated examples, 1) the step of identifying overlapping shows the overlapping subproblem properties. 2) the recurrence function in fact shows the optimal substructure. To be official, these two essential properties states as: -\paragraph{Overlapping Subproblems} -When a recursive algorithm revisits the same subproblem repeatedly, we say that the optimization problem has overlapping subproblems. This can be easily visualized in the top-down subproblem graph, where one state is reached by multiple other states. This property demonstrates the recomputation overhead seen in the complete search solutions of our two examples. - -Overlapping Subproblems property helps us find space for optimization and lead us to its solution -- \textit{the caching mechanism} used in dynamic programming. In the flip side, when subproblems are disjoint such as seen in merge sort and binary search, dynamic programming would not be helping. -\paragraph{Optimal Substructure} -A given problem has optimal substructure property if the optimal solution of the given problem can be obtained by using optimal solutions of its subproblems. Only if optimal substructure property applied we can find the \textit{recurrence relation function} which is a key step in implementation as we have seen from the above two examples. Optimal substructures varies across problem domains in two ways: -\begin{enumerate} - \item \textbf{Subproblem space:} how many subproblems an optimal solution to the original problem uses. For example, in Fibonacci sequence, each integer in range $[0, n]$ is a subproblem, which makes the $n+1$ as the total subproblem space. The state of each subproblem is the optimal solution for that subproblem which is $f(n)$. And in the example of LIS, each subproblem is the array with index in range $[0, i]$, and its state is the length of the longest increasing subsuquence ends/includes at index $i$, this makes the whole subproblem space to be $n$ too. - \item \textbf{State Choice:} how many choices we have in determining which subproblem(s) to use to decide the recurrence function for the current state. In Fibonacci sequence, each state only relies on two preceding states as seen in recurrence function $f(i)=f(i-1)+f(i-2)$, thus making it constant cost. For LIS, each state require knowing all inclusive states (solutions relating to all smaller subproblems), which makes it cost of $O(n)$ that relates to the subproblem space. -\end{enumerate} - -Subproblem space and state choice together not only formulates the recurrent relation with which we very much have the implementation in hand. Together they also decide the time and space complexity we will need to tackle our dynamic programming problems. - -\paragraph{Practical Guideline} -Instead of the textbook definition, we also summarize the experience shared by experienced software programmers. Dynamic programming problems are normally asked in its certain way and its naive solution shows certain time complexity. Here, we summarize the situations when to use or not to use dynamic programming as \textbf{Dos} and \textbf{Donots}. -\begin{itemize} - \item \textbf{Dos:} Dynamic programming fits for the optimizing the following problems which are either exponential or polynomial complexity using complete search: -\begin{enumerate} - \item Optimization: Compute the maximum or minimum value; - \item Counting: Count the total number of solutions; - \item Checking if a solution works. -\end{enumerate} - -\item \textbf{Donots:} In the following cases we might not be able to apply Dynamic Programming: -\begin{enumerate} - \item When the naive solution has a low time complexity already such as $O(n^2)$ or $O(n^3)$. - \item When the input dataset is a set while not an array or string or matrix, $90\%$ chance we will not use DP. - \item When the overlapping subproblems apply but it is not optimization problems thus we can not identify the suboptimial substructure property and thus find its recurrence function. For example, same problem context as in \textbf{Dos} but instead we are required to obtain or print all solutions, this is when we need to retreat back to the use of DFS+memo(top-down) instead of DP. %, Draw out the tree structure is necessary and can be extremely helpful; - - %\item Get the total number of solutions. %(Need Correction} -\end{enumerate} -\end{itemize} -%%%%%%%%%%%%%%%%%%%%%%%%Four elements%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{How? Five Elements and Steps} -\label{sec_dp_elements} -In Section, we have provided two forms of dynamic programming solutions: memoization and tabulation, as two different ways of bringing the caching mechanism into practice. In this section, we focus on the iterative tabulation and generalize its four key elements and practical guidelines for import steps. - -\paragraph{Five Key Elements of Tabulation} -As the first guideline of Tabulation, we summarize the four key elements for the implementation of dynamic programming: -\begin{enumerate} - \item \textbf{Subproblem and State:} Define what the subproblem space, what is the optimal state/solution for each subproblem. In practice, it would normally be the \textit{the total/the maximum/minimum} for subproblem. This requires us to know how to divide problem into subproblems, there are patterns to follow which will be detailed in Chapter.~\ref{}. - \item \textbf{State Transfer (Recurrence) Function}: derive the function that how we can get current state by using result from previous computed state(s). This requires us to identify the optimal substructure and know how to make state choice. - \item \textbf{Assignment and Initialization:} Followed by knowing the subproblem space, we typically assign a space data structure and initialize its values. For base or edge cases, we might need to initialize different than the other more general cases. - \item \textbf{Iteration:} decide the order of iterating through the subproblem space thus we can scan each subproblem/state exact and only \textit{once}. Using the subproblem graph, and visit the subproblms in reversed topological order is a good way to go. - \item \textbf{Answer:} decide which state or a combination of all states such as the the max/min of all the state is the final result needed. -\end{enumerate} - - -%%%%%%%%%%%%%%Dynamic programming Dos and Do nots} -% \paragraph{Practical Guideline} -% \label{sec_dp_do_donots} -% From the above two examples in the last section, we can see that Dynamic programming can be potentialy used to optimize a exponential problem to be polynomial when it comes to problems asking for optimization, total number of solutions or if a resolution is working. For a problem that is already polynomial, dynamic programming can be potentially used to lower the complexity to BCR $O(n)$ just as shown in the second example. However, in this case the dynamic programming can be translated to other simple and straightforward algorithm. To make the readers' life easier, in this book, we generalize the dos and do nots of dynamic programming so that you can have slightly more ideas about when to use dynamic prgramming and when to just use simpler algorithms for certain questions. - - -\paragraph{Five Steps to Solve Dynamic Programming} This is a general guideline for dynamic programming -- memoization or tabulation. Key advice -- being ``flexbile''. Given a real problem, all in all, we are credited with our understanding of the concepts in computer science. Thus, we should not be too bothered or stressed that if you can not come up with a ``perfect'' answer. -\label{sec_dp_generalization} -\begin{enumerate} - \item Read the question: search for the key words of the problem patterns: counting, checking, or maximum/minimum. - \item Come up with the most naive solution ASAP: analyze its time complexity. Is it a typical DFS solution? Try draw a SUBPROBLEM GRAPH to get visualization. Is there space for optimization? - \item Apply Section~\ref{sec_dp_two_properties}: Is there overlapping? Can you define the optimal substructure/recurrence function? - \item If the conclusion is YES, try to define the Five key elements so that we can solve it using the preferable tabulation. If you can figure it out intuitively just like that, great! What to do if not? Maybe retreat to use memoization, which is a combination of divide and conquer, DFS, and memoization. - \item What if we were just so nervous that or time is short, we just go ahead and implement the complete search solution instead. With implementation is better than nothing. With the implementation in hand, maybe we can figure it out later. -\end{enumerate} - -\paragraph{Complexity Analysis} -\label{sec_dp_complexity_analysis} -The complexity analysis of the tabulation is seemingly more straightforward compared with its counterpart -- the recursive memoization. For the tabulation, we can simply draw conclusion without any prior knowledge of the dynamic programming by observing the \texttt{for} loops and its recurrence function. However, for both variant, there exists a common analysis method. The core points to analyze complexity involving dynamic programming is: (1) the subproblem space $|S|$, that is the total number of subproblems; and (2) the number of state choice needed to construct each state $|C|$. By multiplying these two points, we can draw the conclusion of its time complexity as $O(|S||C|)$. - -For example, if the subproblem space is $n$ and if each state $i$ relies on (1) only one or two previous states as we have seen in the example of Fibonacci Sequence, it makes the time complexity $O(n)$; and (2) all previous states in range $[0, i-1]$ as seen in the example of Longest Increasing Subsequence, which can be viewed as $O(n)$ to solve each subproblem, this brings up the complexity up to $O(n^2)$. - - -\subsection{Which? Tabulation or Memoization} -\label{subsec_dp_comparison} -% \paragraph{Complete Search Assist to Dynamic Programming} Also, complete search can be used for us to further validate the solution or even help us to find the recurrence relation between subproblems in some cases. -As we can see, the way the bottom-up DP table is filled is not as intuitive as the top-down DP as it requires some ‘reversals’ of the signs in Complete Search recurrence that we have developed in previous sections. However, we are aware that some programmers actually feel that the bottom-up -version is more intuitive. The decision on using which DP style is in your hand. To help you decide -which style that you should take when presented with a DP solution, we present the trade-off -comparison between top-down Memoization and bottom-up Tabulation in Table ~\ref{tab:dp_decidion_table}. -\begin{table}[!ht] -\begin{small} -\centering -\noindent\captionof{table}{Tabulation VS Memoization} -\label{tab:dp_decidion_table} - \noindent \begin{tabular}{|p{0.1\columnwidth}|p{0.43\columnwidth}|p{0.43\columnwidth}| } - \hline -& Memoization& Tabulation \\ \hline -Pros &\begin{enumerate}[wide, labelwidth=!, labelindent=0pt,noitemsep,topsep=0pt]\item A natural transformation from normal recursive complete search. - \item Compute subproblems only when necessary, sometimes this can be faster. -\end{enumerate} &\begin{enumerate}[wide, labelwidth=!, labelindent=0pt,noitemsep,topsep=0pt] \item Faster if many sub-problems are revisited a sthere is no overhead of recursive calls.\item Can save memory space with dynamic programming `on-the-fly` technique (see Section Extension.~\ref{}). \end{enumerate}\\\hline -% \\ \hline -% &\\ \hline - Cons & \begin{enumerate}[wide, labelwidth=!, labelindent=0pt,noitemsep,topsep=0pt] - \item Slower if many subproblemes are revisited due to overhead of recursive calls. - \item If there are $n$ states, it can use up to $O(n)$ table size which might lead to Memory Limit Exceeded(MLE) for some hard problems. - \item Faces stack overflow due to the resursive calls. - \end{enumerate} & - \begin{enumerate}[wide, labelwidth=!, labelindent=0pt,noitemsep,topsep=0pt] - \item For programmers who are inclined with recursion, this may not be intuitive. - % \item If there are $n$ states - \end{enumerate}\\ \hline -\end{tabular} -\end{small} -\end{table} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%One Dimensional State VS Multiple Dimensional State%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Hands-on Examples (Main-course Examples)} -\label{sec_dynamic_programming_example} - -In the practical guideline, we mentioned that the problems that can be further optimized with dynamic programming would be seen with complexity patters of their naive solutions: either exponential such as $O(2^n)$ or polynomial such as $O(n^3)$ or $O(n^2)$. - -The purpose of this section is to further enhance our knowledge and put our both theoreotical and practical guideline into test. We examine two examples: Triangle and maximum subarray. We have seen how maximum subarray can be solved with linear search and divide and conquer in Chapter.\ref{}. However, in this section, we expand the old solution into dynamic programming solutions and we see the difference and connection. %Dynamic programming can decrease the complexity that used divide and conquer or searching from exponential level to polynomial, e.g. from $O(2^n)$ or $O(n!)$ to $O(n^m)$, $m$ usually is $2$ or $3$. - -%Let us see two more examples: The first one is to optimize a $O(2^n)$ problem, the second one is to optimize a $O(n^3)$ problem. - -\subsection{Exponential Problem: Triangle} -\paragraph{Triangle (L120)} -Given a triangle, find the minimum path sum from top to bottom. Each step you may move to adjacent numbers on the row below. -\begin{lstlisting}[numbers=none] -Example: -Given the following triangle: - -[ -[2], -[3,4], -[6,5,7], -[4,1,8,3] -] -The minimum path sum from top to bottom is 11 (i.e., 2 + 3 + 5 + 1 = 11). -\end{lstlisting} - - -\paragraph{Analysis} -\begin{enumerate} - \item We quickly read the question and we find the key word -- minimum. - \item We come up with the most naive solution that would be dfs which we have already covered in chapter. A quick drawing of dfs traversal graph, we can find some nodes are repetitively visited. - \item Apply Two Properties: First, define the subproblem, for each node in the triangle, it is decided by two indexes $(i, j)$ as row and column index respectively. The subproblem can be straightforward, the minimum sum from the starting point $(0, 0)$ to current position $(i, j)$. And the subprogram graph will be exactly the same as the graph we used in dfs. We identify overlapping easily. - - Now, develop the recurrence function. To build up solution for state at $(i, j)$, it needs two other states: $(i-1, j)$ and $(i-1, j-1)$ and need one value from current state. The function will be: $f(i, j)=\min(f(i-1, j), f(i-1, j-1)) + t[i][j]$. - \item Five Key Elements: we need to figure out how to assign and initialize the \texttt{dp} space and do the iteration. To get the boundary condition: - \begin{enumerate} - \item by observation: the first element at $(0, 0)$ will have none of these two states $f(i-1, j), f(i-1, j-1)$ exist. the leftmost and rightmost element of the triangle will have only one of these two states: $f(i-1, j), f(i-1, j-1)$. - \item by simple math induction: $i \in [0, n-1], j \in [0, i]$. When $i=0, j=0$, $f(i, j)=t[i][j]$, when $i \in [1, n-1], j=0$, $f(i-1, j-1)$ is invalid, and when $i=n-1, j=n-1$, $(i-1, j)$ is invalid. - \end{enumerate} - The answer would be the minimum value of \texttt{dp} at the last row. - The Python code is given: - - \begin{lstlisting}[language = Python] -def min_path_sum(t): - dp = [[0 for c in range(r+1)] for r in range(len(triangle))] # initialized to 0 for f() - n = len(triangle) - #initialize the first point, bottom - dp[0][0] = triangle[0][0] - #initial the left col and the right col of the triangle - for i in range(1, n): - dp[i][0] = dp[i-1][0] + dp[i][0] - dp[i][i] = dp[i-1][i-1] + dp[i][i] - for i in range(1, n): - for j in range(1, i): - dp[i][j] = t[i][j] + min(dp[i-1][j], dp[i-1][j-1]) - return min(dp[-1]) -\end{lstlisting} -\end{enumerate} -% In section of graph search, we have seen the dfs solution of triangle. If -% In the above solution, the state is a recursive tree, and the DFS traverse all the elements in the tree. To reformulate this problem as dynamic programming, if we use $f[x][y]$ marks the minimum path sum start from $(x,y)$, then we have this relation $f[x][y] = A[x][y] + min(f[x+1][y], f[x+1][y+1]$, which gives us a function $T(n) = 2*T(n-1)$. We still have $O(2^n)$ time complexity and still encounter LTE error. -% \begin{lstlisting}[language = Python] -% def minimumTotal(triangle): -% def divideConquer(x, y): -% if x == len(triangle): -% return 0 -% return triangle[x][y]+min(divideConquer(x+1, y), divideConquer(x+1, y+1)) -% return divideConquer(0, 0) -% \end{lstlisting} -% \textbf{Recursive and Memoization} - -% Here, for location $(x ,y)$ we need to compute $(x+1, y+1)$, for location $(x , y+1)$, $f[x][y+1] = A[x][y+1] + min(f[x+1][y+1], f[x+1][y+2]$, we compute $(x+1, y+1)$ again. So the redundancy exists. However, the advantage of this formate with divide and conquer compared with DFS brute force is that we can use memoization to trade for speed and save complexity. Till now the code is successfully AC. - -% The time complexity here is propotional to the number of subproblems, which is the size of triangle, $O(n^2)$. This is usually not obvious of its complexity. -% \begin{lstlisting}[language = Python] -% from sys import maxsize -% def minimumTotal(triangle): -% memo = [[maxsize for i in range(j+1)] for j in range(len(triangle))] -% def divideConquerMemo(x, y): -% #nonlocal memo -% if x == len(triangle): -% return 0 -% if memo[x][y] == maxsize: -% memo[x][y] = triangle[x][y] + min(divideConquerMemo(x+1, y), divideConquerMemo(x+1, y+1)) -% return memo[x][y] -% return divideConquerMemo(0, 0) -% \end{lstlisting} -% It is normally -% \textbf{Iterative with Space} -% Now, we do not use the recursive function, the same as the above memoization, we use a memo space f to save the result. This implementation is more difficult compared with the recursive + memoization method. But it is still something managable with practice. The advantages include: -% \begin{enumerate} -% \item It saves the heap space from the implementation of the recursive function. -% \item It is easier to get the complexity of the algorithm compared with recursive implementation, simply by looking at its for loops. -% \item It is easier to observe the value propagation order, which make it possible to optimize the space complexity. -% \end{enumerate} - -% For the iterative, we have two ways: Bottom-up and top-down. This is compared with your order to fill in the dynamic table. If we use our previous defined relation function $f[x][y] = A[x][y] + min(f[x+1][y], f[x+1][y+1]$, we need to know the result from the larger index so that we can fill in value at the smaller index. Thus, we need to initialize the result for the largest indices. And we reversely fill in the dynamic table, this is called top-down method, from big index to small. Visually we propagate the information from the end to the front. The final result - -% On the other side, if we fill in the table from small to larger index, we need to rewrite the relation function to $f[x][y] = A[x][y] + min(f[x-1][y], f[x-1][y-1]$, this function feedforward the information from the beginning to the end. So we need to initialize the result at (0,0), and the edge of the triangle. following the increasing order, to get value for the larger index, it is bottom-up method. - - - - -% \textbf{Top-down with standard space} $f[x][y] = A[x][y] + min(f[x+1][y], f[x+1][y+1]$. Actually for this problem, the top-down method is slightly simpler: we only need to initialize the last row for the state $f$ because for the last row, we cant find its previous state. We directly return result of $f[0][0]$. -% \begin{lstlisting}[language = Python] -% # top-bottom -% from sys import maxsize -% def minimumTotal(triangle): -% f = [[0 for i in range(j+1)] for j in range(len(triangle))] # initialized to 0 for f() -% n = len(triangle) -% #initial the the last row -% for y in range(len(triangle[-1])): -% f[-1][y] = triangle[-1][y] -% # from small index to large index -% for x in range(n-2, -1, -1): -% for y in range(x, -1, -1): -% f[x][y] = triangle[x][y] + min(f[x+1][y], f[x+1][y+1]) #get result for larger state from smaller state -% return f[0][0] -% \end{lstlisting} -\paragraph{Space Optimization} From the recurrence function, we can see the current state is only related to two states from the last row. We can reuse the original \texttt{triangle} matrix itself to save the state. If we are following the forward induction as the previous solution, we still have the problem of edge cases; for some state that it only has one previous or none previous states needed to decide its current state. We can write our code as: -\begin{lstlisting}[language=Python] -def min_path_sum(t): - ''' - Space optimization with forward induction - ''' - t = deepcopy(t) - if not t: - return 0 - n = len(t) - for i in range(0, n): - for j in range(0, i + 1): - if i == 0 and j == 0: - continue - elif j == 0: - t[i][j] = t[i][j] + t[i-1][j] - elif j == i: - t[i][j] = t[i][j] + t[i-1][j-1] - else: - t[i][j] = t[i][j] + min(t[i-1][j], t[i-1][j-1]) - return min(t[-1]) -\end{lstlisting} -\paragraph{Further Optimization} Let us look at the traversal order backward where we start from the last row and traverse upward to the first row. For the last row, its state should be the same as its triangle value. For any remaining rows and each of its element, its state will all rely on two other states locating below of them. There is consistency in this backward induction and the final state at the first row will be only final global answer. In this method, we reverse of recurrence function as $f(i,j)=min(f(i+1, j+1), f(i+1, j))+t[i][j]$. -\begin{lstlisting}[language = Python] -def min_path_sum(t): - ''' - Space optimization with backward induction - ''' - t = deepcopy(t) - if not t: - return 0 - n = len(t) - # Start from the last second row - for i in range(n-2, -1, -1): - for j in range(i, -1, -1): - t[i][j] = t[i][j] + min(t[i+1][j], t[i+1][j+1]) - return t[0][0] -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% materials needed -%https://blog.csdn.net/github_30242787/article/details/50819414 -%https://blog.csdn.net/xiaqian0917/article/details/53266662 - -\subsection{Polynomial Problem: Maximum Subarray} - -\paragraph{Maximum Subarray (L53)} Find the contiguous subarray within an array (containing at least one number) which has the largest sum. -\begin{lstlisting}[numbers=none] -For example, given the array [-2,1,-3,4,-1,2,1,-5,4], the contiguous subarray [4,-1,2,1] has the largest sum = 6. -\end{lstlisting} - -The problem will be analyzed following our two properties and solved following our five step guideline and five elements. -% \paragraph{Apply Five Steps} Let -\paragraph{Analysis and $O(n)$ Solution} - -\begin{enumerate} - \item First step, we read the problem and we can quickly catch the key word -- maximum. - -\item Second step, the naive solution. We have From other chapters, we have seen how maximum subarray can be approached as either graph search ($O(2^n)$ \textcolor{red}{to get more details later}), linear search along the solution space ($O(n^3)$ and $O(n^2)$ if be tweeted with the computation of subarray). - -\item Third step: Apply two properties. The solution space we concluded for maximum subarray would be totally in $O(n^2)$, and be denoted as $a[i, j]$ where $i, j \in [0, n-1], j \leq i$. This states that the maximum subarray is one of these subarrays fixing their starting index. Here, in order to think in dynamic programming way, let us define subproblem. We first define it as $a[i, j]$, and the state would be its sum of this subarray. We can see there is already some hidden recurrence function that $f(i, j)=f(i,j-1)+a[j]$. We would see there is overlap: $a[0, 4]$ actually includes $a[1, 4]$. - -However, there is something missing. The state we define did not take leverage of the optimal substructure. Let us define the subproblem in another way that has the optimal condition there. We define $f(i), i \in [0, n-1]$, represents the subarry that starts from index $i$ and the answer/state will be the maximum value of these potential subarrarys. Therefore, the subproblem space will be only $O(n)$. The solution space of subproblem $f(i)$ is $a[i, j]$ where $i, j \in [0, n-1], j \leq i$. Assume we are comparing $f(0)$ and $f(1)$, that is the relation of maximum subarry starts from 0 and the maximum subarry that starts from 1. $f(1)$ is a subproblem of $f(0)$. If $f(1)$ is computed already, the $f(0)$ would be either include $f(1)$ if its positive or not include with two possible state-choice. Thus, we get our recurrence function $f(i-1)=max(f(i)+a[i], a[i])$. The last state is $f(n-1)=a[n-1]$. - -\item Step 4: Given all the conclusions, we can start the five key elements. The above solution requires us to start from the maximum index in a reverse order, this is called \textit{backward induction} mentioned in materials explaining dynamic programming from the angle of optimization. We need to always pay attention there is empty array where the maximum subarray should give zero as result. This makes our total states $n+1$ instead of $n$. In the backward induction, this empty state will locate at index $n$ with a list of size $n+1$. %What we can do that is more intuitive is a forward deduction. - -\begin{lstlisting}[language = Python] -def maximum_subarray_dp(a): - ''' - Backward induction dp solution - ''' - # assignment and initialization - dp = [0] * (len(a) + 1) - # fill out the dp space in reverse order - # we do not need to fill the base cae dp[n] - for i in reversed(range(len(a))): - dp[i] = max(dp[i+1] + a[i], a[i]) - print(dp) - return max(dp) -\end{lstlisting} -\end{enumerate} - - -% Brute Force solution: put it into -% The brute force solution of this problem is to use two for loops, one pointer at the start position of the subarray, the other point at the end position of the subarray. Then we get the maximum sum of these subarries. The time complexity is $O(n^3)$, where we spent $O(n)$ to the sum of each subarray. However, if we can get the sum of each subarray with $O(1)$. Then we can lower the complexity to $O(n^2)$. Here one solution is to get $sum(i+1) = sum(i)+nums[i+1]$. -% \begin{lstlisting}[language=Python] -% from sys import maxsize -% def maximumSubarray(nums): -% if not nums: -% return 0 -% maxValue = -maxsize -% for i, v in enumerate(nums): -% accSum = 0 -% for j in range(i, len(nums)): -% #accSum = sum(nums[i:j+1]) -% accSum += nums[j] -% maxValue = max(maxValue, accSum) -% return maxValue -% \end{lstlisting} -% Another way that we can get the sum between $i,j$ in $O(1)$ time with formula $sum(i,j)=sum(0,j)-sum(0,i)$ by using $O(n)$ space to save the sum from $0$ to current index. %is to trade space for efficiency. the sum of subarray from index $i$ to $j$ is $sum(i,j)=sum(0,j)-sum(0,i)$. We can pre compute the accumulated sum to each index and save it in an array of the same size, which gives us $O(n^2)$ time complexity and $O(n)$ space complexity. The code is writen as: - -% solution: Divide and Conquer -% To further improve the efficiency, we use divide and conquer, where we divide one array into two halves: the maximum subarray might located on the left size, or the right side, or some in the left side and some in the right size, which crossed the bound. $T(n) = max(T(left),T(right), T(cross))$, max is for merging and the T(cross) is for the case that the potential subarray across the mid point. For the complexity, $T(n)=2T(n/2)+n$, if we use the master method, it would give us $O(nlgn)$. With this solution, we use $O(lgn)$ space for the recursive function stack space. -% \begin{lstlisting}[language=Python] -% def maxSubArray(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% def getCrossMax(low,mid,high): -% left_sum,right_sum =0,0 -% left_max, right_max = -maxint, -maxint -% left_i,right_j=-1,-1 -% for i in xrange(mid,low-1,-1): #[) -% left_sum+=nums[i] -% if left_sum>left_max: -% left_max= left_sum -% left_i = i -% for j in xrange(mid+1,high+1): -% right_sum+=nums[j] -% if right_sum>right_max: -% right_max= right_sum -% right_j = j -% return (left_i,right_j,left_max+right_max) - -% def maxSubarray(low,high): -% if low==high: -% return (low,high, nums[low]) -% mid = (low+high)//2 -% rslt=[] -% #left_low, left_high, left_sum = maxSubarray(low,mid) #[low,mid] -% rslt.append(maxSubarray(low,mid)) #[low,mid] -% #right_low,right_high,right_sum = maxSubarray(mid+1,high)#[mid+1,high] -% rslt.append(maxSubarray(mid+1,high)) -% #cross_low,cross_high,cross_sum = getCrossMax(low, mid, high) -% rslt.append(getCrossMax(low, mid, high)) -% return max(rslt, key=lambda x: x[2]) -% return maxSubarray(0,len(nums)-1)[2] -% \end{lstlisting} - -% Dynamic Programming: Using dynamic programming: the $f$ memorize the maximum subarray value till $j$, the state till $i$ we can get the result from previous state $i-1$, the value of current state depends on the larger one between $f[i-1]$ plus the current element and the current element, which is represented as $f[i] = max(f[i-1] + nums[i], nums[i])$. This would gave us $O(n)$ time complexity and $O(n+1)$ space complexity. The initialization is $f[0] = 0$, and the answer is $max(f)$. - -\paragraph{Space Optimization} - -If we observe the iterating process, we always only use one previous state. If we use another global variable, say \texttt{maxsum} to track the global maximum subarray value, and use \texttt{state} to replace \texttt{dp} array, we can decrease the space complexity from $O(n)$ to $O(1)$. %However, here since we only need to track $f[i]$ and $f[i+1]$, and keep current maximum value, so that we do not need to use any space. -\begin{lstlisting}[language = Python] -def maximum_subarray_dp_sp(a): - ''' - dp solution with space optimization - ''' - # assignment and initialization - state = 0 - maxsum = 0 - # fill out the dp space in reverse order - # we do not need to fill the base cae dp[n] - for i in reversed(range(len(a))): - state = max(state + a[i], a[i]) - maxsum = max(maxsum, state) - return maxsum -\end{lstlisting} - -All of the above steps are for deep analysis purpose. When you are more experience, we can go directly to the five elements of tabulation and develop the solution without connecting it to the naive solution. Also, this is actually a Kadane's Algorithm which will be further detailed in Chapter.~\ref{}. - - - -% \subsection{Implementations} -% There are different ways to implement dynamic programming: 1) recursive$+$ memoization; including top-down and down-top. 2) iterative $+$ memozation; including top-down and down-top. Also, in a lot of scenarios, we can optimize the space complexity too. For example, rolling array. -% \subsubsection{Recursive and Memoization} -% This method is the most direct and the easiest way to solve the problems. For example. However, this method because it uses recursive function, which include extra space compared with the following iterative method. -% \subsubsection{Iterative} -% We can avoid recursion by using multiple levels of for loop. The four key elements that to design an iterative dynamic programming are: - -% \begin{enumerate} -% \item State: how to represent the solution, maximum or minimum, Yes or No, count; -% \item Function: the state transfer between smaller state and larger state; -% \item Initialization: The smallest state, base state and its solution, which is the start; -% \item Answer: what is the largest state, which is the end. -% \end{enumerate} -% where we can get the result of a problem by iteratively "growing" from the smaller problems. To find the transfer function, the best way to do it is to get a example of each scenario, and see for each case, to decide the relation between smaller problems and larger problems. - -% % now give examples to this section -% \section{Derive Dynamic Programming from BFS} -% \begin{lstlisting}[language = Python] -% def uniquePaths(self, m, n): -% """ -% :type m: int -% :type n: int -% :rtype: int -% """ -% if m==0 or n==0: -% return 0 -% d = [(1, 0), (0, 1)] -% count =[[0 for col in range(n)] for row in range(m)] -% count[0][0]=1 -% bfs = [(0,0)] -% while bfs: -% new_bfs = set() -% for x, y in bfs: -% for dx, dy in d: -% posX, posY = x+dx, y+dy -% if 0<=posX 0$. We start by assuming that this holds true for some smaller $m Right -> Down -> Down -2. Down -> Down -> Right -> Right -\end{lstlisting} - -\textbf{Sequence Type} - -\item 213. House Robber II - -Note: This is an extension of House Robber. - -After robbing those houses on that street, the thief has found himself a new place for his thievery so that he will not get too much attention. This time, all houses at this place are arranged in a circle. That means the first house is the neighbor of the last one. Meanwhile, the security system for these houses remain the same as for those in the previous street. - -Given a list of non-negative integers representing the amount of money of each house, determine the maximum amount of money you can rob tonight without alerting the police. - -example - nums = [3,6,4], return 6 - -% 现在呢, 我们如果选3了的话, 4不好搞。 - -% 如果选4了的话呢, 3不好搞。 -% 这就变成了一个循环数组问题, 循环数组问题有三种方法可以解: - -% 取反 -% 分裂 -% 倍增 - -% 这里我们用分裂的方法, 把数组 - -% [3, 6, 4] - -% 分成, 选3的: - -% [3, 6] nums[:-1] - -% 和不选3的: - -% [6, 4] nums[1:] - -% 然后把这两个非循环数组分别用上面的方法求解. -% 我猜这可能是双序列动规吧… -\begin{lstlisting}[language = Python] -def rob(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - - if not nums: - return 0 - if len(nums)==1: - return nums[0] - def robber1(nums): - dp=[0]*(2) - dp[0] =0 - dp[1] =nums[0] #if len is 1 - for i in range(2,len(nums)+1): #if leng is 2...., index is i-1 - dp[i%2]=max(dp[(i-2)%2]+nums[i-1], dp[(i-1)%2]) - return dp[len(nums)%2] - - return max(robber1(nums[:-1]),robber1(nums[1:])) -\end{lstlisting} - -\item 337. House Robber III - -\item 256. Paint House - -There are a row of n houses, each house can be painted with one of the three colors: red, blue or green. The cost of painting each house with a certain color is different. You have to paint all the houses such that no two adjacent houses have the same color. - -The cost of painting each house with a certain color is represented by a n x 3 cost matrix. For example, costs[0][0] is the cost of painting house 0 with color red; costs[1][2] is the cost of painting house 1 with color green, and so on... Find the minimum cost to paint all houses. - -Solution: state: 0, 1, 2 colors -minCost[i] = till i the mincost for each color -for color 0: paint 0 [0] = min(minCost[i-1][1], minCost[i-1][2])+costs[i][0] - -paint 1 [1] - -minCost[i] = [0,1,2], i for i in [0,1,2] - -answer = min(minCost[-1]) -\begin{lstlisting}[language = Python] -def minCost(self, costs): - """ - :type costs: List[List[int]] - :rtype: int - """ - if not costs: - return 0 - if len(costs)==1: - return min(costs[0]) - - minCost = [[0 for col in range(3)] for row in range(len(costs)+1)] - minCost[0] = [0,0,0] - minCost[1]=[cost for cost in costs[0]] - colorSet=set([1,2,0]) - for i in range(2,len(costs)+1): - for c in range(3): - #previous color - pres = list(colorSet-set([c])) - print(pres) - minCost[i][c] = min([minCost[i-1][pre_cor] for pre_cor in pres])+costs[i-1][c] - return min(minCost[-1]) -\end{lstlisting} - -\item 265. Paint House II - -There are a row of n houses, each house can be painted with one of the k colors. The cost of painting each house with a certain color is different. You have to paint all the houses such that no two adjacent houses have the same color. - -The cost of painting each house with a certain color is represented by a n x k cost matrix. For example, costs[0][0] is the cost of painting house 0 with color 0; costs[1][2] is the cost of painting house 1 with color 2, and so on... Find the minimum cost to paint all houses. - -Note: - All costs are positive integers. - -Follow up: - Could you solve it in O(nk) runtime? - -Solution: this is exactly the same as the last one: -\begin{lstlisting}[language = Python] -if not costs: - return 0 - if len(costs)==1: - return min(costs[0]) - - k = len(costs[0]) - minCost = [[0 for col in range(k)] for row in range(len(costs)+1)] - minCost[0] = [0]*k - minCost[1]=[cost for cost in costs[0]] - colorSet=set([i for i in range(k)]) - for i in range(2,len(costs)+1): - for c in range(k): - #previous color - pres = list(colorSet-set([c])) - minCost[i][c] = min([minCost[i-1][pre_cor] for pre_cor in pres])+costs[i-1][c] - return min(minCost[-1]) -\end{lstlisting} - -\item 276. Paint Fence - -There is a fence with n posts, each post can be painted with one of the k colors. - -You have to paint all the posts such that no more than two adjacent fence posts have the same color. - -Return the total number of ways you can paint the fence. - -Note: - n and k are non-negative integers. -for three posts, the same color, the first two need to be different -\begin{lstlisting}[language = Python] -def numWays(self, n, k): - """ - :type n: int - :type k: int - :rtype: int - """ - if n==0 or k==0: - return 0 - if n==1: - return k - - count = [[0 for col in range(k)] for row in range(n+1)] - same = k - diff = k*(k-1) - for i in range(3,n+1): - pre_diff = diff - diff = (same+diff)*(k-1) - same = pre_diff - return (same+diff) -\end{lstlisting} - -\textbf{Double Sequence Type DP} - -\item 115. Distinct Subsequences (hard) - -Given a string S and a string T, count the number of distinct subsequences of S which equals T. - -A subsequence of a string is a new string which is formed from the original string by deleting some (can be none) of the characters without disturbing the relative positions of the remaining characters. (ie, "ACE" is a subsequence of "ABCDE" while "AEC" is not). - -Example 1: -\begin{lstlisting} -Input: S = "rabbbit", T = "rabbit" -Output: 3 -\end{lstlisting} -Explanation: - -As shown below, there are 3 ways you can generate "rabbit" from S. -(The caret symbol \string^ means the chosen letters) - -\begin{lstlisting} -rabbbit -^^^^ ^^ -rabbbit -^^ ^^^^ -rabbbit -^^^ ^^^ -\end{lstlisting} - -\item 97. Interleaving String - -Given s1, s2, s3, find whether s3 is formed by the interleaving of s1 and s2. - -Example 1: -\begin{lstlisting} -Input: s1 = "aabcc", s2 = "dbbca", s3 = "aadbbcbcac" -Output: true -\end{lstlisting} - -Example 2: -\begin{lstlisting} -Input: s1 = "aabcc", s2 = "dbbca", s3 = "aadbbbaccc" -Output: false -\end{lstlisting} - - - - - -\textbf{Splitting Type DP} -\item 132. Palindrome Partitioning II (hard) - -Given a string s, partition s such that every substring of the partition is a palindrome. - -Return the minimum cuts needed for a palindrome partitioning of s. - -Example: -\begin{lstlisting} -Input: "aab" -Output: 1 -\end{lstlisting} - -Explanation: The palindrome partitioning ["aa","b"] could be produced using 1 cut. - -Exercise: max difference between two subarrays: An integer indicate the value of maximum difference between two Subarrays. The temp java code is: -\begin{lstlisting}[language = Python] -public int maxDiffSubArrays(int[] nums) { - // write your code here - int size = nums.length; - int[] left_max = new int[size]; - int[] left_min = new int[size]; - int[] right_max = new int[size]; - int[] right_min = new int[size]; - - int localMax = nums[0]; - int localMin = nums[0]; - - left_max[0] = left_min[0] = nums[0]; - //search for left_max - for (int i = 1; i < size; i++) { - localMax = Math.max(nums[i], localMax + nums[i]); - left_max[i] = Math.max(left_max[i - 1], localMax); - } - //search for left_min - for (int i = 1; i < size; i++) { - localMin = Math.min(nums[i], localMin + nums[i]); - left_min[i] = Math.min(left_min[i - 1], localMin); - } - - right_max[size - 1] = right_min[size - 1] = nums[size - 1]; - //search for right_max - localMax = nums[size - 1]; - for (int i = size - 2; i >= 0; i--) { - localMax = Math.max(nums[i], localMax + nums[i]); - right_max[i] = Math.max(right_max[i + 1], localMax); - } - //search for right min - localMin = nums[size - 1]; - for (int i = size - 2; i >= 0; i--) { - localMin = Math.min(nums[i], localMin + nums[i]); - right_min[i] = Math.min(right_min[i + 1], localMin); - } - //search for separete position - int diff = 0; - for (int i = 0; i < size - 1; i++) { - diff = Math.max(Math.abs(left_max[i] - right_min[i + 1]), diff); - diff = Math.max(Math.abs(left_min[i] - right_max[i + 1]), diff); - } - return diff; - } -\end{lstlisting} - -\item 152. Maximum Product Subarray (medium) - -Given an integer array nums, find the contiguous subarray within an array (containing at least one number) which has the largest product. - -Example 1: -\begin{lstlisting} -Input: [2,3,-2,4] -Output: 6 -Explanation: [2,3] has the largest product 6. -\end{lstlisting} -Example 2: -\begin{lstlisting} -Input: [-2,0,-1] -Output: 0 -Explanation: The result cannot be 2, because [-2,-1] is not a subarray. -\end{lstlisting} - -Solution: this is similar to the maximum sum subarray, the difference we need to have two local vectors, one to track the minimum vaule: min\_local, the other is max\_local, which denotes the minimum and the maximum subarray value including the ith element. The function is as follows. -\begin{equation} - min\_local[i] = \begin{cases} - min(min\_local[i-1]*nums[i], nums[i]),& nums[i]<0;\\ - min(max\_local[i-1]*nums[i], nums[i])& \text{otherwise} - \end{cases} -\end{equation} -\begin{equation} - max\_local[i] = \begin{cases} - max(max\_local[i-1]*nums[i], nums[i]),& nums[i]>0;\\ - max(min\_local[i-1]*nums[i], nums[i])& \text{otherwise} - \end{cases} -\end{equation} -\begin{lstlisting}[language = Python] -def maxProduct(nums): - if not nums: - return 0 - n = len(nums) - min_local, max_local = [0]*n, [0]*n - max_so_far = nums[0] - min_local[0], max_local[0] = nums[0], nums[0] - for i in range(1, n): - if nums[i]>0: - max_local[i] = max(max_local[i-1]*nums[i], nums[i]) - min_local[i] = min(min_local[i-1]*nums[i], nums[i]) - else: - max_local[i] = max(min_local[i-1]*nums[i], nums[i]) - min_local[i] = min(max_local[i-1]*nums[i], nums[i]) - max_so_far = max(max_so_far, max_local[i]) - return max_so_far -\end{lstlisting} -With space optimization: -\begin{lstlisting}[language = Python] -def maxProduct(self, nums): - if not nums: - return 0 - n = len(nums) - max_so_far = nums[0] - min_local, max_local = nums[0], nums[0] - for i in range(1, n): - if nums[i]>0: - max_local = max(max_local*nums[i], nums[i]) - min_local = min(min_local*nums[i], nums[i]) - else: - pre_max = max_local #save the index - max_local = max(min_local*nums[i], nums[i]) - min_local = min(pre_max*nums[i], nums[i]) - max_so_far = max(max_so_far, max_local) - return max_so_far -\end{lstlisting} -Even simpler way to write it: -\begin{lstlisting}[language = Python] -def maxProduct(self, nums): - if not nums: - return 0 - n = len(nums) - max_so_far = nums[0] - min_local, max_local = nums[0], nums[0] - for i in range(1, n): - a = min_local*nums[i] - b = max_local*nums[i] - max_local = max(nums[i], a, b) - min_local = min(nums[i], a, b) - max_so_far = max(max_so_far, max_local) - return max_so_far -\end{lstlisting} - -\item 122. Best Time to Buy and Sell Stock II - -Say you have an array for which the ith element is the price of a given stock on day i. - -Design an algorithm to find the maximum profit. You may complete as many transactions as you like (i.e., buy one and sell one share of the stock multiple times). - -Note: You may not engage in multiple transactions at the same time (i.e., you must sell the stock before you buy again). - -Example 1: -\begin{lstlisting} -Input: [7,1,5,3,6,4] -Output: 7 -Explanation: Buy on day 2 (price = 1) and sell on day 3 (price = 5), profit = 5-1 = 4. - Then buy on day 4 (price = 3) and sell on day 5 (price = 6), profit = 6-3 = 3. -\end{lstlisting} -Example 2: -\begin{lstlisting} -Input: [1,2,3,4,5] -Output: 4 -Explanation: Buy on day 1 (price = 1) and sell on day 5 (price = 5), profit = 5-1 = 4. - Note that you cannot buy on day 1, buy on day 2 and sell them later, as you are - engaging multiple transactions at the same time. You must sell before buying again. -\end{lstlisting} -Example 3: -\begin{lstlisting} -Input: [7,6,4,3,1] -Output: 0 -Explanation: In this case, no transaction is done, i.e. max profit = 0. -\end{lstlisting} -Solution: the difference compared with the first problem is that we can have multiple transaction, so whenever we can make profit we can have an transaction. We can notice that if we have [1,2,3,5], we only need one transaction to buy at 1 and sell at 5, which makes profit 4. This problem can be resolved with decreasing monotonic stack. whenever the stack is increasing, we kick out that number, which is the smallest number so far before i and this is the transaction that make the biggest profit = current price - previous element. Or else, we keep push smaller price inside the stack. -\begin{lstlisting}[language = Python] -def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - mono_stack = [] - profit = 0 - for p in prices: - if not mono_stack: - mono_stack.append(p) - else: - if pa and B[i]>b: #not swap - count = min(dfs(A[i], B[i], i+1), count) - if A[i]>b and B[i]>a:#swap - count = min(dfs(B[i], A[i], i+1)+1, count) - return count - - return dfs([], [], 0) -\end{lstlisting} -\textbf{DFS with single State Memo is not working}. Now, to avoid overlapping, [5,4], [3,7] because for the DFS there subproblem is in reversed order compared with normal dynamic programming. Simply using the index to identify the state will not work and end up with wrong answer. - -\textbf{DFS with muliple choiced memo}. For this problem, it has two potential choice, swap or keep. The right way is to distinguish different state with additional variable. Here we use \textit{swapped} to represent if the current level we make the decision of swap or not. -\begin{lstlisting}[language=Python] -def minSwap(self, A, B): - if not A or not B: - return 0 - - def dfs(a, b, i, memo, swapped): #the last element of the state - if i == len(A): - return 0 - if (swapped, i) not in memo: - if i == 0: - # not swap - memo[(swapped, i)] = min(dfs(A[i], B[i], i+1, memo, False), dfs(B[i], A[i], i+1, memo, True)+1) - return memo[(swapped, i)] - count = sys.maxsize - - if A[i]>a and B[i]>b: #not swap - count = min(count, dfs(A[i], B[i], i+1, memo, False)) - if A[i]>b and B[i]>a: #swap - count = min(count, dfs(B[i], A[i], i+1, memo, True) +1) - memo[(swapped, i)] = count - - return memo[(swapped, i)] - - return dfs([], [], 0, {}, False) -\end{lstlisting} -\textbf{Dynamic Programming}. Because it has two choice, we define two dp state arrays. One represents the minimum swaps if current i is not swapped, and the other is when the current i is swapped. -\begin{lstlisting}[language=Python] -def minSwap(self, A, B): - if not A or not B: - return 0 - - dp_not =[sys.maxsize]*len(A) - dp_swap = [sys.maxsize]*len(A) - dp_swap[0] = 1 - dp_not[0] = 0 - for i in range(1, len(A)): - if A[i] > A[i-1] and B[i] > B[i-1]: #i-1 not swap and i not swap - dp_not[i] = min(dp_not[i], dp_not[i-1]) - # if i-1 swap, it means A[i]>B[i-1], i need to swap - dp_swap[i] = min(dp_swap[i], dp_swap[i-1]+1) - if A[i] > B[i-1] and B[i] > A[i-1]: # i-1 not swap, i swap - dp_swap[i] = min(dp_swap[i], dp_not[i-1]+1) - # if i-1 swap, it means the first case, current need to not to swap - dp_not[i] = min(dp_not[i], dp_swap[i-1]) - return min(dp_not[-1], dp_swap[-1]) -\end{lstlisting} -Actually, in this problem, the DFS+memo solution is not easy to understand any more. On the other hand, the dynamic programming is easier and more straightforward to understand. - -\item Example $1$. 131. Palindrome Partitioning (medium) - -Given a string s, partition s such that every substring of the partition is a palindrome. - -Return all possible palindrome partitioning of s. -\begin{lstlisting} -For example, given s = "aab", - Return - -[ - ["aa","b"], - ["a","a","b"] -] -\end{lstlisting} - -Solution: here we not only need to count all the solutions, we need to record all the solutions. Before using dynamic prgramming, we can use DFS, and we need a function to see if a splitted substring is palindrome or not. The time complexity for this is $T(n) = T(n-1)+T(n-2)+...+T(1)+O(n)$, which gave out the complexity as $O(3^n)$. This is also called backtracking algorithm. The running time is $152$ ms. -\begin{figure}[h] - \centering - \includegraphics[width = 0.5\columnwidth]{fig/palindromPartition.png} - \caption{State Transfer for the panlindrom splitting} - \label{fig:my_label} -\end{figure} -\begin{lstlisting}[language = Python] -def partition(self, s): - """ - :type s: str - :rtype: List[List[str]] - """ - #s="bb" - #the whole purpose is to find pal, which means it is a DFS - def bPal(s): - return s==s[::-1] - def helper(s, path, res): - if not s: - res.append(path) - for i in range(1,len(s)+1): - if bPal(s[:i]): - helper(s[i:],path+[s[:i]],res) - res=[] - helper(s,[],res) - return res -\end{lstlisting} -Now, we use dynamic programming, for the palindrome, if substring $s(i,j)$ is panlindrome, then if $s[i-1] == s[j+1]$, then s(i-1,j+1) is palindrome too. So, for state: $f[i][j]$ denotes if $s[i:j]$ is a palindrome with $1$ or $0$; for function: $f[i-1][j+1] = f[i][j]$, if $s[i]==s[j]$, else ; for initialization: f[i][i] = True and f[i][i+1], for the loop, we start with size $3$, set the start and end index; However, for this problem, this only acts like function $bPal$, checking it in $O(1)$ time. The running time is $146$ ms. -\begin{lstlisting}[language = Python] -def partition(s): - f = [[False for i in range(len(s))] for i in range(len(s))] - - for d in range(len(s)): - f[d][d] = True - for d in range(1,len(s)): - f[d-1][d]=(s[d-1]==s[d]) - for sz in range(3,len(s)+1): #3: 3 - for i in range(len(s)-sz+1): #the start index, i=0, 0 - j = i+sz-1 #0+3-1 = 2, 1,1 - f[i][j] = f[i+1][j-1] if s[i]==s[j] else False - res = [] - def helper(start, path, res): - if start==len(s): - res.append(path) - for i in range(start,len(s)): - if f[start][i]: - helper(i+1, path+[s[start:i+1]], res) - helper(0, [], res) - return res -\end{lstlisting} -This is actually the example that if we want to print out all the solutions, we need to use DFS and backtracking. It is hard to use dynamic programming and save time. -\end{enumerate} - - - - -\section{Summary} -\textbf{Steps of Solving Dynamic Programming Problems} - -We read through the problems, most of them are using array or string data structures. We search for key words: ''min/max number", ''Yes/No" in ''subsequence/" type of problems. After this process, we made sure that we are going to solve this problem with dynamic programming. Then, we use the following steps to solve it: -\begin{enumerate} - \item . - \item New storage( a list) $f$ to store the answer, where $f_i$ denotes the answer for the array that starts from $0$ and end with $i$. (Typically, one extra space is needed) This steps implicitly tells us the way we do divide and conquer: we first start with dividing the sequence $S$ into $S_{(1,n)}$ and $a_0$. We reason the relation between these elements. - \item We construct a recurrence function using $f$ between subproblems. - \item We initialize the storage and we figure out where in the storage is the final answer (f[-1], max(f), min(f), f[0]). -\end{enumerate} -Other important points from this chapter. -\begin{enumerate} - \item Dynamic programming is an algorithm theory, and divide and conquer $+$ memoization is a way to implement dynamic programming. - \item Dynamic programming starts from initialization state, and deduct the result of current state from previous state till it gets to the final state when we can collect our final answer. - \item The reason that dynamic programming is faster because it avoids repetition computation. - \item Dynamic programming $\approx$ divide and conquer $+$ memoization. -\end{enumerate} - -The following table shows the summary of different type of dynamic programming with their four main elements. -\begin{figure}[h] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/summary_dp.png} - \caption{Summary of different type of dynamic programming problems} - \label{fig:dp_summary} -\end{figure} - -% 1.动态规划是一种算法思想,是高于算法的.而分治的记忆化搜索是实现动态规划的一种手段. -% 2.那么什么是动态规划呢? -% -就感觉上来说,动态规划的是"一层一层来",基于前一个状态推出现在的状态. -% 3.动态规划为什么会快呢? -% -因为减少了很多不必要的重复计算. -% 4.动态规划和分治的区别? -% -动态规划约等于分治+记忆化,因为有了记忆化,所以算过的直接用就行,就不用再算一遍了. -% From the brute force to recursive, to recursive with memorization, to iterative with memo, to how to save the space in memo. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_12_tree_algorithm.tex b/Easy-Book/chapters/chapter_12_tree_algorithm.tex deleted file mode 100644 index 4aeed34..0000000 --- a/Easy-Book/chapters/chapter_12_tree_algorithm.tex +++ /dev/null @@ -1,2188 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\paragraph{Review} We have learned the tree data structure and the traversal inside of the tree from Chapter~\ref{chapter_non_linear_tree} and Chapter~\ref{chapter_tree_traversal}. Remember the core and fundamental methods for solving tree related problems are the \textbf{tree traversal} and \textbf{divide-and-conquer}. We are expecting problems related to the properties of trees and we are expected to solve these problem with high efficiency. - - -\section{Core Principle} -When we first go for interviews, we may find tree and graph problems intimidating and challenging to solve within 40 minutes normal interview window. This might be due to our neglect of the concept of Divide and Conquer. However, at this point, we have already studied the concepts of various trees, divide and conquer, and solved quite a few related questions in previous chapters. We will find out studying this chapter can be really easy compared with the Dynamic programming questions, thanks to the consistent principles to solve tree questions. The principle is to solve problems within \textbf{tree traversal}, either recursively or iteratively, in either of the follow two ways: -\begin{enumerate} - \item \textbf{Top-down Searching}: We write the recursion function of Tree traversal with parameters recording information of visited nodes(in the first pass) to be passed to its subtree through recursive function calls. The result will be returned from leaf node or empty node, or node that satisfy a certain condition. This is just an extension of the standard graph search, either BFS or DFS, with recorded path information. This method usually requires \texttt{None} returned from the recursion function, and always require a global data structure and a local data structure to track the final answer and the current path information. We summarize this method with the following pseudocode: -\begin{lstlisting}[language=Python] -def treeTraversal(root, tmp_result): - if node is empty or node is a leaf node: - collect the result or return the final result - construct: the previous temp result using the current node - treeTraversal(root.left, constructured_tmp_result) - treeTraversal(root.right, constructured_tmp_result) -\end{lstlisting} - -\item \textbf{Bottom-up Divide and Conquer:} Due to the special structure of tree, a binary tree is naturally divided into two halves: left subtree and right subtree. Therefore, we can enforce the Divide and Conquer, assigning two ``agents'' to obtain the result for its subproblems, and once we received the result, we ``merge'' the results of the subtree to gain the result for current node. This also requires us to define the return value for edge cases: normally would be empty node and/or leaves. We summarize this method with the following pseudocode: -\begin{lstlisting}[language=Python] -def treeTraversalDivideConquer(root): - if node is empty or node is a leaf node: - return base result - # divide - left result = treeTraversalDivideConquer(root.left) - right result = treeTraversalDivideConquer(root.right) - - # conquer - merge the left and right result with the current node - return merged result of current node -\end{lstlisting} -\end{enumerate} -The difficulty of these problems are decided by the merge operation, and how many different variables we need to return to decide the next merge operation. -\paragraph{Optimization Problems} -When we are facing optimization based problems, \texttt{However, if we don't like using the recursive function, we can use level-order traversal implemented with Queue.} - -\paragraph{Summary of Recurrence Relation} Due to the fact that divide-and-conquer is inherent to the recursive structure of trees and root-children relation; a binary tree of size $n$ with root will be divided into three non-overlapping parts: left-subtree, root, right-subtree. %The problem of $n$ is thus divided into $d(t) = f(d(t_l), d(t_r), t)$, and there has no overlapping between these three parts. - -Let us conclude some recurrence relation function to the different property of trees, here we use binary tree for the demonstration purpose. In a given tree $t$, and its left and right subtree are denoted by $t_l$ and $t_r$, respectively. -\begin{enumerate} - \item Count the size of a tree: let $|t|$ to be the number of nodes in the tree, and an empty node will have size $0$. the recursive relation will be: - \begin{equation} - |t| = |t_l|+|t_r|+1 - \end{equation} - \item Obtain the height of the tree: let $h(t)$ be the height of node $t$, and a leaf node will have height $0$, and thus en empty node will have $-1$. We have recursive relation for the height as: - \begin{equation} - h(t) = 1 + \max{(h(t_l), h(t_r))} - \end{equation} - \item Obtain the maximum path sum from leaf to a node: let $s(t)$ be the sum of the current tree $t$, and an empty node will have a sum $0$. - \begin{equation} - s(t) = t.val + \max{(s(t_l), s(t_r))} - \end{equation} - If it is from a node to any node downwards through parent-child connections, to obtain the maximum path we would only add its left or right path's sum if its positive, we can use the following recurrence relation: - \begin{equation} - s(t) = t.val + \max(0, \max{(s(t_l), s(t_r))}) - \end{equation} -\end{enumerate} - -N-ary Tree(especially Binary tree) and Binary Search tree are the most popular type of questions among interviews. They each takes nearly half and half of all the tree questions. We would rarely came into the Segment Tree or Trie, but if you have extra time it will help you learn more if you would study these two types too. -\section{N-ary Tree (40\%)} -We classify the binary tree related questions as: -\begin{enumerate} - \item Tree Traversal; - \item Tree Property: Depth, Height, and Diameter - \item Tree Advanced Property: LCA - \item Tree Path -\end{enumerate} -%%%%%%%%%%%%%%%Tree Traversal%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Tree Traversal} -The problems appearing in this section has mostly been solved in tree traversal section, thus we only list the problems here. -\begin{enumerate} - \item 144. Binary Tree Preorder Traversal - \item 94. Binary Tree Inorder Traversal - \item 145. Binary Tree Postorder Traversal - \item 589. N-ary Tree Preorder Traversal - \item 590. N-ary Tree Postorder Traversal - \item 429. N-ary Tree Level Order Traversal - \item 103. Binary Tree Zigzag Level Order Traversal(medium) - \item 105. Construct Binary Tree from Preorder and Inorder Traversal -\end{enumerate} -\begin{examples}[resume] -\item \textbf{103. Binary Tree Zigzag Level Order Traversal (medium).} Given a binary tree, return the zigzag level order traversal of its nodes' values. (ie, from left to right, then right to left for the next level and alternate between). -\begin{lstlisting}[numbers=none] -For example: -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its zigzag level order traversal as: - -[ - [3], - [20,9], - [15,7] -] -\end{lstlisting} -\textbf{Solution: BFS level order traversal.} We use an variable to track the level of the current queue, and if its even, then we add the result in the original order, otherwise, use the reversed order: -\begin{lstlisting}[language=Python] -def zigzagLevelOrder(self, root): - """ - :type root: TreeNode - :rtype: List[List[int]] - """ - if root is None: - return [] - q = [root] - i = 0 - ans = [] - while q: - tmp = [] - tmpAns = [] - for node in q: - tmpAns.append(node.val) - if node.left: - tmp.append(node.left) - if node.right: - tmp.append(node.right) - q = tmp - if i % 2 == 0: - ans += [tmpAns] - else: - ans += [tmpAns[::-1]] - i += 1 - return ans -\end{lstlisting} - -\item \textbf{105. Construct Binary Tree from Preorder and Inorder Traversal.} Given preorder and inorder traversal of a tree, construct the binary tree. -Note:You may assume that duplicates do not exist in the tree. -\begin{lstlisting}[numbers=none] -For example, given preorder = [3,9,20,15,7], inorder = [9,3,15,20,7] -Return the following binary tree: - - 3 - / \ - 9 20 - / \ - 15 7 - \end{lstlisting} - -\textbf{Solution: the feature of tree traversal.} The inorder traversal puts the nodes from the left subtree on the left side of root, and the nodes from the right subtree on the right side of the root. While the preorder puts the root at the first place, followed by the left nodes and right nodes. Thus we can find the root node from the preorder, and then use the inorder list to find the root node, and cut the list into two parts: left nodes and right nodes. We use divide and conquer, and do such operation recursively till the preorder and inorder list is empty. -\begin{lstlisting} [language = Python] -def buildTree(self, preorder, inorder): - """ - :type preorder: List[int] - :type inorder: List[int] - :rtype: TreeNode - """ - #first to decide the root - def helper(preorder,inorder): - if not preorder or not inorder: - return None - - cur_val = preorder[0] - node = TreeNode(cur_val) - #divide: now cut the lists into two halfs - leftinorder,rightinorder = [],[] - bLeft=True - for e in inorder: - if e==cur_val: - bLeft=False #switch to the right side - continue - if bLeft: - leftinorder.append(e) - else: - rightinorder.append(e) - leftset, rightset = set(leftinorder),set(rightinorder) - leftpreorder, rightpreorder = [],[] - for e in preorder[1:]: - if e in leftset: - leftpreorder.append(e) - else: - rightpreorder.append(e) - - #conquer - node.left=helper(leftpreorder, leftinorder) - node.right= helper(rightpreorder,rightinorder) - return node - return helper(preorder,inorder) -\end{lstlisting} -However, the previous code has problem as 203 / 203 test cases passed. -Status: Memory Limit Exceeded. So instead of passing new array, I use index. -\begin{lstlisting} [language = Python] -def buildTree(self, preorder, inorder): - """ - :type preorder: List[int] - :type inorder: List[int] - :rtype: TreeNode - """ - #first to decide the root - def helper(pre_l, pre_r,in_l, in_r): #[pre_l,pre_r) - if pre_l>=pre_r or in_l>=in_r: - return None - - cur_val = preorder[pre_l] - node = TreeNode(cur_val) - #divide: now cut the lists into two halfs - leftinorder = set() - inorder_index = -1 - for i in range(in_l, in_r): - if inorder[i]==cur_val: - inorder_index = i - break - leftinorder.add(inorder[i]) - #when leftset is empty - new_pre_r=pre_l - for i in range(pre_l+1,pre_r): - if preorder[i] in leftinorder: - new_pre_r = i - else: - break - new_pre_r+=1 - - #conquer - node.left=helper(pre_l+1, new_pre_r, in_l, inorder_index) - node.right= helper(new_pre_r,pre_r, inorder_index+1, in_r) - return node - if not preorder or not inorder: - return None - return helper(0,len(preorder),0,len(inorder)) -\end{lstlisting} - - -\end{examples} - -% 94. Binary Tree Inorder Traversal - -% Given a binary tree, return the inorder traversal of its nodes' values. -% \begin{lstlisting} -% Example: - -% Input: [1,null,2,3] -% 1 -% \ -% 2 -% / -% 3 - -% Output: [1,3,2] - -% Follow up: Recursive solution is trivial, could you do it iteratively? -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% # recursive -% def inorderTraversal(self, root): -% """ -% :type root: TreeNode -% :rtype: List[int] -% """ -% # left, root, right -% if root is None: -% return [] -% left = self.inorderTraversal(root.left) -% right = self.inorderTraversal(root.right) -% return left+[root.val]+right -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% # iterative -% def inorderTraversal(self, root): -% """ -% :type root: TreeNode -% :rtype: List[int] -% """ -% # left, root, right -% if root is None: -% return [] -% ans = [] -% stack =[] -% current = root -% while current: -% stack.append(current) -% current = current.left -% while stack: -% tmp = stack.pop() -% ans.append(tmp.val) -% current = tmp.right -% while current: -% stack.append(current) -% current = current.left -% return ans -% \end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%Depth%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Depth/Height/Diameter} -In this section, focus on the property related problems of binary tree: including depth, height and diameter. We can be asked to validate balanced binary tree, or the maximum/minimum of these values. The solution is tree traversal along with some operations along can be used to solve this type of problems. -\begin{enumerate} - \item 111. Minimum Depth of Binary Tree (Easy) - \item 110. Balanced Binary Tree(Easy) - \item 543. Diameter of Binary Tree (Easy) - - \item 559. Maximum Depth of N-ary Tree (Easy) (Exercise) - \item 104. Maximum Depth of Binary Tree (Exercise) -\end{enumerate} - -\begin{examples}[resume] - -\item \textbf{Minimum Depth of Binary Tree (L111, Easy).} Given a binary tree, find its minimum depth. The minimum depth is the number of nodes along the shortest path from the root node down to the nearest leaf node. \textit{Note: A leaf is a node with no children.} -\begin{lstlisting} -Example: - -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its minimum depth = 2. -\end{lstlisting} -\textbf{Solution 1: Level-Order Iterative.} For the minumum path, we can traverse the tree level-by-level and once we encounter the first leaf node, this would be the minimum depth and we return from here and has no need to finish traversing the whole tree. The worst time complexity is $O(n)$ and with $O(n)$ space. -\begin{lstlisting}[language=Python] -def minDepth(self, root): - if root is None: - return 0 - q = [root] - d = 0 - while q: - d += 1 - for node in q: - if not node.left and not node.right: #a leaf - return d - - q = [neigbor for n in q for neigbor in [n.left, n.right] if neigbor] - return d -\end{lstlisting} -\textbf{Solution 2: DFS + Divide and Conquer.} In this problem, we can still use a DFS based traversal. However, in this solution, without iterating the whole tree we would not get the minimum depth. So, it might take bit longer time. And, this takes $O(h)$ stack space. -\begin{lstlisting}[language=Python] -def minDepth(self, root): - if not root: - return 0 - if not root.left and not root.right: # only leaves will have 1 - return 1 - ans = sys.maxsize - if root.left: - ans = min(ans, self.minDepth(root.left)) - if root.right: - ans = min(ans, self.minDepth(root.right)) - return ans+1 -\end{lstlisting} -\item \textbf{110. Balanced Binary Tree(L110, Easy).} Given a binary tree, determine if it is height-balanced. For this problem, a height-balanced binary tree is defined as: \textit{a binary tree in which the \textbf{height} of the two subtrees of every node never differ by more than 1.} (LeetCode used depth however, it should be the height) -\begin{lstlisting}[numbers=none] -Example 1: - -Given the following tree [3,9,20,null,null,15,7]: - - 3 - / \ - 9 20 - / \ - 15 7 - -Return true. - -Example 2: - -Given the following tree [1,2,2,3,3,null,null,4,4]: - - 1 - / \ - 2 2 - / \ - 3 3 - / \ - 4 4 - -Return false. -\end{lstlisting} -\textbf{Solution 1: Bottom-up DFS+Divide and conquer with height as return}. First, because the height of a tree is defined as the number of edges on the \textit{longest path} from node to a leaf. And a leaf will have a height of 0. Thus, for the DFS traversal, we need to return 0 for the leaf node, and for an empty node, we use -1 (for leaf node, we have max(-1, -1) + 1 = 0). In this process, we just need to check if the left subtree or the right subtree is already unbalanced which we use -2 to denote, or the difference of the height of the two subtrees is more than 1. -\begin{lstlisting}[language=Python] -def isBalanced(self, root): - """ - :type root: TreeNode - :rtype: bool - """ - def dfsHeight(root): - if not root: - return -1 - lh = dfsHeight(root.left) - rh = dfsHeight(root.right) - if lh == -2 or rh == -2 or abs(lh-rh) > 1: - return -2 - return max(lh, rh)+1 - return dfsHeight(root) != -2 -\end{lstlisting} -\item \textbf{543. Diameter of Binary Tree (Easy).} Given a binary tree, you need to compute the length of the diameter of the tree. The diameter of a binary tree is the length of the \textbf{longest} path between any two nodes in a tree. Note: The length of path between two nodes is represented by the number of edges between them. This path may or may not pass through the root. -\begin{lstlisting}[numbers=none] - Example: -Given a binary tree - - 1 - / \ - 2 3 - / \ - 4 5 - -Return 3, which is the length of the path [4,2,1,3] or [5,2,1,3]. -\end{lstlisting} -\textbf{Solution: Height of the tree with global variable to track the diameter.} For node 2, the hegiht should be 1, and the length of path from 4 to 5 is 2, which is sum of the height of 4, 5 and two edges. Thus, we use \texttt{rootToLeaf} to track the height of the subtree. Meanwhile, for each node, we use a global variable \texttt{ans} to track the diameter which equals to the sum up of the height of the left and the right subtree and 2 with the following equation: -\begin{align} - diameter = h_l + h_r + 2 -\end{align} -\begin{lstlisting}[language=Python] -def diameterOfBinaryTree(self, root): - """ - :type root: TreeNode - :rtype: int - """ - # this is the longest path from any to any - - def rootToAny(root, ans): - if not root: - return -1 - left = rootToAny(root.left, ans) - right = rootToAny(root.right, ans) - ans[0] = max(ans[0], left+right+2) - return max(left, right) + 1 - ans = [0] - rootToAny(root, ans) - return ans[0] -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%Paths%%%%%%%%%%%%%%%%%%%% -\subsection{Paths} -In this section, we mainly solve path related problems. As we mentioned in Chapter~\ref{chapter_tree}, there are three types of path depending on the starting and ending node type of the path. We might be asked to get minimum/maximum/each path sum/ path length for these three cases: 1) \textbf{root}-to-\textbf{leaf}, 2) \textbf{Root}-to-\textbf{Any} node, 3) \textbf{Any}-node to-\textbf{Any} node. - -Also, maximum or minimum questions is more difficult than the exact path sum, because sometimes when there are negative values in the tree, it makes the situation harder. - -% We normally have two ways to solve these problems. One is using DFS traverse and use global variable and current path variable in the parameters of the recursive function to track the path and collect the results. - -% The second way is DFS and Divide and Conquer, we treat each node as a root tree, we return its result, and for a node, after we get result of left and right subtree, we merge the result. -%%%%%%%%%%%%%%%%%%%%Root to Leaf Path%%%%%%%%%%%%%%%%%%%% -\subsubsection{Root to Leaf Path} -\begin{enumerate} - \item 112. Path Sum (Easy) - \item 113. Path Sum II (easy) - \item 129. Sum Root to Leaf Numbers (Medium) - \item 257. Binary Tree Paths (Easy, exer) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{112. Path Sum (Easy).} Given a binary tree and a sum, determine if the tree has a root-to-leaf path such that adding up all the values along the path equals the given sum. Note: A leaf is a node with no children. -\begin{lstlisting}[numbers=none] -Example: -Given the below binary tree and sum = 22, - - 5 - / \ - 4 8 - / / \ - 11 13 4 - / \ \ -7 2 1 - -return true, as there exist a root-to-leaf path 5->4->11->2 which sum is 22. -\end{lstlisting} -\textbf{Solution: Tree Traversal, Leaf Node as Base Case}. Here we are asked the root-to-leaf path sum, we just need to traverse the tree and use the remaining sum after minusing the value of current node to visit its subtree. At the leaf node, if the remaining sum is equal to the node's value, we return True, otherwise False is returned. Time complexity is $O(n)$. -% is used case The best way to testify the code is to walk through the code with a simple example. For the following example, 1 has no left tree, so the if is None, it should return False for that empty left branch. For any leaf, that is the only place we can possibly return True if the root.val equals to the left sum to need to get. -% \begin{lstlisting} -% 1 -% \ -% 2 -% For sum = 1: using the following code will return True -% \end{lstlisting} -\begin{lstlisting}[language=Python] -def hasPathSum(self, root, sum): - """ - :type root: TreeNode - :type sum: int - :rtype: bool - """ - if root is None: # this is for empty tree - return False - if root.left is None and root.right is None: # a leaf as base case - return True if sum == root.val else False - - left = self.hasPathSum(root.left, sum-root.val) - if left: - return True - right = self.hasPathSum(root.right, sum-root.val) - if right: - return True - return False -\end{lstlisting} -\item \textbf{129. Sum Root to Leaf Numbers (Medium).} Given a binary tree containing digits from 0-9 only, each root-to-leaf path could represent a number. An example is the root-to-leaf path 1->2->3 which represents the number 123. Find the total sum of all root-to-leaf numbers. Note: A leaf is a node with no children. -\begin{lstlisting}[numbers=none] -Example: - -Input: [1,2,3,4,5] - 1 - / \ - 2 3 - / \ - 4 5 -Output: 262 -Explanation: -The root-to-leaf path 1->2->4 represents the number 124. -The root-to-leaf path 1->2->5 represents the number 125. -The root-to-leaf path 1->3 represents the number 13. -Therefore, sum = 124 + 125 + 13 = 262. -\end{lstlisting} -\textbf{Solution 1: Divide and Conquer.} In divide and conquer solution, we treat each child as a root, for node 4 and 5, they return 4 and 5. For node 2, it should get 24+25, in order to construct this value, the recursive function should return the value of its tree and the path length (number of nodes) of current node to all of its leaf nodes. Therefore for node 2, with \texttt{(height, root\_to\_leaf\_sum)}, it combines this with \texttt{$2*10^{height+1}$+ root\_to\_leaf\_sum}, and similarily with the right subtree. -\begin{lstlisting}[language=Python] -def sumNumbers(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if not root: - return 0 - ans, _ = self.sumHelper(root) - return ans -def sumHelper(self, root): - if not root: - return (0, []) - if root.left is None and root.right is None: - return (root.val, [1]) # val and depth - left, ld = self.sumHelper(root.left) - right, rd = self.sumHelper(root.right) - # process: sum over the results till this subtree - ans = left+right - new_d = [] - for d in ld+rd: - new_d.append(d+1) - ans += root.val*10**(d) - return (ans, new_d) -\end{lstlisting} -\textbf{Solution 2: DFS and Parameter Tracker.} We can also construct the value from top-down, we simply record the path in the tree traversal, and at the end, we simply convert the result to the final answer. -\begin{lstlisting}[language=Python] - def sumNumbers(self, root): - """ - :type root: TreeNode - :rtype: int - """ - my_sum = [] - - self.dfs(root,"",my_sum) - - res = 0 - - for ele in my_sum: - res += int(ele) # convert a list to an int? - - return res - - - def dfs(self,node,routine,my_sum): - if not node: - return - - routine = routine + str(node.val) - if not node.left and not node.right: - my_sum.append(routine) - - self.dfs(node.left,routine,my_sum) - self.dfs(node.right,routine,my_sum -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%%%%Root to Any%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Root to Any Node Path} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Any to Any Path%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Any to Any Node Path} -In this subsection, we need a concept called Dual Recursive Function. -\begin{enumerate} - \item 437. Path Sum III (medium) - \item 124. Binary Tree Maximum Path Sum (hard) - \item 543. Diameter of Binary Tree (Easy, put in exercise) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{437. Path Sum III} You are given a binary tree in which each node contains an integer value. Find the number of paths that sum to a given value. The path does not need to start or end at the root or a leaf, but it must go downwards (traveling only from parent nodes to child nodes). The tree has no more than 1,000 nodes and the values are in the range -1,000,000 to 1,000,000. -\begin{lstlisting}[numbers=none] -Example: - -root = [10,5,-3,3,2,null,11,3,-2,null,1], sum = 8 - - 10 - / \ - 5 -3 - / \ \ - 3 2 11 - / \ \ -3 -2 1 - -Return 3. The paths that sum to 8 are: - -1. 5 -> 3 -2. 5 -> 2 -> 1 -3. -3 -> 11 -\end{lstlisting} -\textbf{Solution 1: Dual Recurrence with Divide and Conquer.} In this problem, it is from any to any node, it is equivalent to finding the root->any with sum for all the nodes in the binary tree. We first write a function for root to any. The complexity is $O(n)$. -\begin{lstlisting}[language = Python] -def rootToAny(self, root, sum): - if root is None: - return 0 - # collect result at any node - sum -= root.val - count = 0 - if sum == 0: - count += 1 - return count + self.rootToAny(root.left, sum) + self.rootToAny(root.right, sum) -\end{lstlisting} -However, to get the sum of any to any path (downwards), for each node, we treat it as root node, and call rootToAny, to get satisfactary total paths starts from current node, and we divide the remaining tasks (starting from any other nodes to its left and right subtree). Thus the time complexity if $O(n^2)$. $n$ subproblems and each takes $O(n)$ time. -\begin{lstlisting}[language = Python] - '''first recursion: we traverse the tree and use any node as root, and call rootToAny to get its paths''' -def pathSum(self, root, sum): - if not root: - return 0 - - return self.rootToAny(root, sum) + self.pathSum(root.left, sum) + self.pathSum(root.right, sum) -\end{lstlisting} -\textbf{Solution 2: Optimization with Prefix Sum}. The above solution has large amount of recomputation. This is similar in being in an array: we need to set two pointers, one for subarray start and another for the end. We can use prefix sum to decrease the time complexity to $O(n)$. The sum from n1 to n2 is P[n2]-P[n1] = sum, thus, we need to check P[n1], which equals to P[n2]-sum at each node. To deal with case: [0,0], sum = 0, we need to add 0:1 into the hashmap. Another difference is: in the tree we are using DFS traversal, for a given node, when we finish visit its left subtree and right subtree, and return to its parent level, we need to reset the hashmap. So, this is DFS with backtracking too. -\begin{lstlisting}[language=Python] -def anyToAnyPreSum(self, root, sum, curr, ans, preSum): - if root is None: - return - # process - curr += root.val - ans[0] += preSum[curr-sum] - preSum[curr] += 1 - self.anyToAnyPreSum(root.left, sum, curr, ans, preSum) - self.anyToAnyPreSum(root.right, sum, curr, ans, preSum) - preSum[curr] -= 1 #backtrack to current state - return - -def pathSum(self, root, sum): - if not root: - return 0 - ans = [0] - preSum = collections.defaultdict(int) - preSum[0] = 1 - self.anyToAnyPreSum(root, sum, 0, ans, preSum) - return ans[0] -\end{lstlisting} -\item \textbf{124. Binary Tree Maximum Path Sum (hard).} Given a non-empty binary tree, find the maximum path sum. For this problem, a path is defined as any sequence of nodes from some starting node to any node in the tree along the parent-child connections. The path must contain at least one node and \textbf{does not need to go through the root}. -\begin{lstlisting}[numbers=none] -Example 1: -Input: [1,2,3] - - 1 - / \ - 2 3 - -Output: 6 - -Example 2: - -Input: [-10,9,20,null,null,15,7] - - -10 - / \ - 9 20 - / \ - 15 7 - -Output: 42 -\end{lstlisting} - -\textbf{Solution 1: Dual Recurrence}: Before we head over to the optimized solution, first to understand the question. The question can be rephrased as: for each node, find the largest path sum that goes through this node (the path must contain at least one node thus the current node is the one it must include): the left part is the largest left path and the right part is the largest right path. Because only if the left path or the right path is positive then it is beneficial for us to combine its value, the coming step can be: -\begin{align} - \max(ans[0], \max(left, 0)+\max(right,0) + root.val) -\end{align} -At first, we gain the max path sum from the root to any node, which we implement in the function maxRootToAny. And at the main function, we call maxRootToAny for left and right subtree, then merge the result, then we traverse to the left branch and right branch to do those things too. This is a straightforward dual recurrence. With time complexity $O(n^2)$. -\begin{lstlisting}[language=Python] -def maxRootToAny(self, root): - if root is None: - return 0 - left = self.maxRootToAny(root.left) - right = self.maxRootToAny(root.right) - # conquer: the current node - return root.val+max(0, max(left, right)) #if the left and right are both negative, we get rid of it -def maxPathSum(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if root is None: - return 0 - def helper(root, ans): - if root is None: - return - left = self.maxRootToAny(root.left) - right = self.maxRootToAny(root.right) - ans[0] = max(ans[0], max(left, 0)+max(right,0)+root.val) - helper(root.left, ans) - helper(root.right, ans) - return - ans = [-sys.maxsize] - helper(root, ans) - return ans[0] -\end{lstlisting} -\textbf{Solution 2: Merge the Dual Recurrence}. If we observe these two recurrence function, we can see we use helper(root), we call maxRootToAny with left and right subtree, which is the same as maxRootToAny(root). Then in helper, we use helper(root.left) to call maxRootToAny(root.left.left) and maxRootToAny(root.left.right), which is exactly the same as maxRootToAny(root.left). Thus, the above solution has one power more of complexity. It can be simplied as the following code: -\begin{lstlisting}[language=Python] -def maxRootToAny(self, root, ans): - if root is None: - return 0 - left = self.maxRootToAny(root.left, ans) - right = self.maxRootToAny(root.right, ans) - ans[0] = max(ans[0], max(left, 0) + max(right,0) + root.val) #track the any->root->any maximum - # conquer: the current node - return root.val + max(0, max(left, right)) #track root->any maximum -def maxPathSum(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if root is None: - return 0 - ans = [-sys.maxsize] - self.maxRootToAny(root, ans) - return ans[0] -\end{lstlisting} -The most important two lines of the code is: -\begin{lstlisting}[language=Python] -ans[0] = max(ans[0], max(left, 0) + max(right,0) + root.val) #track the any->root->any maximum -return root.val + max(0, max(left, right)) #track root->any maximum -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%Merge%%%%%%%%%%%%%%%%%%%% -\subsection{Reconstruct the Tree} -In this section, we will be asked to rearrange the node or the value of the tree either in-place or out-of-place. Unless be required to do it in-place we can always use the divide and conquer with returned value and merge. -\subsubsection{In-place Reconstruction} -\begin{enumerate} - \item 114. Flatten Binary Tree to Linked List -\end{enumerate} -\begin{examples}[resume] -\item \textbf{114. Flatten Binary Tree to Linked List (medium).} Given a binary tree, flatten it to a linked list in-place. -\begin{lstlisting}[numbers=none] -For example, given the following tree: - - 1 - / \ - 2 5 - / \ \ -3 4 6 - -The flattened tree should look like: - -1 - \ - 2 - \ - 3 - \ - 4 - \ - 5 - \ - 6 -\end{lstlisting} -\textbf{Solution: Preorder Traversal.} First, the ordering of the nodes in the flattened linked list looks like preorder traversal ordering. With divide and conquer, for the root node 1, we first flat left and right subtree, and assume it returns us $2->3->4$, and $5->6$ with only parent and right child connection. The result of current node can be obtained through connecting \texttt{node.right} to \texttt{node.left} by setting the last node of the left's right child to be \texttt{node.right}. -\begin{lstlisting}[language=Python] -def flatten(self, root): - if not root: - return - # preorder - self.flatten(root.left) # modify root.left - self.flatten(root.right) - - # traverse the left branch to connect with the right branch - if root.left is not None: - node = root.left - while node.right: - node = node.right - node.right = root.right - - else: - root.left = root.right - # connet node, left right - root.right = root.left - root.left = None -\end{lstlisting} -The time complexity is $O(n\log n)$ becuause that each step we need $O(n)$ to find the last node in the left subtree, making a $T(n) = 2T(n/2)+O(n)$ recurrence relation for the time complexity. - - -\end{examples} -\subsubsection{Out-of-place Reconstruction} -\begin{enumerate} - \item 617. Merge Two Binary Trees - \item 226. Invert Binary Tree (Easy) - \item 654. Maximum Binary Tree(Medium) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{617. Merge Two Binary Trees.} Given two binary trees and imagine that when you put one of them to cover the other, some nodes of the two trees are overlapped while the others are not. - -You need to merge them into a new binary tree. The merge rule is that if two nodes overlap, then sum node values up as the new value of the merged node. Otherwise, the NOT null node will be used as the node of new tree. -\begin{lstlisting}[numbers=none] -Example 1: -Input: - Tree 1 Tree 2 - 1 2 - / \ / \ - 3 2 1 3 - / \ \ - 5 4 7 -Output: -Merged tree: - 3 - / \ - 4 5 - / \ \ - 5 4 7 - -Note: The merging process must start from the root nodes of both trees. -\end{lstlisting} -\textbf{Solution 1: DFS+Divide and Conquer}. In this problem, we just need to traverse these two trees ($t_1$ and $t_2$) at the same time instead of just one root node we have seen before. While traversing nodes, when both $t_1$ and $t_2$ are not empty, we need to create a new node with value \texttt(t1.val+t2.val). Assume the \texttt{mergeTrees(t1.left, t2.left)} has merged the left subtrees and returns the root node. Similarly we merge the right subtrees. - -We just need to pay attention to the base cases: -\begin{itemize} - \item When both nodes are \texttt{None} which means we just reached an empty node, we return \texttt{None}. - \item When either node is \texttt{None}, we return the other node. -\end{itemize} -\begin{lstlisting}[language=Python] -def mergeTrees(self, t1, t2): - if t1 is None and t2 is None: # both none - return None - if t1 is None and t2: - return t2 - if t1 and t2 is None: - return t1 - node = TreeNode(t1.val+t2.val) - # divide and conquer, left result and the right result - node.left = self.mergeTrees(t1.left, t2.left) - node.right = self.mergeTrees(t1.right, t2.right) - return node -\end{lstlisting} - -\item \textbf{226. Invert Binary Tree.} Invert a binary tree. -\begin{lstlisting}[numbers=none] -Example: - -Input: - - 4 - / \ - 2 7 - / \ / \ -1 3 6 9 - -Output: - - 4 - / \ - 7 2 - / \ / \ -9 6 3 1 -\end{lstlisting} -\textbf{Solution 1: Divide and Conquer}. -\begin{lstlisting}[language=Python] -def invertTree(self, root): - """ - :type root: TreeNode - :rtype: TreeNode - """ - if root is None: - return None - - # divide: the problem into reversing left subtree and right subtree - left = self.invertTree(root.left) - right = self.invertTree(root.right) - # conquer: current node - root.left = right - root.right = left - return root -\end{lstlisting} - -\item \textbf{654. Maximum Binary Tree.} Given an integer array with no duplicates. A maximum tree building on this array is defined as follow: - \begin{enumerate} - \item The root is the maximum number in the array. - \item The left subtree is the maximum tree constructed from left part subarray divided by the maximum number. - \item The right subtree is the maximum tree constructed from right part subarray divided by the maximum number. - \end{enumerate} - -Construct the maximum tree by the given array and output the root node of this tree. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [3,2,1,6,0,5] -Output: return the tree root node representing the following tree: - - 6 - / \ - 3 5 - \ / - 2 0 - \ - 1 - -Note: - - The size of the given array will be in the range [1,1000]. - -\end{lstlisting} -\textbf{Solution: Divide and Conquer}. The description of the maximum binary tree the root, left subtree, right subtree denotes the root node is the maximum value, and the left child is the max value in the left side of the max value in the array. This fits the divide and conquer. This is so similar as the concept of \textbf{quick sort}. Which divide an array into two halves. The time complexity is $O(nlgn)$. In the worst case, the depth of the recursive tree can grow up to n, which happens in the case of a sorted nums array, giving a complexity of $O(n^2)$. -\begin{lstlisting}[language=Python] - def constructMaximumBinaryTree(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - if not nums: - return None - (m,i) = max((v,i) for i,v in enumerate(nums)) - root = TreeNode(m) - root.left = self.constructMaximumBinaryTree(nums[:i]) - root.right = self.constructMaximumBinaryTree(nums[i+1:]) - return root -\end{lstlisting} -\textbf{Monotone Queue}. The key idea is: -\begin{enumerate} - \item We scan numbers from left to right, build the tree one node by one step; - \item We use a queue to keep some (not all) tree nodes and ensure a decreasing order; - \item For each number, we keep popping the queue until empty or a bigger number appears; 1) The kicked out smaller number is current node's left child (temporarily, this relationship may change in the future). 2) The bigger number (if exist, it will be still in stack) is current number's parent, this node is the bigger number's right child. Then we push current number into the stack. -\end{enumerate} -\begin{lstlisting}[language=Python] -def constructMaximumBinaryTree(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - if not nums: - return None - deQ = collections.deque() - for i, v in enumerate(nums): - node = TreeNode(v) - while deQ and deQ[-1].val < v: - node.left = deQ[-1] - deQ.pop() - if deQ: - deQ[-1].right = node - deQ.append(node) - return deQ[0] -\end{lstlisting} -\end{examples} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%Find an element%%%%%%%%%%%%%%%%%%% -\subsection{Find element} -\paragraph{Lowest Common Ancestor}. The lowest common ancestor is defined between two nodes p and q as the lowest node in T that has both p and q as descendants (where we allow a node to be a descendant of itself). There will be two cases in LCA problem which will be demonstrated in the following example. -\begin{examples}[resume] -\item \textbf{Lowest Common Ancestor of a Binary Tree (L236).} Given a binary tree, find the lowest common ancestor (LCA) of two given nodes in the tree. Given the following binary tree: root = [3,5,1,6,2,0,8,null,null,7,4] -\begin{lstlisting}[numbers=none] - _______3______ - / \ - ___5__ ___1__ - / \ / \ - 6 _2 0 8 - / \ - 7 4 - -Example 1: -Input: root = [3,5,1,6,2,0,8,null,null,7,4], p = 5, q = 1 -Output: 3 -Explanation: The LCA of of nodes 5 and 1 is 3. - -Example 2: -Input: root = [3,5,1,6,2,0,8,null,null,7,4], p = 5, q = 4 -Output: 5 -Explanation: The LCA of nodes 5 and 4 is 5, since a node can be a descendant of itself - according to the LCA definition. -\end{lstlisting} -\textbf{Solution: Divide and Conquer}. There are two cases for LCA: -\begin{enumerate} - \item two nodes each found in different subtree, like example 1. \item two nodes are in the same subtree like example 2. -\end{enumerate} -In the tree traversal, we search for node $p$ and $q$: -\begin{enumerate} - \item if $p$ and $q$ are both found, from the left and the right subtree, the current node is the LCA, solving case 1. - \item if only one of $p$ or $q$ is found, indicating case 2, we simply return this found node as LCA, solving case 2. -\end{enumerate} -If we compare the current node with the p and q, if it equals to any of them, return current node in the tree traversal. Therefore in example 1, at node 3, the left return as node 5, and the right return as node 1, thus node 3 is the LCA. In example 2, at node 5, it returns 5, thus for node 3, the right tree would have None as return, thus it makes the only valid return as the final LCA. The time complexity is $O(n)$. -\begin{lstlisting}[language=Python] -def lowestCommonAncestor(self, root, p, q): - """ - :type root: TreeNode - :type p: TreeNode - :type q: TreeNode - :rtype: TreeNode - """ - if not root: - return None - if root == p or root == q: - return root # found one valid node (case 1: stop at 5, 1, case 2:stop at 5) - left = self.lowestCommonAncestor(root.left, p, q) - right = self.lowestCommonAncestor(root.right, p, q) - if left is not None and right is not None: # p, q in the subtree - return root - if any([left, right]) is not None: - return left if left is not None else right - return None -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%Count%%%%%%%%%%%%%%%%%%%%%% -\subsection{Ad Hoc Problems} -There are some other problems that are flexible and are highly customized requirements. We usually need to be more flexbile with the solutions too. Sometimes, we need to write multiple functions in order to solve one problem. -\begin{enumerate} - \item 250. Count Univalue Subtrees - \item 863. All Nodes Distance K in Binary Tree -\end{enumerate} -\begin{examples}[resume] -\item \textbf{250. Count Univalue Subtrees (medium). } Given a binary tree, count the number of uni-value subtrees. A Uni-value subtree means all nodes of the subtree have the same value. -\begin{lstlisting}[numbers=none] -Example : - -Input: root = [5,1,5,5,5,null,5] - - 5 - / \ - 1 5 - / \ \ - 5 5 5 - -Output: 4 -\end{lstlisting} -\textbf{Solution 1: DFS and Divide and Conquer}. First, all the leaf nodes are univalue subtree with count 1 and also it is the base case with (True, leaf.val, 1) as return. If we are at node 1, we check the left subtree and right subtree if they are univalue, and what is their value, and what is there count. Or for cases that a node only has one subtree. If the val of the subtree and the current node equals, we increase the count by one, and return (True, node.val, l\_count+r\_count+1). All the other cases, we only have (False, None, l\_count+r\_count). -\begin{lstlisting}[language = Python] -def countUnivalSubtrees(self, root): - if not root: - return 0 - - def univalSubtree(root): - if root.left is None and root.right is None: - return (True, root.val, 1) - l_uni, l_val, l_count = True, None, 0 - if root.left: - l_uni, l_val, l_count = univalSubtree(root.left) - r_uni, r_val, r_count = True, None, 0 - if root.right: - r_uni, r_val, r_count = univalSubtree(root.right) - if l_uni and r_uni: - if l_val is None or r_val is None:# a node with only one subtree - if l_val == root.val or r_val == root.val: - return (True, root.val, l_count+r_count+1) - else: - return (False, None, l_count+r_count) - if l_val == r_val == root.val: # a node with both subtrees - return (True, root.val, l_count+r_count+1) - else: - return (False, None, l_count+r_count) - return (False, None, l_count+r_count) - - _, _, count = univalSubtree(root) - return count -\end{lstlisting} -Or else we can use a global variable to record the subtree instead of returning the result from the tree. -\begin{lstlisting}[language=Python] -def countUnivalSubtrees(self, root): - def helper(root): - if not root:return True - if not root.left and not root.right: - self.res += 1 - return True - left_res = helper(root.left) - right_res = helper(root.right) - if root.left and root.right: - if root.val == root.left.val and root.val == root.right.val and left_res and right_res: - self.res += 1 - return True - return False - if root.left and not root.right: - if root.val == root.left.val and left_res: - self.res += 1 - return True - return False - if root.right and not root.left: - if root.val == root.right.val and right_res: - self.res += 1 - return True - return False - self.res = 0 - helper(root) - return self.res -\end{lstlisting} -\item \textbf{863. All Nodes Distance K in Binary Tree (medium).}We are given a binary tree (with root node root), a target node, and an integer value K. (Note that the inputs "root" and "target" are actually TreeNodes.) Return a list of the values of all nodes that have a distance K from the target node. The answer can be returned in any order. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: root = [3,5,1,6,2,0,8,null,null,7,4], target = 5, K = 2 - 3 - / \ - 5 1 - / \ | \ - 6 2 0 8 - / \ - 7 4 -Output: [7,4,1] - -Explanation: -The nodes that are a distance 2 from the target node (with value 5) -have values 7, 4, and 1. -\end{lstlisting} -\begin{figure} - \centering - \includegraphics[width=0.7\columnwidth]{fig/example_863.png} - \caption{Two Cases of K Distance Nodes marked in blue and red arrows. } - \label{fig:distance_k} -\end{figure} -\textbf{Solution 1: DFS traversal with depth to target as return.} There are different cases with path that has target as denoted in Fig~\ref{fig:distance_k}: 1. target is the starting point, we traverse the target downwards to get nodes that is K distance away from target. 2. target is the ending point, we need to traverse back to its parents, and first check the distance of the parent node with the target to see if it is K, and second we use another function to find K-distance away nodes on the other branch of the parent node. Because we do not have pointer back to its parents directly, we use recursive tree traversal so that we can return to the parent node with its distance to the target. Therefore, we need two helper functions. The first function \textit{getDistanceK} takes a starting node, and a distance K, to return a list of K distance downwards from starting point. The second function \textit{getDepth} is designed to do the above task, when we find the target in the tree traversal, we return 0, for empty node return -1. -\begin{lstlisting}[language=Python] -def distanceK(self, root, target, K): - if not root: - return [] - def getDistanceK(target, K): - ans = [] - # from target to K distance - q = [target] - d = 0 - while q: - if d == K: - ans += [n.val for n in q] - break - nq = [] - for n in q: - if n.left: - nq.append(n.left) - if n.right: - nq.append(n.right) - q = nq - d += 1 - return ans - - # get depth of target - def getDepth(root, target, K, ans): - if not root: - return -1 - if root == target: - return 0 - # conquer - left = getDepth(root.left, target, K, ans) - right = getDepth(root.right, target, K, ans) - if left == -1 and right == -1: - return -1 - else: - dis = 0 - if left != -1: - dis = left+1 - if root.right: - ans += getDistanceK(root.right, K-dis-1) - else: - dis = right + 1 - if root.left: - ans += getDistanceK(root.left, K-dis-1) - if dis == K: - ans.append(root.val) - return dis - - ans = getDistanceK(target, K) - getDepth(root, target, K, ans) - return ans -\end{lstlisting} -\textbf{Solution 2: DFS to annotate parent node + BFS to K distance nodes.} In solution 1, we have two cases because we can't traverse to its parents node directly. If we can add the parent node to each node, and the whole tree would become a acyclic direct graph, thus, we can use BFS to find all the nodes that are K distance away. This still has the same complexity. -\begin{lstlisting}[language=Python] -def distanceK(self, root, target, K): - if not root: - return [] - def dfs(node, par = None): - if node is None: - return - node.par = par - dfs(node.left, node) - dfs(node.right, node) - dfs(root) - seen = set([target]) - q = [target] - d = 0 - while q: - if d == K: - return [node.val for node in q] - nq = [] - for n in q: - for nei in [n.left, n.right, n.par]: - if nei and nei not in seen: - seen.add(nei) - nq.append(nei) - q = nq - d += 1 - return [] -\end{lstlisting} -\end{examples} - - - -% \begin{enumerate} - -% \item Binary Tree Paths -% \begin{inparaenum} -% \item \textbf{Minimum Subtree} -% Given a binary tree, find the subtree with minimum sum. Return the root of the subtree. -% LintCode will print the subtree which root is your return node. - -% Solution: we need to get the value of the whole tree, = helper(left)+helper(right)+current val. It’s guaranteed that there is only one subtree with minimum sum and the given binary tree is not an empty tree. - -% \item \textbf{The maximum path sum in BT} -% \begin{inparaenum} -% \item the maximum path sum(root->leaf) - -% Example: For the following BT: -% \begin{lstlisting} -% 1 -% / \ -% 2 3 -% \end{lstlisting} -% Return 4. (The maximum path is 1->3). However, if we have negative value, this is not going to work. -% \begin{lstlisting} [language = Python] -% public int maxPathSum2(TreeNode root) { -% if (root == null) { -% return 0 #th -% } -% int left = maxPathSum2(root.left) -% int right = maxPathSum2(root.right) - -% return root.val + Math.max(left, right) #at least root+one of the subtree -% } -% \end{lstlisting} - -% \item the maximum path sum(root->any) - -% Binary Tree Maximum Path Sum II, http://www.lintcode.com/zh-cn/problem/binary-tree-maximum-path-sum-ii/ - -% The path can be from root to any node, but it needs include at least one nod, which is the root. -% Example, For the following BT: -% \begin{lstlisting} -% 1 -% / \ -% 2 3 -% \end{lstlisting} -% Return 4. (Maximum Path is 1->3) - -% Solution: this one is slightly different, for each node, we can return the sum of current node +left subtree, or current node+ right subtree, or we just return current node, which means the path ends here. -% For the divide and conquer: 1) Recursive end condition: when the node is null. 2) Divide: divide the tree into the result of the left subtree and right subtree. 3)Conquer: merge the result from the divide. -% \begin{lstlisting} [language = Python] -% public int maxPathSum2(TreeNode root) { -% if (root == null) { -% return 0; -% } -% //divide -% int left = maxPathSum2(root.left); -% int right = maxPathSum2(root.right); -% //conquer -% return root.val + Math.max(0, Math.max(left, right)); #if the max is negative, we get rid of them, use 0 instead. -% } -% \end{lstlisting} -% \item the maximum path sum(any->any) - -% 2.5 Binary Tree Maximum Path Sum - - -% \end{inparaenum} -% \item Reverse from Traverse result to build tree - - -% \end{inparaenum} -% \end{enumerate} -% \section{Time complexity of Binary Tree} -% If we spent O(n) to convert $T(n)$ to $2T(n/2)$. We have the following deduction: -% \begin{equation} \label{bt_time} -% \begin{split} -% T(n) & = 2T(n/2) + O(n)\\ -% & = 2 * 2T(n/4) + O(n) + O(n)\\ -% & = O(nlogn) -% \end{split} -% \end{equation} -% which is the same as merge sort. If the divide cost is only $O(1)$. -% \begin{equation}\label{bt_time2} -% \begin{split} -% T(n) &= 2T(n/2) + O(1)\\ -% & = 2 * 2T(n/4) + O(1) + O(1)\\ -% &= n + (1 + 2 + 4 +...+ n)\\ -% &\approx n + 2n\\ -% &\approx O(n) -% \end{split} -% \end{equation} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%BST Related problems and algorithms -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Binary Search Tree (BST)} -\subsection{BST Rules} -\begin{enumerate} - \item 98. Validate Binary Search Tree (Medium) - \item 99. Recover Binary Search Tree(hard) - \item 426. Convert Binary Search Tree to Sorted Doubly Linked List (medium) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{98. Validate Binary Search Tree (medium)} Given a binary tree, determine if it is a valid binary search tree (BST). Assume a BST is defined as follows: -\begin{itemize} - \item The left subtree of a node contains only nodes with keys less than the node's key. - \item The right subtree of a node contains only nodes with keys greater than the node's key. - \item Both the left and right subtrees must also be binary search trees. - \end{itemize} -\begin{lstlisting} -Example 1: - -Input: - 2 - / \ - 1 3 -Output: true - -Example 2: - - 5 - / \ - 1 4 - / \ - 3 6 -Output: false -Explanation: The input is: [5,1,4,null,null,3,6]. The root node's value - is 5 but its right child's value is 4. -\end{lstlisting} -\textbf{Solution1: Limit the value range for subtrees: top-down}. We start from the root, which should be in range $[-\inf, +\inf]$. And the left subtree should be limited into $[-\inf, root.val]$, and right in $[root.val, +\inf]$. The Code is simple and clean: -\begin{lstlisting}[language=Python] -def isValidBST(self, root, minv=float("-inf"), maxv=float("inf")): - """ - :type root: TreeNode - :rtype: bool - """ - if root is None: - return True - - if (minv < root.val < maxv): - return self.isValidBST(root.left, minv, root.val) and self.isValidBST(root.right, root.val, maxv) - return False -\end{lstlisting} -\textbf{Solution 2: Limit the value range for parent node: bottom-up}. We traverse the tree, and we return values from the None node, then we have three cases: -\begin{lstlisting} -1) both subtrees are None # a leaf - return (True, root.val, root.val) -2) both subtrees are not None: # a subtree with two branches - check if l2 < root.val < r1: - merge the range to: - return (True, l1, r2) -3) one subtree is None: # a subtree with one branches: - only check one of l2, r1 and merge accordingly -\end{lstlisting} -\textbf{Solution 2: Using inorder}. If we use inorder, then the tree resulting list we obtained should be strictly increasing. -\begin{lstlisting}[language=Python] -def isValidBST(self, root): - if root is None: - return True - - def inOrder(root): - if not root: - return [] - return inOrder(root.left) + [root.val] + inOrder(root.right) - ans = inOrder(root) - pre = float("-inf") - for v in ans: - if v <= pre: - return False - pre = v - return True -\end{lstlisting} -\item \textbf{99. Recover Binary Search Tree (hard).} Two elements of a binary search tree (BST) are swapped by mistake. Recover the tree without changing its structure. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [1,3,null,null,2] - - 1 - / - 3 - \ - 2 - -Output: [3,1,null,null,2] - - 3 - / - 1 - \ - 2 - -Example 2: - -Input: [3,1,4,null,null,2] - - 3 - / \ -1 4 - / - 2 - -Output: [2,1,4,null,null,3] - - 2 - / \ -1 4 - / - 3 -\end{lstlisting} -Follow up: A solution using O(n) space is pretty straight forward. Could you devise a constant space solution? - -\textbf{Solution 1: Recursive InOrder Traversal and Sorting, O(n) space.} The same as validating a BST, the inorder traversal of a valid BST must have a sorted order. Therefore, we obtain the inorder traversed list, and sort them by the node value, and compared the sorted list and the unsorted list to find the swapped nodes. -\begin{lstlisting}[language=Python] -def recoverTree(self, root): - """ - :type root: TreeNode - :rtype: void Do not return anything, modify root in-place instead. - """ - def inorder(root): - if not root: - return [] - return inorder(root.left) + [root] + inorder(root.right) - - - ans = inorder(root) - sans = sorted(ans, key = lambda x: x.val) - # swap - for x, y in zip(ans,sans): - if x != y: - x.val, y.val = y.val, x.val - break -\end{lstlisting} -\textbf{Solution 2: Iterative Traversal: O(1) space.} The inorder traversal for each example are: -\begin{lstlisting}[numbers=none] -Example 1: [3, 2, 1], need to switch 3, 1 -Example 2: [1, 3, 2, 4], need to switch 3, 2 -\end{lstlisting} -If we observe the inorder list: if we check the previous and current pair, if it is dropping as (3,2), (2,1), then we call this dropping pairs. In example 2, there is only one pair (3,2). This is the two possoble cases when we swap a pair of elements in a sorted list. If we use the inorder iterative traversal, and record the pre, cur dropping pairs, then it is straightforward to do the swapping of the dropping pair or just one pair. -\begin{lstlisting}[language=Python] -def recoverTree(self, root): - cur, pre, stack = root, TreeNode(float("-inf")), [] - drops = [] - # inorder iterative: left root, right - while stack or cur: - while cur: - stack.append(cur) - cur = cur.left - cur = stack.pop() - if cur.val < pre.val: - drops.append((pre, cur)) - pre, cur = cur, cur.right - - drops[0][0].val, drops[-1][1].val = drops[-1][1].val, drops[0][0].val -\end{lstlisting} -\item \textbf{426. Convert Binary Search Tree to Sorted Doubly Linked List (medium)} Convert a BST to a sorted circular doubly-linked list in-place. Think of the left and right pointers as synonymous to the previous and next pointers in a doubly-linked list. One example is shown in Fig.~\ref{fig:bst_dll}. -\begin{figure}[h!] - \centering - \includegraphics[width=0.45\columnwidth]{fig/bstdlloriginalbst.png} - \includegraphics[width=0.45\columnwidth]{fig/bstdllreturndll.png} - \caption{Example of BST to DLL} - \label{fig:bst_dll} -\end{figure} - -\textbf{Analysis} As we observe the example, for each node in the doubly linked list (dll), its predecessor and successor is the same as the same node in BST. As we have learned the concept of predecessor and successor in Chapter~\ref{chapter_tree}, we know how to find the predecessor and successor individually for each node. However, in this scene, it would be more useful with the inorder traversal, wherein we can use divide and conquer to obtain the left sorted list and the right sorted list for each node. More than this, we need to make the dll, we have two choices to do this: 1) Use our learned inorder traversal to generate a list, and then generate the dll from the list of BST nodes. 2) Combine the inorder traversal together with the linking process. - -\textbf{Solution 1: Inorder traversal + Doubly linked List Connect.} This process is straightforward, we need to handle the case where the BST only has one node, or for BST that has at least two nodes. For the second case, we should handle the head and tail node seperately due to its different linking rule: -\begin{lstlisting}[language=Python] -def treeToDoublyList(self, root): - """ - :type root: Node - :rtype: Node - """ - if not root: - return None - - def treeTraversal(root): - if not root: - return [] - left = treeTraversal(root.left) - - right = treeTraversal(root.right) - return left + [root] + right - - sortList = treeTraversal(root) - if len(sortList) == 1: - sortList[0].left = sortList[0] - sortList[0].right = sortList[0] - return sortList[0] - - for idx, node in enumerate(sortList): - if idx == 0: - node.right = sortList[idx+1] - node.left = sortList[-1] - elif idx == len(sortList) - 1: - node.right = sortList[0] - node.left = sortList[idx-1] - else: - node.right = sortList[idx+1] - node.left = sortList[idx-1] - return sortList[0] -\end{lstlisting} - -\textbf{Solution 2: Inorder traversal together with linking process.} We use divide and conquer method and assuming the left and right function call gives us the head of the dll on each side. With left\_head and right\_head, we just need to link these two separate dlls with current node in the process of inorder traversal. The key here is to find the tail left dll, and link them like: left\_tail+current\_node+right\_head, and link left\_head with right\_tail. With dlls, to find the tail from the head, we just need to use head.left. -\begin{lstlisting}[language=Python] -def treeToDoublyList(self, root): - """ - :type root: Node - :rtype: Node - """ - if not root: return None - - left_head = self.treeToDoublyList(root.left) - right_head = self.treeToDoublyList(root.right) - return self.concat(left_head, root, right_head) - - -""" -Concatenate a doubly linked list (prev_head), a node -(curr_node) and a doubly linked list (next_head) into -a new doubly linked list. -""" -def concat(self, left_head, curr_node, right_head): - # for current node, it has only one node, head and tail is the same - new_head, new_tail = curr_node, curr_node - - if left_head: - # find left tail - left_tail = left_head.left - # connect tail with current node - left_tail.right = curr_node - curr_node.left = left_tail - # new_head points to left_head - new_head = left_head - - if right_head: - right_tail = right_head.left - # connect head with current node - curr_node.right = right_head - right_head.left = curr_node - new_tail = right_tail # new_tail points to right_tail - - new_head.left = new_tail - new_tail.right = new_head - return new_head -\end{lstlisting} -\end{examples} - -\subsection{Operations} -In this section, we should problems related to operations we introduced in section~\ref{concept_binary_search_tree}, which include SEARCH, INSERT, GENERATE, DELETE. LeetCode Problems include: -\begin{enumerate} - \item 108. Convert Sorted Array to Binary Search Tree - \item 96. Unique Binary Search Trees -\end{enumerate} - -\begin{examples}[resume] -\item \textbf{108. Convert Sorted Array to Binary Search Tree.} Given an array where elements are sorted in ascending order, convert it to a height balanced BST. For this problem, a height-balanced binary tree is defined as a binary tree in which the depth of the two subtrees of every node never differ by more than 1. -\begin{lstlisting}[numbers=none] -Example: - -Given the sorted array: [-10,-3,0,5,9], - -One possible answer is: [0,-3,9,-10,null,5], which represents the following height balanced BST: -\begin{lstlisting} - 0 - / \ - -3 9 - / / - -10 5 -\end{lstlisting} - -\textbf{Solution: Binary Searching.} use the binary search algorithm, the stop condition is when the l>r. -\begin{lstlisting}[language = Python] -def sortedArrayToBST(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - def generatebalancedBST(l,r): - if l>r: - return None - m = (l+r)//2 - tree = TreeNode(nums[m]) - tree.left = generatebalancedBST(l,m-1) - tree.right = generatebalancedBST(m+1,r) - return tree - return generatebalancedBST(0,len(nums)-1) -\end{lstlisting} - -109. Convert Sorted List to Binary Search Tree, the difference is here we have a linked list, we can convert the linked list into a list nums - -\item \textbf{96. Unique Binary Search Trees} - -Given n, how many structurally unique BST’s (binary search trees) that store values 1…n? -\begin{lstlisting}[numbers=none] -For example, - - Given n = 3, there are a total of 5 unique BST's. - 1 3 3 2 1 - \ / / / \ \ - 3 2 1 1 3 2 - / / \ \ - 2 1 2 3 -\end{lstlisting} - -Solution: When we read the signal, list all of it, we need to use for loop, to pose each element as root, and the left side is left tree, the right side is used for the right tree. Use DPS: We generated all the BST that use ith node as root -\begin{lstlisting}[language = Python] -def numTrees(self, n): - """ - :type n: int - :rtype: int - """ - def constructAllBST(start,end): - if start>end: - return [None] - - #go through the start to end, and use the ith as root - rslt=[] - leftsubs,rightsubs=[],[] - for i in xrange(start,end+1): - - leftsubs=constructAllBST(start,i-1) - rightsubs=constructAllBST(i+1,end) - for leftnode in leftsubs: - for rightnode in rightsubs: - node = TreeNode(i) - node.left=leftnode - node.right=rightnode - rslt.append(node) - return rslt - -rslt= constructAllBST(1,n) - return len(rslt) - \end{lstlisting} - -If we only need length, a slightly better solution showing as follows. -\begin{lstlisting}[language = Python] -def numTrees(self, n): - """ - :type n: int - :rtype: int - """ - def constructAllBST(start,end): - if start>end: - return 1 - - #go through the start to end, and use the ith as root - count = 0 - leftsubs,rightsubs=[],[] - for i in xrange(start,end+1): - - leftsubs=constructAllBST(start,i-1) - rightsubs=constructAllBST(i+1,end) - count+=leftsubs*rightsubs - return count - -rslt= constructAllBST(1,n) - return rslt - \end{lstlisting} - -However, it still cant pass the test, try the bottom up iterative solution with memorization: $T(start,end)=T(start,i-1)*T(i+1,end) T(j,i)=T(j,i-1)*T(i+1,i)$. How to explain this? -\begin{lstlisting}[language = Python] -def numTrees1(self, n): - res = [0] * (n+1) - res[0] = 1 - for i in xrange(1, n+1): #when i=2, j=[0,1] res[2] = res[0]*res[2-1-0] + res[1]*res[2-1-1] - for j in xrange(i): #i [1,n], j =[0,i), the case if for one node, - res[i] += res[j] * res[i-1-j] - return res[n] -\end{lstlisting} -Using math: -\begin{lstlisting}[language = Python] -# Catalan Number (2n)!/((n+1)!*n!) -def numTrees(self, n): - return math.factorial(2*n)/(math.factorial(n)*math.factorial(n+1)) -\end{lstlisting} - -\end{examples} - -\subsection{Find certain element of the tree} -successor or predecessor:285. Inorder Successor in BST, 235. Lowest Common Ancestor of a Binary Search Tree -\begin{enumerate} - \item 285. Inorder Successor in BST - \item 235. Lowest Common Ancestor of a Binary Search Tree - \item 230. Kth Smallest Element in a BST - \item 270. Closest Binary Search Tree Value - \item 272. Closest Binary Search Tree Value II - \item 426. Convert Binary Search Tree to Sorted Doubly Linked List (find the precessor and successor) -\end{enumerate} - -\paragraph{Lowest Common Ancestor(LCA)} The lowest common ancestor is defined between two nodes v and w as the lowest node in T that has both v and w as descendants (where we allow a node to be a descendant of itself).” e.g., if u=5,w=19, then we first node when we recursively visiting the tree that is within [u,w], then the LCA is 14. Compared with LCA for binary tree, because of the searching property of searching tree, it is even simipler: - \begin{lstlisting} - treverse the tree: - if node.val is in [s, b], return node is LCA - if node.val > b, traverse node.left - if node.val < s, traverse node.right - \end{lstlisting} - - 235. Lowest Common Ancestor of a Binary Search Tree - - Given a binary search tree (BST), find the lowest common ancestor (LCA) of two given nodes in the BST. -\begin{lstlisting} -Given binary search tree: root = [6,2,8,0,4,7,9,null,null,3,5] - - _______6______ - / \ - ___2__ ___8__ - / \ / \ - 0 _4 7 9 - / \ - 3 5 - -Example 1: - -Input: root = [6,2,8,0,4,7,9,null,null,3,5], p = 2, q = 8 -Output: 6 -Explanation: The LCA of nodes 2 and 8 is 6. - -Example 2: - -Input: root = [6,2,8,0,4,7,9,null,null,3,5], p = 2, q = 4 -Output: 2 -Explanation: The LCA of nodes 2 and 4 is 2, since a node can be a descendant of itself - according to the LCA definition. -\end{lstlisting} -\begin{lstlisting}[language=Python] -def lowestCommonAncestor(self, root, p, q): - """ - :type root: TreeNode - :type p: TreeNode - :type q: TreeNode - :rtype: TreeNode - """ - s = min(p.val,q.val) - b = max(p.val,q.val) - def LCA(node): - if not node: - return None - if node.val>b: - return LCA(node.left) - if node.valb: - return LCA(node.left) - if node.valtarget: - if root.val-target= L). You might need to change the root of the tree, so the result should return the new root of the trimmed binary search tree. - -Example 2: -\begin{lstlisting} -Input: - 3 - / \ - 0 4 - \ - 2 - / - 1 - - L = 1 - R = 3 - -Output: - 3 - / - 2 - / - 1 - \end{lstlisting} - -Solution: Based on F1, if the value of current node is smaller than L, suppose at 0, then we delete its left child, node.left = None, then we check its right size, go to node.right, we return node = goto(node.right), if it is within range, then we keep checking left, right, and return current node -\begin{lstlisting}[language = Python] -def trimBST(self, root, L, R): - """ - :type root: TreeNode - :type L: int - :type R: int - :rtype: TreeNode - """ - def trimUtil(node): - if not node: - return None - if node.valR: - node.right=None - node=trimUtil(node.left) - return node - else: - node.left=trimUtil(node.left) - node.right=trimUtil(node.right) - return node - return trimUtil(root) -\end{lstlisting} -A mutant of this is to split the BST into two, one is smaller or equal to the given value, the other is bigger. -\subsection{Split the Tree} -Split the tree - -with a certain value ,776. Split BST - -776. Split BST - -Given a Binary Search Tree (BST) with root node root, and a target value V, split the tree into two subtrees where one subtree has nodes that are all smaller or equal to the target value, while the other subtree has all nodes that are greater than the target value. It's not necessarily the case that the tree contains a node with value V. - -Additionally, most of the structure of the original tree should remain. Formally, for any child C with parent P in the original tree, if they are both in the same subtree after the split, then node C should still have the parent P. - -You should output the root TreeNode of both subtrees after splitting, in any order. - -Example 1: -\begin{lstlisting} -Input: root = [4,2,6,1,3,5,7], V = 2 -Output: [[2,1],[4,3,6,null,null,5,7]] -Explanation: -Note that root, output[0], and output[1] are TreeNode objects, not arrays. -\end{lstlisting} - -The given tree [4,2,6,1,3,5,7] is represented by the following diagram: -\begin{lstlisting} - 4 - / \ - 2 6 - / \ / \ - 1 3 5 7 -\end{lstlisting} - -Solution: The coding is quite similar as the trimming. -\begin{lstlisting}[language = Python] -class Solution(object): - def splitBST(self, root, V): - """ - :type root: TreeNode - :type V: int - :rtype: List[TreeNode] - """ - def splitUtil(node): - if not node: - return (None,None) - if node.val<=V: - sb1,sb2 = splitUtil(node.right) #the left subtree will satisfy the condition, split the right subtree - node.right=sb1 #Now set the right subtree with sb1 that - return (node, sb2) - else: - sb1, sb2=splitUtil(node.left) #the right subtree satisfy the condition, split the left subtree - node.left=sb2 - return (sb1,node) - return list(splitUtil(root)) -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Exercise%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercise} -\subsection{Depth} -104. Maximum Depth of Binary Tree (Easy) - -Given a binary tree, find its maximum depth. The maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node. - -Note: A leaf is a node with no children. -\begin{lstlisting} -Example: - -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its depth = 3. -\end{lstlisting} -\textbf{DFS+Divide and conquer}. -\begin{lstlisting}[language=Python] -def maxDepth(self, root): - if not root: - return 0 - if not root.left and not root.right: - return 1 - depth = -sys.maxsize - if root.left: - depth = max(depth, self.maxDepth(root.left)) - if root.right: - depth = max(depth, self.maxDepth(root.right)) - return depth+1 -\end{lstlisting} -559. Maximum Depth of N-ary Tree (Easy) - -Given a n-ary tree, find its maximum depth. The maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node. -\begin{lstlisting}[language=Python] -# Definition for a Node. -class Node(object): - def __init__(self, val, children): - self.val = val - self.children = children - -def maxDepth(self, root): - if not root: - return 0 - children = root.children - if not any(children): # a leaf - return 1 - depth = -sys.maxsize - for c in children: - if c: - depth = max(depth, self.maxDepth(c)) - return depth+1 -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%%%%%%Paths%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Path} -\textbf{113. Path Sum II (medium).} Given a binary tree and a sum, find all root-to-leaf paths where each path's sum equals the given sum. -\textit{Note: A leaf is a node with no children.} -\begin{lstlisting}[numbers=none] -Example: -Given the below binary tree and sum = 22, - - 5 - / \ - 4 8 - / / \ - 11 13 4 - / \ / \ -7 2 5 1 - -Return: - -[ - [5,4,11,2], - [5,8,4,5] -] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def pathSumHelper(self, root, sum, curr, ans): - if root is None: # this is for one brach tree - return - if root.left is None and root.right is None: # a leaf as base case - if sum == root.val: - ans.append(curr+[root.val]) - return - - self.pathSumHelper(root.left, sum-root.val, curr+[root.val], ans) - - self.pathSumHelper(root.right, sum-root.val, curr+[root.val], ans) - -def pathSum(self, root, sum): - """ - :type root: TreeNode - :type sum: int - :rtype: List[List[int]] - """ - ans = [] - self.pathSumhelper(root, sum, [], ans) - return ans -\end{lstlisting} - -257. Binary Tree Paths - -Given a binary tree, return all root-to-leaf paths. - -Note: A leaf is a node with no children. -\begin{lstlisting} -Example: -Input: - - 1 - / \ -2 3 - \ - 5 -Output: ["1->2->5", "1->3"] -Explanation: All root-to-leaf paths are: 1->2->5, 1->3 -\end{lstlisting} -\textbf{Root to Leaf}. Becareful that we only collect result at the leaf, and for the right tree and left tree we need to make sure it is not None: -\begin{lstlisting}[language=Python] -def binaryTreePaths(self, root): - """ - :type root: TreeNode - :rtype: List[str] - """ - def dfs(root, curr, ans): - if root.left is None and root.right is None: # a leaf - ans.append(curr+str(root.val)) - return - if root.left: - dfs(root.left, curr+str(root.val)+'->', ans) - if root.right: - dfs(root.right, curr+str(root.val)+'->', ans) - if root is None: - return [] - ans = [] - dfs(root, '', ans) - return ans -\end{lstlisting} -543. Diameter of Binary Tree - - Given a binary tree, you need to compute the length of the diameter of the tree. The diameter of a binary tree is the length of the longest path between any two nodes in a tree. This path may or may not pass through the root. -\begin{lstlisting} -Example: -Given a binary tree - - 1 - / \ - 2 3 - / \ - 4 5 - -Return 3, which is the length of the path [4,2,1,3] or [5,2,1,3]. -\end{lstlisting} -\textbf{Root to Any with Global Variable to track the any to any through root}. -\begin{lstlisting}[language=Python] -def diameterOfBinaryTree(self, root): - """ - :type root: TreeNode - :rtype: int - """ - # this is the longest path from any to any - - def rootToAny(root, ans): - if not root: - return 0 - left = rootToAny(root.left, ans) - right = rootToAny(root.right, ans) - ans[0] = max(ans[0], left+right) # track the any to any through root - return max(left, right) + 1 #get the maximum depth of root to any - ans = [0] - rootToAny(root, ans) - return ans[0] -\end{lstlisting} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_13_greedy_algo.tex b/Easy-Book/chapters/chapter_13_greedy_algo.tex deleted file mode 100644 index e0bf5da..0000000 --- a/Easy-Book/chapters/chapter_13_greedy_algo.tex +++ /dev/null @@ -1,662 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -Greedy algorithm is a further optimization strategy on top of dynamic programming. It usually constructs and tracks a single optimal solution to problem directly and incrementally; like the dynamic programming, it works with subproblems, and at each step it extends the last partial solution by evaluating all available candidates and then pick the best one at the moment without regard to other discarded solutions. Greedy algorithm picks the best immediate output, but does not consider the big picture, hence it is considered greedy. - -Because of the ``greediness'' of the greedy algorithms, whether the single one solution we derive is optimal or not is what for us to ponder and decide. The consciousness of its optimality is important: if we require an absolutely optimal solution, we have to prove its optimality with systematic induction methods, if we are aware that it wont lead to the optimal solution, but is close enough and a good approximation to the optimal solution that we seek but too expensive to achieve, we can still go for it. This chapter is a systematic study of the greedy algorithm, we focus on designing and proving methods that always try to achieve the optimal solution. - -Greedy algorithm is highly related to and relies on \textbf{math optimization}. It is ``easy'' if you can reason a solution and prove it easily with math, which comes ``natural''. Greedy algorithm can be ``hard'' when we need to identify important and less obvious properties, design a greedy approach, and prove its correctness with more systematic induction methods; it requires even more analysis effort than dynamic programming does. Because of the highly flexibility of the greedy algorithms, a lot algorithmic books do not even cover this topic. -It is not frequently seen in real interviews, but we want to cover it because in the field of AI, the searching is approximate, greedy algorithm can be approximate too and efficient. Maybe it will inspire us in other fields. - - -%%%%%%%%%%%%%%%%%%From dynamic programming to greedy algorithm%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exploring} -\paragraph{Maximum Non-overlapping Intervals (L435)} Given a collection of intervals, find the minimum number of intervals you need to remove to make the rest of the intervals non-overlapping. Note: You may assume the interval’s end point is always bigger than its start point. Intervals like [1,2] and [2,3] have borders “touching” but they don’t overlap each other. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [ [1,2], [2,3], [3,4], [1,3] ] -Output: 1 -Explanation: [1,3] can be removed and the rest of intervals are non-overlapping. -\end{lstlisting} -\paragraph{Analysis} Naively, this is a combination problem that each interval can be taken or not taken, which has a total of $O(2^n)$ combinations. For each combination, we make sure its a feasible that none of the within items overlaps. The process of enumerating the combination has been well explained in the chapter of search and combinatorics. As a routine for optimization problem, we use a sequence $X$ to represent if each item in the original array is chosen or not, $x_i\in \{0, 1\}$. Our objective is to optimize the value: -\begin{align} - o = \max \sum_{i=0}^{n-1} x_i \\ - % \texttt{w.r.t} x_i\in \{0, 1\} \texttt{ and x_i does not overlap with each other} -\end{align} -However, if we sort the items by either start or end time, the checking of an item's compatibility to a combination will be only need to compare it with its last item - -% Think further that in our resulting subsequence, if we sort them in order of their start time or finish time, we could have $s_i < f_i \leq s_{i+1} < f_{i+1}$. This would indicates that we sort our array of intervals in some order and simply the search space from a search tree to linear space. -\subsubsection{Dynamic Programming} -\begin{figure}[H] - \centering - \includegraphics[width=0.49\columnwidth]{fig/greedy_schedule_all_intervals_sorting_finish.png} - \includegraphics[width=0.49\columnwidth]{fig/greedy_schedule_all_intervals_sorting.png} - \caption{All intervals sorted by start and end time.} - \label{fig:greedy_intervals_sort_types} -\end{figure} -\paragraph{A-B: Convert to Longest Increasing Subsequence} A feasible solution would be that $a_0, a_1, ..., a_k$, and $s(a_i)\leq f(a_i)\leq s(a_{i+1}) < f(a_{i+1})$. If we sort the intervals by either start or end time, our sorted intervals as shown in Fig.~\ref{fig:greedy_intervals_sort_types}. We can reduce our problem into finding the length of longest subsequence $LS, i = [0, k-1]$ that does not overlap , which is equivalently defining that $s[i+1]\geq f[i]$ in the resulting subsequence. This is similar enough to the concept of longest increasing subsequence, we can apply the dynamic programming to solve this problem with a time complexity of $O(n^2)$. - -For this problem, there can exist multiple optimal solutions and dynamic programming can tell us from the \texttt{LIS} array that which one has the maximum. Let's define a subproblem $d[i]$ as getting the maximum number of non-overlapping intervals for subarray $[a[0], a[1], ..., a[i-1]]$ with the maximum subsequence that includes $a[i-1]$. Then, our the recurrence relation is: -\begin{align} - d[i]&=\max(d[j])+1, j \in [0, i-1], j int: - if not intervals: - return 0 - intervals.sort(key=lambda x: x[0]) - n = len(intervals) - LIS = [0]*(n+1) - for i in range(n): - max_before = 0 - for j in range(i, -1, -1): - if intervals[i][0] >= intervals[j][1]: - max_before = max(max_before, LIS[j+1]) - LIS[i+1] = max(LIS[i], max_before+1) - #print(LIS) - return len(intervals)-max(LIS) -\end{lstlisting} - -\paragraph{Simplified Dynamic Programming} -Let's approach the problem directly, define a subproblem $d[i]$ as the maximum number of non-overlapping intervals for subarray $a[0:i]$. -With induction, assume we have solved all subproblems from $d[0]$ up till $d[i-1]$, meaning we have known the answer to all these subproblems. Now we want to find the recurrence relation between subproblem $d[i]$ and its preceding subproblems. We have $a[i]$ at hand, what effect it can have? - -We can either increase its previous maximum value which is $d[i-1]$ by one, or else, the optimal solution remains unchanged. This makes the $d$ array \textbf{non-decreasing} sequence. With this characteristic, we do not need to try out all preceding compatible intervals, but instead just its nearest preceding one--because it is at least the same as all preceding ones. We define this preceding compatible interval of $a[i]$ with index $p[i]$, then our recurrence relation become -\begin{equation} - d[i] = max(d[i-1], d[p[i]]+1). -\end{equation} -And the final answer will be \texttt{dp[-1]}. -With the sorting, the part with the dynamic programming only takes $O(n)$, making the total time $O(n\log n)$ mainly caused by sorting. The Code only differs one line with the above approach: -\begin{lstlisting}[language=Python] -def eraseOverlapIntervals(intervals: List[List[int]]) -> int: - if not intervals: - return 0 - intervals.sort(key=lambda x: x[0]) - n = len(intervals) - dp = [0]*(n+1) - - for i in range(n): - max_before = 0 - for j in range(i, -1, -1): - if intervals[i][0] >= intervals[j][1]: - max_before = max(max_before, dp[j+1]) - break - dp[i+1] = max(dp[i], max_before+1) - #print(LIS) - return n-dp[-1] -\end{lstlisting} -\subsubsection{Greedy Algorithm} -In the previous solution, the process looks like this: If it is sorted by end time, first we have $e, m = 1$, for $a$, it is not compatible with $e$, according to previous recurrence relation, $m=1$, with either $a$ or $e$ in the optimal solution. When we are processing $d$, its preceding compatible interval is $e$, making our maximum value $2$ for this subproblem. For $c$, the length of the optimal solution remains the same, but with additional optimal solution: $e, c$. -However, if we go back, when we are processing $a$, is it necessary to keep $a$ and $e$ as the optimal solution. If we just get the maximum length of the optimal solution, we do not need to track all optimal solutions, but just one that is the most ``optimistic'', which will be $e$ in our case. Because choosing $e$ instead of $a$ leaves more space and thus more likely to fit more intervals for the later subproblems. Similarly, for $c$, it is incompatible with $d$, then it is safe to throw it away, because it has the largest end time--the least optimistic, thus it is unnecessary to replace any previous interval in the optimal solution with it. This algorithm takes this simplification even more aggressive and ``greedy'. Therefore, in the greedy algorithm, if we have multiple optimal solutions, usually it only cares about \textbf{one} that is the most promising and optimistic, and incrementally to build up on it. The code is given: -\begin{lstlisting}[language=Python] -def eraseOverlapIntervals(intervals: List[List[int]]) -> int: - if not intervals: - return 0 - min_rmv = 0 - intervals.sort(key = lambda x: x[1]) - last_end = -sys.maxsize - for i in intervals: - if i[0] >= last_end: #non-overlap - last_end = i[1] - else: - min_rmv += 1 - - return min_rmv -\end{lstlisting} -If we sort our problems by start time. We need to tweak the code a bit, that whenever one interval is incompatible with previous, we see if it has earlier end time that the previous one, if it is, then we replace it with this one, because it has later start time, and earlier end time, whatever the optimal that the previous interval is in, replacing it with the current one will not overlap and it will be more promising. -\begin{lstlisting}[language=Python] -for i in intervals: - if i[0] < last_end: #overlap, delete this one, do not update the end - if i[1] < last_end: - last_end = i[1] - min_rmv += 1 - else: - last_end = i[1] -\end{lstlisting} -\subsubsection{Summary and Comparison} -We have seen that both dynamic programming and the greedy algorithms solves the problems \textbf{incrementally}--starting from small problems to larger problems. Dynamic programming plays safe by tracking the previous state, thus it does not matter how you sort these intervals; both by start time and end time work the same. However, in the greedy approach, it cares less about the previous states. - - -For example, if our intervals are [[1, 11], [2, 12], [13, 14], [11, 22]], the dynamic programming will give us LIS=[0, 1, 1, 2, 2], which indicates there are two optimal solutions. While, in the greedy algorithm, we would find one that is [1, 11], [13, 14]. -The resulting is, we might find a solution that is one of its multiple optimal solutions with the same length. In this process, we greatly increased our time efficiency, and simplified the algorithm design and coding. - -\paragraph{Questions to Ponder} -\begin{itemize} - \item Are you absolutely sure that is one of the optimal solutions? If it is optimal, how to prove it then? - \item When can I use greedy algorithm over dynamic programming? -\end{itemize} -% The first problem is challenging and it differs to different problems. There - -% To answer the second question first: We can't. The reason why greedy approach in the interval case is we have $s_i \leq f_i, f_i \leq f_{i+1}$. In the LIS, we have $s_i \leq f_i, s_{i+1}\geq f_i$. There is a \textbf{non-decreasing property}. This is very similar to our shortest-path graph algorithms. This is equivalently same as in an array [1, 2, 2, 3, 4, 4], to find the longest increasing subsequence and the greedy approach is to choose 1 first, and then 2. For the second 2, we compare it with previous 2, it is not larger, thus we skip it. With this greedy approach, we eventually get [1, 2, 3, 4] as the longest increasing subsequence. - -% For the third question, I find it is genuinely true that we need to have this non-decreasing property. We will see a lot in our real examples and they all point to this conclusion. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What if there each interval is weighted with a real value $w_i$, and the objective is to maximize a non-overlap set of interval's sum of weights? } {If the weight can be both negative and positive, we have to use the first dynamic programming method. If for every $w_i\geq 0$, then previous suboptimal solution can still have a chance to lead to a global optimal solution if it happens to be compatible with following intervals with large weight, we can apply the second dynamic programming method. } -\end{bclogo} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Introduction to Greedy Algorithm} -\subsubsection{What is Greedy Algorithm?} We say that dynamic programming tracks best solution to all subproblems (in the above example, it is the \texttt{d} array) and incrementally build up solutions to subproblems using their subproblems (in our example, we use all of its subproblems \texttt{for j in range(i)} to build up solution to subproblem \texttt{d[i]}. - -% While, it is hard to give precise definition of greedy algorithms. In \textit{introduction to algorithms}, it defines two properties to greedy algorithms but I often found it is confusing to match the implementation to the definition: - - -% \paragraph{check later} -% While, greedy algorithm does not track or solve all subproblems. It approaches the problem from a different way! If we make a greedy choice, we have only one remaining subprpblem to solve: for example, at first, we greedily choose [1,11], leaving us with one subproblem [2, 12], [13, 14], [11, 22]. Then, we check [2, 12], which is not compatible to its solution [1, 11], we skip, and leaving our subproblem as [13, 14], [11, 22]. Then we check [13, 14], add into our partial optimal solution, and we have a subproblem even smaller to solve later. As the process of greedy algorithm, we are always downsizing our remaining subproblem. In dynamic programming solution, each subproblem will have an answer in \texttt{LIS}. However, for the greedy approach, we do not know and in instead, it directly select an item as part of our optional solution at each step. -Greedy algorithm follows the same trend in the sense of solving overlapping subproblems where optimal substructure property shows. But it only maintain \textbf{one optimal solution} for each of the subproblem--the most promising one. For example, for [[1, 11]], the optimal solution is [1, 11], for [1, 11], [2, 12], the optimal solution is still [1, 11], even though [2, 12] is another optimal solution for this subproblem. For [1, 11], [2, 12], [13, 14], [11, 22], greedy approach gives us [1,11],[13, 14] as our optimal solution, while in dynamic programming, we can still find another optimal solution: [1, 11], [11, 22]. -\paragraph{Three Properties} We define three properties for greedy algorithm: -\begin{itemize} - \item Overlapping Subproblems and Optimal substructure property: These two properties defined exactly the same as in dynamic programming. If an optimal solution to the problem contains within it optimal solutions to its subproblem, this is said to be optimal substructure. In our example, [1, 11], [2, 12], [13, 14], the optimal solution [1, 11], [13, 14] contains optimal solution [1, 11] that is to its subproblem [1, 11], [2, 12]. - \item Greedy-choice property: This is the only additional property that greedy algorithm holds compared with dynamic programming. We can assemble a globally optimal solution by making a locally optimal (greedy) choice. - - For example, given an array [2, 1, 3, 7, 5, 6], which has as [1, 3, 5, 6], [2, 3, 5, 6] as the longest increasing subsequence. We define the LIS as the longest increasing subsequence that ends at a[i-1] for array $a[0:i]$. The process of constructing it with dynamic programming shows as follows: - \begin{lstlisting} - subproblems - [2], LIS = [2] - [2, 1], LIS = [1] - [2, 1, 3], LIS= [1, 3], [2, 3] - [2, 1, 3, 7], LIS = [1, 3, 7], [2, 3, 7] - [2, 1, 3, 7, 5], LIS = [1, 3, 5], [2, 3, 5] - [2, 1, 3, 7, 5, 6], LIS = [1, 3, 5, 6], [2, 3, 5, 6] - \end{lstlisting} - We clearly see that to get the best solution, we have to rely on the optimal solution of all preceding subproblems. If we insist on applying greedy algorithm, this is how to process looks like: - \begin{lstlisting} - subproblems - [2], LIS = [2] - [2, 1], LIS = [2], only compare [2] and 1 - [2, 1, 3], LIS= [2, 3] - [2, 1, 3, 7], LIS = [2, 3, 7] - [2, 1, 3, 7, 5], LIS = [2, 3, 7] - [2, 1, 3, 7, 5, 6], LIS = [2, 3, 7] - \end{lstlisting} - LIS = [2, 3, 7] is locally optimal but not part of the global optimal solutions which are [1, 3, 5, 6] and [2, 3, 5, 6]. In our non-overlapping interval problem, if one interval is optimal in the local subproblem, it will sure be part of the optimal solution to the final problem (globally). - - % While in greedy algorithm, because of the special ordering in our optimal solution $s_i \leq f_i, f_i \leq f_{i+1}$, and our ordering of our data decides the optimal solution only relies on one optimal solution of the previous subproblem. This will only work if the data is ordered in some way. - - -\end{itemize} -To summarize, greedy algorithms simply works on incrementally build up one optimal solution. For this single optimal solution to be globally optimal, each partial optimal solution has to exactly match some prefix of the optimal solution. Both proving the correctness and design of greedy algorithm thus has to be done by induction and that at each stage, greedy algorithm is making the best choices. - -To correctly design a greedy algorithm, it has to make a locally optimal choice according to some rules or orderings. In the above example, we know the optional solution has the property that $s_i \leq f_i, f_i \leq f_{i+1}$. By sorting the intervals with increasing order of the finish time. The greedy approach choose the interval with the earliest finish time, and it says, this belongs to my optimal solution. And it just need to go through all the candidates in the order of finishing time and see if it is compatible with the last item, and we would build up a feasible and optimal solution. It orders its subproblems to make sure each partial optimal solution will be ``prefix'' or part of the global optimal solution. -\subsubsection{Practical Guideline} -It is clear to us like in dynamic programming, greedy algorithms are for solving optimization problems, and it subjects to a set of constraints. For example: -\begin{itemize} - \item Maximize the number of events you can attend, but do not attend any overlapping events. - \item Minimize the number of jumps - \item Minimize the cost of all edges chosen, but do not disconnect the graph. -\end{itemize} - -Do not worry about the definition of greedy algorithm; it is hard and often confusing, because it is a natural and highly dependable on the problem context. However, to come up with a rule that. - -I suggest we start with dynamic programming, it is more systemized, easier to prove the correctness, and it guides us to walk through to the greedy algorithm which is more efficient just as the process shown in the example. -\paragraph{Ordering, Monotone Property} These constraints bring sense of ordering in our optimal solution, and this is when greedy algorithm applies. Therefore, we say:``beneath every greedy algorithm, there is almost always a more cumbersome dynamic programming solutions''. But, not every dynamic programming we can find a more efficiency greedy algorithm, because to make greedy algorithm work, there needs to have some ordering in the optimal solution that makes the locally optimization applicably and globally optimal. We shall see this in our examples! In the activity scheduling, it is that $d[i]$ is non-decreasing as shown in its dynamic programming solution. Because of this property, a glo therefore, and in the dijkstra's algorithm, it is that $w(s, u) < w(s, v) = w(s, u) + w(u, v)$. in the shortest path. Once this monotone property breaks as in a graph with negative weights, greedy algorithm won't apply and we have to retreat to dynamic programming. - -\subsubsection{Pros and Cons} -As we see, greedy algorithm has the following pros: -\begin{itemize} - \item Simplicity: Greedy algorithms are often easier to describe and code up than other algorithms. - \item Efficiency: Greedy algorithms can often be implemented more efficiently than other algorithms. -\end{itemize} -However, -\begin{itemize} - \item Hard to get it right: Once you have found the right greedy approach, designing greedy algorithms can be easy. However, finding the right rule can be hard. - \item Hard to verify/prove: Showing a greedy algorithm is correct often requires a nuanced argument. -\end{itemize} -%%%%%%%%%%%%%%%%%%%%%Prove Greedy Algorithm%%%%%%%%%%%%%%%%%%%%%%%%% -\section{*Proof} -The main challenging in greedy algorithms is to prove its correctness, which is important in theoretical study. However, in real coding practice, we can leverage the dynamic programming solution to compare with and scrutinize different kinds of examples to make sure the greedy algorithm and the dynamic programming are having the same results. Still, let us just learn this proof techniques as mastering another powerful tool. - -\subsection{Introduction} -First, we introduce generally two techniques/arguments to prove the correctness of a greedy algorithm in a step-by-step fashion using the mathematical induction, they are: \textbf{Greedy Stays Ahead} and \textbf{Exchange Arguments}. -\subsubsection{Greedy stays ahead} -This simple style of proof works by showing that, according to some measures, the optimal solution built by the greedy algorithm is always at least or better than the optimal solution during each iteration of the algorithm. Once we have established this argument, we can show that the greedy solution must be optimal. Typically there are four steps: - \begin{enumerate} - \item Define the solution: Define our greedy solution as $G$ and we compare it against some optimal solution $O^{*}$. - \item Define the measurement: Your goal is to find a series of measurements you can make of your solution and the optimal solution. Define some series of measures $m_1(X), m_2(X), ..., m_n(X)$ such that $m_1(X^{*}), m_2(X^{*}), ..., m_k(X^{*})$ is also defined for some choices of m and n.Note that there might be a different number of measures for X and X*, since you can't assume at this point that X is optimal - \item Prove Greedy Stays Ahead: Prove that $m_i(X)\geq m_i(X^{*})$ or that $m_i(X)\leq m_i(X^{*})$, whichever is appropriate, for all reasonable values of $i$. This argument is usually done inductively. - \item Prove Optimality. Using the fact that greedy stays ahead, prove that the greedy algorithm must produce an optimal solution. This argument is often done by contradiction by assuming the greedy solution isn't optimal and using the fact that greedy stays ahead to de-rive a contradiction. - \end{enumerate} - The main challenge with this style of argument is finding the right measurements to make. -\subsubsection{Exchange Arguments} It proves that the greedy solution is optimal by showing that we can iteratively \textbf{transform} any optimal solution into the greedy solution produced by greedy algorithm without worsening the cost of the optimal solution. This transformality matches the word ``exchange''. Exchange arguments are a more versatile technique compared with greedy stays ahead. -It can be generalized into three steps: -\begin{enumerate} - \item Define the solution: Define our greedy solution as $G=\{g_1, ..., g_k\}$ and we compare it against some optimal solution $O=\{o_1, ..., o_m\}$. - \item Compare solutions: Assume the optimal solution is not the same as the greedy solution; show that if $m(G)\neq m(O)$, then $G$ and $O$ must differ in some way. How it differs depend on the measurement and the problem context. - \begin{enumerate} - \item If it is a combination and a length problem, then $m(G)=k$, and $m(O)=m$, we need to prove $k=m$. - \item If it is a combination and with objective as a value, we assume $o_1$ and $g_1$ differs and all others are identical. Then, we swap $o_1$ with $g_1$. - \item If it is a permutation with objective function, we assume there are two consecutive items in $O$ that is in a different order than they are in $G$ (i.e. there is inversion) - \end{enumerate} - \item Exchange Arguments: Show how to transform $O$ by exchanging some piece of $O$ for some piece of $G$. Then, we prove that by doing so, we did not increase/decrease the cost of $O$ as we transform $O$ to $G$ with more iterations, proving that greedy is just as good as any optimal solution and hence is optimal. - % \item Iterate: Argue that we have decreased the number of difference between $G$ and $O$ by performing exchange, and with the iteration we can turn $O$ into $G$ without impacting the quality of the solution. Therefore, $G$ must be optimal. -\end{enumerate} - -\subsubsection{Guideline} -We will simply go through the list and, but the point is it is we should use the proof methods as a way to design the greedy algorithm on top of the dynamic programming. - -\subsection{Greedy Stays Ahead} -\paragraph{Maximum Non-overlapping Intervals Proofs} Let's use $G, O$ for our greedy and optimal solution respectively. $G$ consists of $\{G_1, G_2, ..., G_k\}$ in the order they each item is added into the $G$. Similiarly, $O_1, O_2, ..., O_m$ is one of the optimal sets. -\begin{theorem} -$G$ is a compatible set of intervals. -\end{theorem} -$G$ is trivially feasible because in our design we discard any interval that overlaps with our previous greedy choice. Now, all it matters is to prove its optimality. - -We know that there might exist multiple optimal solutions for a problem, just as shown in our example. In this particular problem, we do not intend to prove that $G=O$, because it might not; we prove that $G$ and $O$ has the same length instead $|G|=|O|$. We will apply ``greedy stays ahead'' principle along with mathematical induction. We first assume that the intervals in $O$ is ordered by the same rule applied on $G$--$f(O_i)s(O_n)\geq f(O_{n-1})$. The greedy algorithm selects the available interval with smallest finish time, which guarantee that $f(G_n)\leq f(O_n)$. Now, we have formally proved our sense of ``greedy stays ahead'' or rather ``greedy never fall behind'' using induction. -\begin{theorem}\label{prove_optimality} -$G$ is optimal: $|G|=|O|$. -\end{theorem} -We apply contradiction: if $G$ is not optimal, then we must have $m>k$. Similarily, after step $k$, we have $f(G_{k})\leq f(O_{k}) \leq s(O_{k+1})$, and $O_{k+1}$ must be remaining in the available set for greedy algorithm to choose. Because greedy algorithm only stops when the remaining set is empty--a contradiction. So far, we successfully applied the ``greedy stays ahead'' method to prove the correctness in the example of maximum non-overlapping intervals. - -\subsection{Exchange Arguments} - - -\paragraph{Scheduling to minimize lateness: } Instead of having a fixed start and end time for each interval, we relax the start time. Therefore, we represent the interval with $[t_i, d_i]$, where $t_i$ is the contiguous time interval and $d_i$ is the deadline. There are many objective functions we might want to optimize. Here, we assume we only have one resource, what is the maximum number of meetings that we can schedule into a single conference room and none of them is late or we allow some meetings to run late, but define lateness $l_i$ as: -\begin{equation} - l[i] = - f[i] - d[i] -\end{equation} -Say, our object is to minimize the total lateness scheduling all meetings in one conference room, find the optimal solution: -\begin{align} - O&=\min \sum_{i=0}^{n-1} l_i\\ - &=\min \sum_{i=0}^{n-1} f[i] - d[i] -\end{align} -\begin{lstlisting}[numbers=none] -For example, we have nums = [[4, 6], [2, 6], [5, 5]] -The optimal solution is [2, 6], [4, 6], [5, 5] with total lateness (2-6)+(2+4-6)+(2+4+5)-5 = -4+0+6 = 2 -Example 2: -nums = [[2,15],[36,45],[9,29],[16,23],[7, 9], [4,9], [5, 9]] -ans = 47 -\end{lstlisting} -\paragraph{Analysis} First, let us assume all intervals have distinct deadline. A naive solution is to try all permutation of $n$ intervals and find the one with the minimum lateness. But what if we start from random order, we compute its lateness and each time we exchange two adjacent items and see if this change will decrease the total lateness or not. -\begin{lstlisting} -______a_i____a_j -\end{lstlisting} -Therefore, -Say our items are $a_i$ and $a_{j}$. There are four cases according to $d_i, d_{j}, t_i, t_{j}$. At first, with lateness $s + t_i -d_i$, and $s+t_i+t_j-d_j$. After the exchange, we have $s+t_j - d_j$ and $s+t_j+t_i - d_i$. $i$ will definitely be more late, $j$ however will be less late. Let us compare the additional lateness of $i$ with the decreased lateness of $j$: - \begin{align} - s+t_j+t_i - d_i - (s+t_i-d_i) \xrightarrow{} s+t_i+t_j-d_j - (s+t_j-d_j) \\ - t_j \xrightarrow{} t_i - \end{align} -Therefore, we have to exchange $i,j$ if $t_j < t_i$. Thus, ordering the list with increasing order of duration time of each meeting will have the best objective. Our Python code is: -\begin{lstlisting}[language=Python] -def lateness(intervals): - intervals = sorted(intervals, key=lambda x: (x[0])) - f = 0 - ans = 0 - for i, (t, d) in enumerate(intervals): - f += t - ans = ans + f-d - return ans -\end{lstlisting} -\paragraph{Modification} However, if we modify our definition of lateness as: -\begin{equation} - l[i] = \begin{cases} 0, & \text{if } f[i] \leq d[i], \\ - f[i] - d[i], \text{otherwise}. - \end{cases} -\end{equation} -Which is to say we do not reward for intervals that are not late with negative values. Things get more complex. - -\begin{enumerate} - \item If none of them is late, then exchange or not to change will not make any difference to the total lateness. -\begin{lstlisting} -______a_i____a_j__d_i__d_j -\end{lstlisting} -\item If both is late, then exchange the items if $t_j < t_i$. -\item If $i$ is late, and $j$ is not late, then no matter about their $t$, exchange them will only be even later. -\item If $i$ is not late, and $j$ is late. Exchange them will totally depends -\end{enumerate} -Therefore, if we change the definition of lateness, greedy solution is no longer available for us, not even dynamic programming. But the greedy approach that first sorts the intervals by the duration time will get us a good start, then we can track the smallest lateness with backtracking and search prune by its minimum lateness found so far. -\begin{lstlisting}[language=Python] -def lateness(intervals, f, l, globalmin, globalans, ans, used): - if len(ans) == len(intervals): - if l < globalmin[0]: - globalmin[0] = l - globalans[0] = ans[::] - return - for i, (t, d) in enumerate(intervals): - if used[i]: - continue - used[i] = True - f += t - if f-d >= 0: - l+= (f-d) - if l < globalmin[0]: - ans.append(i) - lateness(intervals, f, l, globalmin,globalans, ans, used ) - ans.pop() - if f-d >=0: - l -= (f-d) - f -= t - used[i] = False - return -\end{lstlisting} -We call this function with code: -\begin{lstlisting}[language=Python] -intervals = sorted(intervals, key=lambda x: (x[0])) -globalmin, globalans = [float('inf')], [[]] -ans = [] -used = [False]*len(intervals) -lateness(intervals, 0, 0, globalmin, globalans, ans, used) -print(globalmin, globalans[0]) -for i in globalans[0]: - print(intervals[i], end= ' ') -\end{lstlisting} -We will get the following output: -\begin{lstlisting}[numbers=none] -63 [1, 2, 0, 3, 4, 5, 6] -[4, 9] [5, 9] [2, 15] [7, 9] [9, 29] [16, 23] [36, 45] -\end{lstlisting} -We can see that no particular rule--not sorting by $d$, not by $t$, and not by $d-t$ which is called \textbf{slack time}--we can find that to solve it in greedy polynomial time. However, can we use dynamic programming? -\paragraph{Dynamic Programming} We can first do a simple experiment: we take out $[2, 15]$ from our \texttt{intervals}, the resulting optimal solution keeps the same order as [4, 9] [5, 9] [7, 9] [9, 29] [16, 23] [36, 45], which is a very good indicator that dynamic programming might apply. We can keep taking out and the optimal solution is still simply the same order, this indicates the optimal substructure. - -Let us assume we find the best order $O$ for subarray $intervals[0:i]$, now we have to prove that the best solution for subarray $intervals[0:i+1]$ can be obtained by inserting \texttt{interval[i]} into $O$. Assume the position we insert is at $j$, so $O[0:j]$ will not be affected at all, we care about $O[j:i]$. First, we have to prove that no matter where to add insert \texttt{interval[i]}, the ordering of $O$ needs to keep unchanged for it to have optimal solution. If insert position is at the end of $O$, the ordering do not need to change. For the other positions, however, it is really difficult to prove without enough math knowledge and optimization. - -Let us assume the start time is $s$ for $j, j+1$, we know: -\begin{align} - l(s+t_j-d_j)+l(s+t_j+t_{j+1}-d_{j+1}) \leq l(s+t_{j+1}-d_{j+1})+l(s+t_j+t_{j+1}-d_{j}) -\end{align} -Because $l(c)\in[0, c]$, prove that -\begin{align} - l(s+t_j+t_i-d_j)+l(s+t_j+t_i+t_{j+1}-d_{j+1} )\leq l(s+t_{j+1}+t_i-d_{j+1})+l(s+t_j+t_i+t_{j+1}-d_{j}) -\end{align} - -We can not prove it, and we use this method to try out, but it gives us wrong answer, so far, all our attempt to use greedy algorithm failed miserably. - - -When there is a tie at the deadline, if we schedule the one that takes the most time first, we end up with higher lateness. For example, if our solution is [5, 5], [4, 6], [2, 6], the lateness is 5+4-6 + (9+2-6) = 3+5 =8 instead of 6. - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What if each interval, if we are allowing multiple resources, what is the least number of conference rooms we need to schedule all meeting. } -\end{bclogo} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{630. Course Schedule III, find the maximum number of non-overlapping meetings can be scheduled within one resource. } -\end{bclogo} - -\paragraph{Prove the Kruskal's Algorithm} Let the optimal minimum spanning tree be $O = (V, E^{*})$, and the one generated by greedy approach be $G=(V, E)$. $|E^{*}|=|E|$ because both is a tree and the number of edges always equal to $|V|-1$. Assume there is one edge $e\in E^{*}, e\not \in E$, this means that there is another edge $f \in E$ that differs from $e$. Other than these two edges, all the other edges are the same. For example, in the graph we say $e=(1,5)$. With the constraint that there is only edge differs, $f$ has to be one edge out of $(2, 3), (3, 5)$; adding $e$ to $T$ forms a cycle, so in $T^{*}$, it can not have edges $(2, 3), (3, 5)$ at the same time, thus one referred as $f$ has to be removed in the $T^{*}$. It is always true that $cost(e)\geq cost(f)$, because otherwise the greedy approach would have chosen $e$ instead of $f$. - -For the optimal approach, if we replace $e$ with $f$, then we have $cost(T) =cost(T^{*}-e+f) \leq cost(T^{*})$. This means, with this swap of $e$ and $f$ between $G$ and $O$, the cost of the greedy approach is still at most the same as the optimal cost, transforming the optimal solution to greedy solution will not worsen the optimal solution. - -\section{Design Greedy Algorithm} -We have seen greedy algorithm design, definition, and different examples of greedy approaches and its proof. One obvious sign that states greedy approach might apply is, ``sorting will not incur the correctness of the optimal solution but rather greatly simplify the design complexity’’. Generally, people design a greedy algorithm by trying out rules with objection and hopefully find one good enough and then prove its correctness. This approach is simple but fuzzy. Thus, we prefer a more systemized design approach: -\begin{enumerate} - \item Search: Analyze the problem with search and combination--no implementation is needed, to know our atomic search complexity. -\item Dynamic Programming: Design dynamic programming approach first by defining state and constructing a recurrence relation repeatedly until we find one that works well. This step brings us closer to the greedy approach: it gives us definition of state, recurrence relation and a polynomial time complexity. -\item Greedy: then further to see if the greedy choice property holds--between a subproblem $p_i$ and its succeeding subproblem $p_{i+1}$, if the optimal solution within $p\_i$ is also part of the optimal solution within $p\_{i+1}$ or we can simply construct an optimal solution from previous optimal solutions without checking multiple subproblems. If it holds, great, the previous dynamic programming becomes an overkill, and we further improve our approach by simplifying the recurrence relation with ``rules’’, which saves us time and/or space. To derive a good ``rule’’, we have to study and understand a bunch of ``facts’’, thus strengthen our choice with: Does the greedy optimal solution always stay ahead and be the most promising optimal solution at each step? If not, try exchanging some items within the previous optimal solution and see if it improves the situation and keeps us staying ahead. - \end{enumerate} -We will solve the following classical problems with this - -%%%%%%%%%%%%%%%%%%Classical problems%%%%%%%%%%%%%%% -\section{Classical Problems} -List classical problems -\subsection{Scheduling} -We have seen two scheduling problems, it is a time-based problem which naturally follows has a leftmost to rightmost order along the timeline. And it is about scheduling tasks/meetings to allowed resources. We need to pay attention to the following contexts: -\begin{itemize} - \item Do we have to assign all intervals or just select a maximum set from all? This relates to the number of resources that are available. - \item What are the conditions? Is both start and end time fixed, or they are highly flexible and are bounded by earliest possible start time and latest end time? -\end{itemize} -The core principle is to answer these questions: -\begin{itemize} - \item Start and end time: - \begin{itemize} - \item Is both fixed? Yes, then we are simply giving these intervals as a state, no change at all, and the ordering of the start and end time is exactly the same. If the question is only given one resource and we need to get the maximum non-overlapping intervals, easy piece of cake, we follows the order, and check if it is compatible with one previous intervals. If it asks about the minimum resources needed, that is the depth of the set, that is the property, we have discussed how assigning a meeting room to a preceding free meeting room does not affect the number of free rooms for the next. - \item If it is not fixed, we are given either $t_i$ and $d_i$ for each interval--we can start at any time $s_i$, finish at $s_i+t_i$, but we do have a deadline $d_i$ that better to be met--or we are given $b_i, t_i, d_i$--we can start at any $s_i$, but it would better be $s_i\geq b_i$, and end at $s_i+t_i\leq d_i$. (The second is not sure). The fundamental rule is: \textbf{Earliest Deadline First}. We have proved that if there is an inversion, swapping them will only result better objective value. This usually points out that the optimal solution shall have no inversion and no idle time on the resource. Whenever we met a tie at the deadlines, no matter what order of these intervals with equal deadline, the total lateness is usually the same, which can be proved with inversion. - \end{itemize} -\end{itemize} -\paragraph{Scheduling all Intervals(L253. Meeting Rooms II)} In our previous scheduling problem, there is only a single resource to fit in non-overlapping intervals. Scheduling all intervals on the other hand requires us to schedule all the intervals with as few resources as possible. This problem is also known as \textbf{interval partitioning problem} or \textbf{interval coloring problem} because our goal is to partition all intervals across multiple resources, and it is like each resource to be assigned a color. - -Given an array of meeting time intervals consisting of start and end times $[[s_1,e_1],[s_2,e_2],...] (s_i < e_i)$, find the minimum number of conference rooms required and assign a label for each interval. -\begin{lstlisting}[numbers=none] -Example 1: -Input: [[2,15, 'a'],[36,45, 'b'],[9,29, 'c'],[16,23, 'd'],[4,9, 'e']] -Output: 2 -\end{lstlisting} -\begin{figure}[H] - \centering - \includegraphics[width=0.7\columnwidth]{fig/greedy_schedule_all_intervals_1.png} - \caption{All intervals} - \label{fig:greedy_intervals} -\end{figure} -\paragraph{Analysis} The example is plotted in Fig.~\ref{fig:greedy_intervals}. A universal solution is to treat each interval as a vertex, if two intervals overlap, connect them with an edge, thus forming a graph. Now, the problem is reduced to a graph coloring, however, it might be to complicating things. - -\subsubsection{Find Minimum Number of Conference Rooms} -First, let us solve the first problem: What is the minimum number of conference rooms required? By observation and intuition, if at the same time point, there are a number of overlapping intervals, we have to assign each of these intervals a different resources. Now, we define the depth $d$ as the maximum number of overlapping intervals at any single point on the time-line, we claim: -\begin{theorem} -In the interval partitioning problem, the number of resources needed is at least the depth $d$ of the set of intervals. -\end{theorem} -Before we head off to the proof, let's discuss how to find the depth. According to the definition, if we are lucky that the time given is in the form of integer, then, the most straightforward way is to use the \textbf{sweep line} method, with a \texttt{counter} to track the number of intervals at each integer time moment. We use a vertical line to sweep from the leftmost to the right most intervals: this exactly follows a natural order, that when the earliest meeting starts, we have to assign a room to it no matter what (start has +1), and when can reuse a meeting room assigned before only if there is one that is freed (end has -1 as value). The sweep line method We need to watch out for the edge case, when two intervals where the finish time of one and the start time of the other overlaps, such as $[4, 9], [9, 29]$, this is not counted as two. Therefore we make sure to exclude the finish time when scanning, in range $[s, e)$. -\begin{figure}[H] - \centering - \includegraphics[width=0.7\columnwidth]{fig/greedy_schedule_all_intervals__sort_count.png} - \caption{All intervals sorted by start and end time.} - \label{fig:greedy_intervals_sort_count} -\end{figure} -However, this process can be simplified. When we are scanning, we will notice that the count will only change when encountering start or finish time point. First, at the start time of $a$, we have to assign one room, then at the start time of e, we have to assign a second room, and at the end of $e$, we free the room, and at the start time of $c$, it reuses the second room right away. Then in the line, at the end of $a$, it releases room 1, so $d$ starts reusing the first room right away. We can simply assign 1 to the start time, -1 to the finish time, and put all of these points into a list, sort them by time first. Because to handle the edge case--a tie in the previous sort where the start and end time is the same, the second degree sorting is used to put -1 in front of 1 to avoid overcounting the rooms. This process is shown in Fig.~\ref{fig:greedy_intervals_sort_count}. -\begin{lstlisting}[language=Python] -def minMeetingRooms(intervals): - if not intervals: - return 0 - points = [] - for s, e in intervals: - points.append((s, +1)) - points.append((e, -1)) - points = sorted(points, key=lambda x: (x[0], x[1])) - ans = 0 - total = 0 - for _, flag in points: - total += flag - ans = max(ans, total) - return ans -\end{lstlisting} -\subsubsection{Label Assignment} -We can modify the previous code to incorporate label assignment. We separate the start and end time in two independent lists because only when we meet a start time, we assign a room, and sort both of them. -\begin{lstlisting}[numbers=none] -2(0) 4(4) 9(2) 16(3) 36(1) -9(4) 15(0) 23(3) 29(2) 45(1) -\end{lstlisting} -We put two pointers, $sp, ep$ at the start of the start time list and end time list respectively. We need zero room at first. And for start pointer at $2$, we assign room one to interval 0 because $2<9$, no room is freed to reuse. Then $sp$ moves to 4. $4<9$, no room is freed, assign room 2 to interval 4. $sp$ at 9, $9\geq9$, meaning we can reuse the room belonged to interval 4, thus assign room 2 to interval 2. Now, move both $sp$ and $ep$, we are comparing $16>15$, meaning interval 3 can reuse the room belonged to interval 0, we assign room 1 to interval 3. Next, we compare $36 > 23$, interval 1 takes the room number 1 from interval 3. Since one of the pointer reached to the end of the list, process ends. -\begin{lstlisting}[language=Python] -def minMeetingRooms(intervals): - starts, ends = [], [] - for i, (s, e) in enumerate(intervals): - starts.append((s, i)) - ends.append((e, i)) - starts.sort(key=lambda x: x[0]) - ends.sort(key=lambda x: x[0]) - n = len(intervals) - rooms = [0] * n - sp, ep = 0, 0 - label = 0 - while sp < n: - index = starts[sp][1] - # Assign a new room - if starts[sp][0] < ends[ep][0]: - rooms[index] = label - label += 1 - else: #Reuse a room - room_of_end = rooms[ends[ep][1]] - rooms[index] = room_of_end - ep += 1 - sp += 1 - print(rooms) - return label -\end{lstlisting} - -The above method is natural but indeed greedy! We sort the intervals by start time, the worst case we assign each meeting a different room. However, the number of room can be reduced if we can reuse any previous assigned meeting rooms that is free at the moment. \textbf{The depth is controlled by the time line}. For example, for interval $c$, if both $a$ and $e$ is free at that moment, does it matter which meeting room to put of c in? Nope. Because no matter which room it is in, the interval d will overlap with this interval, thus can not use its meeting room, but still there is the one left from either a or e. This is what this problem is essentially different from the maximum non-overlapping problems. The greedy part is we always reassign the room belongs to the earliest available rooms. A non-greedy and naive way is to check all preceding meeting rooms, and find one available. - -\paragraph{You have to property} Did you find that, for the resource assignment, mostly, we have no much choice, because we have to assign it. The only choice is which room. We are greedy that we merge it whenever we can. All the solutions no matter if they put the earliest finished meeting room to reassign or just random or arbitrary one, they are doing it for a single purpose: reduce the possible number of resources whenever they can. - -An easy way to understand this problem is to notice that: for each meeting you HAVE TO assign it a room. The worst case is we assign a room for each single meeting and we do not even need to sort these intervals. Well, how can we optimize it, minimize the number of rooms? We have to reuse a room whenever it is possible. Therefore, we need to sort the meeting by start time. Because the first meeting has no choice but to assign a meeting room to it. For the second meeting, we have two options: either assign a room or reuse one that is available now. -\begin{itemize} - \item If I choose to reuse a room, does it influence my optimal solution later on? No, because if we chose to reuse a room, we decrease the total number of room by one, and later on it wont even affect the available rooms for the next meeting. It's like, here is a candy, take it and it wont affect your chance of having candy at all! Of course I would go for it. -\item Does it matter which one to reuse? Nope. Why? Because the smallest number of rooms needed are decided by how many meetings collide at a single time point. No matter which available room you put of this meeting, for the following meetings the number of available rooms are always the same:any rooms that are freed from preceding meetings. Here is the thing. When we are scanning from the leftmost interval to the rightmost by start time, -\end{itemize} -This is why there are so many different approaches: iterating preceding meetings and find any one that is available or put it into a min-heap to use the earliest available rooms or as the second solution, it is still the same as of the min-heap, reassign one that ends earliest. This optimization process is natural and GREEDY! - -\paragraph{Proof} We have been proved it already informally. The greedy we have will end up with compatible/feasible solutions where at each meeting rooms, no two overlapping meetings will be scheduled. -\begin{theorem} -If we use the greedy algorithm above, we can schedule every interval with $d$ number of resources, which is optimal. -\end{theorem} - -We know that using $d$ number of resources, we have to prove that we can schedule these intervals with $d$ resources. - -\paragraph{Organize} - -Second, how to assign a label to each meeting? Actually, it is not necessary to know the number of the minimum conference rooms $d$ needed to assign a label to each, we can get the $d$ by counting the total number of labels. -Now, back to be greedy, it might be tempting at first to follow the non-overlapping scheduling problems. First, we sort the intervals by the finish time, and an intuitive strategy to assign labels is: go through intervals in order, assign each interval a label that differs from any previous overlapping interval's. The code is: -\begin{lstlisting}[language=Python] -def colorInterval(intervals): - intervals = sorted(intervals, key=lambda x: x[1]) - labels = [] # label list to sorted intervals - n = len(intervals) - for i, (s, e) in enumerate(intervals): - excluded = [] - for j in range(i): - if s < intervals[j][1]: # overlap - excluded.append(labels[j]) - # assign label - for l in range(n): - if l not in excluded: - labels.append(l) - break - return len(set(labels)) -\end{lstlisting} -\begin{figure}[H] - \centering - \includegraphics[width=0.48\columnwidth]{fig/greedy_schedule_all_intervals_sorting_finish.png} - \includegraphics[width=0.48\columnwidth]{fig/greedy_schedule_all_intervals_wrong_sort.png} - \caption{Left: sort by start time, Right: sort by finish time.} - \label{fig:greedy_intervals_sorting_bad} -\end{figure} -\begin{figure}[H] - \centering - \includegraphics[width=0.48\columnwidth]{fig/greedy_schedule_all_intervals_sorting.png} - \includegraphics[width=0.48\columnwidth]{fig/greedy_schedule_all_intervals_good_sort.png} - \caption{Left: sort by start time, Right: sort by finish time.} - \label{fig:greedy_intervals_sorting_good} -\end{figure} -Unfortunately, it gives us wrong answer; it used three resources instead of 2 as we proved before. The sorting and the answer is plotted in Fig.~\ref{fig:greedy_intervals_sorting_bad}. What went wrong? In the non-overlapping interval scheduling problem, what matters is the maximum number of intervals we can fit in within one resource, sorting by finish time guarantees we fit as many intervals as possible. However, in this problem, we try to use as less as possible of resources, we want each resource to be as tight as possible. In math, .... Therefore, the right way of sorting is to sort by the starting time. If you insist on sorting by finish time, you will get right answer if you traverse the intervals in reversed order. - -This type of assignment takes $O(n^2)$ in time complexity. We can easily do better but with the same greedy strategy. -\subsubsection{Optimizations} -We use a list \texttt{rooms} which starts as being empty. Each room, we only keep its end time. After sorting by the start time, we go through each interval and try to put it in a room if it does not overlap, we put this interval in this room and update its end time. If no available room found, we assign a new room instead. With this strategy, we end up with $O(nd)$ in time complexity. When $d$ is small enough, it saves more time. -\begin{lstlisting}[language=Python] -def minMeetingRooms(intervals): - intervals = sorted(intervals, key=lambda x: x[0]) - rooms = [] # a list that tracks the end time - for s, e in intervals: - bFound = False - for i, re in enumerate(rooms): - if s >= re: - rooms[i] = e - bFound = True - break - if not bFound: - rooms.append(e) - return len(rooms) -\end{lstlisting} -\paragraph{Priority Queue} Is there a way to fully get rid of the factor of $d$? In our case, we loop over all rooms and check if it is available, but we do not even care which one is, we just need one! So, instead we replace \texttt{rooms} with a priority queue with uses min-heap, making sure each time we only check the room with the earliest end time; if it does not overlap, put this meeting into this room and update its finish time, or else assign a new room. -\begin{lstlisting}[language=Python] -import heapq -def minMeetingRooms(intervals): - intervals = sorted(intervals, key=lambda x: x[0]) - rooms = [] # a list that tracks the end time of each room - - for s, e in intervals: - bFound = False - # now, just check the room that ends earlier instead of check it all - if rooms and rooms[0] <= s: - heapq.heappop(rooms) - heapq.heappush(rooms, e) - return len(rooms) -\end{lstlisting} -\subsection{Partition} -\paragraph{763. Partition Labels} A string S of lowercase letters is given. We want to partition this string into as many parts as possible so that each letter appears in at most one part, and return a list of integers representing the size of these parts. - -\begin{lstlisting}[numbers=none] -Example 1: - -Input: S = "ababcbacadefegdehijhklij" -Output: [9,7,8] -Explanation: -The partition is "ababcbaca", "defegde", "hijhklij". -This is a partition so that each letter appears in at most one part. -A partition like "ababcbacadefegde", "hijhklij" is incorrect, because it splits S into less parts. -\end{lstlisting} - -\begin{lstlisting}[numbers=none] -for example ``abaefegdehi''. -{aba}, {efegde}, {hi} -\end{lstlisting} -\paragraph{Analysis} We know we can use the partition type of dynamic programming to find the maximum length with -\begin{align} - d[n] = p[i:n], d[i] \\ - d[n] = \max(d[i] + 1), i < n, \texttt{and p[i:n] is an independet part}.\\ - d[i] = p[j:i], d[j], i\in[0, n-1], j List[int]: - n = len(S) - loc = defaultdict(int) - for i, c in enumerate(S): - loc[c] = i # get the last location of each char - last_loc = -1 - prev_loc = -1 - ans = [] - for i, c in enumerate(S): - #prev_loc = min(prev_loc, i) - last_loc = max(last_loc, loc[c]) - if i == last_loc: ##a good one - ans.append(last_loc - prev_loc) - prev_loc = last_loc - - return ans -\end{lstlisting} -With the greedy approach, we further decreased the complexity to $O(n)$. -\subsection{Data Compression, File Merge} -\paragraph{File Merge} -Given array $F$ of size $n$, each item indicates the length of the $i$-th file in the array. Find the best ordering of merging all files. -\begin{lstlisting}[numbers=none] -For example, F = {10,5,100,50,20,15} -\end{lstlisting} -First, this is a exponential problem because we might need to try all permutation of a file. Merge the first two files take 10+5 cost, and merge this further with 100, takes 10+5+100, and so. Now, let us write the cost of the original order: -\begin{align} - c &= \min (F_0+F_1) + (F_0+F_1+F_2) + ... + (F_0+F_1+F_2+...+F_{n-1}) \\ - &=\min(n-1) F_0 + (n-2) F_1+ ...+F_{n-1} -\end{align} -From the objective function, because all file size are having positive sizes, to minimize it, we have to make sure $F_0$ is the smallest item in the array because it has to be computed the most times, and $F_2$ is the second smallest and so on. We can easily figure out that sorting the files in increasing orders and merge them in this order result the least cost of merging. This is a very simple and natural greedy approach. -\paragraph{Data Compression} -\subsection{Factional S} -\subsection{Graph Algorithms} -%%%%%%%%%%%%%%%%%%%%%%%%Exercise -\section{Exercises} -\begin{itemize} - \item 630. Course Schedule III (hard) -\end{itemize} - - -%We shall complete the process of developing a greedy solution by converting the recursive algorithm to an iterative one. Although the steps we shall go through in this section are slightly more involved than is typical when developing a greedy algorithm, they illustrate the relationship between greedy algorithms and dynamic programming. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_13_tree_algorithm.tex b/Easy-Book/chapters/chapter_13_tree_algorithm.tex deleted file mode 100644 index 04377b0..0000000 --- a/Easy-Book/chapters/chapter_13_tree_algorithm.tex +++ /dev/null @@ -1,871 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -The purpose of the -%%%%%%%%%%%%%%%%%%%%%binary search tree%%%%%%%%%%%%%%%%%%%%%% -\section{Binary Search Tree} -\label{sec_binary_search_tree} - In computer science, a \textbf{search tree} is a tree data structure used for locating specific keys from within a set. In order for a tree to function as a search tree, the key for each node must be greater than any keys in subtrees on the left and less than any keys in subtrees on the right. - -The advantage of search trees is their efficient search time ( $O(\log n)$) given the tree is reasonably balanced, which is to say the leaves at either end are of comparable depths as we introduced the \textbf{balanced binary tree}. - -The search tree data structure supports many dynamic-set operations, including \textbf{Search} for a key, minimum or maximum, predecesor or successor, \textbf{insert} and \textbf{delete}. Thus, a search tree can be both used as a dictionary and a priority queue. -\begin{figure}[!ht] - \centering - \includegraphics[width = 0.4\columnwidth]{fig/Binary_search_tree.png} - \caption{Example of Binary search tree of depth 3 and 8 nodes.} - \label{fig:bst} -\end{figure} - -% Search trees are often used to implement an associative array. The search tree algorithm uses the key from the key-value pair to find a location, and then the application stores the entire key–value pair at that location. - -% In this section, we will introduce the most commonly used two types of searching trees: binary searching tree (BST) and Trie where the keys are usually numeric numbers and strings respectively. - -% \subsection{Binary Searching Tree} -% \label{concept_binary_search_tree} -A binary search tree (BST) is a search tree with children up to two. There are three possible ways to properly define a BST, and we use $l$ and $r$ to represent the left and right child of node $x$: 1)$l.key \leq x.key < r.key$, 2) $l.key < x.key \leq r.key$, 3) $l.key < x.key < r.key$. In the first and second definition, our resulting BST allows us to have duplicates, while not in the case of the third definiton. One example of BST without duplicates is shown in Fig~\ref{fig:bst}. - -% an organized searching tree structure in binary tree, as the name suggests. Binary search trees whose internal nodes each store a key (and optionally, an associated value), each node have two distinguished sub-trees (if only one sub-tree the other is None). - -% BST keep their keys in sorted order, so that lookup and other operations can use the \textit{principle of binary search tree}: - -% \indent Let $x$ be a node in a binary search tree, if $y$ is a node in the left subtree of x, them $y.key \leq x.key$. If $y$ is a node in the right subtree of $x$, then $y.key \geq x.key$. - - - -\subsubsection{Operations} -When looking for a key in a tree (or a place to insert a new key), we traverse the tree from root to leaf, making comparisons to keys stored in the nodes of the tree and deciding, on the basis of the comparison, to continue searching in the left or right subtrees. On average, this means that each comparison allows the operations to skip about half of the tree, so that each SEARCH, INSERT or DELETE takes time proportional to the logarithm of the number of items stored in the tree. This is much better than the linear time required to find items by key in an (unsorted) array, but slower than the corresponding operations on hash tables. - -% \textbf{Definition} A binary search tree is a rooted binary tree, whose internal nodes each store a key (and optionally, an associated value) and each have two distinguished sub-trees, commonly denoted left and right. The tree additionally satisfies the binary search property, which states that the key in each node must be greater than or equal to any key stored in the left sub-tree, and less than or equal to any key stored in the right sub-tree.[1]:287 The leaves (final nodes) of the tree contain no key and have no structure to distinguish them from one another. - -In order to build a BST, we need to INSERT a series of elements in the tree organized by the searching tree property, and in order to INSERT, we need to SEARCH the position to INSERT this element. Thus, we introduce these operations in the order of SEARCH, INSERT and GENERATE. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/bst_insertion.png} - \caption{The lightly shaded nodes indicate the simple path from the root down to the position where the item is inserted. The dashed line indicates the link in the tree that is added to insert the item. } - \label{fig:bst_operation} -\end{figure} -\paragraph{SEARCH} - -There are two different implementations for SEARCH: recursive and iterative. -\begin{lstlisting}[language = Python] -# recursive searching -def search(root,key): - # Base Cases: root is null or key is present at root - if root is None or root.val == key: - return root - - # Key is greater than root's key - if root.val < key: - return search(root.right,key) - - # Key is smaller than root's key - return search(root.left,key) -\end{lstlisting} -Also, we can write it in an iterative way, which helps us save the heap space: -\begin{lstlisting}[language = Python] -# iterative searching -def iterative_search(root,key): - while root is not None and root.val != key: - if root.val < key: - root = root.right - else: - root = root.left - return root -\end{lstlisting} -\paragraph{INSERT} -Assuming we are inserting a node $13$ into the tree shown in Fig~\ref{fig:bst_operation}. A new key is always inserted at leaf (there are other ways to insert but here we only discuss this one way). We start searching a key from root till we hit an empty node. Then we new a TreeNode and insert this new node either as the left or the child node according to the searching property. Here we still shows both the recursive and iterative solutions. -\begin{lstlisting}[language = Python] -# Recursive insertion -def insertion(root, key): - if root is None: - root = TreeNode(key) - return root - if root.val < key: - root.right = insertion(root.right, key) - else: - root.left = insertion(root.left, key) - return root -\end{lstlisting} -The above code needs return value and reassign the value for the right and left every time, we can use the following code which might looks more complex with the if condition but works faster and only assign element at the end. -\begin{lstlisting}[language=Python] -# recursive insertion -def insertion(root, val): - if root is None: - root = TreeNode(val) - return - if val > root.val: - if root.right is None: - root.right = TreeNode(val) - else: - insertion(root.right, val) - else: - if root.left is None: - root.left = TreeNode(val) - else: - insertion(root.left, val) -\end{lstlisting} -We can search the node iteratively and save the previous node. The while loop would stop when hit at an empty node. There will be three cases in the case of the previous node. -\begin{lstlisting}[numbers=none] -1. The previous node is None, which means the tree is empty, so we assign a root node with the value -2. The previous node has a value larger than the key, means we need to put key as left child. -3. The previous node has a value smaller than the key, means we need to put key as right child. -\end{lstlisting} -\begin{lstlisting}[language = Python] -# iterative insertion -def iterativeInsertion(root, key): - pre_node = None - node = root - while node is not None: - pre_node = node - if key < node.val: - node = node.left - else: - node = node.right - # we reached to the leaf node which is pre_node - if pre_node is None: - root = TreeNode(key) - elif pre_node.val > key: - pre_node.left = TreeNode(key) - else: - pre_node.right = TreeNode(key) - return root -\end{lstlisting} -\paragraph{BST Generation} -First, let us declare a node as BST which is the root node. Given a list, we just need to call INSERT for each element. The time complexity can be $O(n\log_n)$. -\begin{lstlisting}[language=Python] -datas = [8, 3, 10, 1, 6, 14, 4, 7, 13] -BST = None -for key in datas: - BST = iterativeInsertion(BST, key) -print(LevelOrder(BST)) -# output -# [8, 3, 10, 1, 6, 14, 4, 7, 13] -\end{lstlisting} -\paragraph{DELETE} -Before we start to check the implementation of DELETE, I would suggest the readers to read the next subsection--the Features of BST at first, and then come back here to finish this paragraph. - -When we delete a node, three possibilities arise. -\begin{lstlisting}[numbers=none] -1) Node to be deleted is leaf: Simply remove from the tree. - - 50 50 - / \ delete(20) / \ - 30 70 ---------> 30 70 - / \ / \ \ / \ - 20 40 60 80 40 60 80 - -2) Node to be deleted has only one child: Copy the child to the node and delete the child - - 50 50 - / \ delete(30) / \ - 30 70 ---------> 40 70 - \ / \ / \ - 40 60 80 60 80 - -3) Node to be deleted has two children: Find inorder successor of the node. Copy contents of the inorder successor to the node and delete the inorder successor. Note that inorder predecessor can also be used. - - 50 60 - / \ delete(50) / \ - 40 70 ---------> 40 70 - / \ \ - 60 80 80 - -The important thing to note is, inorder successor is needed only when right child is not empty. In this particular case, inorder successor can be obtained by finding the minimum value in right child of the node. -\end{lstlisting} - - - -\subsubsection{Features of BST} -\label{concept_features_bst} -\paragraph{Minimum and Maximum} The operation is similar to search, to find the minimum, we always traverse on the left subtree. For the maximum, we just need to replace the ``left'' with ``right'' in the key word. Here the time complexity is the same $O(lgn)$. -\begin{lstlisting}[language=Python] -# recursive -def get_minimum(root): - if root is None: - return None - if root.left is None: # a leaf or node has no left subtree - return root - if root.left: - return get_minimum(root.left) - -# iterative -def iterative_get_minimum(root): - while root.left is not None: - root = root.left - return root -\end{lstlisting} - -Also, sometimes we need to search two additional items related to a given node: successor and predecessor. The structure of a binary search tree allows us to determine the successor or the predecessor of a tree without ever comparing keys. - -\paragraph{Successor of a Node} A successor of node $x$ is the smallest item in the BST that is strictly greater than $x$. It is also called in-order successor, which is the next node in Inorder traversal of the Binary Tree. Inoreder Successor is None for the last node in inorder traversal. If our TreeNode data structure has a parent node. - -Use parent node: the algorihtm has two cases on the basis of the right subtree of the input node. -\begin{lstlisting}[numbers=none] -For the right subtree of the node: -1) If it is not None, then the successor is the minimum node in the right subtree. e.g. for node 12, successor(12) = 13 = min(12.right) -2) If it is None, then the successor is one of its ancestors. We traverse up using the parent node until we find a node which is the left child of its parent. Then the parent node here is the successor. e.g. successor(2)=5 -\end{lstlisting} - The Python code is provided: -\begin{lstlisting}[language = Python] -def Successor(root, n): -# Step 1 of the above algorithm - if n.right is not None: - return get_minimum(n.right) -# Step 2 of the above algorithm -p = n.parent -while p is not None: - if n == p.left :# if current node is the left child node, then we found the successor, p - return p - n = p - p = p.parent -return p -\end{lstlisting} -However, if it happens that your tree node has no parent defined, which means you can not traverse back its parents. We only have one option. Use the inorder tree traversal, and find the element right after the node. \begin{lstlisting}[numbers=none] -For the right subtree of the node: -1) If it is not None, then the successor is the minimum node in the right subtree. e.g. for node 12, successor(12) = 13 = min(12.right) -2) If it is None, then the successor is one of its ancestors. We traverse down from the root till we find current node, the node in advance of current node is the successor. e.g. successor(2)=5 -\end{lstlisting} -\begin{lstlisting}[language=Python] -def SuccessorInorder(root, n): - # Step 1 of the above algorithm - if n.right is not None: - return get_minimum(n.right) - # Step 2 of the above algorithm - succ = None - while root is not None: - - if n.val > root.val: - root = root.right - elif n.val < root.val: - succ = root - root = root.left - else: # we found the node, no need to traverse - break - return succ -\end{lstlisting} - -\paragraph{Predecessor of A Node} A predecessor of node $x$ on the other side, is the largest item in BST that is strictly smaller than $x$. It is also called in-order predecessor, which denotes the previous node in Inorder traversal of BST. e.g. for node 14, predecessor(14)=12= max(14.left). The same searching rule applies, if node $x$'s left subtree exists, we return the maximum value of the left subtree. Otherwise we traverse back its parents, and make sure it is the right subtree, then we return the value of its parent, otherwise the reversal traverse keeps going. -\begin{lstlisting}[language = Python] -def Predecessor(root, n): -# Step 1 of the above algorithm - if n.left is not None: - return get_maximum(n.left) -# Step 2 of the above algorithm -p = n.parent -while p is not None: - if n == p.right :# if current node is the right node, parent is smaller - return p - n = p - p = p.parent -return p -\end{lstlisting} - The worst case to find the successor or the predecessor of a BST is to search the height of the tree: include the one of the subtrees of the current node, and go back to all the parents and greatparents of this code, which makes it the height of the tree. The expected time complexity is $O(lgn)$. And the worst is when the tree line up and has no branch, which makes it $O(n)$. - - -% \item Smallest and the Biggest Value -% \begin{lstlisting}[language = Python] -% #recursive -% def getSmallest(node): -% if not node: -% return None -% if node.left: -% return getSmallest(node.left) -% return node -% #iterative -% def getSmallest(node): -% while node: -% node=node.left -% return node -% \end{lstlisting} -% \end{enumerate} - - - -% \textbf{Insertion and Generation of BST} Insertion and deletion is not as easy as the operations shown before, because they cause the dynamic set represented by the binary tree to change. Therefore, the data structure must be re-structured to reflect the change in order to keep holding the binary-search-tree property. - -% Insertion is more straightforward, it is a key component to build a BST. To build a BST, we start from an empty root, and we go through a sequence of data that we want to store in BST structure, and insert each one in the right place with the binary search tree property. To insert a node into the BST, at first we search the tree level-by-level starts from the root node, if the value is smaller that the comparing node's key, we move to its left subtree or else to the right subtree till we reach to leaf node. Look at an example, the follwing tree, - -% the code is as follows: - -% Thus the code for building a BST is as follows: - - -% \textcolor{red}{to do: write the whole operations and the definition of BST as a python class} - -% DELETE: -% \begin{lstlisting}[language = Python] -% def delete(root,key): - -% # Base Cases: root is null or key is present at root -% if root is None or root.val == key: -% return root - -% # Key is greater than root's key -% if root.val < key: -% return search(root.right,key) - -% # Key is smaller than root's key -% return search(root.left,key) -% \end{lstlisting} - -Now we put a table here to summarize the space and time complexity for each operation. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Time complexity of operations for BST in big O notation } - \noindent \begin{tabular}{|p{0.33\columnwidth}|p{0.33\columnwidth}| p{0.33\columnwidth}|} - \hline - Algorithm & Average & Worst Case \\ \hline -Space & $O(n)$& $O(n)$ \\ -Search & $O(lgn)$ & $O(n)$ \\ \hline - -Insert & $O(lgn)$ & $O(n)$ \\ -Delete & $O(lgn)$ & $O(n)$ \\ \hline -\end{tabular} - \label{tab:msrc_precession} - \end{small} -\end{table} - -% \paragraph{Advanced Features} -% For a BST, the left subtree all have smaller values than the current node, and the right subtree are all bigger than the current node. This concept is useful in trimming BST, see example, $669$. Trim a Binary Search Tree. - - -% \section{Augmented Tree} -% According to \textit{Introduction to Algorithms}, augmenting data stuctures are defined as a textbook data structure augmented by storing additional information in it. In this Section, we introduce two types of augmented tree: Trie for pattern matching in static String and Segment Tree for Range Query. - -%https://www.mimuw.edu.pl/~szczurek/TSG2/04_suffix_arrays.pdf - -%%%%%%%%%%%%%%Segment Tree%%%%%%%%%%%%%%% -\section{Segment Tree} -\label{sec_segment_tree} -% In this subsection, we discuss another data structure which can efficiently answer dynamic range queries. As a starting point, we discuss a problem of finding the index of the minimum element -% in an array given a range: [i..j]. This is more commonly known as the Range Minimum Query(RMQ). For example, given an array A of size 7 below, RMQ(1, 3) = 2, as the index 2 contains the minimum element among A[1], A[2], and A[3]. To check your understanding of RMQ, verify that on array A below, RMQ(3, 4) = 4, RMQ(0, 0) = 0, RMQ(0, 1) = 1, and RMQ(0, 6) = 5. - -% There are several ways to solve this RMQ. One of the trivial algorithm is to simply iterate the -% array from index i to j and report the index with the minimum value. But this is O(n) per query. -% When n is large, such algorithm maybe infeasible. - -Segment Tree is a static full binary tree similar to heap that is used for storing the intervals or segments. `Static` here means once the data structure is build, it can not be modified or extended. Segment tree is a data structure that can efficiently answer numerous \textit{dynamic range queries} problems -(in logarithmic time) like finding minimum, maximum, sum, greatest common divisor, least common denominator in array. The ``dynamic" means there are constantly modifications of the value of elements (not the tree structure). For instance, given a problem to find the index of the minimum/maximum/sum of all elements in an given range of an array: [i:j]. - -\paragraph{Definition} Consider an array A of size n and a corresponding Segment Tree T (here a range [0, n-1] in A is represented as A[0:N-1]): -\begin{enumerate} - \item The root of T represents the whole array A[0:N-1]. - \item Each internal node in the Segment Tree T represents the interval of A[i:j] where $0 < i < j < n$. - \item Each leaf in T represents a single element A[i], where $0 \leq i 9 -update(1, 2) -sumRange(0, 2) -> 8 -\end{lstlisting} -Note: -\begin{enumerate} - \item - The array is only modifiable by the update function. - \item You may assume the number of calls to update and sumRange function is distributed evenly. -\end{enumerate} -\paragraph{Solution: Brute-Force.} There are several ways to solve the RSQ. The \textbf{brute-force solution} is to simply iterate the array from index i to j to sum up the elements and return its corresponding index. And it gives $O(n)$ per query, such algorithm maybe infeasible if queries are constantly required. Because the update and query action distributed evenly, it still gives $O(n)$ time complexity and $O(n)$ in space, which will get LET error. - -\paragraph{Solution: Segment Tree.} With Segment Tree, we can store the TreeNode's val as the sum of elements in its corresponding interval. We can define a TreeNode as follows: -\begin{lstlisting}[language=Python] -class TreeNode: - def __init__(self, val, start, end): - self.val = val - self.start = start - self.end = end - self.left = None - self.right = None -\end{lstlisting} -As we see in the process, it is actually not necessary if we save the size of the array, we can decide the start and end index of each node on-the-fly and saves space. -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/307_RSQ_SegmentTree.png} - \caption{Illustration of Segment Tree. } - \label{fig:segment_tree} -\end{figure} -\paragraph{Build Segment Tree.} Because the leaves of the tree is a single element, we can use divide and conquer to build the tree recursively. For a given node, we first build and return its left and right child(including calculating its sum) in advance in the `divide` step, and in the `conquer' step, we calculate this node's sum using its left and right child's sum, and set its left and right child. Because there are totally $2n-1$ nodes, which makes the time and space complexity $O(n)$. -\begin{lstlisting}[language=Python] -def _buildSegmentTree(self, nums, s, e): #start index and end index - if s > e: - return None - if s == e: - return self.TreeNode(nums[s]) - - m = (s + e)//2 - # divide - left = self._buildSegmentTree(nums, s, m) - right = self._buildSegmentTree(nums, m+1, e) - - # conquer - node = self.TreeNode(left.val + right.val) - node.left = left - node.right = right - return node -\end{lstlisting} -\paragraph{Update Segment Tree.} Updating the value at index i is like searching the tree for leaf node with range [i, i]. We just need to recalculate the value of the node in the path of the searching. This operation takes $O(\log n)$ time complexity. -\begin{lstlisting}[language=Python] -def _updateNode(self, i, val, root, s, e): - if s == e: - root.val = val - return - m = (s + e)//2 - if i <= m: - self._updateNode(i, val, root.left, s, m) - else: - self._updateNode(i, val, root.right, m+1, e) - root.val = root.left.val + root.right.val - return -\end{lstlisting} -\paragraph{Range Sum Query.} Each query range [i, j], will be a combination of ranges of one or multiple ranges. For instance, as in the segment tree shown in Fig~\ref{fig:segment_tree}, for range [2, 4], it will be combination of [2, 3] and [4, 4]. The process is similar to the updating, we starts from the root, and get its middle index m: 1) if [i, j] is the same as [s, e] that i == s and j == e, then return the value, 2) if the interval [i, j] is within range [s, m] that j <=m , then we just search it in the left branch. 3) if [i, j] in within range [m+1, e] that i>m, then we search for the right branch. 4) else, we search both branch and the left branch has target [i, m], and the right side has target [m+1, j], the return value should be the sum of both sides. The time complexity is still $O(\log n)$. -\begin{lstlisting}[language=Python] -def _rangeQuery(self, root, i, j, s, e): - if s > e or i > j: - return 0 - if s == i and j == e: - return root.val if root is not None else 0 - - m = (s + e)//2 - - if j <= m: - return self._rangeQuery(root.left, i, j, s, m) - elif i > m: - return self._rangeQuery(root.right, i, j, m+1, e) - else: - return self._rangeQuery(root.left, i, m, s, m) + self._rangeQuery(root.right, m+1, j, m+1, e) -\end{lstlisting} -The complete code is given: -\begin{lstlisting}[language=Python] -class NumArray: - class TreeNode: - def __init__(self, val): - self.val = val - self.left = None - self.right = None - - def __init__(self, nums): - self.n = 0 - self.st = None - if nums: - self.n = len(nums) - self.st = self._buildSegmentTree(nums, 0, self.n-1) - - def update(self, i, val): - self._updateNode(i, val, self.st, 0, self.n -1) - - def sumRange(self, i, j): - return self._rangeQuery(self.st, i, j, 0, self.n-1) -\end{lstlisting} -\end{examples} - - - -Segment tree can be used here to lower the complexity of each query to $O(log n)$. - -%%%%%%%%%%%%%%%%%%%Trie%%%%%%%%%%%%%%%%% -\section{Trie for String} -\label{concept_trie} -\paragraph{Definition} Trie comes from the word re\textbf{Trie}val. In computer science, a trie, also called digital tree, radix tree or prefix tree which like BST is also a kind of search tree for finding substring in a text. We can solve string matching in $O(|T|)$ time, where |T| is the size of our text. This purely algorithmic approach has been studied extensively in the algorithms: Knuth-Morris-Pratt, Boyer-Moore, and Rabin-Karp. However, we entertain the possibility that multiple queries will be made to the same text. This motivates the development of data structures that preprocess the text to allow for more efficient queries. Such efficient data structure is Trie, which can do each query in $O(P)$, where P is the length of the pattern string. Trie is an ordered tree structure, which is used mostly for storing strings (like words in dictionary) in a compact way. -\begin{enumerate} - \item In a Trie, each child branch is labeled with letters in the alphabet $\sum$. Actually, it is not necessary to store the letter as the key, because if we order the child branches of every node alphabetically from left to right, the position in the tree defines the key which it is associated to. - \item The root node in a Trie represents an empty string. -\end{enumerate} -% An ordered tree data structure used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. - -Now, we define a trie Node: first it would have a bool variable to denote if it is the end of the word and a children which is a list of of 26 children TrieNodes. -\begin{lstlisting}[language= Python] -class TrieNode: - # Trie node class - def __init__(self): - self.children = [None]*26 - # isEndOfWord is True if node represent the end of the word - self.isEndOfWord = False -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/trie_compact_trie.jpg} - \caption{Trie VS Compact Trie} - \label{fig:trie_compact_trie} -\end{figure} - -\paragraph{Compact Trie} If we assign only one letter per edge, we are not taking full advantage of the trie’s tree structure. It is more useful to consider compact or compressed tries, tries where we remove the one letter per edge constraint, and contract non-branching paths by concatenating the letters on these paths. -In this way, every node branches out, and every node traversed represents a choice between two different words. The compressed trie that corresponds to our example trie is also shown in Figure -~\ref{fig:trie_compact_trie}. - -\paragraph{Operations: INSERT, SEARCH} -% Now, let us solve an LeetCode problem together which requires us to implement a complete Trie that with the operations INSERT, SEARCH, STARTWITH. All of these operations are actually quickly similar and they all require us to simultaneously iterate each character in the input string (or word) and each level of the Trie on the location of that character. So, it would not be hard to get the worst time complexity when we searched the whole tree or finished iterating the characters in the input. -Both for INSERT and SEARCH, it takes $O(m)$, where m is the length of the word/string we wand to insert or search in the trie. Here, we use an LeetCode problem as an example showing how to implement INSERT and SEARCH. Because constructing a trie is a series of INSERT operations which will take $O(n*m)$, n is the total numbers of words/strings, and m is the average length of each item. The space complexity fof the non-compact Trie would be $O(N*|\sum|)$, where $|\sum|$ is the alphlbetical size, and N is the total number of nodes in the trie structure. The upper bound of N is $n*m$. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/Trie.png} - \caption{Trie Structure} - \label{fig:trie} -\end{figure} -\begin{examples} -\item \textbf{208. Implement Trie (Prefix Tree) (medium).} Implement a trie with insert, search, and startsWith methods. -\begin{lstlisting} -Example: -Trie trie = new Trie(); -trie.insert("apple"); -trie.search("apple"); // returns true -trie.search("app"); // returns false -trie.startsWith("app"); // returns true -trie.insert("app"); -trie.search("app"); // returns true -\end{lstlisting} -\textit{Note: You may assume that all inputs are consist of lowercase letters a-z. All inputs are guaranteed to be non-empty strings.} - -\paragraph{INSERT} with INSERT operation, we woould be able to insert a given word in the trie, when traversing the trie from the root node which is a TrieNode, with each letter in world, if its corresponding node is None, we need to put a node, and continue. At the end, we need to set that node's endofWord variable to True. thereafter, we would have a new branch starts from that node constructured. For example, when we first insert ``app`` as shown in Fig~\ref{fig:trie_compact_trie}, we would end up building branch ``app``, and with ape, we would add nodes ``e`` as demonstrated with red arrows. -\begin{lstlisting}[language=Python] -def insert(self, word): - """ - Inserts a word into the trie. - :type word: str - :rtype: void - """ - node = self.root #start from the root node - for c in word: - loc = ord(c)-ord('a') - if node.children[loc] is None: # char does not exist, new one - node.children[loc] = self.TrieNode() - # move to the next node - node = node.children[loc] - # set the flag to true - node.is_word = True -\end{lstlisting} - -\paragraph{SEARCH} For SEARCH, like INSERT, we traverse the trie using the letters as pointers to the next branch. There are three cases: 1) for word P, if it doesnt exist, but its prefix does exist, then we return False. 2) If we found a matching for all the letters of P, at the last node, we need to check if it is a leaf node where is\_word is True. STARTWITH is just slightly different from SEARCH, it does not need to check that and return True after all letters matched. -\begin{lstlisting}[language=Python] -def search(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True if node.is_word else False -\end{lstlisting} -\begin{lstlisting}[language=Python] -def startWith(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True -\end{lstlisting} -Now complete the given Trie class with TrieNode and \_\_init\_\_ function. -\begin{lstlisting}[language=Python] -class Trie: - class TrieNode: - def __init__(self): - self.is_word = False - self.children = [None] * 26 #the order of the node represents a char - - def __init__(self): - """ - Initialize your data structure here. - """ - self.root = self.TrieNode() # root has value None -\end{lstlisting} -\end{examples} - -\begin{examples} -\item \textbf{336. Palindrome Pairs (hard).} Given a list of unique words, find all pairs of distinct indices (i, j) in the given list, so that the concatenation of the two words, i.e. words[i] + words[j] is a palindrome. -\begin{lstlisting} -Example 1: - -Input: ["abcd","dcba","lls","s","sssll"] -Output: [[0,1],[1,0],[3,2],[2,4]] -Explanation: The palindromes are ["dcbaabcd","abcddcba","slls","llssssll"] - -Example 2: - -Input: ["bat","tab","cat"] -Output: [[0,1],[1,0]] -Explanation: The palindromes are ["battab","tabbat"] -\end{lstlisting} -\textbf{Solution: One Forward Trie and Another Backward Trie.} We start from the naive solution, which means for each element, we check if it is palindrome with all the other strings. And from the example 1, [3,3] can be a pair, but it is not one of the outputs, which means this is a combination problem, the time complexity is ${C_n}{C_{n-1}}$, and multiply it with the average length of all the strings, we make it $m$, which makes the complexity to be $O(mn^2)$. However, we can use Trie Structure, -\begin{lstlisting}[language = Python] -from collections import defaultdict - - -class Trie: - def __init__(self): - self.links = defaultdict(self.__class__) - self.index = None - # holds indices which contain this prefix and whose remainder is a palindrome - self.pali_indices = set() - - def insert(self, word, i): - trie = self - for j, ch in enumerate(word): - trie = trie.links[ch] - if word[j+1:] and is_palindrome(word[j+1:]): - trie.pali_indices.add(i) - trie.index = i - - -def is_palindrome(word): - i, j = 0, len(word) - 1 - while i <= j: - if word[i] != word[j]: - return False - i += 1 - j -= 1 - return True - - -class Solution: - def palindromePairs(self, words): - '''Find pairs of palindromes in O(n*k^2) time and O(n*k) space.''' - root = Trie() - res = [] - for i, word in enumerate(words): - if not word: - continue - root.insert(word[::-1], i) - for i, word in enumerate(words): - if not word: - continue - trie = root - for j, ch in enumerate(word): - if ch not in trie.links: - break - trie = trie.links[ch] - if is_palindrome(word[j+1:]) and trie.index is not None and trie.index != i: - # if this word completes to a palindrome and the prefix is a word, complete it - res.append([i, trie.index]) - else: - # this word is a reverse suffix of other words, combine with those that complete to a palindrome - for pali_index in trie.pali_indices: - if i != pali_index: - res.append([i, pali_index]) - if '' in words: - j = words.index('') - for i, word in enumerate(words): - if i != j and is_palindrome(word): - res.append([i, j]) - res.append([j, i]) - return res -\end{lstlisting} -\textbf{Solution2: .}Moreover, there are always more clever ways to solve these problems. Let us look at a clever way: - abcd, the prefix is ''. 'a', 'ab', 'abc', 'abcd', if the prefix is a palindrome, so the reverse[abcd], reverse[dc], to find them in the words, the words stored in the words with index is fastest to find. $O(n)$. Note that when considering suffixes, we explicitly leave out the empty string to avoid counting duplicates. That is, if a palindrome can be created by appending an entire other word to the current word, then we will already consider such a palindrome when considering the empty string as prefix for the other word. - \begin{lstlisting}[language = Python] - class Solution(object): - def palindromePairs(self, words): - # 0 means the word is not reversed, 1 means the word is reversed - words, length, result = sorted([(w, 0, i, len(w)) for i, w in enumerate(words)] + - [(w[::-1], 1, i, len(w)) for i, w in enumerate(words)]), len(words) * 2, [] - - #after the sorting,the same string were nearby, one is 0 and one is 1 - for i, (word1, rev1, ind1, len1) in enumerate(words): - for j in xrange(i + 1, length): - word2, rev2, ind2, _ = words[j] - #print word1, word2 - if word2.startswith(word1): # word2 might be longer - if ind1 != ind2 and rev1 ^ rev2: # one is reversed one is not - rest = word2[len1:] - if rest == rest[::-1]: result += ([ind1, ind2],) if rev2 else ([ind2, ind1],) # if rev2 is reversed, the from ind1 to ind2 - else: - break # from the point of view, break is powerful, this way, we only deal with possible reversed, - return result - \end{lstlisting} - \end{examples} - - %https://fizzbuzzed.com/top-interview-questions-5/ -% \paragraph{Searching} -% \paragraph{Insertion} -% \paragraph{Deletion} - -% Let us see the complete code of a Trie Class: -% \begin{lstlisting}[language = Python] - -% class Trie: - -% # Trie data structure class -% def __init__(self): -% self.root = self.getNode() - -% def getNode(self): - -% # Returns new trie node (initialized to NULLs) -% return TrieNode() - -% def _charToIndex(self,ch): - -% # private helper function -% # Converts key current character into index -% # use only 'a' through 'z' and lower case - -% return ord(ch)-ord('a') - - -% def insert(self,key): - -% # If not present, inserts key into trie -% # If the key is prefix of trie node, -% # just marks leaf node -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) - -% # if current character is not present -% if not pCrawl.children[index]: -% pCrawl.children[index] = self.getNode() -% pCrawl = pCrawl.children[index] - -% # mark last node as leaf -% pCrawl.isEndOfWord = True - -% def search(self, key): - -% # Search key in the trie -% # Returns true if key presents -% # in trie, else false -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) -% if not pCrawl.children[index]: -% return False -% pCrawl = pCrawl.children[index] - -% return pCrawl != None and pCrawl.isEndOfWord - -% # driver function -% def main(): - -% # Input keys (use only 'a' through 'z' and lower case) -% keys = ["the","a","there","anaswe","any", -% "by","their"] -% output = ["Not present in trie", -% "Present in tire"] - -% # Trie object -% t = Trie() - -% # Construct trie -% for key in keys: -% t.insert(key) - -% # Search for different keys -% print("{} ---- {}".format("the",output[t.search("the")])) -% print("{} ---- {}".format("these",output[t.search("these")])) -% print("{} ---- {}".format("their",output[t.search("their")])) -% print("{} ---- {}".format("thaw",output[t.search("thaw")])) - -% if __name__ == '__main__': -% main() -% \end{lstlisting} -There are several other data structures, like balanced trees and hash tables, which give us the possibility to search for a word in a dataset of strings. Then why do we need trie? Although hash table has $O(1)$ time complexity for looking for a key, it is not efficient in the following operations : -\begin{itemize} - \item Finding all keys with a common prefix. - \item Enumerating a dataset of strings in lexicographical order. -\end{itemize} - -\paragraph{Sorting} -Lexicographic sorting of a set of keys can be accomplished by building a trie from them, and traversing it in pre-order, printing only the leaves' values. This algorithm is a form of radix sort. This is why it is also called radix tree. - -% \paragraph{Dynamic Programming for Static Array} - - -%%%%%%%%%%%%%%%%%%%%%%%%bonus -\section{Bonus} -\paragraph{Solve Duplicate Problem in BST} When there are duplicates, things can be more complicated, and the college algorithm book did not really tell us what to do when there are duplicates. If you use the definition "left <= root < right" and you have a tree like: -\begin{lstlisting}[numbers=none] - 3 - / \ - 2 4 -\end{lstlisting} - -then adding a ``3'' duplicate key to this tree will result in: -\begin{lstlisting} [numbers=none] - 3 - / \ - 2 4 - \ - 3 -\end{lstlisting} -Note that the duplicates are not in contiguous levels. - -This is a big issue when allowing duplicates in a BST representation as the one above: duplicates may be separated by any number of levels, so checking for duplicate's existence is not that simple as just checking for immediate children of a node. - -An option to avoid this issue is to not represent duplicates structurally (as separate nodes) but instead use a counter that counts the number of occurrences of the key. The previous example would then have a tree like: -\begin{lstlisting} - 3(1) - / \ - 2(1) 4(1) - \end{lstlisting} - -and after insertion of the duplicate "3" key it will become: -\begin{lstlisting} - 3(2) - / \ - 2(1) 4(1) - \end{lstlisting} - -This simplifies SEARCH, DELETE and INSERT operations, at the expense of some extra bytes and counter operations. In the following content, we assume using definition three so that our BST will have no duplicates. - -%%%%%%%%%%%%%%%%%%%%%%%%%Exercise%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{LeetCode Problems} - -\begin{enumerate} - \item 144. Binary Tree Preorder Traversal - \item 94. Binary Tree Inorder Traversal - \item 145. Binary Tree Postorder Traversal - \item 589. N-ary Tree Preorder Traversal - \item 590. N-ary Tree Postorder Traversal - \item 429. N-ary Tree Level Order Traversal - \item 103. Binary Tree Zigzag Level Order Traversal(medium) - \item 105. Construct Binary Tree from Preorder and Inorder Traversal -\end{enumerate} - -938. Range Sum of BST (Medium) - -Given the root node of a \textbf{binary search tree}, return the sum of values of all nodes with value between L and R (inclusive). - -The binary search tree is guaranteed to have unique values. -\begin{lstlisting} -Example 1: - -Input: root = [10,5,15,3,7,null,18], L = 7, R = 15 -Output: 32 - -Example 2: - -Input: root = [10,5,15,3,7,13,18,1,null,6], L = 6, R = 10 -Output: 23 -\end{lstlisting} -\textbf{Tree Traversal+Divide and Conquer}. We need at most $O(n)$ time complexity. For each node, there are three cases: 1) L <= val <= R, 2)val < L, 3)val > R. For the first case it needs to obtain results for both its subtrees and merge with its own val. For the others two, because of the property of BST, only the result of one subtree is needed. -\begin{lstlisting}[language=Python] -def rangeSumBST(self, root, L, R): - if not root: - return 0 - if L <= root.val <= R: - return self.rangeSumBST(root.left, L, R) + self.rangeSumBST(root.right, L, R) + root.val - elif root.val < L: #left is not needed - return self.rangeSumBST(root.right, L, R) - else: # right subtree is not needed - return self.rangeSumBST(root.left, L, R) -\end{lstlisting} - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_13_tree_traversal.tex b/Easy-Book/chapters/chapter_13_tree_traversal.tex deleted file mode 100644 index edf14c0..0000000 --- a/Easy-Book/chapters/chapter_13_tree_traversal.tex +++ /dev/null @@ -1,31 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Tree Traverse -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -As the second Chapter related to trees, it is time for us to learn how to complete search a given tree and learn the different applications of trees as mentioned in Chapter~\ref{chapter_}. Same as a graph data structure, the first thing before thinking about any algorithms is to learn methods to traverse the tree. As in graph, there are two broad ways to iterate all nodes and edges in the tree, Depth-first-search(DFS) and Breath-first-search(BFS). Gladly, due to simpler structure the tree is and it has no cycles, the traverse of a tree is easier to implement than that of a general graph, because there are no cycles in the tree and it is not possible to reach a node from multiple directions. - -\paragraph{Tree Traversal} We first talk about general traversal of trees, first start with the free trees, which are just simplified version of graph search implementation without the state records. We mainly focus on the implementation of rooted trees and learn both recursive and iterative implementation. These contents will be shown in: -\begin{enumerate} - \item Depth-first-search based Tree Traversal in Section~\ref{dfs_tree_traversal}. - \item Breath-first-search based Tree Traversal in Section~\ref{bfs_tree_traversal}. -\end{enumerate} - -\paragraph{Applications} We then focus on the applications of trees, mainly with different types of search trees, include binary search tree (BST), segment tree, trie for string and order statistic trees. We would learn how to do search in these different trees and how to implement basic operations to make it a data structure such as insert, delete, and construction. -\begin{enumerate} - \item Binary Search Tree in Section~\ref{}. - \item Segment Tree in Section~\ref{}. - \item Trie for string in Section~\ref{}. -\end{enumerate} - -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/example_graph.png} - \caption{Example Graph vs converted tree, where we delete edge $3->5$ and $5->6$.} - \label{example_graph_tree} -\end{figure} - - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_14_sorting.tex b/Easy-Book/chapters/chapter_14_sorting.tex deleted file mode 100644 index 6fa3d73..0000000 --- a/Easy-Book/chapters/chapter_14_sorting.tex +++ /dev/null @@ -1,1347 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -Sorting is the most basic building block for many other algorithms and is often considered as the very first step that eases and reduces the original problems to easier ones. -\section{Introduction} -\paragraph{Sorting} In computer science, a \textit{sorting algorithm} is designed to rearrange items of a given array in a certain order based on each item's \textit{key}. The most frequently used orders are \textit{numerical order} and \textit{lexicographical order}. For example, given an array of size $n$, sort items in increasing order of its numerical values: -\begin{lstlisting}[numbers=none] -Array = [9, 10, 2, 8, 9, 3, 7] -sorted = [2, 3, 7, 8, 9, 9, 10] -\end{lstlisting} -\paragraph{Selection} \textit{Selection algorithm} is used to find the k-th smallest number in a given array; such a number is called the k-th \textit{order statistic}. For example, given the above array, find the 3-th smallest number. -\begin{lstlisting}[numbers=none] -Array = [9, 10, 2, 8, 9, 3, 7], k = 3 -Result: 7 -\end{lstlisting} -Sorting and Selection often go hand in hand; either we first execute sorting and then select the desired order through indexing or we derive a selection algorithm from a corresponding sorting algorithm. Due to such relation, this chapter is mainly about introducing sorting algorithms and occasionally we introduce their corresponding selection algorithms by the side. - -\paragraph{Lexicographical Order} For a list of strings, sorting them will make them in lexicographical order. The order is decided by a comparison function, which compares corresponding characters of the two strings from left to right. In the process, the first pair of characters that differ from each other determines the ordering: the string that has smaller alphabet from the pair is smaller than the other string. - -Characters are compared using the Unicode character set. All uppercase letters come before lower case letters. If two letters are the same case, then alphabetic order is used to compare them. For example: -\begin{lstlisting}[numbers=none] -'ab' < 'bc' (differs at i = 0) -'abc' < 'abd' (differs at i = 2) -\end{lstlisting} -Special cases appears when two strings are of different length and the shorter one $s$ is a prefix of the the longer one $t$, then it is considered that $s < t$. For example: -\begin{lstlisting}[numbers=none] -'ab' < 'abab' ('ab' is a prefix of 'abab') -\end{lstlisting} - -\paragraph{How to Learn Sorting Algorithms?} We list a few terminologies that are commonly seen to describe the properties of a certain sorting algorithm: -% \paragraph{How to Learn Sorting Algorithms?} Before we start our journey to learn each individual existing sorting algorithm, it is worthy the time to discuss some key terminologies and techniques that distinguish different kind. Therefore, along the learning process, we know what questions to answer and trying to look for answer. Knowing the behavior and performance of each kind helps us making better decision when trying to design best solutions for real problems. -\begin{itemize} - \item \textbf{In-place Sorting}: In-place sorting algorithm only uses a constant number of extra spaces to assist its implementation. If a sorting algorithm is not in-place, it is called out-of-place sorting instead. - \item \textbf{Stable Sorting}: Stable sorting algorithm maintain the relative order of items with equal keys. For example, two different tasks that come with same priority in the priority queue should be scheduled in the relative pending ordering. - \item \textbf{Comparison-based Sorting}: This kind of sorting technique determines the sorted order of an input array by comparing pairs of items and moving them around based on the results of comparison. And it has a lower bound of $\Omega (n\log n)$ comparison. -\end{itemize} - -\paragraph{Sorting Algorithms in Coding Interviews} As the fundamental Sorting and selection algorithms can still be potentially met in interviews where we might be asked to implement and analyze any sorting algorithm you like. Therefore, it is necessary for us to understand the most commonly known sorting algorithms. Also, Python provides us built-in sorting algorithms to use directly and we shall mater the syntax too. - -\paragraph{The Applications of Sorting} The importance of sorting techniques is decided by its multiple fields of application: -\begin{enumerate} - \item Sorting can organize information in a human-friendly way. For example, the lexicographical order are used in dictionary and inside of library systems to help users locate wanted words or books in a quick way. - \item Sorting algorithms often be used as a key subroutine to other algorithms. As we have shown before, binary search, sliding window algorithms, or cyclic shifts of suffix array need the data to be in sorted order to carry on the next step. When ordering will not incur wrong solution to the problems, sorting beforehand should always be atop on our mind for sorting first might ease our problem later. -\end{enumerate} - -\paragraph{Organization} We organize the content mainly based on the worst case time complexity. Section~\ref{o_n_2_sorting} - \ref{o_n_log_n_sorting} focuses on comparison-based sorting algorithms, and Section~\ref{O_n_k_sorting}-\ref{o_n_sorting} introduce classical non-comparison-based sorting algorithms. %In addition, Section~\ref{lexcographical_order} completes the picture and show which sorting algorithms can be adapted to do lexcographical order sorting based on its distinct characters that the range of keys limited by $|\sum|$, the number of possible keys in the definition. Further, in Section~\ref{python_built_in_sort}, we introduce the built-in sort function. Know the properties and how to customize the comparison functions. To recap: -\begin{itemize} -\item Naive Sorting (Section~\ref{o_n_2_sorting}): Bubble Sort, Insertion Sort, Selection Sort; -\item Asymptotically Best Sorting (Section~\ref{o_n_log_n_sorting}) Sorting: merge sort, quick sort, and Quick Select; -\item Linear Sorting (Section~\ref{O_n_k_sorting}): Counting Sort, where $k$ is the range of the very first and last key. -\item Python Built-in Sort (Section~\ref{python_built_in_sort}): -\end{itemize} -% We can get $O(1)$ for an sorted data strcture. The simplest case of a selection algorithm is finding the minimum (or maximum) item by iterating through the data structure, and the hardest case is finding the median. Many selection algorithms are derived by generalizing a sorting algorithm. Naturally, sorting can potentially be the first step of algorithms like selection or even other problems to ordering the data. Such as we can apply binary search on the sorted data structure. - -% The best-known selection algorithm is quickselect, which is related to quicksort. Naturally sorting is the most basic and fundamental step in problem solving and many languages have provided us built-in sorting algorithms to use directly. - -\section{Python Comparison Operators and Built-in Functions} -\paragraph{Comparison Operators} Python offers 7 comparison operators shown in Table.~\ref{tab:comparison_operators} to compare values. It either returns \texttt{True} or \texttt{False} according to the condition. - -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Comparison operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.65\columnwidth}|} - \hline -> & Greater than - \texttt{True} if left operand is greater than the right\\ \hline -< & Less that - \texttt{True} if left operand is less than the right\\ \hline -== & Equal to - \texttt{True} if both operands are equal\\ \hline -!= & Not equal to - \texttt{True} if operands are not equal\\ \hline ->= & Greater than or equal to - \texttt{True} if left operand is greater than or equal to the right\\ \hline -<= & Less than or equal to - True if left operand is less than or equal to the right\\ \hline -\end{tabular} - \label{tab:comparison_operators} - \end{small} -\end{table} - -For example, compare two numerical values: -\begin{lstlisting}[language=Python] -c1 = 2 < 3 -c2 = 2.5 > 3 -\end{lstlisting} -The printout is: -\begin{lstlisting}[language=Python] -(True, False) -\end{lstlisting} -Also, compare two strings follows the lexicographical orders: -\begin{lstlisting}[language=Python] -c1 = 'ab' < 'bc' -c2 = 'abc' > 'abd' -c3 = 'ab' < 'abab' -c4 = 'abc' != 'abc' -\end{lstlisting} -The printout is: -\begin{lstlisting}[language=Python] -(True, False, True, False) -\end{lstlisting} -What's more, it can compare other types of sequences such as \texttt{list} and \texttt{tuple} using lexicographical orders too: -\begin{lstlisting}[language=Python] -c1 = [1, 2, 3] < [2, 3] -c2 = (1, 2) > (1, 2, 3) -c3 = [1, 2] == [1, 2] -\end{lstlisting} -The printout is: -\begin{lstlisting}[language=Python] -(True, False, True) -\end{lstlisting} -However, mostly Python 3 does not support comparison between different types of sequence, nor does it supports comparison for \texttt{dictionary}. For \texttt{dictionary} data structures, in default, it uses its key as the key to compare with. For example, comparison between \texttt{list} and \texttt{tuple} will raise \texttt{TypeError}: -\begin{lstlisting}[language=Python] -[1, 2, 3] < (2, 3) -\end{lstlisting} -The error is shown as: -\begin{lstlisting}[language=Python] -----> 1 [1, 2, 3] < (2, 3) -TypeError: '<' not supported between instances of 'list' and 'tuple' -\end{lstlisting} -Comparison between dictionary as follows will raise the same error: -\begin{lstlisting}[language=Python] -{1: 'a', 2:'b'} < {1: 'a', 2:'b', 3:'c'} -\end{lstlisting} -\paragraph{Comparison Functions} -Python built-in functions \texttt{max()} and \texttt{min()} support two forms of syntax: \texttt{max(iterable, *[, key, default])} and \texttt{max(arg1, arg2, *args[, key])}. If one positional argument is provided, it should be an iterable. And then it returns the largest item in the iterable based on its key. It also accepts two or more positional arguments, and these arguments can be numerical or sequential. When there are two or more positional argument, the function returns the largest. - -For example, with one iterable and it returns \texttt{20}: -\begin{lstlisting}[language=Python] -max([4, 8, 9, 20, 3]) -\end{lstlisting} -With two positional arguments --either numerical or sequential: -\begin{lstlisting}[language=Python] -m1 = max(24, 15) -m2 = max([4, 8, 9, 20, 3], [6, 2, 8]) -m3 = max('abc', 'ba') -\end{lstlisting} -The printout of these results is: -\begin{lstlisting}[language=Python] -(24, [6, 2, 8], 'ba') -\end{lstlisting} -With \texttt{dictionary}: -\begin{lstlisting}[language=Python] -dict1 = {'a': 5, 'b': 8, 'c': 3} -k1 = max(dict1) -k2 = max(dict1, key=dict1.get) -k3 = max(dict1, key =lambda x: dict1[x]) -\end{lstlisting} -The printout is: -\begin{lstlisting}[language=Python] -('c', 'b', 'b') -\end{lstlisting} -When the sequence is empty, we need to set an default value: -\begin{lstlisting}[language=Python] -max([], default=0) -\end{lstlisting} -\paragraph{Rich Comparison} To compare a self-defined \texttt{class}, in Python 2.X, \texttt{\_\_cmp\_\_(self, other)} special method is used to implement comparison between two objects. \texttt{\_\_cmp\_\_(self, other)} returns negative value if \texttt{self < other}, positive if \texttt{self > other}, and zero if they were equal. However, in Python 3, this \texttt{cmp} style of comparisons is dropped, and \texttt{rich comparison} is introduced, which assign a special method to each operator as shown in Table.~\ref{tab:comparison_operator_special_method}: -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Operator and its special method} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.35\columnwidth}|} - \hline -== & \texttt{\_\_eq\_\_} \\ \hline -!= &\texttt{\_\_ne\_\_}\\ \hline -< & \texttt{\_\_lt\_\_}\\ \hline -<= &\texttt{\_\_le\_\_}\\ \hline -> & \texttt{\_\_gt\_\_}\\ \hline ->= & \texttt{\_\_ge\_\_}\\ \hline -\end{tabular} - \label{tab:comparison_operator_special_method} - \end{small} -\end{table} -To avoid the hassle of providing all six functions, we can only implement \texttt{\_\_eq\_\_}, \texttt{\_\_ne\_\_}, and only one of the ordering operators, and use the \texttt{functools.total\_ordering()} decorator to fill in the rest. For example, write a class \texttt{Person}: -\begin{lstlisting}[language=Python] -from functools import total_ordering -@total_ordering -class Person(object): - def __init__(self, firstname, lastname): - self.first = firstname - self.last = lastname - - def __eq__(self, other): - return ((self.last, self.first) == (other.last, other.first)) - - def __ne__(self, other): - return not (self == other) - - def __lt__(self, other): - return ((self.last, self.first) < (other.last, other.first)) - - def __repr__(self): - return "%s %s" % (self.first, self.last) -\end{lstlisting} -Then, we would be able to use any of the above comparison operator on our class: -\begin{lstlisting}[language=Python] -p1 = Person('Li', 'Yin') -p2 = Person('Bella', 'Smith') -p1 > p2 -\end{lstlisting} -It outputs \texttt{True} because last name ``Yin'' is larger than ``Smith''. -%%%%%%%%%%%%%%%% -% Chapter: Bubble sort % -%%%%%%%%%%%%%%%% -\section{Naive Sorting} -\label{o_n_2_sorting} -As the most naive and intuitive group of comparison-based sorting methods, this group takes $O(n^2)$ time and usually consists of two nested for loops. In this section, we learn three different sorting algorithms ``quickly'' due to their simplicity: insertion sort, bubble sort,and selection sort. - - - -\subsection{Insertion Sort} -Insertion sort is one of the most intuitive sorting algorithms for humans. For humans, given an array of $n$ items to process, we divide it into two regions: \textbf{sorted and unrestricted region}. Each time we take one item ``out'' of the unrestricted region to sorted region by inserting it at a proper position. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/insertion_sort.png} - \caption{The whole process for insertion sort: Gray marks the item to be processed, and yellow marks the position after which the gray item is to be inserted into the sorted region.} - \label{fig:insertion_sort} -\end{figure} - - - -\paragraph{In-place Insertion} The logic behind this algorithm is simple, we can do it easily by setting up another sorted array. However, here we want to focus on the in-place insertion. Given array of size $n$, we use index $0$ and $i$ to point to the start position of sorted and the unrestricted region, respectively. And $i=1$ at the beginning, indicates that the sorted region will naturely has one item. We have sorted region in $[0, i-1]$, and the unrestricted region in $[i, n-1]$. We scan item in the unrestricted region from left to right, and insert each item $a[i]$ into the sorted sublist. - -The key step is to find a proper position of $a[i]$ in the region $[0, i-1]$ to insert into. There are two different ways for iteration over unsorted region: forward and backward. We use pointer $j$ in the sorted region. -\begin{itemize} - \item Forward: $j$ will iterate in range $[0, i-1]$. We compare $a[j]$ with $a[i]$, and stop at the first place that $a[j] > a[i]$ (to keep it stable). All items elements $a[j:i-1]$ will be shifted backward for one position, and $a[i]$ will be placed at index $j$. Here we need $i$ times of comparison and swaps. - - \item Backward: $j$ iterates in range $[i-1, 0]$. We compare $a[j]$ with $a[i]$, and stop at the first place that $a[j] <= a[i]$ (to keep it stable). In this process, we can do the shifting simultaneously: if $a[j] > a[i]$, we shift $a[j]$ with $a[j+1]$. -\end{itemize} -In forward, the shifting process still requires us to reverse the range, -therefore the backward iteration makes better sense. - -For example, given an array $a = [9, 10, 2, 8, 9, 3]$. First, 9 itself is sorted array. we demonstrate the backward iteration process. At first, 10 is compared with 9, and it stays at where it is. At the second pass, 2 is compared with 10, 9, and then it is put at the first position. The whole whole process of this example is demonstrated in Fig.~\ref{fig:insertion_sort}. - - -% If the $t$ is larger than $v$, we keep comparing the next $v$; if $t$ is smaller or equal to $v$, we insert $t$ at the place of $v$. In python using the insert() of the list data type will automatically shift items behind. Doing so will keep the sorting to be stable, that is the items with equal keys will remain its original order. However, if the $t$ is larger than all the items, then we need to remember to insert it at the end. -% \begin{lstlisting}[numbers=none] -% If the t > v: -% we move to the next v -% else t <= v: # we -% insert at the position of v -% \end{lstlisting} - - -\paragraph{With Extra Space Implementation} The Python \texttt{list.insert()} function handles the insert and shifting at the same time. We need to pay attention when the item is larger than all items in the sorted list, we have to insert it at the end. -\begin{lstlisting}[language=Python] -def insertionSort(a): - if not a or len(a) == 1: - return a - n = len(a) - sl = [a[0]] # sorted list - for i in range(1, n): - for j in range(i): - if sl[j] > a[i]: - sl.insert(j, a[i]) - break - if len(sl) != i + 1: # not inserted yet - sl.insert(i, a[i]) - return sl -\end{lstlisting} -\paragraph{Backward In-place Implementation} -We use a \texttt{while} loop to handle the backward iteration: whenever the target is smaller than the item in the sorted region, we shift the item backward. When the \texttt{while} loop stops, it is either $j = -1$ or when $t >= a[j]$. -\begin{itemize} - \item When $j=-1$, that means we need to insert the target at the first position which should be $j+1$. -\item When $t >= a[j]$, we need to insert the target one position behind $j$, which is $j+1$. -\end{itemize} -The code is shown as: -\begin{lstlisting}[language=Python] -def insertionSort(a): - if not a or len(a) == 1: - return a - n = len(a) - for i in range(1, n): - t = a[i] - j = i - 1 - while j >= 0 and t < a[j]: - a[j+1] = a[j] # Move item backward - j -= 1 - a[j+1] = t - return -\end{lstlisting} - - -% The detail is for each element that is going to be inserted, for the case of increasing order, we reversely iterate through the sorted list, if the current value is larger, then put the element in current position, otherwise, shift the larger element backward, and compare it with the element before it. The Python code is as follows: -% \begin{lstlisting}[language = Python] - -% def shift(a, start, end): -% for i in range(end, start, -1): # [i, j) -% a[i] = a[i-1] - -% def insertionSort(a): -% if not a or len(a) == 1: -% return a -% n = len(a) -% sl = [a[0]] # sorted list -% for i in range(1, n): # items to be inserted into the sorted -% for j in range(i): -% if a[i] <= a[j]: -% # shift all other elements [j, i-1] -% tmp = a[i] -% shift(a, j, i) -% a[j] = tmp - -% return a - -% def insertionSort(alist): -% for index in range(1,len(alist)): - -% currentvalue = alist[index] -% position = index - -% while position>0 and alist[position-1]>currentvalue: -% alist[position] = alist[position-1] #shift the larger element back -% position = position-1 - -% alist[position] = currentvalue -% alist = [54,26,93,17,77,31,44,55,20] -% insertionSort(alist) -% print(alist) -% \end{lstlisting} - -\subsection{Bubble Sort and Selection Sort} -\subsubsection{Bubble Sort} -Bubble sort compares each pair of adjacent items in an array and swaps them if they are out of order. Given an array of size $n$: in a single pass, there are $n-1$ pairs for comparison, and at the end of the pass, one item will be put in place. -\begin{figure}[H] - \centering - \includegraphics{fig/bubble_sort.png} - \caption{One pass for bubble sort} - \label{fig:bubble_sort} -\end{figure} - -\paragraph{Passes} For example, Fig.~\ref{fig:bubble_sort} shows the first pass for sorting array [9, 10, 2, 8, 9, 3]. When comparing a pair $(a_i, a_{i+1})$, if $a_i > a_{i+1}$, we swap these two items. We can clearly see after one pass, the largest item 10 is in place. For the next pass, it only compare pairs within the unrestricted window $[0, 4]$. This is what``bubble'' means in the name: after a pass, the largest item in the unrestricted window bubble up to the end of the window and become in place. - -% \paragraph{Next Pass} Therefore, in the next pass, the last item will no longer needed to be compared. For pass $i$, it places the current $i$-th largest items in position in range of $[n-i-1, n)$. the last $i$ items will be sorted. We say, in the first pass, where $i=0$, the valid window is $[0, n)$, and to be generalize the valid window for $i$-th pass is $[0, n-i)$. - -\paragraph{Implementation} With the understanding of the valid window of each pass, we can implement ``bubble'' sort with two nested \texttt{for} loops in Python. The first \texttt{for} loop to enumerate the number of passes, say $i$, which is $n-1$ in total. The second \texttt{for} loop to is to scan pairs in the unrestricted window $[0, n-i-1]$ from left to right. thus index $j$ points to the first item in the pair, making it in range of $[0, n-i-2]$. -\begin{lstlisting}[language = Python] -def bubbleSort(a): - if not a or len(a) == 1: - return - n = len(a) - for i in range(n - 1): #n-1 passes - for j in range(n - i -1): - # Swap - if a[j] > a[j + 1]: - a[j], a[j + 1] = a[j + 1], a[j] - return -\end{lstlisting} -When the pair has equal values, we do not need to swap them. The advantage of doing so is (1) to save unnecessary swaps and (2) keep the original order of items with same keys. This makes bubble sort a \textbf{stable sort}. Also, in the implementation no extra space is assigned either which makes bubble sort \textbf{in-place sort}. - -% The most basic and most intuitive sorting algorithm is the bubble sort. The bubble sort makes multiple passes through a list. In each pass, it compare a pair of adjacent items and switch them if they are out of order. For pass $i$, it places the current $i$th largest value in position $n-i$. In essence, each item “bubbles” up to the location where it belongs. The time complexity of bubble sort is $O(n+(n-1)+(n-2)+...+1)= O(n(n-1)/2) = O(n^2)$. - -% Figure ~\ref{fig:bubble_sort} shows the first pass of a bubble sort. The shaded items are being compared to see if they are out of order. If there are n items in the list, then there are n-1 pairs of items that need to be compared on the first pass. It is important to note that once the largest value in the list is part of a pair, it will continually be moved along until the pass is complete. - - -\paragraph{Complexity Analysis and Optimization} In $i$-th pass, the item number in the valid window is $n-i$ with $n-i-1$ maximum of comparison and swap, and we need a total of $n-1$ passes. The total time will be $T = \sum_{i=0}^{n-i}{(n-i-1)} = n-1 + (n-2) +...+2+1 = n(n-1)/2=O(n^2)$. The above implementation runs $O(n^2)$ even if the array is sorted. We can optimize the inner \texttt{for} loop by stopping the whole program if no swap is detected in a single pass. When the input is nearly sorted, this strategy can get us $O(n)$ time complexity. - -% \to sort, this makes it in-place sort. Also, it is a stable sort. paragraph{Properties} As we see in bubble sort, there is no extra space needed -\subsubsection{Selection Sort} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.98\columnwidth]{fig/selection_sort.png} - \caption{The whole process for Selection sort} - \label{fig:selection_sort} -\end{figure} -In the bubble sort, each pass we get the largest element in the valid window in place by a series of swapping operations. While, selection sort makes a slight optimization via searching for the largest item in the current unrestricted window and swap it directly with the last item in the region. This avoids the constant swaps as occurred in the bubble sort. The whole sorting process for the same array is shown in Fig~\ref{fig:selection_sort}. - -\paragraph{Implementation} Similar to the implementation of Bubble Sort, we have the concept of number of passes at the outer \texttt{for} loop, and the concept of unrestricted at the inner \texttt{for} loop. We use variables \texttt{ti} and -\texttt{li} for the position of the largest item to be and being, respectively. -\begin{lstlisting}[language = Python] -def selectSort(a): - n = len(a) - for i in range(n - 1): #n-1 passes - ti = n - 1 - i - li = 0 # The index of the largest item - for j in range(n - i): - if a[j] >= a[li]: - li = j - # swap li and ti - a[ti], a[li] = a[li], a[ti] - return -\end{lstlisting} -Like bubble sort, selection sort is \textbf{in-place}. In the comparison, we used \texttt{if a[j] >= a[li]:}, which is able to keep the relative order of equal keys. For example, in our example, there is equal key $9$. Therefore, selection sort is stable sort too. - -\paragraph{Complexity Analysis} Same as of bubble sort, selection sort has a worst and average time complexity of $O(n^2)$ but more efficient when the input is not as near as sorted. -%%%%%%%%%%%%%%%% -% Chapter: Merge Sort % -%%%%%%%%%%%%%%%% -\section{Asymptotically Best Sorting} -\label{o_n_log_n_sorting} -We have learned a few comparison-based sorting algorithms and they all have an upper bound of $n^2$ in time complexity due to the number of comparisons must be executed. Can we do better than $O(n^2)$ and how? - -\paragraph{Comparison-based Lower Bounds for Sorting} Given an input of size $n$, there are $n!$ different possible permutations on the input, indicating that our sorting algorithms must find the one and only one permutation by comparing pairs of items. So, how many times of comparison do we need to reach to the answer? Let's try the case when $n=3$, and all possible permutations using the indexes will be: $(1,2,3), (1,3,2), (3,1,2), (2,1,3), (2,3,1),(3,2,1)$. First we compare pair $(1,2)$, if $a_1 < a_2$, our candidates set is thus narrowed down to $\{(1,2,3), (1,3,2), (3,1,2)\}$. - -We draw a decision-tree, which is a full binary tree with $n!$ leaves--the $n!$ permutations, and each branch represents one decision made on the comparison result. The cost of any comparison-based algorithm is abstracted as the length of the path from the root of the decision tree to its final sorted permutation. The longest path represents the worst-case number of comparisons. - -Using $h$ to denote the height of the binary tree, and $l$ for the number of leaves. First, a binary tree will have at most $2^h$ leaves, we get $l \leq 2^h$. Second, it will have at least $n!$ leaves to represent all possible orderings, we have $l \geq n!$ . Therefore we get the lower bound time complexity for the worst case: -\begin{align} -n! &\leq l \leq 2^h \\ - 2^h &\ge n!\\ - h &\ge \log(n!) \\ - h &= \Omega(n\log n) -\end{align} - -In this section, we will introduce three classical sorting algorithms that has $O(n\log n)$ time complexity: Merge Sort and Quick Sort both utilize the Divide-and-conquer method, and Heap Sort uses the max/min heap data structures. -\subsection{Merge Sort} -% We have already used Merge Sort as the basic example of illustrating the Divide-and-Conquer algorithm design methodology. -As we know there are two main steps: ``divide'' and ``merge'' in merge sort and we have already seen the illustration of the ``divide'' process in Chapter.~\ref{chapter_divide_conquer}. %and each happens at a differnt stage of the recursion function call. - -\paragraph{Divide} In the divide stage, the original problem $a[s...e]$, where $s, e$ is the start and end index of the subarray, respectively. The divide process divides its parent problem into two halves from the middle index $m = (s+e)//2$: $a[s...m]$, and $a[m+1, e]$. This recursive call keeps moving downward till the size of the subproblem becomes one when $s=e$, which is the base case for a list of size 1 is naturally sorted. The process of divide is shown in Fig.~\ref{fig:merge_sort}. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/merge_sort.png} - \caption{Merge Sort: The dividing process is marked with dark arrows and the merging process is with gray arrows with the merge list marked in gray color too.} - \label{fig:merge_sort} -\end{figure} - -\paragraph{Merge} When we obtained two sorted sublists from the left and right side, the result of current subproblem is to merge the two sorted list into one. The merge process is done through two pointer method: We assign a new list and put two pointers at the start of the two sublists, and each time we choose the smaller item to append into the new list between the items indicated by the two pointers. Once a smaller item is chosen, we move its corresponding pointer to the next item in that sublist. We continue this process until any pointer reaches to the end. Then, the sublist where the pointer does not reach to the end yet is coped to the end of the new generated list. The subprocess is shown in Fig.~\ref{fig:merge_sort} and its implementation is as follows: -\begin{lstlisting}[language=Python] -def merge(l, r): - ans = [] - # Two pointers each points at l and r - i = j = 0 - n, m = len(l), len(r) - - while i < n and j < m: - if l[i] <= r[j]: - ans.append(l[i]) - i += 1 - else: - ans.append(r[j]) - j += 1 - - ans += l[i:] - ans += r[j:] - return ans -\end{lstlisting} - -In the code, we use $l[i] <= r[j]$ instead of $l[i] < r[j]$ is because when the left and right sublist contains items of equal keys, we put the ones in the left first in the merged list, so that the sorting can be \textbf{stable}. However, we used a temporary space as $O(n)$ to save the merged result a, making merge sort an \textbf{ out-of-place} sorting algorithm. - -\paragraph{Implementation} The whole implementation is straightforward. -\begin{lstlisting}[language = Python] -def mergeSort(a, s, e): - if s == e: - return [a[s]] - - m = (s + e) // 2 - - l = mergeSort(a, s , m) - r = mergeSort(a, m+1, e) - return merge(l, r) -\end{lstlisting} -% Check out the sample code to how to prove a sorting is stable or not using tuple. - - -\paragraph{Complexity Analysis} Because for each divide process we need to take $O(n)$ time to merge the two sublists back to a list, the recurrent relation of the complexity function can be deducted as follows: -\begin{equation} \label{bt_time} -\begin{split} -T(n) & = 2T(n/2) + O(n)\\ - & = 2 * 2T(n/4) + O(n) + O(n)\\ - & = O(n\log n) -\end{split} -\end{equation} -Thus, we get $O(n\log n)$ as the upper bound for merge sort, which is asymptotically optimal within the comparison-based sorting. - -\subsection{HeapSort} -\label{sorting_subsec_heapsort} -To sort the given array in increasing order, we can use min-heap. We first \texttt{heapify} the given array. To get a sorted list, we can simply pop out items till the heap is empty. And the popped out items will be in sorted order. - -%Since the maximum item is stored at the root, we can put swap root with the last item in the list, and then we can view the heap as a smaller size of $n-1$. We observe all the subtree of the root remain max-heaps other than the new root item that might violate the max-heap property. We restore the max-heap property through sinking, and put the root item at position $n-2$, and repeat the process for n-1 times. - -% \paragraph{Implementation} Continuing use our examplary Heap class in Chapter~\ref{chapter_heap_priority_queue}, we implement heapsort member function: -% \begin{lstlisting}[language=Python] -% def heapsort(self, a): -% self.heapify_sink(a) -% n = len(a) -% for i in range(n, 1, -1): # position to put the root node -% self.heap[i], self.heap[1] = self.heap[1], self.heap[i] #swap root with i -% self.size -= 1 -% self._sink(1) # sink dow the new root -% print(self.heap) -% \end{lstlisting} -\paragraph{Implementation} -We can implement heap sort easily with built-in module \texttt{heapq} through the \texttt{heapify()} and \texttt{heappop()} functions : -\begin{lstlisting}[language=Python] -from heapq import heapify, heappop -def heapsort(a): - heapify(a) - return [heappop(a) for i in range(len(a))] -\end{lstlisting} - -\paragraph{Complexity Analysis} The \texttt{heapify} takes $O(n)$, and the later process takes $O(\log n + \log {(n-1)} +... + 0) = \log (n!)$ which has an upper bound of $O(n\log n)$. %Check out more on \url{https://www.programiz.com/dsa/heap-sort}. -%%%%%%%%%%%%%%%% -% Chapter: Quick Sort % -%%%%%%%%%%%%%%%% -\subsection{Quick Sort and Quick Select} -Like merge sort, quick sort applies divide and conquer method and is mainly implemented with recursion. Unlike merge sort, the conquering step the sorting process- \textit{partition} happens before ``dividing'' the problem into subproblems through recursive calls. - -\paragraph{Partition and Pivot} In the partition, quick sort chooses a \textit{pivot} item from the subarray, either randomly or intentionally. Given a subarray of $A[s, e]$, the pivot can either be located at $s$ or $e$, or a random position in range $[s, e]$. Then it partitions the subarray $A[s, e]$ into three parts according to the value of the pivot: $A[s, p-1], A[p]$, and $A[p+1...e]$, where $p$ is where the pivot is placed at. The left and right part of the pivot satisfies the following conditions: -\begin{itemize} - \item -$A[i] \le A[p], i \in [s, p-1]$, -\item and $A[i] > A[p], i \in [p+1, e]$. -\end{itemize} -If we are allowed with linear space, this partition process will be trivial to implement. However, we should strive for better and learn an in-place partition methods--\textit{Lomuto's} Partition, which only uses constant space. - -\paragraph{Conquer} After the partition, one item--the pivot $A[p]$ is placed in the right place. Next, we only need to handle two subproblems: sorting $A[s, p-1]$ and $A[p+1, e]$ by recursively call the quicksort function. -We can write down the main steps of quick sort as: -\begin{lstlisting}[language=Python] -def quickSort(a, s, e): - # Base case - if s >= e: - return - p = partition(a, s, e) - - # Conquer - quickSort(a, s , p-1) - quickSort(a, p+1, e) - return -\end{lstlisting} - -At the next two subsection, we will talk about partition algorithm. And the requirement for this step is to do it \textbf{in-place} just through a series of swapping operations. - - -% it not simply divide it into half and half until it reaches to the base case and then in the returning stage of the recursion to merge from small case. It has three main st, while not using additional storage. As a trade-off, however, it is possible that the list may not be divided in half. When this happens, we will see that performance is diminished. - -% A quick sort first selects a value, which is called the pivot value. Although there are many different ways to choose the pivot value, we will simply use the first item in the list. The role of the pivot value is to assist with splitting the list. The actual position where the pivot value belongs in the final sorted list, commonly called the split point, will be used to divide the list for subsequent calls to the quick sort. - -% Figure ~\ref{fig:choose_pivot} shows that 54 will serve as our first pivot value. Since we have looked at this example a few times already, we know that 54 will eventually end up in the position currently holding 31. The partition process will happen next. It will find the split point and at the same time move other items to the appropriate side of the list, either less than or greater than the pivot value. -% \begin{figure}[h] -% \centering -% % \includegraphics[width=0.8\columnwidth]{fig/firstsplit.png} -% % \caption{Choose pivot} -% \label{fig:choose_pivot} -% \includegraphics[width=0.8\columnwidth]{fig/partitionA.png} -% \caption{Hoarse Partition} -% \end{figure} -% Python code: -% \begin{lstlisting}[language= Python] -% def quickSort(alist): -% quickSortHelper(alist,0,len(alist)-1) - -% def quickSortHelper(alist,first,last): -% if first p$, we just increment pointer $j$; - \item Otherwise when $A[j] <= p$, this item should goes to region (1). We accomplish this by swapping this item with the first item in region (2) at $i+1$. And now region (1) increments by one and region (2) shifts one position backward. - \end{itemize} - \item After the for loop, we need to put our pivot at the first place of region (2) by swapping. And now, the whole subarray is successfully parititioned into three regions as we needed, and return where the index of where the pivot is at--$i+1$--as the partition index. -\end{itemize} -We The implementation of as follows: -\begin{lstlisting}[language = Python] -def partition(a, s, e): - p = a[e] - i = s - 1 - # Scan unresticted area - for j in range(s, e): - # Swap - if a[j] <= p: - i += 1 - a[i], a[j] = a[j], a[i] - a[i+1], a[e] = a[e], a[i+1] - return i+1 -\end{lstlisting} - -% \subsubsection{Hoare Partition} -% In hoare partition, $i$ is placed at the same place as in the Lumoto's partition, and j starts at $e$ and $i$ and $j$ move towards each other. The regions are: $A[p...i] \le x, A[i+1...j-1], A[j...e-1]>x$. When we are at position $j-1$, we compare this item with pivot, if it is larger than x, we decrease $j$. For i, if it is smaller than or equal to x, we increase $i$. Until there are both violation at $i$ and $j$, we swap these two items. Also, $j$ stops when $j=i$. -% \begin{lstlisting}[language = Python] -% def HoarePartition(arr,low,high): -% pivot=arr[high] -% i=low -% j=high-1 -% while True: -% while arr[i] <= pivot and i < j: -% i += 1 -% while arr[j] >= pivot and i < j: -% j -= 1 -% if i == j: -% if arr[i] <= pivot: -% i += 1 -% arr[i], arr[high] = arr[high], arr[i] -% return i -% else: -% arr[i], arr[j] = arr[j], arr[i] -% \end{lstlisting} -\paragraph{Complexity Analysis} The worst case of the partition appears when the input array is already sorted or is reversed from the sorted array. In this case, it will partition a problem with size $n$ into one subproblem with size $n-1$ and the other subproblem is just empty. The recurrence function is $T(n) = T(n-1) + O(n)$, and it has a time complexity of $O(n^2)$. And the best case appears when a subprocess is divided into half and half as in the merge sort, where the time complexity is $O(n\log n)$. Randomly picking the pivot from $A[s, e]$ and swap it with $A[e]$ can help us achieve a stable performance with average $O(n\log n)$ time complexity. -\subsubsection{Stablity of Quick Sort} Quick sort is \textit{not stable}, because there are cases items can be swapped no matter what: (1) as the first item in the region (2), it can be swapped to the end of region (2). (2) as the pivot, it is swapped with the first item in the region (2) too. Therefore, it is hard to guarantee the stability among equal keys. We can try experiment with $A = [(2, 1), (2, 2), (1, 1)]$, and use the first element in the tuple as key. This will sort it as $A=[(1, 1), (2, 2), (2, 1)]$. - -However, we can still make quick sort stable if we get rid of the swaps by using two extra lists: one for saving the smaller and equivalent items and the other for the larger items. -%%%%%%%%%%%%%%%%%%%%%%%%Quick Select$$$$$$$$$$$$$$$$$$ -\subsubsection{Quick Select} -Quick Select is a variant of quick sort, and it is used to find the $k$-th smallest item in a list in linear time. In quicksort, it recurs both sides of the partition index, while in quick select, only the side that contains the $k$-th smallest item will be recurred. This is similar to the binary search, the comparison of $k$ and partition index $p$ results three cases: -\begin{itemize} - \item If $p=k$, we find the $k$-th smallest item, return. - \item If $p > k $, then we recur on the right side. - \item If $p < k $, then we recur on the left side. -\end{itemize} -Based on the structure, quick select has the following recurrence time complexity function: -\begin{align} -T(n) &= T(n/2) + O(n)\\ -T(n) &= O(n) \texttt{ (with master theorem)} -\end{align} -% This reduces the expected complexity from $O(n \log n)$ to $O(n)$, with a worst case of $O(n^2)$. -\paragraph{Implementation} We first set $k$ in range of $[s, e]$. When $s=e$, there is only one item in the list, which means we no longer can divide it. This is our end condition and is also the case when our original list has only one item, then we have to return this item as the 0-th smallest item. -\begin{lstlisting}[language=Python] -def quickSelect(a, s, e, k, partition=partition): - if s >= e: - return a[s] - - p = partition(a, s, e) - if p == k: - return a[p] - if k > p: - return quickSelect(a, p+1, e, k, partition) - else: - return quickSelect(a, s, p-1, k, partition) -\end{lstlisting} - - - - -%%%%%%%%%%%%%%%% -% Section: Counting sort % -%%%%%%%%%%%%%%%% -\section{Linear Sorting} -\label{linear_sorting} -Sorting without basing upon comparisons is possible, creative, and even faster, proved by the three non-comparative sorting algorithms we are about to introduce: Bucket Sort, Counting Sort, and Radix Sort. -For these algorithms the theoretic lower bound $O(n\log n)$ of comparison-based sorting is not likewise a lower bound any more; they all work in linear time. However, there are limitations to the input data, as these sorting techniques rely on certain assumptions concerning the data to be sorted to be able to work. - -Although the three algorithms we see in this section come in different forms and rely on different assumptions to the input data, we see one thing in common: They all use the divide and conquer algorithm design paradigm. Let's explore their unique tricks and the restrictive applications! - -\subsection{Bucket Sort} -Bucket Sort assumes that the input data satisfying a uniform distribution. The uniform distribution is usually assumed to be in interval $[0, 1)$. However, it can be extended to any uniform distribution with simple modification. Bucket sort applies a one time divide and conquer trick--it divides the input data into $n$ independent segments, $n$ the size of the input, just as what we have seen in merge sort, and then insertion sort is applied on each segment, and finally each sorted segmented is combined to get the result. - -Bucket sort manages the dividing process by assigning $n$ empty \textbf{buckets}, and then distribute the input data $a[i]$ to bucket index \texttt{int(a[i]*n)}. For example, if $n=10$, and $a[i]=0.15$, the bucket that the number goes to is the one with index $1$. We use example a = [0.42, 0.72, 0. , 0.3 , 0.15, 0.09, 0.19, 0.35, 0.4 , 0.54], and visualize the process in Fig.~\ref{fig:bucket_sort}. -\begin{figure}[!ht] - \centering - - \includegraphics[width=0.8\columnwidth]{fig/bucket_sort.png} - \caption{Bucket Sort} - \label{fig:bucket_sort} -\end{figure} - - -\paragraph{Implementation} First, we prepare the input data with \texttt{random.uniform} from \texttt{numpy} library. For simplicity and the reconstruction of the same input, we used random seed and rounded the float number to only two decimals. -\begin{lstlisting}[language=Python] -import numpy as np -np.random.seed(1) -a = np.random.uniform(0, 1, 10) -a = np.round(a, decimals=2) -\end{lstlisting} -Now, the code for the bucket sort is straightforward as: -\begin{lstlisting}[language=Python] -from functools import reduce -def bucketSort(a): - n = len(a) - buckets = [[] for _ in range(n)] - # Divide numbers into buckets - for v in a: - buckets[int(v*n)].append(v) - # Apply insertion sort within each bucket - for i in range(n): - insertionSort(buckets[i]) - # Combine sorted buckets - return reduce(lambda a, b: a + b, buckets) -\end{lstlisting} -\paragraph{Complexity Analysis} -\paragraph{Extension} To extend to uniform distribution in any range, we first find the minimum and maximum value, $minV, maxV$, and compute the bucket index $i$ for number $a[i]$ with formula: -\begin{align} -i= n\frac{a[i]-minV}{maxV-minV} -\end{align} -\subsection{Counting Sort} -\label{O_n_k_sorting} -Counting sort is an algorithm that sorts items according to their corresponding keys that are small integers. It works by counting the occurrences of each distinct key value, and using arithmetic--prefix sum--on those counts to determine the position of each key value in the sorted sequence. Counting sort no longer fits into the comparison-based sorting paradigm because it uses the keys as indexing to assist the sorting instead of comparing them directly to decide relative positions. For input that comes with size $n$ and the difference between the maximum and minimum integer keys $k$, counting sort has a time complexity $O(n+k)$. - - - - - - -% The counting sort is a sorting technique based on keys between a specific range. It works by counting the occurrence number of each distinct key and saves it into a count array. Then a prefix sum computation is applied on count array. Eventually, a loop over the given array and put the key to location pointed out by the count array. -\subsubsection{Premise: Prefix Sum} Before we introduce counting sort, first let us see what is prefix sum. Prefix sum, a.k.a cumulative sum, inclusive scan, or simply scan of a sequence of numbers $x_i, i \in [0, n-1]$ is second sequence of numbers $y_i, i \in [0, n-1]$, and $y_i$ is the sums of prefixes of the input sequence, with equation: -\begin{align} - y_i = \sum_{j=0}^ix_j -\end{align} -For instance, the prefix sums of on the following array is: -\begin{lstlisting}[numbers=none] -Index: 0 1 2 3 4 5 -x: 1 2 3 4 5 6 -y: 1 3 6 10 15 21 -\end{lstlisting} - -Prefix sums are trivial to compute with the following simple recurrence relation in $O(n)$ complexity. -\begin{align} - y_i = y_{i-1} + x_i, i \geq 1 -\end{align} - -Despite the ease of computation, prefix sum is a useful primitive in certain algorithms such as counting sort and Kadane's Algorithm as you shall see through this book. - -\subsubsection{Counting Sort} -Given an input array $[1, 4, 1, 2, 7, 5, 2]$, let's see how exactly counting sort works by explaining it in three steps. Because our input array comes with duplicates, we distinguish the duplicates by their relative order shown in the parenthesises. Ideally, for this input, we want it to be sorted as: -\begin{lstlisting}[numbers=none] -Index: 0 1 2 3 4 5 6 -Key: 1(1) 4 1(2) 2(1) 7 5 2(2) -Sorted:1(1) 1(2) 2(1) 2(2) 4 5 7 -\end{lstlisting} -\begin{figure}[!ht] - \centering - - \includegraphics[width=0.8\columnwidth]{fig/counting_sort_1.png} - \caption{Counting Sort: The process of counting occurrence and compute the prefix sum.} - \label{fig:counting_sort_1} -\end{figure} -\begin{enumerate} - \item \textbf{Count Occurrences:} We assign a \texttt{count} array $C_i$, and assign a size 8, which has index in range $[0, 7]$ and will be able to contain our keys whose range is $[1, 7]$. which has the same size of the key range $k$. We loop over each key in the input array, and use key as index to count each key's occurrence. Doing so will get the following result. And it means in the input array, we have two 1's, two 2's, one 4, one 5, and one 7. The process is shown in Fig.~\ref{fig:counting_sort_1}. - - Counting sort is indeed a subtype of bucket sort, where the number of buckets is $k$, and each bucket stores keys implicitly by using keys as indexes and the occurrence to track the total number of the same keys. -% \begin{lstlisting}[numbers=none] -% Index: 0 1 2 3 4 5 6 7 -% Count: 0 2 2 0 1 1 0 1 -% \end{lstlisting} - -\item \textbf{Prefix Sum on Count Array:} We compute the prefix sum for \texttt{count} array, which is shown as: -\begin{lstlisting}[numbers=none] -Index: 0 1 2 3 4 5 6 7 -Count: 0 2 2 0 1 1 0 1 -Prefix Sum: 0 2 4 4 5 6 6 7 -\end{lstlisting} -Denote the prefix sum array as $ps$. For key $i$, $ps_{i-1}$ tells us the number of items that is less or equals to ($\leq$) key $i$. This information can be used to place key $i$ directly into its correct position. For example, for key 2, summing over its previous keys' occurrences ($ps_{1})$ gives us 2, indicating that we can put key 2 to position $2$. However, key 2 appears two times, and the last position of key 2 is indicated by $ps_2 - 1$, which is 3. Therefore, for any key $i$, its locations in the sorted array is in range $[ps_{i-1}, ps_i)$. -%Observing the count array after this step showing in the below table, key 7 will be the order $(C_6, C_7]$, which is $(6, 7]$, that is only the 7-th position at index 6. For key 2, it is in the range of $(2, 4]$, that is 3-th and 4-th position. -% \begin{lstlisting}[numbers=none] -% Index: 0 1 2 3 4 5 6 7 -% Prefix Sum: 0 2 4 4 5 6 6 7 -% \end{lstlisting} -We could have just scan the prefix sum array, and use the prefix sum as locations for key indicated by index of prefix sum array. However, this method is only limited to situations where the input array is integers. Moreover, it is unable to keep the relative ordering of the items of the same key. - -\item \textbf{Sort Keys with Prefix Sum Array: } First, let us loop over the input keys from position $0$ to $n-1$. For $key_i$, we decrease the prefix sum by one, $ps_{key_i} = ps_{key_i}-1$ to get the last position that we can assign this key in the sorted array. The whole process is shown in Fig.~\ref{fig:counting_sort}. We saw that items of same keys are sorted in reverse order. Looping over keys in the input in reverse order is able to correct this and thus making the counting sort a stable sorting algorithm. -\begin{figure}[!ht] - \centering - \includegraphics[width=1.1\columnwidth]{fig/counting_sort.png} - \caption{Counting sort: Sort keys according to prefix sum.} - \label{fig:counting_sort} -\end{figure} - -\end{enumerate} - - -\paragraph{Implementation} In our implementation, we first find the range of the input data, say it is $[minK, maxK]$, making our range $k = maxK-minK$. And we recast the key as $key - minK$ for two purposes: -\begin{itemize} - \item To save space for \texttt{count} array. - \item To be able to handle negative keys. -\end{itemize}The implementation of the main three steps are nearly the same as what we have discussed other than the recast of the key. In the process, we used two auxiliary arrays: \texttt{count} array for counting and accumulating the occurrence of keys with $O(k)$ space and \texttt{order} array for storing the sorted array with $O(n)$ space, giving us the space complexity $O(n+k)$ in our implementation. The Python code is shown as: -\begin{lstlisting}[language=Python] -def countSort(a): - minK, maxK = min(a), max(a) - k = maxK - minK + 1 - count = [0] * (maxK - minK + 1) - n = len(a) - order = [0] * n - # Get occurrence - for key in a: - count[key - minK] += 1 - - # Get prefix sum - for i in range(1, k): - count[i] += count[i-1] - - # Put key in position - for i in range(n-1, -1, -1): - key = a[i] - minK - count[key] -= 1 # to get the index as position - order[count[key]] = a[i] - return order -\end{lstlisting} - -\paragraph{Properties} Counting sort is \textbf{out-of-place} for the auxiliary \texttt{count} and \texttt{order} array. Counting sort is \textbf{stable} given that we iterate keys in the input array in reversed order. Counting sort is likely to have $O(n+k)$ for both the space and time complexity. - -\paragraph{Applications} Due to the special character that counting sort sorts by using key as index, and the range of keys decides the time and space complexity, counting sort's applications are limited. We list the most common applications: -\begin{itemize} - \item Because the time complexity depends on the size of $k$, in practice counting sort is usually used when $k=O(n)$, in which case it makes the time complexity $O(n)$. - \item Counting sort is often used as a sub-routine. For example, it is a part of other sorting algorithms such as radix sort, which is a linear sorting algorithm. We will also see some examples in string matching chapter. -\end{itemize} - -\subsection{Radix Sort} -The word ``Radix'' is a mathematical term for the \textit{base} of a number. For example, decimal and hexadecimal number has a radix of 10 and 16, respectively. For strings of alphabets has a radix of 26 given there are 26 letters of alphabet. Radix sort is a non-comparative sorting methods that utilize the concept of radix or base to order a list of integers digit by digit or a list of strings letter by letter. The sorting of integers or strings of alphabets is different based on the different concepts of ordering--number ordering and the lexicographical order as we have introduced. We show one example for list of integers and strings and their sorted order or lexicographical order: -%the sorted order for [170, 45, 75, 90, 802, 24] is [24, 45, 75, 90, 170, 802]. For a list of fruits [apple, pear, berry, peach, apricot], its lexicographical order is [apple, apricot, berry, peach, pear]. -\begin{lstlisting}[numbers=none] -Integers: 170, 45, 75, 90, 802, 24 -Sorted: 24, 45, 75, 90, 170, 802 -\end{lstlisting} - -\begin{lstlisting}[numbers=none] -Strings: apple, pear, berry, peach, apricot -Sorted: apple, apricot, berry, peach, pear -\end{lstlisting} -And we see how that the integers are ordered by the length of digits, whereas in the sorted strings, the length of strings does not usually decide the ordering. - -Within Radix sorting, it is usually either the bucket sort or counting sort that is doing the sorting using one radix as key at a time. Based upon the sorting order of the digit, we have two types of radix sorting: \textit{Most Significant Digit (MSD) radix sort} which starts from the left-most radix first and goes all the way the right-most radix, and \textit{Least Significant Digit (LSD) radix sort} vice versa. -% \begin{itemize} -% \item In our fruit alphabetization example, it groups the strings by a single letter with either bucket sort or counting sort under the hood, starting from the very first letter on the left side all the way to the very last on the right if necessary. This form of radix sort is also know as Most Significant Digit (MSD) radix sort, and is usually solved by recursion. -% \item -% \end{itemize} -We should address the details of the two forms of radix sort -- MSD and LSD using our two examples. - -\subsubsection{LSD Radix Sorting Integers} -LSD radix sort is often used to sort list of integers. It sorts the entire numbers one digit/radix at a time from the least-significant to the most-significant digit. For a list of positive integers where the maximum of them has $m$ digits, LSD radix sort takes a total of $m$ passes to finish sorting. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.98\columnwidth]{fig/lsd_radix_integer_sort.png} - \caption{Radix Sort: LSD sorting integers in iteration} - \label{fig:lsd_radix_integer_sort} -\end{figure} - -Here, we demonstrate the process of LSD radix sorting on our exemplary list of integers with counting sort as a subroutine to sort items by using each radix as key. $m=3$ in our example: -\begin{itemize} - \item As shown in Fig.~\ref{fig:lsd_radix_integer_sort}, in the first pass, the least significant digit (1st place) is used as key to sort. After this pass, the ordering of numbers of unit digits is in-place. - \item In the second pass, the 10s place digit is used. After this pass, we see that numbers that has less than or equals to two digits comprising $24, 45, 75, 90$ in our example is in ordering. - \item At the last and third pass, the 100s place digit is used. For numbers that are short of 100s place digit, 0 is placed. Afterwards, the entire numbers are in ordering. -\end{itemize} -We have to notice that the sorting will not work unless the sorting subroutine we apply is stable. For example, in our last pass, there exists four zeros, indicating that they share the same key value. If the relative ordering of them is not kept, the previously sorting effort will be wasted. -\paragraph{Implementation} -To implement the code with Python, we first need to know how to get each digit out of an integer. With number 178 as an example: -\begin{itemize} - \item The least significant digit 8 is the reminder of $178\%10$. - \item The second least-significant digit 7 is the reminder of $17\%10$. - \item And the most-significant digit 1 is the reminder of $1\%10$. -\end{itemize} -As we see for digit 8, we need to have 178, for digit 7, we need to have 17, and for digit 1, we only need 1. $178, 17, 1$ are the prefix till the digit we need. We can obtain these prefixes via a base $exp$. -\begin{lstlisting}[numbers=none] -exp = 1, (178 // exp ) = 178, 178 % 10 = 8 -exp = 10, (178 // exp ) = 17, 17 % 10 = 7 -exp = 100, (178 // exp ) = 1, 1 % 10 = 1 -\end{lstlisting} -We can also get the prefix by looping and each time we divide our number by 10. For example, the following code will output [8, 7, 1]. -\begin{lstlisting}[language = Python] -a = 178 -digits = [] -while a > 0: - digits.append(a%10) - a = a // 10 -\end{lstlisting} -Now, we know the number of loops we need is decided by the maximum positive integer in our input array. On the code basis, we use a \texttt{while} loop to obtain the prefix and making sure that it is larger than 0. At each pass, we call \texttt{count\_sort} subroutine to sort the input list. The code is shown as: -\begin{lstlisting}[language=Python] -def radixSort(a): - maxInt = max(a) - exp = 1 - while maxInt // exp > 0: - a = count_sort(a, exp) - exp *= 10 - return a -\end{lstlisting} -For subroutine \texttt{count\_sort} subroutine, it is highly similar to our previously implemented counting sort but two minor differences: -\begin{itemize} - \item Because we sort by digits, therefore, we have to use a formula: $key = (key // exp) \% 10$ to covert the key to digit. - \item Because for decimal there are in total only 10 digits, we only arrange 10 total space for the \texttt{count} array. -\end{itemize} -The code is as: -\begin{lstlisting}[language=Python] -def count_sort(a, exp): - count = [0] * 10 # [0, 9] - n = len(a) - order = [0] * n - # Get occurrence - for key in a: - key = (key // exp) % 10 - count[key] += 1 - - # Get prefix sum - for i in range(1, 10): - count[i] += count[i-1] - - # Put key in position - for i in range(n-1, -1, -1): - key = (a[i] // exp) % 10 - count[key] -= 1 # to get the index as position - order[count[key]] = a[i] - return order -\end{lstlisting} -\paragraph{Properties and Complexity Analysis} -Radix sorting for integers takes $m$ passes with $m$ as the total digits, and each pass takes $O(n+k)$, where $k=10$ since there is only 10 digits for decimals. This gives out a total of $O(mn)$ time complexity, and $m$ is rather of a constant compared with variable $n$, thus radix sorting for integers with counting sort as subroutine has a linear time complexity. Due to the usage of counting sort, which is stable, making the radix sorting a stable sorting algorithm too. - -With the usage of auxiliary \texttt{count} and \texttt{order}, it gives a $O(n)$ space complexity, and makes the LSD integer sorting an out-of- place sorting algorithm. -\subsubsection{MSD Radix Sorting Strings} -In our fruit alphabetization example, it uses MSD radix sorting and groups the strings by a single letter with either bucket sort or counting sort under the hood, starting from the very first letter on the left side all the way to the very last on the right if necessary. MSD radix sorting is usually implemented with recursion. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/msd_radix_string_sort.png} - \caption{Radix Sort: MSD sorting strings in recursion. The black and grey arrows indicate the forward and backward pass in recursion, respectively. } - \label{fig:msd_radix_string_sort} -\end{figure} - -For better demonstration, we add two more strings: ``ap'' and ``pear''. String ``ap'' is for showing what happens when the strings in the same bucket but one is shorter and has no valid letter to compare with. And string ``pear'' is to showcase how the algorithm handles duplicates. -The algorithm indeed applies the recursive divide and conquer design methodology. - -\paragraph{Implementation} We show bucket sort here, as in lexicographical ordering, the first letter of the strings already decide the ordering of groups of strings bucketed by this letter. This LSD sorting method divide the strings into different buckets indexed by letter, and it then combines the returned results together to get the final sorted strings, which is highly similar to merge sort. %the groups of strings by the first letter but in our code source, the one with counting sort is provided too. -\begin{itemize} - \item At first, the recursion handles the first keys of the first letter in the string, as the process where $i=0$ shown in Fig.~\ref{fig:msd_radix_string_sort}. There are three buckets with letter `a', `b', and `p'. - \item At depth 2 when $i=1$, the resulting buckets from depth 1 is further bucketed by the second letter. Bucket `b' contains only one item, which itself is sorted, thus, the recursion end for this bucket. For the last bucket `a' and `p', they are further bucketed by letter `p' and `e'. - \item At depth 3 when $i=2$, for the last bucket `p', it further results two more buckets `p' and `r'. However, for string `ap' in the bucket, there is no valid third letter to use as index. And according to the lexicographical order, it puts `ap' in earlier ordering of the resulted buckets. \item In our example, the forward process in the recursion is totally done when $i=4$. It then enters into the backward phase, which merges buckets that are either composed of a single item or the \texttt{done\_bucket}. -\end{itemize} - -The code is offered as: -\begin{lstlisting}[language=Python] -def MSD_radix_string_sort(a, i): - # End condition: bucket has only one item - if len(a) <= 1: - return a - - # Divide - buckets = [[] for _ in range(26)] - done_bucket = [] - for s in a: - if i >= len(s): - done_bucket.append(s) - else: - buckets[ord(s[i]) - ord('a')].append(s) - # Conquer and chain all buckets - ans = [] - for b in buckets: - ans += MSD_radix_string_sort(b, i + 1) - return done_bucket + ans -\end{lstlisting} - -\paragraph{Properties and Complexity Analysis} -Because the bucket sort itself is a stable sorting algorithm, making the radix sort for string stable too. - -The complexity analysis for the recursive Radix sorting can be accomplished with recursion tree. The tree has nearly $n$ leaves. The worst case occurs when all strings within the input array are the same, thus the recursion tree degrades to linear structure with length $n$ and within each node $O(n)$ is spent to scan items of corresponding letter, making the worst time complexity $O(n^2)$. - -For the existence of auxiliary \texttt{buckets}, \texttt{done\_bucket}, and \texttt{ans} arrays in sorting of strings, it is an out-of-place sorting. With the same recursion tree analysis for space, we have linear space complexity too. - -% The connection of BST/Trie to Quciksort/Radix Sort. - -% \textcolor{red}{Need further understanding}. A binary search tree is a dynamic version of what happens during quicksort. The root represents an arbitrary (but hopefully not too far off from the median) pivot element from the collection. The left subtree is then everything less than the root, and the right subtree is everything greater than the root. The left and right collections are then again ordered in the same manner, i.e. the data structure is defined recursively. - -% A trie is a dynamic version of what happens during radix sort. You look at the first bit or digit of a number (or first letter of a string) to determine which subtree the value belongs in. You then repeat the procedure recursively using the next character or digit to determine which of the subtree's children it belongs in, and so on. - - -% %%%%%%%%%%%%%%%% -% % Section: bucket sort % -% %%%%%%%%%%%%%%%% -% \section{$O(n)$ Sorting} -% \label{o_n_sorting} - - -% \paragraph{Use Counting Sort} Counting sort is one of the most efficient sorting algorithm for lexicographical ordered sorting. -% \begin{lstlisting}[language = Python] -% def countSort(arr): - -% # The output character array that will have sorted arr -% output = [0 for i in range(256)] - -% # Create a count array to store count of inidividul -% # characters and initialize count array as 0 -% count = [0 for i in range(256)] - -% # For storing the resulting answer since the -% # string is immutable -% ans = ["" for _ in arr] - -% # Store count of each character -% for i in arr: -% count[ord(i)] += 1 - -% # Change count[i] so that count[i] now contains actual -% # position of this character in output array -% for i in range(256): -% count[i] += count[i-1] - -% # Build the output character array -% for i in range(len(arr)): -% output[count[ord(arr[i])]-1] = arr[i] -% count[ord(arr[i])] -= 1 - -% # Copy the output array to arr, so that arr now -% # contains sorted characters -% for i in range(len(arr)): -% ans[i] = output[i] -% return ans - -% # Driver program to test above function -% arr = "geeksforgeeks" -% ans = countSort(arr) -% print "Sorted character array is %s" %("".join(ans)) -% \end{lstlisting} - - -%%%%%%%%%%%%%%%% -% Section: Python sort % -%%%%%%%%%%%%%%%% -\section{Python Built-in Sort} -\label{python_built_in_sort} -There are two built-in functions to sort \texttt{list} and other iterable objects in Python 3, and both of them are stable. In default, they use \texttt{<} comparisons between items and sort items in increasing order. -\begin{itemize} - \item Built-in method \texttt{list.sort(key=None, reverse=False)} of \texttt{list} which sorts the items in the list in-place, and returns \texttt{None}. - \item Built-in function \texttt{sorted(iterable, key=None, reverse=False)} works on \textit{any iterable object}, including \texttt{list}, \texttt{string}, \texttt{tuple}, \texttt{dict}, and so on. It sorts the items out-of-place; returning another \texttt{list} and keeps the original input unmodified. -\end{itemize} -\subsubsection{Basics} -To use the above two built-in methods to sort a list of integers is just as simple as: -\begin{lstlisting}[language=Python] -lst = [4, 5, 8, 1, 2, 7] -lst.sort() -\end{lstlisting} -Printing out \texttt{lst} shows that the sorting happens in-place within \texttt{lst}. -\begin{lstlisting}[language=Python] -[1, 2, 4, 5, 7, 8] -\end{lstlisting} -Now, use \texttt{sorted()} for the same list: -\begin{lstlisting}[language=Python] -lst = [4, 5, 8, 1, 2, 7] -new_lst = sorted(lst) -\end{lstlisting} -We print out: -\begin{lstlisting}[language=Python] -new_lst, lst -([1, 2, 4, 5, 7, 8], [4, 5, 8, 1, 2, 7]) -\end{lstlisting} -Let's try to sort other iterable object, and try sort a tuple of strings: -\begin{lstlisting}[language=Python] -fruit = ('apple', 'pear', 'berry', 'peach', 'apricot') -new_fruit = sorted(fruit) -\end{lstlisting} -Print out \texttt{new\_fruit}, and we also see that it returned a \texttt{list} instead of \texttt{tuple}. -\begin{lstlisting}[language=Python] -['apple', 'apricot', 'berry', 'peach', 'pear'] -\end{lstlisting} -Note: For \texttt{list}, \texttt{list.sort()} is faster than \texttt{sorted()} because it doesn't have to create a copy. For any other iterable, we have no choice but to apply \texttt{sorted()} instead. - -\paragraph{Change Comparison Operator} What if we want to redefine the behavior of comparison operator \texttt{<}? Other than writing a class and defining \texttt{\_\_lt\_\_()}, in Python 2, these two built-in functions has another argument, \texttt{cmp}, but it is totally dropped in Python 3. We can use \texttt{functools}'s \texttt{cmp\_to\_key} method to convert to key in Python 3. For example, we want to sort [4, 5, 8, 1, 2, 7] in reverse order, we can define a \texttt{cmp} function that reverse the order of items to be compared: -\begin{lstlisting}[language=Python] -def cmp(x, y): - return y - x -\end{lstlisting} -And then we call this function as: -\begin{lstlisting}[language=Python] -from functools import cmp_to_key -lst.sort(key=cmp_to_key(cmp)) -\end{lstlisting} -The printout of \texttt{lst} is: -\begin{lstlisting}[language=Python] -[8, 7, 5, 4, 2, 1] -\end{lstlisting} - -\paragraph{Timsort} -These two methods both using the same sorting method -- \textit{Timsort} and has the same parameters. Timesort is a hybrid stable and in-place sorting algorithm, derived from merge sort and insertion sort, designed to perform well on many kinds of real-world data. It uses techniques from Peter McIlroy's ``Optimistic Sorting and Information Theoretic Complexity'', January 1993. It was implemented by Tim Peters in 2002 for use in the Python programming language. The algorithm finds subsequences of the data that are already ordered, and uses that knowledge to sort the remainder more efficiently. - -\subsubsection{Arguments} -They both takes two keyword-only arguments: \texttt{key} and \texttt{reverse}:, and each has \texttt{None} and \texttt{False} as default value, respectively. -\begin{itemize} - \item Argument \texttt{key}: ot specifies a function of one argument that is used to extract a comparison key from each list item (for example, \texttt{key=str.lower}). If not set, the default value \texttt{None} means that the list items are sorted directly. - \item Argument \texttt{reverse}: a boolean value. If set to \texttt{True}, then the list or iterable is sorted as if each comparison were reversed(use \texttt{>=} sorted list is in Descending order. The dafault value is False. -\end{itemize} -\paragraph{Sort in Reverse Order} Set \texttt{reverse=True} will sort a list of integers in decreasing order: -\begin{lstlisting}[language=Python] -lst = [4, 5, 8, 1, 2, 7] -lst.sort(reverse=True) -\end{lstlisting} -Print out \texttt{lst}, we see: -\begin{lstlisting}[language=Python] -[8, 7, 5, 4, 2, 1] -\end{lstlisting} -This is equivalent to customize a class \texttt{Int} and rewrite its \texttt{\_\_lt\_\_()} special method as: -\begin{lstlisting}[language=Python] -class Int(int): - def __init__(self, val): - self.val = val - def __lt__(self, other): - return other.val < self.val -\end{lstlisting} -Now, sort the same list but without setting \texttt{reverse} will get us exactly the same result: -\begin{lstlisting}[language=Python] -lst = [Int(4), Int(5), Int(8), Int(1), Int(2), Int(7)] -lst.sort() -\end{lstlisting} - -\paragraph{Customize \texttt{key}} -We have mainly two options to customize the \texttt{key} argument: (1) through \texttt{lambda} function, (2) through a pre-defined function. And in either way, the function only takes one argument. For example, to sort the following list of tuples by using the second item in the tuple as key: -\begin{lstlisting}[language=Python] -lst = [(8, 1), (5, 7), (4, 1), (1, 3), (2, 4)] -\end{lstlisting} -We can write a function, and set \texttt{key} argument to this function -\begin{lstlisting}[language=Python] -def get_key(x): - return x[1] -new_lst = sorted(lst, key = get_key) -\end{lstlisting} -The sorted result is: -\begin{lstlisting}[language=Python] -[(8, 1), (4, 1), (1, 3), (2, 4), (5, 7)] -\end{lstlisting} -The same result can be achieved via lambda function which is more convenient: -\begin{lstlisting}[language=Python] -new_lst = sorted(lst, key = lambda x: x[1]) -\end{lstlisting} - - -Same rule applies to objects with named attributes. For example, we have the following class named \texttt{Student} that comes with three attributes: \texttt{name}, \texttt{grade}, and \texttt{age}, and we want to sort a list of \texttt{Student} class only through the age. -\begin{lstlisting}[language = Python] -class Student(object): - def __init__(self, name, grade, age): - self.name = name - self.grade = grade - self.age = age - - # To support indexing - def __getitem__(self, key): - return (self.name, self.grade, self.age)[key] - - def __repr__(self): - return repr((self.name, self.grade, self.age)) -\end{lstlisting} -We can do it through setting \texttt{key} argument still: -\begin{lstlisting}[language = Python] -students = [Student('john', 'A', 15), Student('jane', 'B', 12), Student('dave', 'B', 10)] -sorted(students, key=lambda x: x.age) -\end{lstlisting} -which outputs the following result: -\begin{lstlisting}[language=Python] -[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)] -\end{lstlisting} - -The key-function patterns shown above are very common, so Python provides convenience functions to make accessor functions easier and faster. The \texttt{operator} module has \texttt{itemgetter()} and \texttt{attrgetter()} to get the attributes by index and name, respectively. For the sorting above, we can do it like this: -\begin{lstlisting}[language = Python] -from operator import attrgetter -sorted(students, key=attrgetter('age')) -\end{lstlisting} -\texttt{attrgetter} can take multiple arguments, for example, we can sort the list first by `grade' and then by `age', we can do it as: -\begin{lstlisting}[language = Python] -sorted(students, key=attrgetter('grade', 'age')) -\end{lstlisting} -which outputs the following result: -\begin{lstlisting}[language=Python] -[('john', 'A', 15), ('dave', 'B', 10), ('jane', 'B', 12)] -\end{lstlisting} -If our object supports indexing, which is why we defined \texttt{\_\_getitem\_\_()} in the class, we can use \texttt{itemgetter()} to do the same thing: -\begin{lstlisting}[language=Python] -from operator import itemgetter -sorted(students, key=itemgetter(2)) -\end{lstlisting} - - -%%%%%%%%%%Summary and Bonus%%%%%%%%%%%%%%%%%%%%% -\section{Summary and Bonus} -Here, we give a comprehensive summary of the time complexity for different sorting algorithms. -\begin{figure}[h] - \centering - \includegraphics[width=1\columnwidth]{fig/time_complexity_sorting.png} - \caption{The time complexity for common sorting algorithms} - \label{fig:sort_complexity} -\end{figure} -%%%%%%%%%%exercise -\section{LeetCode Problems} - -\paragraph{Problems} - -\begin{examples} - \item \textbf{Insertion Sort List (147).} Sort a linked list using insertion sort. A graphical example of insertion sort. The partial sorted list (black) initially contains only the first element in the list. With each iteration one element (red) is removed from the input data and inserted in-place into the sorted list - - Algorithm of Insertion Sort: Insertion sort iterates, consuming one input element each repetition, and growing a sorted output list. - At each iteration, insertion sort removes one element from the input data, finds the location it belongs within the sorted list, and inserts it there. It repeats until no input elements remain. - \begin{lstlisting}[numbers=none] -Example 1: -Input: 4->2->1->3 -Output: 1->2->3->4 - -Example 2: -Input: -1->5->3->4->0 -Output: -1->0->3->4->5 - \end{lstlisting} - - \item \textbf{Merge Intervals (56, medium).} Given a collection of intervals, merge all overlapping intervals. -\begin{lstlisting}[numbers=none] -Example 1: -Input: [[1,3],[2,6],[8,10],[15,18]] -Output: [[1,6],[8,10],[15,18]] -Explanation: Since intervals [1,3] and [2,6] overlaps, merge them into [1,6]. - -Example 2: -Input: [[1,4],[4,5]] -Output: [[1,5]] -Explanation: Intervals [1,4] and [4,5] are considered overlapping. -\end{lstlisting} - -\item \textbf{Valid Anagram (242, easy).} Given two strings s and t , write a function to determine if t is an anagram of s. -\begin{lstlisting}[numbers=none] -Example 1: -Input: s = "anagram", t = "nagaram" -Output: true - -Example 2: -Input: s = "rat", t = "car" -Output: false -\end{lstlisting} -\textit{Note: You may assume the string contains only lowercase alphabets.} - -Follow up: \textit{What if the inputs contain unicode characters? How would you adapt your solution to such case?} - -\item \textbf{Largest Number (179, medium).} - -\item \textbf{Sort Colors (leetcode: 75).} Given an array with n objects colored red, white or blue, sort them so that objects of the same color are adjacent, with the colors in the order red, white and blue. Here, we will use the integers 0, 1, and 2 to represent the color red, white, and blue respectively. -\textit{Note: You are not suppose to use the library's sort function for this problem.} - -\item \textbf{ -148. Sort List (sort linked list using merge sort or quick sort).} -\end{examples} - -\paragraph{Solutions} -\begin{enumerate} - \item Solution: the insertion sort is easy, we need to compare current node with all previous sorted elements. However, to do it in the linked list, we need to know how to iterate elements, how to build a new list. In this algorithm, we need two while loops to iterate: the first loop go through from the second node to the last node, the second loop go through the whole sorted list to compare the value of the current node to the sorted element, which starts from having one element. There are three cases for the comparison: if the comp\_node does not move, which means we need to put the current node in front the previous head, and the cur\_node become the new head; if the comp\_node stops at the back of it, so current node is the end, we set its value to 0, and we save the pre\_node in case; if it stops in the middle, we need to put cur\_node in between pre\_node and cur\_node. -\begin{lstlisting}[language = Python] -def insertionSortList(self, head): - """ - :type head: ListNode - :rtype: ListNode - """ - if head is None: - return head - sorted_head = head - cur_node = head.next - head.next = None #sorted list only has one node, a new list - while cur_node: - next_node = cur_node.next #save the next node - cmp_node = head - #compare node with previous all - pre_node = None - while cmp_node and cmp_node.val <= cur_node.val: - pre_node = cmp_node - cmp_node = cmp_node.next - - if cmp_node == head: #put in the front - cur_node.next = head - head = cur_node - elif cmp_node == None: #put at the back - cur_node.next = None #current node is the end, so set it to None - pre_node.next = cur_node - #head is not changed - else: #in the middle, insert - pre_node.next = cur_node - cur_node.next = cmp_node - cur_node = next_node - return head -\end{lstlisting} - -\item Solution: Merging intervals is a classical case that use sorting. If we do the sorting at first, and keep track our merged intervals in a heap (which itself its sorted too), we just iterate into the sorted intervals, to see if it should be merged in the previous interval or just be added into the heap. Here the code is tested into Python on the Leetcode, however for the python3 it needs to resolve the problem of the heappush with customized class as iterable item. -\begin{lstlisting}[language = Python] -# Definition for an interval. -# class Interval(object): -# def __init__(self, s=0, e=0): -# self.start = s -# self.end = e -from heapq import heappush, heappop - -class Solution(object): - def merge(self, intervals): - """ - :type intervals: List[Interval] - :rtype: List[Interval] - """ - if not intervals: - return [] - #sorting the intervals nlogn - intervals.sort(key=lambda x:(x.start, x.end)) - h = [intervals[0]] - # iterate the intervals to add - for i in intervals[1:]: - s, e = i.start, i.end - bAdd = False - for idx, pre_interal in enumerate(h): - s_before, e_before = pre_interal.start, pre_interal.end - if s <= e_before: #overlap, merge to the same interval - h[idx].end = max(e, e_before) - bAdd = True - break - if not bAdd: - #no overlap, push to the heap - heappush(h, i) - return h -\end{lstlisting} - -\item Solution: there could have so many ways to do it, the most easy one is to sort the letters in each string and see if it is the same. Or we can have an array of 26, and save the count of each letter, and check each letter in the other one string. -\begin{lstlisting}[language = Python] -def isAnagram(self, s, t): - """ - :type s: str - :type t: str - :rtype: bool - """ - return ''.join(sorted(list(s))) == ''.join(sorted(list(t))) -\end{lstlisting} -The second solution is to use a fixed number of counter. -\begin{lstlisting}[language = Python] -def isAnagram(self, s, t): - """ - :type s: str - :type t: str - :rtype: bool - """ - if len(s) != len(t): - return False - table = [0]*26 - start = ord('a') - for c1, c2 in zip(s, t): - print(c1, c2) - table[ord(c1)-start] += 1 - table[ord(c2)-start] -= 1 - for n in table: - if n != 0: - return False - return True -\end{lstlisting} -For the follow up, use a hash table instead of a fixed size counter. Imagine allocating a large size array to fit the entire range of unicode characters, which could go up to more than 1 million. A hash table is a more generic solution and could adapt to any range of characters. - -\item Solution: from instinct, we know we need sorting to solve this problem. From the above example, we can see that sorting them by integer is not working, because if we do this, with 30, 3, we get 303, while the right answer is 333. To review the sort built-in function, we need to give a key function and rewrite the function, to see if it is larger, we compare the concatenated value of a and b, if it is larger. The time complexity here is $O(n\log n)$. -\begin{lstlisting}[language = Python] -class LargerNumKey(str): - def __lt__(x, y): - return x+y > y+x - -class Solution: - def largestNumber(self, nums): - largest_num = ''.join(sorted(map(str, nums), key=LargerNumKey)) - return '0' if largest_num[0] == '0' else largest_num -\end{lstlisting} -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_15_bit-manipulation.tex b/Easy-Book/chapters/chapter_15_bit-manipulation.tex deleted file mode 100644 index 7720a5d..0000000 --- a/Easy-Book/chapters/chapter_15_bit-manipulation.tex +++ /dev/null @@ -1,652 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Bit Manipulation} -Many books on algorithmic problem solving seems forget about one topic--bit and bit manipulation. Bit is how data is represented and saved on the hardware. Thus knowing such concept and bit manipulation using Python sometimes can also help us device more efficient algorithms, either space or time complexity in the later Chapter. - - -For example, how to convert a char or integer to bit, how to get each bit, set each bit, and clear each bit. Also, some more advanced bit manipulation operations. After this, we will see some examples to show how to apply bit manipulation in real-life problems. -%%%%%%%%%%%%%%%%%%%%%%Bit operators%%%%%%%%%%%% -\section{Python Bitwise Operators} -\label{sec_basic_bit_operator} -Bitwise operators include <<, >>, \&, |, \~, \^. All of these operators operate on signed or unsigned numbers, but instead of treating that number as if it were a single value, they treat it as if it were a string of bits. Twos-complement binary is used for representing the singed number. - - - - -Now, we introduce the six bitwise operators. -\paragraph{x \texttt{<<} y} Returns $x$ with the bits shifted to the left by $y$ places (and new bits on the right-hand-side are zeros). This is the same as multiplying $x$ by $2^y$. - -\paragraph{x \texttt{>>} y} Returns $x$ with the bits shifted to the right by $y$ places. This is the same as dividing $x$ by $2^y$, same result as the $//$ operator. This right shift is also called \textit{arithmetic right shift}, it fills in the new bits with the value of the sign bit. - -\paragraph{x \texttt{\&} y} "Bitwise and". Each bit of the output is 1 if the corresponding bit of $x$ AND of $y$ is 1, otherwise it's 0. It has the following property: -\begin{lstlisting}[language=Python] -# keep 1 or 0 the same as original -1 & 1 = 1 -0 & 1 = 0 -# set to 0 with & 0 -1 & 0 = 0 -0 & 0 = 0 -\end{lstlisting} - -\paragraph{x \texttt{|} y} "Bitwise or". Each bit of the output is 0 if the corresponding bit of $x$ AND of $y$ is 0, otherwise it's 1. -\begin{lstlisting}[language=Python] -# set to 1 with | 1 -1 | 1 = 1 -0 | 1 = 1 - -# keep 1 or 0 the same as original -1 | 0 = 1 -0 | 0 = 0 -\end{lstlisting} - -\paragraph{$\thicksim x -$} Returns the complement of x - the number you get by switching each 1 for a 0 and each 0 for a 1. This is the same as $-x - 1$(really?). - -\paragraph{x $\wedge$ y} "Bitwise exclusive or". Each bit of the output is the same as the corresponding bit in $x$ if that bit in $y$ is 0, and it's the complement of the bit in $x$ if that bit in $y$ is 1. It has the following basic properties: -\begin{lstlisting}[language=Python] -# toggle 1 or 0 with ^ 1 -1 ^ 1 = 0 -0 ^ 1 = 1 - -# keep 1 or 0 with ^ 0 -1 ^ 0 = 1 -0 ^ 0 = 0 -\end{lstlisting} -Some examples shown: -\begin{lstlisting} -A = 5 = 0101, B = 3 = 0011 - A ^ B = 0101 ^ 0011 = 0110 = 6 - \end{lstlisting} -More advanced properties of XOR operator include: - \begin{lstlisting} -a ^ b = c -c ^ b = a - -n ^ n = 0 -n ^ 0 = n -eg. a=00111011, b=10100000 , c= 10011011, c ^b= a - \end{lstlisting} - -\paragraph{Logical right shift} The logical right shift is different to the above right shift after shifting it puts a 0 in the most significant bit. It is indicated with a $>>>$ operator n Java. However, in Python, there is no such operator, but we can implement one easily using \textbf{bitstring} module padding with zeros using $>>=$ operator. -\begin{lstlisting}[language=Python] ->>> a = BitArray(int=-1000, length=32) ->>> a.int --1000 ->>> a >>= 3 ->>> a.int -536870787 -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%Useful Python function%%%%%%%%%%%%%%%%%%%% -\section{Python Built-in Functions} -\label{sec_bitwise_built_in_function} -\paragraph{bin()} The bin() method takes a single parameter \textbf{num}- an integer and return its \textit{binary string}. If not an integer, it raises a TypeError exception. -\begin{lstlisting}[language=Python] -a = bin(88) -print(a) -# output -# 0b1011000 -\end{lstlisting} -However, bin() doesn't return \textit{binary bits} that applies the two's complement rule. For example, for the negative value: -\begin{lstlisting}[language=Python] -a1 = bin(-88) -# output -# -0b1011000 -\end{lstlisting} -\paragraph{int(x, base = 10)} The int() method takes either a string x to return an integer with its corresponding base. The common base are: 2, 10, 16 (hex). -\begin{lstlisting}[language=Python] -b = int('01011000', 2) -c = int('88', 10) -print(b, c) -# output -# 88 88 -\end{lstlisting} - -\paragraph{chr()} The chr() method takes a single parameter of integer and return a character (a string) whose Unicode code point is the integer. If the integer i is outside the range, ValueError will be raised. -\begin{lstlisting}[language=Python] -d = chr(88) -print(d) -# output -# X -\end{lstlisting} -\paragraph{ord()} The ord() method takes a string representing one Unicode character and return an integer representing the Unicode code point of that character. -\begin{lstlisting}[language=Python] -e = ord('a') -print(e) -# output -# 97 -\end{lstlisting} - -%%%%%%%%%%%%%%%%Two's complement Binary%%%%%%%%%%%%%%%%%%%%%% -\section{Twos-complement Binary} -Given 8 bits, if it is unsigned, it can represent the values 0 to 255 (1111,1111). However, a two's complement 8-bit number can only represent positive integers from 0 to 127 (0111,1111) because the most significant bit is used as sign bit: '0' for positive, and '1' for negative. -\begin{equation} - \sum_{i=0}^{N-1} 2^i = 2^{(N-1)}+2^{(N-2)}+...+2^2+2^1+2^0= 2^N-1 -\end{equation} -The twos-complement binary is the same as the classical binary representation for positive integers and differs slightly for negative integers. Negative integers are represented by performing Two's complement operation on its absolute value: it would be $(2^N-n)$ for representing $-n$ with N-bits. -Here, we show Two's complement binary for eight-bit signed integers in Fig.~\ref{fig:twos_complement}. -\begin{figure} - \centering - \includegraphics[width=0.6\columnwidth]{fig/eight_bit_two_complement.png} - \caption{Two's Complement Binary for Eight-bit Signed Integers.} - \label{fig:twos_complement} -\end{figure} -\paragraph{Get Two's Complement Binary Representation} -In Python, to get the two's complement binary representation of a given integer, we do not really have a built-in function to do it directly for negative number. Therefore, if we want to know how the two's complement binary look like for negative integer we need to write code ourselves. The Python code is given as: -\begin{lstlisting}[language=Python] -bits = 8 -ans = (1 << bits) -2 -print(ans) -# output -# '0b11111110' -\end{lstlisting} -There is another method to compute: inverting the bits of n (this is called \textbf{One's Complement}) and adding 1. For instance, use 8 bits integer 5, we compute it as the follows: -\begin{align} -\label{five} - 5_{10} &= {0000, 0101}_2, \\ - {-5}_{10} &= {1111, 1010}_2 + 1_2, \\ - {-5}_{10} &= {1111, 1011}_2 -\label{five_complement} -\end{align} -To flip a binary representation, we need expression x XOR '1111,1111', which is $2^N-1$. The Python Code is given: -\begin{lstlisting}[language=Python] -def twos_complement(val, bits): - # first flip implemented with xor of val with all 1's - flip_val = val ^ (1 << bits - 1) - #flip_val = ~val we only give 3 bits - return bin(flip_val + 1) -\end{lstlisting} - -\paragraph{Get Two's Complement Binary Result} -In Python, if we do not want to see its binary representation but just the result of two's complement of a given positive or negative integer, we can use two operations $-x$ or $\thicksim +1$. For input 2, the output just be a negative integer -2 instead of its binary representation: -\begin{lstlisting}[language=Python] -def twos_complement_result(x): - ans1 = -x - ans2 = ~x + 1 - print(ans1, ans2) - print(bin(ans1), bin(ans2)) - return ans1 -# output -# -8 -8 -# -0b1000 -0b1000 -\end{lstlisting} -This is helpful if we just need two's complement result instead of getting the binary representation. - - -%%%%%%%%%%%%%%%%%%%%%%%Useful operation%%%%%%%%%%%%%%%%%%%% -\section{Useful Combined Bit Operations} -\label{sec_useful_bit_combination} - -For operations that handle each bit, we first need a \textit{mask} that only set that bit to 1 and all the others to 0, this can be implemented with arithmetic left shift sign by shifting 1 with 0 to n-1 steps for n bits: -\begin{lstlisting}[language=Python] -mask = 1 << i -\end{lstlisting} -\paragraph{Get ith Bit} In order to do this, we use the property of AND operator either 0 or 1 and with 1, the output is the same as original, while if it is and with 0, they others are set with 0s. -\begin{lstlisting}[language=Python] -# for n bit, i in range [0,n-1] -def get_bit(x, i): - mask = 1 << i - if x & mask: - return 1 - return 0 -print(get_bit(5,1)) -# output -# 0 -\end{lstlisting} -Else, we can use left shift by i on x, and use AND with a single 1. -\begin{lstlisting}[language=Python] -def get_bit2(x, i): - return x >> i & 1 -print(get_bit2(5,1)) -# output -# 0 -\end{lstlisting} - -\paragraph{Set ith Bit} We either need to set it to 1 or 0. To set this bit to 1, we need matching relation: $1->1, 0->1$. Therefore, we use operator |. To set it to 0: $1->0, 0->0$. Because 0 \& 0/1 = 0, 1\&0=1, 1\&1 = 1, so we need first set that bit to 0, and others to 1. -\begin{lstlisting}[language=Python] -# set it to 1 -x = x | mask - -# set it to 0 -x = x & (~mask) -\end{lstlisting} - -\paragraph{Toggle ith Bit} Toggling means to turn bit to 1 if it was 0 and to turn it to 0 if it was one. We will be using 'XOR' operator here due to its properties. -\begin{lstlisting}[language=Python] -x = x ^ mask -\end{lstlisting} - -\paragraph{Clear Bits} In some cases, we need to clear a range of bits and set them to 0, our base mask need to put 1s at all those positions, Before we solve this problem, we need to know a property of binary subtraction. Check if you can find out the property in the examples below, -\begin{lstlisting}[numbers=none] -1000-0001 = 0111 -0100-0001 = 0011 -1100-0001 = 1011 -\end{lstlisting} - -The property is, the difference between a binary number n and 1 is all the bits on the right of the rightmost 1 are flipped including the rightmost 1. Using this amazing property, we can create our mask as: -\begin{lstlisting}[language=Python] -# base mask -i = 5 -mask = 1 << i -mask = mask -1 -print(bin(mask)) -# output -# 0b11111 -\end{lstlisting} -With this base mask, we can clear bits: (1) All bits from the most significant bit till i (leftmost till ith bit) by using the above mask. (2) All bits from the lest significant bit to the ith bit by using $\thicksim mask$ as mask. The Python code is as follows: -\begin{lstlisting}[language=Python] -# i i-1 i-2 ... 2 1 0, keep these positions -def clear_bits_left_right(val, i): - print('val', bin(val)) - mask = (1 << i) -1 - print('mask', bin(mask)) - return bin(val & (mask)) -\end{lstlisting} -\begin{lstlisting}[language=Python] -# i i-1 i-2 ... 2 1 0, erase these positions -def clear_bits_right_left(val, i): - print('val', bin(val)) - mask = (1 << i) -1 - print('mask', bin(~mask)) - return bin(val & (~mask)) -\end{lstlisting} -Run one example: -\begin{lstlisting}[numbers=none] -print(clear_bits_left_right(int('11111111',2), 5)) -print(clear_bits_right_left(int('11111111',2), 5)) -val 0b11111111 -mask 0b11111 -0b11111 -val 0b11111111 -mask -0b100000 -0b11100000 -\end{lstlisting} -\paragraph{Get the lowest set bit } Suppose we are given '0010,1100', we need to get the lowest set bit and return '0000,0100'. And for 1100, we get 0100. If we try to do an AND between 5 and its two's complement as shown in Eq.~\ref{five} and \ref{five_complement}, we would see only the right most 1 bit is kept and all the others are cleared to 0. This can be done using expression $x \&(-x)$, $-x$ is the two's complement of $x$. -\begin{lstlisting}[language=Python] -def get_lowest_set_bit(val): - return bin(val & (-val)) -print(get_lowest_set_bit(5)) -# output -# 0b1 -\end{lstlisting} -Or, optionally we can use the property of subtracting by 1. -\begin{lstlisting} -x ^ (x & (x -1)) -\end{lstlisting} -\paragraph{Clear the lowest set bit} In many situations we want to strip off the lowest set bit for example in Binary Indexed tree data structure, counting number of set bit in a number. We use the following operations: -\begin{lstlisting}[language=Python] -def strip_last_set_bit(val): - print(bin(val)) - return bin(val & (val - 1)) -print(strip_last_set_bit(5)) -# output -# 0b101 -# 0b100 -\end{lstlisting} - - -% \paragraph{Update Bits:} $mask = ~(1<> i for num in nums} # shift right for n, divide/2^i, get the first (32-i) bits - answer += any((answer+1) ^ p in prefixes for p in prefixes) - return answer -\end{lstlisting} - -\textbf{Solution 2: Use Trie.} -\begin{lstlisting}[language=Python] -def findMaximumXOR(self, nums): - def Trie(): - return collections.defaultdict(Trie) - - root = Trie() - best = 0 - - for num in nums: - candidate = 0 - cur = this = root - for i in range(32)[::-1]: - curBit = num >> i & 1 - this = this[curBit] - if curBit ^ 1 in cur: - candidate += 1 << i - cur = cur[curBit ^ 1] - else: - cur = cur[curBit] - best = max(candidate, best) - return best -\end{lstlisting} -\end{examples} -\paragraph{With Mask} -\begin{examples}[resume] -\item \textbf{190. Reverse Bits (Easy).}Reverse bits of a given 32 bits unsigned integer. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: 00000010100101000001111010011100 -Output: 00111001011110000010100101000000 -Explanation: The input binary string 00000010100101000001111010011100 represents the unsigned integer 43261596, so return 964176192 which its binary representation is 00111001011110000010100101000000. - -Example 2: - -Input: 11111111111111111111111111111101 -Output: 10111111111111111111111111111111 -Explanation: The input binary string 11111111111111111111111111111101 represents the unsigned integer 4294967293, so return 3221225471 which its binary representation is 10101111110010110010011101101001. -\end{lstlisting} - -\textbf{Solution: Get Bit and Set bit with mask.} We first get bits from the most significant position to the least significant position. And get the bit at that position with mask, and set the bit in our 'ans' with a mask indicates the position of (31-i): -\begin{lstlisting}[language=Python] -# @param n, an integer -# @return an integer -def reverseBits(self, n): - ans = 0 - for i in range(32)[::-1]: #from high to low - mask = 1 << i - set_mask = 1 << (31-i) - if (mask & n) != 0: #get bit - #set bit - ans |= set_mask - return ans -\end{lstlisting} - -\item \textbf{201. Bitwise AND of Numbers Range (medium).}Given a range [m, n] where $0 \leq m \leq n \leq 2147483647$, return the bitwise AND of all numbers in this range, inclusive. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [5,7] -Output: 4 - -Example 2: - -Input: [0,1] -Output: 0 -\end{lstlisting} - -\textbf{Solution 1: O(n) do AND operation.} We start a 32 bit long 1s. The solution would receive LTE error. -\begin{lstlisting}[language=Python] -def rangeBitwiseAnd(self, m, n): - """ - :type m: int - :type n: int - :rtype: int - """ - ans = int('1'*32, 2) - for c in range(m, n+1): - ans &= c - return ans -\end{lstlisting} - -\textbf{Solution 2: Use mask, check bit by bit. } Think, if we AND all, the resulting integer would definitely smaller or equal to $m$. For example 1: -\begin{lstlisting}[numbers=none] -0101 5 -0110 6 -0111 7 -\end{lstlisting} -We start from the least significant bit at 5, if it is 1, then we check the closest number to 5 that has 0 at the this bit. It would be 0110. If this number is in the range, then this bit is offset to 0. We then move on to check the second bit. To make this closest number: first we clear the least i+1 positions in m to get 0100 and then we add it with $1 << (i+1)$ as 0010 to get 0110. -\begin{lstlisting}[language=Python] -def rangeBitwiseAnd(self, m, n): - ans = 0 - mask = 1 - for i in range(32): # [::-1]: - bit = mask & m != 0 - if bit: - # clear i+1, ..., 0 - mask_clear = (mask<<1)-1 - left = m & (~mask_clear) - check_num = (mask << 1) + left - if check_num < m or check_num > n: - ans |= 1 << i - mask = mask << 1 - return ans - -\end{lstlisting} - -\textbf{Solution 3: Use While Loop.} We can start do AND of n with (n-1). If the resulting integer is still larger than m, then we keep do such AND operation. -\begin{lstlisting}[language=Python] -def rangeBitwiseAnd(self, m, n): - ans=n - while ans>m: - ans=ans&(ans-1) - return ans -\end{lstlisting} -\end{examples} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%Exerciese -\section{Exercises} -\begin{enumerate} -\item Write a function to determine the number of bits required to convert integer A to integer B. -\begin{lstlisting}[language = Python] -def bitswaprequired(a, b): - count = 0 - c = a^b - while(c != 0): - count += c & 1 - c = c >> 1 - return count -print(bitswaprequired(12, 7)) -\end{lstlisting} - -\item \textbf{389. Find the Difference (easy).} Given two strings $s$ and $t$ which consist of only lowercase letters. String $t$ is generated by random shuffling string s and then add one more letter at a random position. Find the letter that was added in $t$. -\begin{lstlisting}[numbers=none] -Example: -Input: -s = "abcd" -t = "abcde" - -Output: -e -Explanation: -'e' is the letter that was added. -\end{lstlisting} -\textbf{Solution 1: Use Counter Difference.} This way we need $O(M+N)$ space to save the result of counter for each letter. -\begin{lstlisting}[language=Python] -def findTheDifference(self, s, t): - s = collections.Counter(s) - t = collections.Counter(t) - diff = t - s - return list(diff.keys())[0] -\end{lstlisting} -\textbf{ Solution 2: Single Number with XOR.} Using bit manipulation and with $O(1)$ we can find it in $O(M+N)$ time, which is the best BCR: -\begin{lstlisting}[language=Python] -def findTheDifference(self, s, t): - """ - :type s: str - :type t: str - :rtype: str - """ - v = 0 - for c in s: - v = v ^ ord(c) - for c in t: - v = v ^ ord(c) - return chr(v) - -\end{lstlisting} - -\item \textbf{50. Pow(x, n) (medium).} for n, such as 10, we represent it as 1010, if we have a base and an result, we start from the least significant position, each time we move, the base because base*base, and if the value if 1, then we multiple the answer with the base. - - - - -% Now consider a range - -% [m = 0bxyz0acd, n=0bxyz1rst] - -% here xyzpacdrst all are digits in base 2. - -% We can find two numbers that are special in the range [m, n] -% \begin{lstlisting} -% (1) m' = 0bxyz0111 -% (2) n' = 0bxyz1000 -% \end{lstlisting} - -% The bitwise AND of all the numbers in range [m, n] is just the bitwise AND of the two special number -% \begin{lstlisting} -% rangeBitwiseAnd(m, n) = m' & n' = 0bxyz0000 -% \end{lstlisting} - -% This tells us, the bitwise and of the range is keeping the common bits of m and n from left to right until the first bit that they are different, padding zeros for the rest. -% \begin{lstlisting}[language = Python] -% def rangeBitwiseAnd(self, m, n): -% """ -% :type m: int -% :type n: int -% :rtype: int -% """ -% i = 0 -% while m != n: -% m >>= 1 -% n >>= 1 #find the common bits, i counts how many zeros we need -% i += 1 -% return n << i # common bits then we shift i left -% \end{lstlisting} -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_16_math.tex b/Easy-Book/chapters/chapter_16_math.tex deleted file mode 100644 index b70e492..0000000 --- a/Easy-Book/chapters/chapter_16_math.tex +++ /dev/null @@ -1,609 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -In this chapter, we will specifically talk math related problems. Normally, for the problems appearing in this section, they can be solved using our learned programming methodology. However, it might not inefficient (we will get LTE error on the LeetCode) due to the fact that we are ignoring their math properties which might help us boost the efficiency. Thus, learning some of the most related math knowledge can make our life easier. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% sorting -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% GCD -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Numbers} -\subsection{Prime Numbers} -A prime number is an integer greater than 1, which is only divisible by 1 and itself. First few prime numbers are : 2 3 5 7 11 13 17 19 23 ... - -Some interesting facts about Prime numbers: -\begin{enumerate} - \item 2 is the only even Prime number. - \item 2, 3 are only two consecutive natural numbers which are prime too. - \item \label{divide}Every prime number except 2 and 3 can represented in form of 6n+1 or 6n-1, where n is natural number. - \item \label{godld} Goldbach Conjecture: Every even integer greater than 2 can be expressed as the sum of two primes. Every positive integer can be decomposed into a product of primes. - \item GCD of a natural number with Prime is always one. - \item Fermat’s Little Theorem: If n is a prime number, then for every a, $1 <= a < n $, %$a^{(n-1)} == 1 (mod n) OR a^(n-1) % n = 1$. check if it is useful in real situation - \item Prime Number Theorem : The probability that a given, randomly chosen number n is prime is inversely proportional to its number of digits, or to the logarithm of n. -\end{enumerate} -\subsubsection{Check Single Prime Number} -Learning to check if a number is a prime number is necessary: the naive solution comes from the direct definition, for a number $n$, we try to check if it can be divided by number in range $[2, n-1]$, if it divides, then its not a prime number. -\begin{lstlisting}[language=Python] -def isPrime(n): - # Corner case - if (n <= 1): - return False - # Check from 2 to n-1 - for i in range(2, n): - if (n % i == 0): - return False -return True -\end{lstlisting} -There are actually a lot of space for us to optimize the algorithm. First, instead of checking till n, we can check till $\sqrt{n}$ because a larger factor of n must be a multiple of smaller factor that has been already checked. Also, because even numbers bigger than 2 are not prime, so the step we can set it to 2. The algorithm can be improved further by use feature \ref{divide} that all primes are of the form $6k \pm 1$, with the exception of 2 and 3. Together with feature \ref{godld} which implicitly states that every non-prime integer is divisible by a prime number smaller than itself. So a more efficient method is to test if n is divisible by 2 or 3, then to check through all the numbers of form $6k \pm 1$. -\begin{lstlisting}[language=Python] -def isPrime(n): - # corner cases - if n <= 1: - return False - if n<= 3: - return True - - if n % 2 == 0 or n % 3 == 0: - return False - - for i in range(5, int(n**0.5)+1, 6): # 6k+1 or 6k-1, step 6, up till sqrt(n), when i=5, check 5 and 7, (k-1, k+1) - if n%i == 0 or n%(i+2)==0: - return False - return True -return True -\end{lstlisting} - -\subsubsection{Generate A Range of Prime Numbers} -\paragraph{Wilson theorem} says if a number k is prime then $((k-1)! + 1) \% k$ must be 0. Below is Python implementation of the approach. Note that the solution works in Python because Python supports large integers by default therefore factorial of large numbers can be computed. -\begin{lstlisting}[language=Python] -# Wilson Theorem -def primesInRange(n): - fact = 1 - rst = [] - for k in range(2, n): - fact *= (k-1) - if (fact + 1)% k == 0: - rst.append(k) - return rst - -print(primesInRange(15)) -# output -# [2, 3, 5, 7, 11, 13] -\end{lstlisting} - -\paragraph{Sieve Of Eratosthenes} To generate a list of primes. It works by recognizing \textit{Goldbach Conjecture} that all non-prime numbers are divisible by a prime number. An optimization is to only use odd number in the primes list, so that we can save half space and half time. The only difference is we need to do index mapping. -\begin{lstlisting}[language=Python] -def primesInRange(n): - primes = [True] * n - primes[0] = primes[1] = False - for i in range(2, int(n ** 0.5) + 1): - #cross off remaining multiples of prime i, start with i*i - if primes[i]: - for j in range(i*i,n,i): - primes[j] = False - rst = [] # or use sum(primes) to get the total number - for i, p in enumerate(primes): - if p: - rst.append(i) - return rst - -print(primesInRange(15)) -\end{lstlisting} -\subsection{Ugly Numbers} -Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. We can write it as $ugly number = 2^i3^j5^k, i>=0, j>=0, k>=0$. Examples of ugly numbers: 1, 2, 3, 5, 6, 10, 15, ... The concept of ugly number is quite simple. Now let us use the LeetCode problems as example to derive the algorithms to identify ugly numbers. -\subsubsection{Check a Single Number} -263. Ugly Number (Easy) -\begin{lstlisting} -Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. For example, 6, 8 are ugly while 14 is not ugly since it includes another prime factor 7. - -Note: - 1 is typically treated as an ugly number. - Input is within the 32-bit signed integer range. -\end{lstlisting} -Analysis: because the ugly number is only divisible by $2, 3, 5$, so if we keep dividing the number by these factors ($num/f$), eventually we would get $1$, if the reminder ($num\%f$) is $0$ (divisible), otherwise we stop the loop to check the number. -\begin{lstlisting}[language = Python] -def isUgly(self, num): - """ - :type num: int - :rtype: bool - """ - if num ==0: - return False - factor = [2,3,5] - for f in factor: - while num%f==0: - num/=f - return num == 1 -\end{lstlisting} -\subsubsection{Generate A Range of Number} -264. Ugly Number II (medium) -\begin{lstlisting} -Write a program to find the n-th ugly number. - -Ugly numbers are positive numbers whose prime factors only include 2, 3, 5. For example, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12 is the sequence of the first 10 ugly numbers. - -Note that 1 is typically treated as an ugly number, and n does not exceed 1690. -\end{lstlisting} -Analysis: The first solution is we use the rules $ugly number = 2^i3^j5^k, i>=0, j>=0, k>=0$, using three for loops to generate at least 1690 ugly numbers that is in the range of $2^32$, and then sort them, the time complexity is $O(nlogn)$, with $O(n)$ in space. However, if we need to constantly make request, it seems resasonable to save a table, and once the table is generated and saved, each time we would only need constant time to check. -\begin{lstlisting}[language = Python] -from math import log, ceil -class Solution: - ugly = [2**i * 3**j * 5**k for i in range(32) for j in range(ceil(log(2**32, 3))) for k in range(ceil(log(2**32, 5)))] - ugly.sort() - def nthUglyNumber(self, n): - """ - :type n: int - :rtype: int - """ - return self.ugly[n-1] -\end{lstlisting} -The second way is only generate the nth ugly number, with -\begin{lstlisting}[language=Python] -class Solution: - n = 1690 - ugly = [1] - i2 = i3 = i5 = 0 - for i in range(n-1): - u2, u3, u5 = 2 * ugly[i2], 3 * ugly[i3], 5 * ugly[i5] - umin = min(u2,u3,u5) - ugly.append(umin) - if umin == u2: - i2 += 1 - if umin == u3: - i3 += 1 - if umin == u5: - i5 += 1 - - def nthUglyNumber(self, n): - """ - :type n: int - :rtype: int - """ - return self.ugly[n-1] -\end{lstlisting} -%%%%%%%%%Combinatorics%%%% -\subsection{Combinatorics} -\begin{enumerate} - \item 611. Valid Triangle Number -\end{enumerate} -\begin{examples}[resume] -\item \textbf{Pascal's Triangle II(L119, *).} Given a non-negative index k where k <= 33, return the kth index row of the Pascal's triangle. Note that the row index starts from 0. In Pascal's triangle, each number is the sum of the two numbers directly above it. -\begin{lstlisting}[numbers=none] -Example: -Input: 3 -Output: [1,3,3,1] -\end{lstlisting} -Follow up: Could you optimize your algorithm to use only O(k) extra space? -\textbf{Solution: Generate from Index 0 to K}. -\begin{lstlisting}[language=Python] -def getRow(self, rowIndex): - if rowIndex == 0: - return [1] - # first, n = rowIndex+1, if n is even, - ans = [1] - for i in range(rowIndex): - tmp = [1]*(i+2) - for j in range(1, i+1): - tmp[j] = ans[j-1]+ans[j] - ans = tmp - return ans -\end{lstlisting} -Triangle Counting - -\end{examples} -%%%%%%%%%%%Others%%%%%%%%%%% -\subsubsection{Smallest Larger Number} -556. Next Greater Element III -\begin{lstlisting} -Given a positive 32-bit integer n, you need to find the smallest 32-bit integer which has exactly the same digits existing in the integer n and is greater in value than n. If no such positive 32-bit integer exists, you need to return -1. - -Example 1: - -Input: 12 -Output: 21 - -Example 2: - -Input: 21 -Output: -1 -\end{lstlisting} -Analysis: The first solution is to get all digits [1,2], and generate all the permutation [[1,2],[2,1]], and generate the integer again, and then sort generated integers, so that we can pick the next one that is larger. But the time complexity is O(n!). - -Now, let us think about more examples to find the rule here: -\begin{lstlisting} -435798->435879 -1432->2134 -\end{lstlisting} -If we start from the last digit, we look to its left, find the cloest digit that has smaller value, we then switch this digit, if we cant find such digit, then we search the second last digit. If none is found, then we can not find one. Like 21. return -1. This process is we get the first larger number to the right. -\begin{lstlisting} -[5, 5, 7, 8, -1, -1] -[2, -1, -1, -1] -\end{lstlisting} -After the this we switch 8 with 7: we get -\begin{lstlisting} -4358 97 -2 431 -\end{lstlisting} -For the reminding digits, we do a sorting and put them back to those digit to get the smallest value -\begin{lstlisting}[language=Python] -class Solution: - def getDigits(self, n): - digits = [] - while n: - digits.append(n%10) # the least important position - n = int(n/10) - return digits - def getSmallestLargerElement(self, nums): - if not nums: - return [] - rst = [-1]*len(nums) - -for i, v in enumerate(nums): - smallestLargerNum = sys.maxsize - index = -1 - for j in range(i+1, len(nums)): - if nums[j]>v and smallestLargerNum > nums[j]: - index = j - smallestLargerNum = nums[j] - if smallestLargerNum < sys.maxsize: - rst[i] = index - return rst - - - def nextGreaterElement(self, n): - """ - :type n: int - :rtype: int - """ - if n==0: - return -1 - - digits = self.getDigits(n) - digits = digits[::-1] - # print(digits) - - rst = self.getSmallestLargerElement(digits) - # print(rst) - stop_index = -1 - - # switch - for i in range(len(rst)-1, -1, -1): - if rst[i]!=-1: #switch - print('switch') - stop_index = i - digits[i], digits[rst[i]] = digits[rst[i]], digits[i] - break - if stop_index == -1: - return -1 - -# print(digits) - - # sort from stop_index+1 to the end - digits[stop_index+1:] = sorted(digits[stop_index+1:]) - print(digits) - -#convert the digitialized answer to integer - nums = 0 - digit = 1 - for i in digits[::-1]: - nums+=digit*i - digit*=10 - if nums>2147483647: - return -1 - - - return nums -\end{lstlisting} -\section{Intersection of Numbers} -In this section, intersection of numbers is to find the ``common" thing between them, for example Greatest Common Divisor and Lowest Common Multiple. -\subsection{Greatest Common Divisor} -GCD (Greatest Common Divisor) or HCF (Highest Common Factor) of two numbers $a$ and $b$ is the largest number that divides both of them. For example shown as follows: -\begin{lstlisting} -The divisors of 36 are: 1, 2, 3, 4, 6, 9, 12, 18, 36 -The divisors of 60 are: 1, 2, 3, 4, 5, 6, 10, 12, 15, 30, 60 -GCD = 12 -\end{lstlisting} -Special case is when one number is zero, the GCD is the value of the other. $gcd(a, 0) = a$. - -The basic algorithm is: we get all divisors of each number, and then find the largest common value. Now, let's see how to we advance this algorithm. We can reformulate the last example as: -\begin{lstlisting} -36 = 2 * 2 * 3 * 3 -60 = 2 * 2 * 3 * 5 -GCD = 2 * 2 * 3 - = 12 -\end{lstlisting} -So if we use $60-36 = 2*2*3*5 - 2*2*3*3 = (2*2*3)*(5-3) = 2*2*3*2$. So we can derive the principle that the GCD of two numbers does not change if the larger number is replaced by its difference with the smaller number. The features of GCD: -\begin{enumerate} - \item $gcd(a, 0) = a$ - \item $gcd(a, a) = a$, - \item $gcd(a, b) = gcd(a-b, b)$, if $a>b$. -\end{enumerate} -Based on the above features, we can use Euclidean Algorithm to gain GCD: -\begin{lstlisting} -def euclid(a, b): - while a != b: - # replace larger number by its difference with the smaller number - if a > b: - a = a - b - else: - b = b - a - return a - -print(euclid(36, 60)) -\end{lstlisting} -The only problem with the Euclidean Algorithm is that it can take several subtraction steps to find the GCD if one of the given numbers is much bigger than the other. A more efficient algorithm is to replace the subtraction with remainder operation. The algorithm would stops when reaching a zero reminder and now the algorithm never requires more steps than five times the number of digits (base 10) of the smaller integer. - -The recursive version code: -\begin{lstlisting}[language = Python] -def euclidRemainder(a, b): - if a == 0 : - return b - return gcd(b%a, a) -\end{lstlisting} -The iterative version code: -\begin{lstlisting}[language = Python] -def euclidRemainder(a, b): - while a > 0: - # replace one number with reminder between them - a, b = b%a, a - return b - -print(euclidRemainder(36, 60)) -\end{lstlisting} - - -\subsection{Lowest Common Multiple} -Lowest Common Multiple (LCM) is the smallest number that is a multiple of both $a$ and $b$. For example of 6 and 8: -\begin{lstlisting} -The multiplies of 6 are: 6, 12, 18, 24, 30, ... -The multiplies of 8 are: 8, 16, 24, 32, 40, ... -LCM = 24 -\end{lstlisting} -Computing LCM is dependent on the GCD with the following formula: -\begin{equation} - lcm(a, b) = \frac{a\times b}{gcd(a, b)} -\end{equation} - -\section{Arithmetic Operations} -Because for the computer, it only understands the binary representation as we learned in Bit Manipulation (Chapter~\ref{chapter_bit}, the most basic arithmetic operation it supports are binary addition and subtraction. (Of course, it can execute the bit manipulation too.) The other common arithmetic operations such as Multiplication, division, modulus, exponent are all implemented/coded with the addition and subtraction as basis or in a dominant fashion. As a software engineer, have a sense of how we can implement the other operations from the given basis is reasonable and a good practice of the coding skills. Also, sometimes if the factor to compute on is extra large number, which is to say the computer can not represent, we can still compute the result by treating these numbers as strings. - -In this section, we will explore operations include multiplication, division. There are different algorithms that we can use, we learn a standard one called long multiplication and long division. I am assuming you know the algorithms and focusing on the implementation of the code instead. - -\paragraph{Long Multiplication} - -\paragraph{Long Division} We treat the dividend as a string, e.g. dividend = 3456, and the divisor = 12. We start with 34, which has the digits as of divisor. 34/12 = 2, 10, where 2 is the integer part and 10 is the reminder. Next step, we take the reminder and join with the next digit in the dividend, we get 105/12 = 8, 9. Smilarily, 96/12 = 8, 0. Therefore we get the results by joinging the result of each dividending operation, '288'. To see the coding, let us code it the way required by the following LeetCode Problem. In the process we need (n-m) (n, m is the total number of digits of dividend and divisor, respectively) division operation. Each division operation will be done at most 9 steps. This makes the time complexity $O(n-m)$. -\begin{examples}[resume] -\item \textbf{29. Divide Two Integers (medium)} Given two integers dividend and divisor, divide two integers without using multiplication, division and mod operator. Return the quotient after dividing dividend by divisor. The integer division should truncate toward zero. -\begin{lstlisting}[language=Python][numbers=none] -Example 1: - -Input: dividend = 10, divisor = 3 -Output: 3 - -Example 2: - -Input: dividend = 7, divisor = -3 -Output: -2 -\end{lstlisting} - -\textbf{Analysis:} we can get the sign of the result first, and then convert the dividend and divisor into its absolute value. Also, we better handle the bound condition that the divisor is larger than the vidivend, we get 0 directly. The code is given: -\begin{lstlisting}[language=Python] -def divide(self, dividend, divisor): - def divide(dd): # the last position that divisor* val < dd - s, r = 0, 0 - for i in range(9): - tmp = s + divisor - if tmp <= dd: - s = tmp - else: - return str(i), str(dd-s) - return str(9), str(dd-s) - - if dividend == 0: - return 0 - sign = -1 - if (dividend >0 and divisor >0 ) or (dividend < 0 and divisor < 0): - sign = 1 - dividend = abs(dividend) - divisor = abs(divisor) - if divisor > dividend: - return 0 - ans, did, dr = [], str(dividend), str(divisor) - n = len(dr) - pre = did[:n-1] - for i in range(n-1, len(did)): - dd = pre+did[i] - dd = int(dd) - v, pre = divide(dd) - ans.append(v) - - ans = int(''.join(ans))*sign - - if ans > (1<<31)-1: - ans = (1<<31)-1 - return ans -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Probability Theory -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Probability Theory} -In programming tasks, such problems are either solvable with some closed-form formula or one has no choice than to enumerate the complete search space. -\section{Linear Algebra} -\textit{Gaussian Elimination} is one of the several ways to find the solution for a system of linear euqations. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% geometry -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Geometry} -In this section, we will discuss coordinate related problems. - -939. Minimum Area Rectangle(Medium) - -Given a set of points in the xy-plane, determine the minimum area of a rectangle formed from these points, with sides parallel to the x and y axes. - -If there isn't any rectangle, return 0. -\begin{lstlisting} -Example 1: - -Input: [[1,1],[1,3],[3,1],[3,3],[2,2]] -Output: 4 - -Example 2: - -Input: [[1,1],[1,3],[3,1],[3,3],[4,1],[4,3]] -Output: 2 -\end{lstlisting} -\textbf{Combination}. This at first it is a combination problem, we pick four points and check if it is a rectangle and then what is the size. However the time complexity can be $C_n^k$, which will be $O(n^4)$. The following code implements the best combination we get, however, we receive LTE: -\begin{lstlisting}[language=Python] -def minAreaRect(self, points): - def combine(points, idx, curr, ans): # h and w at first is -1 - if len(curr) >= 2: - lx, rx = min([x for x, _ in curr]), max([x for x, _ in curr]) - ly, hy = min([y for _, y in curr]), max([y for _, y in curr]) - size = (rx-lx)*(hy-ly) - if size >= ans[0]: - return - xs = [lx, rx] - ys = [ly, hy] - for x, y in curr: - if x not in xs or y not in ys: - return - - if len(curr) == 4: - ans[0] = min(ans[0], size) - return - - for i in range(idx, len(points)): - if len(curr) <= 3: - combine(points, i+1, curr+[points[i]], ans) - return - - ans=[sys.maxsize] - combine(points, 0, [], ans) - return ans[0] if ans[0] != sys.maxsize else 0 -\end{lstlisting} -\textbf{Math: Diagonal decides a rectangle}. We use the fact that if we know the two diagonal points, say (1, 2), (3, 4). Then we need (1, 4), (3, 2) to make it a rectangle. If we save the points in a hashmap, then the time complexity can be decreased to $O(n^2)$. The condition that two points are diagonal is: x1 != x2, y1 != y2. If one of them is equal, then they form a vertical or horizontal line. If both equal, then its the same points. -\begin{lstlisting}[language = Python] -class Solution(object): - def minAreaRect(self, points): - S = set(map(tuple, points)) - ans = float('inf') - for j, p2 in enumerate(points): # decide the second point - for i in range(j): # decide the firs point - p1 = points[i] - if (p1[0] != p2[0] and p1[1] != p2[1] and # avoid - (p1[0], p2[1]) in S and (p2[0], p1[1]) in S): - ans = min(ans, abs(p2[0] - p1[0]) * abs(p2[1] - p1[1])) - return ans if ans < float('inf') else 0 -\end{lstlisting} -\textbf{Math: Sort by column}. Group the points by x coordinates, so that we have columns of points. Then, for every pair of points in a column (with coordinates (x,y1) and (x,y2)), check for the smallest rectangle with this pair of points as the rightmost edge. We can do this by keeping memory of what pairs of points we've seen before. -\begin{lstlisting}[language=Python] -def minAreaRect(self, points): - columns = collections.defaultdict(list) - for x, y in points: - columns[x].append(y) - lastx = {} # one-pass hash - ans = float('inf') - - for x in sorted(columns): # sort by the keys - column = columns[x] - column.sort() # sort column - for j, y2 in enumerate(column): # right most edge, up point - for i in xrange(j): # right most edge, lower point - y1 = column[i] - if (y1, y2) in lastx: # 1: [1, 3], will be saved, when we were at 3: [1, 3], we can get the answer - ans = min(ans, (x - lastx[y1,y2]) * (y2 - y1)) - lastx[y1, y2] = x # y1, y2 form a tuple - return ans if ans < float('inf') else 0 -\end{lstlisting} - -\section{Miscellaneous Categories} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% rabbit and turtle to find circle or repeat number -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Floyd’s Cycle-Finding Algorithm} - -Without this we detect cycle with the following code: -\begin{lstlisting}[language = Python] -def detectCycle(self, A): - visited=set() - head=point=A - while point: - if point.val in visited: - return point - visited.add(point) - point=point.next - return None -\end{lstlisting} - -Traverse linked list using two pointers. Move one pointer by one and other pointer by two. If these pointers meet at some node then there is a loop. If pointers do not meet then linked list doesn’t have loop. Once you detect a cycle, think about finding the starting point. -\begin{figure}[h!] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/floyd.png} - \caption{Example of floyd’s cycle finding} - \label{fig:floyd} -\end{figure} - -\begin{lstlisting}[language = Python] -def detectCycle(self, A): - #find the "intersection" - p_f=p_s=A - while (p_f and p_s and p_f.next): - p_f = p_f.next.next - p_s = p_s.next - if p_f==p_s: - break - #Find the "entrance" to the cycle. - ptr1 = A - ptr2 = p_s; - while ptr1 and ptr2: - if ptr1!=ptr2: - ptr1 = ptr1.next - ptr2 = ptr2.next - else: - return ptr1 - return None -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% exercise -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercise} -\subsection{Number} -313. Super Ugly Number -\begin{lstlisting} -Super ugly numbers are positive numbers whose all prime factors are in the given prime list primes of size k. For example, [1, 2, 4, 7, 8, 13, 14, 16, 19, 26, 28, 32] is the sequence of the first 12 super ugly numbers given primes = [2, 7, 13, 19] of size 4. - -Note: - (1) 1 is a super ugly number for any given primes. - (2) The given numbers in primes are in ascending order. - (3) 0 < k <= 100, 0 < n <= 106, 0 < primes[i] < 1000. - (4) The nth super ugly number is guaranteed to fit in a 32-bit signed integer. -\end{lstlisting} -\begin{lstlisting}[language=Python] -def nthSuperUglyNumber(self, n, primes): - """ - :type n: int - :type primes: List[int] - :rtype: int - """ - nums=[1] - idexs=[0]*len(primes) #first is the current idex - for i in range(n-1): - min_v = maxsize - min_j = [] - for j, idex in enumerate(idexs): - v = nums[idex]*primes[j] - if v\mu$. The problem now becomes finding the longest path from any node to any node in this directed graph. - \item Model the problem as a tree. The tree starts from empty root node, at each level i, the tree has n-i possible children: nums[i+1], nums[i+2], ..., nums[n-1]. There will only be an edge if the child's value is larger than its parent. Or we can model the tree as a multi-choice tree: for combination problem, each element can either be chosen or not chosen. We would end up with two branch, and the nodes would become a path of the LIS, therefore, the longest LIS exist at the leaf nodes which has the longest length. - \item Model it with divide and conquer and optimal substructure. -\end{enumerate} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_3_iteration_recursion.tex b/Easy-Book/chapters/chapter_3_iteration_recursion.tex deleted file mode 100644 index 790fd8c..0000000 --- a/Easy-Book/chapters/chapter_3_iteration_recursion.tex +++ /dev/null @@ -1,209 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Iteration and Recursion} -\label{chapter_iteration_recursion} -\begin{chapquote} -{Niklaus Wirth, \textit{Algorithms + Data Structures = Programs, 1976}} -``The power of recursion evidently lies in the possibility of defining an infinite set of objects by a finite statement. In the same manner, an infinite number of computations can be described by a finite recursive program, even if this program contains no explicit repetitions.'' -\end{chapquote} - -\section{Introduction} -\label{iteration_recursion_introduction} -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/iteration_recursion.png} - \caption{Iteration vs recursion: in recursion, the line denotes the top-down process and the dashed line is the bottom-up process. } - \label{fig:iteration_vs_recursion} -\end{figure} -In computer science, software programs can be categorized as either \textit{iteration} or \textit{recursion}, thus making iteration and recursion as the topmost level of concepts in software development and the very first base for us to study computer science techniques. Iteration refers to a \textit{looping} process which repeats some part of the code until a certain condition is met. Recursion, similarly, needs to stop at a certain condition, but it replaces the loop with recursive function calls; meaning a function calls itself from within its own code. The process is shown in Fig.~\ref{fig:iteration_vs_recursion}. - -Do you still have the feeling that you seemingly already understand the iteration even without code, but what is recursion exactly? Recursion can be a bit of challenging for beginners, it differs from our normal way of thinking. It is a bit of similar to the vision of being in the restroom which has two mirrors abrest on each side and facing each other, we see multiple images of the things in front of each mirror, and these images usually appear from large to small. This is similar to recursion. The relation between these recurred images can be called \textit{recurrence relation}. - -Understanding recursion and learning basic rules to solve recurrence relation are two of the most purposes in this chapter. Thus, we organize the content of this chapter following this trend: -\begin{enumerate} - \item Section.~\ref{sec_iteration_recursion} will first address our question by analyzing the recursion mechanism within the computer program, and we further understand the different between by seeing example of factorial series and examines the pros and cons of each. - \item Section.~\ref{sec_recursion} advances our knowledge about recursion by studying the recurrence relation, including its definition, categorization and addressing how to solve recurrence relation. - \item Section.~\ref{sec_iter_recur_examples} gives us two examples to see how iteration and recursion works in real practice. -\end{enumerate} -\begin{importantnote} -Deduce(find) the recurrence relation and sometimes solves it is a key step in algorithm design and problem solving, solving the recurrence time relation is important to algorithm analysis. -\end{importantnote} -%we will first andwer the question: recursion handles function calls in two passes -- \textit{top-down} and \textit{bottom-up}. In the top-down process, recursion starts from the entrance of the program, call itself it is composed of two passes:Thus, in this chapter, we The purpose of this chapter is to understand how iteration and recursion works in algorithms and software developing. Especially, to understand how recursion works, and we also include the most common usage of being recursion. - -% \section{Iteration and Recursion} -% \label{sec_iteration_recursion} -In this section, we first learn iteration and Python Syntax that can be used to implement. We then examine a classic and elementary example--Factorial sequence to catch a glimpse of how iteration and recursion can be applied to solve this problem. Then, we discuss more details about recursion. We end this section by comparing iteration and recursion; their pros and cons and their relation between. - -\section{Iteration} -In simple terms, an iterative function is one that loops to repeat some part of the code. In Python, the loops can be expressed with \texttt{for} and \texttt{while} loop. - -Enumerating the number from $1$ to $10$ is a simple iteration. Implementation wise: -\begin{itemize} - \item \texttt{for} usually is used together with function \texttt{range(start, stop, step)} which creates a sequence of numbers from \texttt{start} to \texttt{stop} in range $[start, end)$, and increments by $step$ (1 by default). Thus, we need to set \texttt{start} as 1, and \texttt{end} as 11 to get numbers from 1 to 10. -\begin{lstlisting}[language=Python] -# enumerate 1 to 10 with for loop -for i in range(1, 11): - print(i, end=' ') -\end{lstlisting} -\item \texttt{while} is used with syntax -\begin{lstlisting}[numbers=none] -while expression - statement -\end{lstlisting} -In our case, we need to set start condition which is $i=1$, and the expression will be limiting $i <= 10$. In the statement, we need to manually increment the variable \texttt{i} so that we wont not end up with infinite loop. -\begin{lstlisting}[language=Python] -i = 1 -while i <= 10: - print(i, end = ' ') - i += 1 -\end{lstlisting} -\end{itemize} - -\section{Factorial Sequence} The factorial of a positive integer $n$, denoted by $n!$, is the product of all positive integers less than or equal to $n$: -\begin{lstlisting}[numbers=none] -For example: -5! = 5 \times 4 \times 3 \times 2 \times 1 = 120. -0! = 1 -\end{lstlisting} -% In this section, we are going to approach the problem using both iteration and recursion, conceptually and implementally. -To compute the factorial sequence at $n$, we need to know the factorial sequence at $n-1$, which can be expressed as a \textit{recurrence relation}, that $n!= n \times (n-1)!$). -\begin{itemize} - \item Solving with iteration: we use a \texttt{for} loop starts at 1 up till $n$ so that we eventually build up our answer at $n$. We use a variable \texttt{ans} to save the factorial result for each number, and once the program stops, \texttt{ans} gives the result of our factorial for $n$. -\begin{lstlisting}[language=Python] -def factorial_iterative(n): - ans = 1 - for i in range(1, n+1): - ans = ans * i - return ans -\end{lstlisting} -\item Solving with recursion: we start to call a recursive function at $n$, within this function, we can itself but instead with $n-1$ just as shown in the recurrence relation. We then multiply this recursive call with $n$. We need to define a bottom, which is the end condition for the recursive function calls to avoid infinite loop. In this case, it bottoms out at $n=1$, which we can know its answer would be $1$, thus we return 1 to stop further function calls and recursively return to its upmost level. -\begin{lstlisting}[language=Python] -def factorial_recursive(n): - if n == 1: - return 1 - return n * factorial_recursive(n-1) -\end{lstlisting} -\end{itemize} - -\section{Recursion} -In this section, we reveal how the recursion mechanism works: function calls and stack, two passes. -\label{sec_recursion} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/activation_records.png} - \caption{Call stack of recursion function} - \label{fig:call_stack_recursion_function} -\end{figure} -\paragraph{Two Elements} -When a routine calls itself either directly or indirectly, it is said to be making a recursive function call. The basic idea behind solving problems via recursion is to break the instance of the problem into smaller and smaller instances until the instances are so small they can be solved trivially. We can view a recursive routine as consisting of two parts. -\begin{itemize} - \item Recursive Calls: As in the factorial sequence, when the instance of the problem is still too large to solve directly, we recursive call this function itself to solve problems of smaller size. Then the result returned from the recursive calls are used to build upon the result of the upper level using \textit{recurrence relation}. For example, If we use $f(n)$ to denote the factorial at $n$, the recurrence relation would be $f(n)=n\times f(n-1), n>0$. - \item End/Basis Cases: The above resursive call needs to bottom-out; stop when the instance is so small to be solved directly. This stop condition is called end/basic case. Without this case, the recursion will continue to dive infinitely deep and eventually we run of memory and get a crash. A recursive function can have one or more base cases. In the example of factorial, the base case is when $n=0$, by definition $0!=1$. -\end{itemize} - - -\paragraph{Recursive Calls and Stacks} -The recursive function calls of the recursive factorial we implemented in the last section can be demonstrated as Fig.~\ref{fig:call_stack_recursion_function}. - -The execution of recursive function $f(n)$ will pay two visits to each resursive function $f(i), i \in [1, n]$ through two passes: \textit{top-down} and \textit{bottom-up} as we have illustrated in Fig.~\ref{fig:iteration_vs_recursion}. The recursive function handles this process via a \textit{stack} data structure which follows a Last In First Out (LIFO) principle to record all function calls. - -\begin{itemize} - \item In the top-down pass, each recursive function's execution context is ``pushed'' into the stack in the order of $f(n)$, $f(n-1)$, ..., $f(1)$. The process ends till it hits to the end case $f(0)$, which will not be ``pushed'' into the stack but execution some code and \texttt{returns} value(s). The end case marks as the start of the bottom-up process. - -\item In the bottom-up pass, the recursive function's execution context in the stack is ``poped'' off the stack in a reversed order: $f(1)$, ..., $f(n-1)$, $f(n)$. And $f(1)$ takes the returned value from the base case to construct its value using the recurrence relation. Then it returns its value up to the next recursive function $f(2)$. This whole process ends at $f(n)$ which returns its value. -\end{itemize} - -\paragraph{How Import Recursion Is?} -Recursion is a very powerful and fundamental technique, and it is basis for several other design principles, such as: -\begin{itemize} - \item Divide and Conquer (Chapter.~\ref{chapter_divide_conquer}). - \item Recursive Search, such as Tree Traversal and graph search. - \item Dynamic Programming (Chapter.~\ref{chapter_dynamic-programming}. - \item Combinatorics such as enumeration (permutation and combination) and branch and bound etc. - \item Some classes of greedy algorithms. -\end{itemize} -It also supports the proof of correctness of algorithms via mathematical induction, and consistently arise in the algorithm complexity analysis. We shall see through out this book and will end up drawing this conclusion ourselves. - -\paragraph{Practical Guideline} -In real algorithmic problem solving, different process normally has different usage. - -In top-down process we do: -\begin{enumerate} - \item Break problems into smaller problems, there are different ways of ``breaking'' and depends on which, they can either be \textit{divide and conquer} or \textit{decrease and conquer} which we will further expand in Chapter.~\ref{} and \ref{}. Divide and conquer will divide the problems into disjoint subproblems, whereas in decrease and conquer, the problems - \item Searching: visit nodes in non-linear data structures (graph/tree), visit nodes in linear data structures. Also, at the same time, we can use \textbf{pass by reference} to track the state change such as the traveled path in the path related graph algorithms. -\end{enumerate} - - -In bottom-up process, we can either return \texttt{None} or \texttt{variables}. Assume if we already used \textbf{pass by reference} to tack the change of state, then it is not necessarily to return variables. In some scenario, tracking states with by passing by reference can be more easier and more intuitive. For example, in the graph algorithm, we mostly like to use this method. - -\paragraph{Tail Recursion} -This is also called \textit{tail recursion} where the function calls itself at the end (``tail'') of the function in which no computation is done after the return of recursive call. Many compilers optimize to change a recursive call to a tail recursive or an iterative call. - -%Returning variables -% \begin{enumerate} -% \item Using rreturn None: Simply return to the upper level if we already used \textbf{pass by reference} to tack the change of state. -% \item return variables: We get the result of the next recursive function call and using the recurrence relation to construct the result of current function. if we have return result, we do process of these results with current state and return to the upper level. In divide and conquer, we mostly likely need to merge its results. For iteration, this process gives the iteration process the backward traveling process. -% \end{enumerate} - -% \paragraph{Examples of Applications} -% For iteration, the top-down process is visiting nodes in 'forwarding' direction, and the bottom-up process on the other hand functions as a reverse visiting process. This makes a linear data structures function as a doubly linked list, and make a one direction tree structure function as one with parent. Here we list some examples that used recursive so that we can go backward: -% \begin{enumerate} -% \item 2. Add Two Numbers -% \end{enumerate} - -\section{Iteration VS Recursion} - -\paragraph{Stack Overflow Problem} -In our example, if we call function \texttt{factorial\_recursive()} with $n=1000$, Python would have complain an error as: -\begin{lstlisting} -RecursionError: maximum recursion depth exceeded in comparison -\end{lstlisting} -which is a \textit{stack overflow} problem. -A stack overflow is when we run out of memory to hold items in the stack. These situations can incur the stack overflow problem: -\begin{enumerate} - \item No base case is defined. - \item The recursion is too deep which is out of the assigned memory limit of the executing machine. -\end{enumerate} -\paragraph{Stack Overflow for Recursive Function and Iterative Implementation} According to Wikipedia, in software, a stack overflow occurs if the call stack pointer exceeds the stack bound. The call stack may consist of a limited amount of address space, often determined at the start of the program depending on many factors, including the programming language, machine architecture, multi-threading, and amount of available memory. When a program attemps to use more space than is available on the call stack, the stack is said to \textit{overflow}, typically resulting in a program crash. The very deep recursive function is faced with the threat of stack overflow. And the only way we can fix it is by transforming the recursion into a loop and storing the function arguments in an explicit stack data structure, this is often called the iterative implementation which corresponds to the recursive implementation. - -We need to follow these points: -\begin{enumerate} - \item End condition, Base Cases and Return Values: either return an answer for base cases or None, and used to end the recursive calls. - \item Parameters: parameters include: data needed to implement the function, current paths, the global answers and so on. - \item Variables: What the \textbf{local} and {global} variables. In Python any pointer type of data can be used as global variable global result putting in the parameters. - \item Construct current result: when to collect the results from subtree and combine to get the result for current node. - \item Check the depth: if the program will lead to the heap stack overflow. -\end{enumerate} - - -\paragraph{Conversion} For a given problem, conversion between iteration and recursion is possible, but the difficulty of the conversion is highly dependable on specific problem context. For example, the iteration of a range of numbers can be represented with recurrence relation $T(n)=T(n-1)+1$. On the side of implementation, some recursion and iteration can be easily converted between such as linear search; in some other cases, it takes more tricks and requires more sophisticated data structures to assist the conversion, such as in the iterative implementation of the recursive depth-first-search, it uses stack. Do not worry about these concepts here, as you flip more pages in the book, you will know and start to think better. - -\paragraph{Tail recursion and Optimization} - -In a typical recursive function, we usually make the recursive calls first, and then take the return value of the recursive call to calculate the result. Therefore, we only get the final result after all the recursive calls have returned some value. But in a tail recursive function, the various calculations and statements are performed first and the recursive call to the function is made after that. By doing this, we pass the results of the current step to the next recursive call to the function. Hence, the last statement in a Tail recursive function is the recursive call to the function. -This means that when we perform the next recursive call to the function, the current stack frame (occupied by the current function call) is not needed anymore. This allows us to optimize the code. We Simply reuse the current stack frame for the next recursive step and repeat this process for all the other function calls. - -Using regular recursion, each recursive call pushes another entry onto the call stack. When the functions return, they are popped from the stack. In the case of tail recursion, we can optimize it so that only one stack entry is used for all the recursive calls of the function. This means that even on large inputs, there can be no stack overflow. This is called Tail recursion optimization. - -Languages such as lisp and c/c++ have this sort of optimization. But, the Python interpreter doesn’t perform tail recursion optimization. Due to this, the recursion limit of python is usually set to a small value (approx, $10^4$). This means that when you provide a large input to the recursive function, you will get an error. This is done to avoid a stack overflow. The Python interpreter limits the recursion limit so that infinite recursions are avoided. - - -\paragraph{Handling recursion limit} -The ``sys module'' in Python provides a function called \texttt{setrecursionlimit()} to modify the recursion limit in Python. It takes one parameter, the value of the new recursion limit. By default, this value is usually $10^4$. If you are dealing with large inputs, you can set it to, $10^6$ so that large inputs can be handled without any errors. - - -% \section{Hands-on Examples} -% \label{sec_iter_recur_examples} - -\section{Exercises} -\begin{enumerate} - \item Compute factorial sequence using \texttt{while} loop. -\end{enumerate} - -\section{Summary} -If a cursive algorithm can be further optimized, the optimization method can either be divide and conquer or decrease and conquer. We have put much effort into solving recurrence relation of both: the linear recurrence relation for decrease and conquer, the divide and conquer recurrence relation for divide and conquer. Right now, do not struggle and eager to know what is divide or decrease and conquer, it will be explained in the next two chapters. - % A conditional statement decides the termination of recursion while a control variable’s value decide the termination of the iteration statement (except in the case of a while loop). - % Infinite recursion can lead to system crash whereas, infinite iteration consumes CPU cycles. - % Recursion repeatedly invokes the mechanism, and consequently the overhead, of method calls. This can be expensive in both processor time and memory space while iteration doesn’t. - % Recursion makes code smaller while iteration makes it longer. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_5_algorithm_analysis.tex b/Easy-Book/chapters/chapter_5_algorithm_analysis.tex deleted file mode 100644 index 9716cf2..0000000 --- a/Easy-Book/chapters/chapter_5_algorithm_analysis.tex +++ /dev/null @@ -1,521 +0,0 @@ -\documentclass[../main.tex]{subfiles} - -\begin{document} - -\chapter{Algorithm Complexity Analysis} -\label{chapter_algorithm_analysis} -When a software program runs on a machine, we genuinely care about the \textit{hardware space} and the \textit{running time} that it takes to complete the execution of the program; space and running time is the cost we need to pay to get the problem solved. The lower the cost, the happier we would be. Thus, \texttt{space} and \texttt{running time} are two metrics we use to evaluate the performance of programs, or rather say, algorithms. - -Now, if I ask you the question, "How to evaluate the performance of algorithms?" Do not go low and tell me, "You just write the code and run it on a computer?" Because here is the reality: (a) These two metrics are mostly possible to vary as using different the physical machine and the programming languages, and (b) The cost will be too high. First, when we are solving a problem, we would always try to come up with many possible solutions--algorithms. Implementing and running all candidates just boost your cost of labor and finance. Second, even at the best case, you only have one candidate, but what if your designated machine can not load the program due to the memory limit, what if your algorithm takes millions of years to run, would you prefer to sit and wait? - -With these situation, it is obvious that we need to \textit{predict} algorithm's performance--running time and space--without implementing or running on a particular machine, and meanwhile the prediction should be independent of the hardwares. In this chapter, we will study the complexity analysis method that strives to enable us such ability. The space complexity is mostly obvious and way easier to obtain compared with its counterpart-time complexity. This decides that in this chapter, the analysis of time complexity will outweigh the pages we spent on space complexity. Before we dive into a plethora of algorithms and data structures, learning the complexity analysis techniques can help us evaluate each algorithm. - - -% We organize the chapter: -% \begin{enumerate} -% \item Introduction -% \item Asymptotic notations -% \item Amortized Analysis -% \item Hands-on Examples -% \end{enumerate} - -\section{Introduction} -In reality, it is impossible to predict the exact behavior of an algorithm, thus complexity analysis only try to extract the main influencing factors and ignore some trivial details. The complexity analysis is thus only \textit{approximate}, but it works. - -\paragraph{What are the main influencing factors? } - -Imagine sorting an array of integers with size 10 and size 10,000,000. The time and space it takes to these two input size will mostly be a huge difference. Thus, the number of items in the \textit{input size} is a straightforward factor. Assume we use $n$ to denote the size of the input, and the complexity analysis will define an expression of the running time as $T(n)$ and the space as $S(n)$. - -In complexity analysis, RAM model is based upon, where instructions/operators are executed one after another, without concurrency. Therefore, the running time of algorithm on a particular input can be expressed as counting the number of \textit{operations or ``steps''} to run. - -\paragraph{What are the difference cases?} - -Yet, when two input instance has exactly the same size, but with different values, such that one array where the input array is already sorted, and the other is totally random, the time it takes to these two cases will possibly vary, depending on the sorting algorithm that you chose. In complexity analysis, \textit{best-case}, \textit{worst-case}, \textit{average-case} complexity analysis is used to differentiate the behavior of the same algorithm applied on different input instance. -\begin{enumerate} - \item \textbf{Worst-case}: The behavior of the algorithm or an operation of a data structure with respect to the worst possible case of input instance. This gave us a way to measure the upper bound on the running time for any input, which is denoted as $O$. Knowing it gives us a guarantee that the algorithm will never take any longer. - \item \textbf{Average-case}: The expected behavior when the input is randomly drawn from a given distribution. Average case running time is used as an estimate complexity for a normal case. The expected case here offers us asymptotic bound $\Theta$. Computation of average-case running time entails knowing all possible input sequences, the probability distribution of occurrence of these sequences, and the running times for the individual sequences. Often it is assumed that all inputs of a given size are equally likely. - \item \textbf{Best-case}: The possible best behavior when the input data is arranged in a way, that your algorithms run least amount of time. Best case analysis can lead us to the lower bound $\Omega$ of an algorithm or data structure. -\end{enumerate} - -\paragraph{Toy Example: Selection Sort} Given a list of integers, sort the item incrementally. -\begin{lstlisting}[numbers=none] -For example, given the list A=[10, 3, 9, 2, 8, 7, 9], the sorted list will be: -A=[2, 3, 7, 8, 9, 9, 10]. -\end{lstlisting} -There are many sorting algorithms, in this case, let us examine the \textit{selection sort}. Given the input array $A$, and size to be $n$, we have index $[0, n-1]$. In selection sort, each time we select the current largest item and swap it with item at its corresponding position in the sorted list, thus dividing the list into two parts: unsorted list on the left and sorted list on the right. For example, at the first pass, we choose 10 from $A[0,n-1]$ and swap it with $A[n-1]$, which is 9; at the second pass, we choose the largest item 9 from $A[0,n-2]$ and swap it with 7 at $A[n-2]$, and so. Totally, after $n-1$ passes we will get an incrementally sorted array. More details of selection sort can be found in Chapter~\ref{chapter_sorting}. - -In the implementation, we use \texttt{ti} to denote the target position and \texttt{li} the index of the largest item which can only get by scanning. We show the Python code: -\begin{lstlisting}[language=Python] -def selectSort(a): cost times - '''Implement selection sort''' - n = len(a) - for i in range(n - 1): #n-1 passes, - ti = n - 1 -i c n-1 - li = 0 c n-1 - for j in range(n - i): - if a[j] > a[li]: c \sum_{i=0}^{n-2}(n-i) - li = j c \sum_{i=0}^{n-2}(n-i) - # swap li and ti - print('swap', a[li], a[ti]) - a[ti], a[li] = a[li], a[ti] c n-1 - print(a) - return a -\end{lstlisting} -First, we ignore the distinction between different operation types and treat all alike with a cost of $c$. In the above code, the line that comes with notations--\texttt{cost} and \texttt{times}--are operations. In line $5$, we first point at the target position \texttt{ti}. Because of the \texttt{for} loop above it, this operation will be called $n-1$ times. Same for line $6$ and $12$. For operation in line $8$ and $9$, the times it operated is denoted as $\sum_{i=0}^{n-2}(n-i)$ due to two nested \texttt{for} loops. And the range of $j$ is dependable of the outer loop with $i$. We get our running time $T(n)$ by summing up these cost on the variable of $i$. -\begin{align} -\label{complexity_eq_1} - T(n) &= 3c*(n-1) + \sum_{i=0}^{n-2} {2c(n-i)}\\ - &= 3c*(n-1) + 2c (n+(n-1)+(n-2)+...+2) \notag \\ - &= 3c*(n-1) + 2c (\frac{(n-1)*(2+n)}{2})\notag \\ - &=cn^2+cn-2+3cn-3c\notag \\ - &=cn^2+4cn-3c-2\label{complexity_eq_1_2} \\ - &=an^2+bn+c\label{complexity_eq_1_3} -\end{align} -We use three constants $a, b, c$ to rewrite Eq. \ref{complexity_eq_1_2} with Eq.\ref{complexity_eq_1_3}. - -In the case of sorting, an incrementally sorted array will potentially be the best-cases that takes the lest running time and on the other hand decrementally sorted array will be the worst-case. However, in the example of selection sorted array, even if the input is perfect sorted, the algorithm does not consider this case, it still runs n-1 passes, each pass it still scans from a fixed size of window to find the largest item (you would only know it is the largest by looking all cases). Thus, in this case, the best-case, worst-case, and average-case all happens to have the same running time shown in Eq.~\ref{complexity_eq_1_3}. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%Asymptotic notations%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Asymptotic Notations} -\paragraph{Order of Growth and Asymptotic Running Time} In Equation~\ref{complexity_eq_1_3} we end up with three constant a, b, c and two terms with order $n^2$ and $n$. When the input is large enough, all the lower order terms, even if with large constant, will become relatively insignificant to the highest term; we thus neglect the lower terms and end up with $an^2$. Further, we neglect the constant coefficient $a$ for the same reason. However, we can not say $T(n)=n^2$, because we know mathematically speaking, it is wrong. -\begin{figure}[h!] - \centering - \includegraphics[width=0.8\columnwidth]{fig/big_o_complexity_chart.png} - \caption{Order of Growth of Common Functions} - \label{fig:big_o_complexity_chart} -\end{figure} - -Instead, since we are only interested with property of $T(n)$ when $n$ is large enough, we say the relation between the original complexity function $an^2+bn+c$ is ``asymptotically equivalent to'' $n^2$, which reads ``$T(n)$ is is asymptotic to $n^2$'' and denoted as $T(n)=an^2+bn+c \asymp n^2$. Form Fig.~\ref{fig:big_o_complexity_chart}, we can visualize that when $n$ is large enough, the term $n$ is trivial compared with $n^2$. - -In this way, we manage to classify our complexity into a group of families, say, exponential $2^n$ or polynomial $n^2$.% For example, if the input size is $n$, then we can have functions like $f_1 = an+b$ and $f_2=an^2+bn+c$. But normally, we would only get the highest order of function, and simplified them to $g_1=n$ and $g_2=n^2$, with different notation. Function $f_1$ and $g_1$ are of the same order of magnitude or growth and $g_1$ can approach the curve of $f_1$ arbitrarily closely. The relation of them is \textbf{asymptotic}, and denoted as $f_1 \asymp g_1$. - -\subsubsection{Definition of Asymptotic Notations} We mentioned ``asymptotically equivalent'' relation, which can be formalized and defined with $\Theta$-Notation as $T(n)=\Theta(n)$, one of the main three asymptotic notations--asymptotically equivalent, smaller, and larger--we will cover in this section. -\paragraph{$\Theta$-Notation} For a given function $g(n)$, we define $\Theta(g(n))$(pronounced as ``big theta'') as a set of functions $\Theta(g(n))=\{f(n)\}$, that each $f(n)$ can be bounded by $g(n)$ by $0 \leq c_1g(n)\leq f(n)\leq c_2g(n)$ for all $n\geq n_0$ for positive constant $c_1$, $c_2$ and $n_0$. We show this relation in Fig.~\ref{fig:asym_notation}. Strictly speaking, we would write $f(n)\in\Theta(g(n))$ to indicate that $f(n)$ is just one member of the set of functions that $\Theta(g(n))$ can represent. However, in the field of computer science, we write $f(n)=\Theta(g(n))$ instead. - -We say $g(n)$ is an \textit{asymptotically tight bound} of $f(n)$. For example, we can say $n^2$ is asymptotically tight bound for $2n^2+3n+4$ or $5n^2+3n+4$ or $3n^2$ or any other similar functions. We can denote our running time as $T(n)=\Theta(n^2)$. - -\begin{figure}[!ht] - \centering - \includegraphics[width = 1\columnwidth]{fig/notations.png} - \caption{Graphical examples for asymptotic notations. Replace f(n) with T(n) } - \label{fig:asym_notation} -\end{figure} - -\paragraph{$O$-Notation} Further, we define the \textit{asymptotically upper bound} of a set of functions $\{f(n)\}$ as $O(g(n))$(pronounced as ``big oh'' of $f(n)$), with $0 \leq f(n)\leq cg(n)$ for all $n\geq n_0$ for positive constant $c$ and $n_0$. We show this relation in Fig.~\ref{fig:asym_notation}. - -Note that $T(n) = \Theta(g(n))$ implies that $T(n)=O(g(n))$, but not the other way around. With $2n^2+3n+4$ or $5n^2+3n+4$ or $3n^2$, it also be denoted as $T(n)=O(n^2)$. Big Oh notation is widely applied in computer science to describe either the running time or the space complexity. - -\paragraph{$\Omega$-Notation} It provides \textbf{asymptotic lower bound} running time. With $T(n)=\Omega(g(n))$(pronounced as ``big omega'') we represent a set of functions that $0 \leq cg(n) \leq f(n)$ for all $n\geq n_0$ for positive constant $c$ and $n_0$. - - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Does it mean that $O$ is worst-case, $\Theta$ is the average-case and $\Omega$ is the best-case? How does it relate to this three cases. } -\end{bclogo} - -\subsubsection{Properties of Asymptotic Comparisons} -We should note that only if $f(n)=O(g(n))$ and $f(n)=\Omega(g(n))$, we can have $f(n)=\Theta(g(n))$. - -\begin{table}[!ht] -\begin{small} -\centering -\noindent\captionof{table}{ Analog of Asymptotic Relation} - \noindent \begin{tabular}{|p{0.4\columnwidth}|p{0.4\columnwidth}| } - \hline - Notation & Similar Relations \\ \hline -$f(n)=\Theta(g(n))$ & $f(n)=g(n)$ \\\hline -$f(n)=O(g(n))$ &$f(n)\leq g(n)$\\ \hline -$f(n)=\Omega(g(n))$ & $f(n)\geq g(n)$\\\hline -\end{tabular} - \label{tab:asymptotic_notation_property} - \end{small} -\end{table} - -It is fair to denote the relation of $g(n)$ and $f(n)$ to similar relation as between real numbers as shown in Table.~\ref{tab:asymptotic_notation_property}. Thus the properties of real numbers, such as transitivity, reflexivity, symmetry, transpose symmetry all holds for asymptotic notations. - -\section{Practical Guideline} -The previous two sections, we introduced the complexity function $T(n)$, how it is influenced by different cases of input instance--worst, average, and best cases, and how that we can use asymptotic notations to focus the complexity only on the dominant term in function $T(n)$. In this section, we would like to provide some practical guideline that arise in real application. -\paragraph{Input Size and Running Time} -In general, the time taken by an algorithm grows with the size of the input, so it is universal to describe the running time of a program as a function of the size of its input. $f(n)$, with the input size denoted as $n$. - -The notation of \textbf{input size} depends on specific problems and data structures. For example, the size of the array can be denoted as integer $n$, the total numbers of bits when it come to binary notation, and sometimes, if the input is matrix or graph, we need to use two integers such as $(m, n)$ for a two-dimensional matrix or $(V, E)$ for the vertices and edges in a graph. - -We use function $T$ to denote the running time. With input size of $n$, our running time can be denoted as $T(n)$. Given $(m, n)$, it can be $T(m, n)$. - -\paragraph{Worst-case Analysis is Preferred} -In reality, worst-case input is chosen as our indicator over the best input and average input for: (a) best input is not representative; there is usually an input for the algorithm become trivial; (b) the average-input is sometimes very hard to define and measure; (3) In some cases, the worst-case input is very close to the average and to the observational input; (4)The algorithm with the best efficiency on the worst-case usually achieve the best performance. - -\paragraph{Relate Asymptotic Notations to Three Cases of Input Instance} -It might seemingly confusing about how the asymptotic notation relates to the three cases of input instance--worst-case, best-case, and average case. - -Think about it this way, asymptotic notations apply to any function that it abstract away some lower-term to characterize the property of the function when the input is large or infinite. Therefore, it has nothing to do with these three cases in this way. - -However, assume we are trying to characterize the complexity of an algorithm, and we analyzed its best-case and worst case input: -\begin{itemize} - \item Worst-case: $T(n)=an^2+bn+c$, now we can say $T(n)=\Theta(n^2)$, which indicates that $T(n)=\Omega(n^2)$ and $T(n)=O(n^2)$. - \item Best-case: $T(n)=an$, we can say $T(n)=\Theta(n)$, which indicates that $T(n)=\Omega(n)$ and $T(n)=O(n)$. -\end{itemize} -In order to describe the complexity of our algorithm in general; put aside the particular input instance. Such as the the average case analysis, which is typically hard to ``average'' between different input, we can come up with an estimation, and safely say for the time complexity in general is $an\leq T(n)\leq an^2+bn+c$. This can be further expanded as: -\begin{equation} - c_1n\leq an\leq T(n)\leq an^2+bn+c \leq c_2n^2 -\end{equation} -Equivalently, we are safe to characterize a lower-bound based on best-case and an upper-bound based on the worst-case, thus we say the time complexity of our algorithm as $T(n)=\Omega(n), T(n)=O(n^2)$. - - -\paragraph{Big Oh is a Popular Notation to Complexity Analysis} -As we have concluded that the worst-case analysis is both easy to get and good indicator of the overall complexity. Big Oh as the absolute upper bound of the worst-case would also indicate the upper bound of the algorithm in general. - -Even if we can get a tight bound for the algorithm as in the case of selection sort, it is always right to say that its an upper bound because $\Theta(g(n))$ is a subset of $O(g(n))$. This is like, we know dog is categorized as canine, and canine is in the type of mammal, thus, we are right to say that dog is a species of mammal. - - - - -% \subsubsection{Four Types of Complexity Analysis} -% As we will see in the remaining content of the book, different algorithm is affected differently with its input data distribution. We category this influence in three levels: worst-case, average-case, and best case. -% \begin{enumerate} -% \item \textbf{Worst-case}: The behavior of the algorithm or an operation of a data structure with respect to the worst possible case of input instance. This gave us a way to measure the upper bound on the running time for any input, which is denoted as $O$. Knowing it gives us a guarantee that the algorithm will never take any longer. -% \item \textbf{Average-case}: The expected behavior when the input is randomly drawn from a given distribution. Average case running time is used as an estimate complexity for a normal case. The expected case here offers us asymptotic bound $\Theta$. Computation of average-case running time entails knowing all possible input sequences, the probability distribution of occurrence of these sequences, and the running times for the individual sequences. Often it is assumed that all inputs of a given size are equally likely. -% \item \textbf{Best-case}: The possible best behavior when the input data is arranged in a way, that your algorithms run least amount of time. Best case analysis can lead us to the lower bound $\Omega$ of an algorithm or data structure. -% \end{enumerate} -% The selection algorithm's complexity will not be affected by its input, because no matter what data distribution is, we need to traverse the fixed range of array to find the largest item. For these type of algorithms, usually we have $T(n)=O(g(n))=\Theta(g(n))=\Omega(g(n))$. However, there are a lot of other algorithms that differs from different inputs. - -% All of these notations are applied to functions. However, in the practical interviews, when the interviewer asks you to give the time and space complexity, you do not necessarily to give them the answer for each notation, you can just use $O$ to denote, with regarding to different cases introduced in the next section. Here, we provide the most likely used growth rate plotted in Fig.~\ref{fig:big_o_complexity_chart}. - - -\section{Time Recurrence Relation} -We have studied recurrence relation throughly in Chapter.~\ref{chapter_recurrence_relation}. How does it relate to complexity analysis? We can represent either recursive function or iterative function with time recurrence relation. Therefore, the complexity analysis can be done in two steps: (1) get the recurrence relation and (2) solve the recurrence relation. -\begin{itemize} - \item For recursive function, this representation is natural. For example, in the merge sort, it can be easily represented as $T(n)=2T(n/2)+O(n)$, that each step it divides a problem of size $n$ into two subproblems each with half size, and the cost to combine the solution of these two subproblems will be at most $n$, that is why we add $O(n)$. - \item A time recurrence relation can be easily applied on iterative program too. Say, in the simple task where we try to search a target in a list array, we can write a recurrence relation function to it as $T(n)=T(n-1)+1$. Because, in the scanning process, one move reduce the problem to a smaller size, and the case of it is 1. Using the asymptotic notation, we can further write it as $T(n)=T(n-1)+O(1)$. Solving this recurrence relation straightforwardly through iteration method, we can have $T(n)=O(n)$. -\end{itemize} - -As in the chapter.~\ref{chapter_divide_and_conquer}, there are generally two ways of reducing a problem: divide and conquer and Reduce by Constant size, which is actually a non-homogenous recurrence relation. - - In Chapter.~\ref{chapter_recurrence_relation}, we showed how to solve linear recurrence relation and get absolute answer, it was seemingly complex and terrifying. Good news, as complexity analysis is about estimating the cost, so we can loose ourselves a bit and sometimes a lower/upper bound is good enough, and the base case will almost always be $O(1)=1$. - - \subsection{General Methods to Solve Recurrence Relation} - We have shown in Chapter.~\ref{chapter_recurrence_relation} there are iterative method and mathematical induction as general methods to try to solve an easy recurrence relation. We demonstrate how these two methods can be used in solving time recurrence relations first. Additionally, we introduce recursion tree method. - \subsubsection{Iterative Method} -The most straightforward method for solving recurrence relation no matter its linear or non-linear is the \textit{iterative method}. Iterative method is a technique or procedure in computational mathematics that it iteratively replace/substitute each $a_n$ with its recurrence relation $\Psi(n, a_{n-1}, a_{n-2}, ..., a_{n-k})$ till all items ``disappear'' other than the initial values. Iterative method is also called substitution method. - -We demonstrate iteration with a simple non-overlapping recursion. -\begin{align} -\label{complexity_eq_binary_search} - T(n)&=T(n/2)+O(1)\\ - &=T(n/2^2)+O(1)+O(1)\notag\\ - &=T(n/2^3)+3O(1)\notag\\ - &=...\notag\\ - &=T(1)+kO(1) -\end{align} -We have $\frac{n}{2^k}=1$, we solve this equation and will get $k=\log_2 n$. Most likely $T(1)=O(1)$ will be the initial condition, we replace this, and we get $T(n)=O(\log_2 n)$. - -However, when we try to apply iteration on the third recursion: $T(n)=3T(n/4)+O(n)$. It might be tempting to assume that $T(n)=O(n\log n)$ due to the fact that $T(n)=2T(n/2)+O(n)$ leads to this time complexity. -\begin{align} -\label{complexity_non_overlap_1} - T(n)&=3T(n/4)+O(n)\\ - &=3(3T(n/4^2)+n/4)+n=3^2T(n/4^2)+n(1+3/4)\notag\\ - &=3^2(3T(n/4^3)+n/4^2)+n(1+3/4)=3^3T(n/4^3)+n(1+3/4+3/4^2)\\ - &=...\\ - &=3^kT(n/4^k)+n\sum_{i=0}^{k-1}(\frac{3}{4})^{i} -\end{align} -\subsubsection{Recursion Tree} -Since the term of T(n) grows, the iteration can look messy. We can use recursion tree to better visualize the process of iteration. In a recursive tree, each node represents the value of a single subproblem, and a leaf would be a subproblem. As a start, we expand $T(n)$ as a node with value $n$ as root, and it would have three children each represents a subproblem $T(n/4)$. We further do the same with each leaf node, until the subproblem is trivial and be a base case. In practice, we just need to draw a few layers to find the rule. The cost will be the sum of costs of all layers. The process can be seen in Fig.~\ref{fig:recursive_tree}. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.98\columnwidth]{fig/recursion_tree_non_overlap.png} - \caption{The process to construct a recursive tree for $T(n) = 3T(\floor*{n/4}) + O(n)$. There are totally k+1 levels. Use a better figure. } - \label{fig:recursive_tree} -\end{figure} - In this case, it is the base case $T(1)$. Through the expansion with iteration and recursion tree, our time complexity function becomes: -\begin{align} -\label{complexity_non_overlap_2} - T(n)&=\sum_{i=1}^{k}L_i + L_{k+1}\\ - &=n\sum_{i=1}^{k}(3/4)^{i-1}+3^kT(n/4^k) -\end{align} - -In the process, we can see that Eq.~\ref{complexity_non_overlap_2} and Eq.~\ref{complexity_non_overlap_1} are the same. Because $T(n/4^k)=T(1)=1$, we have $k=\log_4 n$. -\begin{align} -\label{complexity_non_overlap_2} - T(n)&\leq n\sum_{i=1}^{\infty}(3/4)^{k-1}+3^kT(n/4^k)\\ - &\leq 1/(1-3/4)n+3^{\log_4 n} T(1)= 4n+n^{log_4 3} - &\leq 5n \\ - &=O(n) -\end{align} - - - -\subsubsection{Mathematical Induction} -Mathematical induction is a mathematical proof technique, and is essentially used to prove that a property $P(n)$ holds for every natural number $n$, i.e. for $n=0, 1, 2, 3$, and so on. Therefore, in order to use induction, we need to make a \textit{guess} of the closed-form solution for $a_n$. Induction requires two cases to be proved. -\begin{enumerate} - \item - \textit{Base case:} proves that the property holds for the number $0$. -\item \textit{Induction step:} proves that, if the property holds for one natural number $n$, then it holds for the next natural number $n+1$. -\end{enumerate} - -For $T(n)=2\times T(n-1) +1, T_0 = 0$, we can have the following result by expanding $T(i), i \in [0, 7]$. -\begin{lstlisting}[numbers=none] -n 0 1 2 3 4 5 6 7 -T_n 0 3 7 15 31 63 127 -\end{lstlisting} -It is not hard that we find the rule and guess $T(n) = 2^n-1$. Now, we prove this equation by induction: -\begin{enumerate} - \item Show that the basis is true: $T(0) = 2^0 -1 = 0$. - \item Assume it holds true for $T(n-1)$. By induction, we get - \begin{align} - T(n)&=2T(n-1) + 1 \\ - &=2 (2^{n-1} - 1) + 1 \\ - &= 2^n -1 - \end{align} - Now we show that the induction step holds true too. -\end{enumerate} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Solve $T(n)=T(n/2)+O(1)$ and $T(2n)\leq2T(n)+2n-1, T(2)=1$.} -\end{bclogo} - - \subsection{Solve Divide-and-Conquer Recurrence Relations} -All the previous recurrence relation, either homogeneous or non-homogeneous, they fall into the bucket of \textit{decrease and conquer} (maybe not right), and either is yet another type of recursion--Divide and Conquer. Same here, we ignore how we get such recurrence but focus on how to solve it. - -We write our divide and conquer recurrence relation using the time complexity function, there are two types as shown in Eq.\ref{divide_conquer_eq1}(n are divided equally) and E1.\ref{divide_conquer_eq2}(n are divided unequally): -\begin{equation} - T(n)=aT(n/b)+f(n) - \label{divide_conquer_eq1} -\end{equation} -where $a\leq 1, b>1$, and $f(n)$ is a given function, which usually has $f(n)= cn^k$. -\begin{equation} - T(n)=\sum_{i=1}^{k}a_iT(n/b_i)+f(n) - \label{divide_conquer_eq2} -\end{equation} -Considering that the first type is much more commonly seen that the other, we only learn how to solve the first type; in fact, at least, I assume you that within this book, the second type will never appear. - -\paragraph{Sit and Deduct} For simplicity, we assume $n=b^m$, so that $n/b$ is always integer. First, let us use the iterative method, and expand Eq.~\ref{divide_conquer_eq1} up till $n/b^m$ times so that $T(n)$ become $T(1)$: -\begin{align} - T(n)&=aT(n/b)+cn^k\\ - &=a(aT(n/b^2)+c(n/b)^k)+cn^k\\ - &=a(a(T(n/b^3)+c(n/b^2)^k)+c(n/b)^k)+cn^k\\ - &\vdots\\ - &=a(a(\ldots T(n/b^m)+c(n/b^{m-1})^{k})+\ldots)+cn^k\\ - &=a(a(\ldots T(1)+cb^{k})+\ldots)+cn^k -\end{align} -Now, assume $T(1)=c$ for simplicity and for getting rid of this constant part in our sequence. Then, -\begin{equation} - T(n)=ca^m+ca^{m-1}b^k+ca^{m-2}b^{2k}+\ldots+cb^{mk}, -\end{equation} -which implies that -\begin{align} -T(n)&=c\sum_{i=0}^ma^{m-i}b^{ik}\\ -&=ca^m\sum_{i=0}^m(\frac{b^k}{a})^i -\end{align} -So far, we get a geometric series, which is a good sign to get the closed-form expression. We first summarize all possible substitutions that will help our further analysis. -\begin{align} - f(n)&=cn^k\\ - n&=b^m\\ - & \xrightarrow{} \\ - \label{eq_divide_conquer_sub_2} - m&=\log_b n\\ - f(n)&=cb^{mk}\\ - a^m&=a^{\log_b n}=n^{\log_b a}\label{eq_divide_conquer_sub_1} -\end{align} -Depending on the relation between $a$ and $b^k$, there are three cases: -\begin{enumerate} - \item $b^k < a$: In this case, $\frac{b^k}{a}<1$, so the geometric series converges to a constant even if $m$ goes to infinity. Then, we have an upper bound for $T(n)$, $T(n) a$: In this case, we denote $\frac{b^k}{a}=d$ ($d$ is a constant and $d>1$). Use the standard formula for summing over a geometric series: - \begin{align} - T(n)&=ca^m\frac{d^{m+1}-1}{d-1}=O(a^m\frac{d^{m+1}-1}{d-1})\\ - &=O(b^{mk})=O(n^k)=O(f(n)) - \end{align} -\end{enumerate} - - - -\subsubsection{Master Method} Comparison between $b^k$ and $a$ equals to the comparison between $b^{km}$ between $a^m$ . From the above substitution, it further equals to compare $f(n)$ to $n^{\log_b a}$. This is when master method kicks in and we will see how it helps us to apply these three cases into real situation. - -Compare $f(n)/c=n^k$ with $n^{\log_b a}$. Intuitively, the larger of the two functions would dominate the solution to the recurrence. Now, we rephrase the three cases using the master method for the easiness of memorization. -\begin{enumerate} - \item If $n^k 0$, we have: - \begin{equation} - T(n)=O(n^{\log_b a}) - \end{equation} - \item If $n^k>n^{\log_b a}$, similarily, we need it to be polynomially larger than a factor of $n^{\epsilon}$ for some constant $\epsilon > 0$, we have: - \begin{equation} - T(n)=O(f(n)) - \end{equation} - \item If $n^k=n^{\log_b a}$, then: - \begin{equation} - T(n)=O(n^{\log_b a}\log_b n) - \end{equation} - -\end{enumerate} - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Complexity Analysis -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Hands-on Example: Insertion Sort} -\label{sec_time_complexity} -In this section, we are expecting to see example that has different asymptotic bound as the input differs; where we focus more on the worst-case and average-case analysis. Along the analysis of complexity, we will also see how asymptotic notation can be used in equations or inequalities to assist the process. - -Because most of the time, the average-case running time will be asmptotically equal to the worst-case, thus we do not really try to analyze it at the first place. In the case of best-case, it would only matter if you know your application context fits right in, otherwise, it will be trivial and non-helpful in the comparison of multiple algorithms. We will see example below. - -\paragraph{Insertion Sort: Worst-case and Best-case} There is another sorting algorithm--insertion sort--it sets aside another array $S$ to save the sorted items. At first, we can put the first item in which itself is already sorted. At the second pass, we put A[1] into the right position in $S$. Until the last item is handled, we return the sorted list. The code is: -\begin{lstlisting}[language=Python] -def insertionSort(a): - '''implement insertion sort''' - if not a or len(a) == 1: - return a - n = len(a) - sl = [a[0]] + [None] *(n-1) # sorted list - for i in range(1, n): # items to be inserted into the sorted - key = a[i] - j = i-1 - - while j >= 0 and sl[j] > key: # compare key from the last sorted element - sl[j+1] = sl[j] # shift a[j] backward - j -= 1 - sl[j+1] = key - print(sl) - return sl -\end{lstlisting} -For the first \texttt{for} loop in line 7, it will sure has $n-1$ passes. However, for the inner \texttt{while} loop, the real times of execution of statement in line 12 and 13 depends on the state between \texttt{sl} and \texttt{key}. If we try to sort the input array \texttt{a} incrementally such that A=[2, 3, 7, 8, 9, 9, 10], and if the input array is already sorted, then there will be no items in the sorted list can be larger than our key which result only the execution of line 14. This is the best case, we can denote the running time of the \texttt{while} loop by $\Omega(1)$ because it has constant running time at its best case. However, if the input array is a reversed as the desired sorting, which means it is decreasing sorted such as A=[10, 9, 9, 9, 7, 3, 2], then the inner \texttt{while} loop will has $n-i$, we denote it by $O(n)$. We can denote our running time equation as: -\begin{align} - T(n)&=T(n-1)+O(n)\\ - &=O(n^2) \notag -\end{align} -And, -\begin{align} - T(n)&=T(n-1)+\Omega(1)\\ - &=\Omega(n)\notag -\end{align} -Using simple iteration, we can solve the math formula and have the asymptotic upper bound and lower bound for the time complexity of insertion sort. - -For the average case, we can assume that each time, we need half time of comparison of $n-i$, we can have the following equation: -\begin{align} - T(n)&=T(n-1)+\Theta(n/2)\\ - &=T(n-2)+\Theta(\frac{n}{2}+\frac{n-1}{2}) \notag\\ - &=\Theta(n^2)\notag -\end{align} -For algorithm that is stable in complexity, we conventionally analyze its average performance, and it is better to use $\Theta$-notation in the running time equation and give the asymptotic tight bound like in the selection sort. For algorithm such as insertion sort, whose complexity varies as the input data distribution we conventionally analyze its worst-case and use $O$-notation. - - - -% \paragraph{Simple example} These are just straightforward for us to analyze the running time. Sometimes, things become more obscure. Then, we need more advanced techniques to help us handle. For example, we use recurrence function to represent the the time we need when the problem decrease the size. Such that for one for loop, we can use $T(n)=T(n-1)+O(1)$, and for two nested for loops, normally $T(n)=T(n-1)+O(n)$ is enough to represent this situation. For a divide and conquer problem, we might get a recurrence function as $T(n) = T(n/2)+O(1)$. With recurrence function, the time complexity analysis is conveniently converted to a math problem and things get to be more interesting. We can divide the recurrence function into two types: non-overlapping as $T(n) = T(a*n/b)+f(n)$, and with over-lapping as $T(n) = T(a*n/b)+f(n)$. - - -%%%%%%%%%%%%%%%%%%Amortiled analysis%%%%%%%%%%%%% -\section{*Amortized Analysis} -\label{sec_amortized_analysis} -There are two different ways to evaluate an algorithm/data structure: - -\begin{enumerate} - \item Consider each operation separately: one that look each operation incurred in the algorithm/data structure separately and offers worst-case running time $O$ and average running time $\Theta$ for each operation. For the whole algorithm, it sums up on these two cases by how many times each operation is incurred. - \item Amortized among a sequence of (related) operations: Amortized analysis can be used to show that the average cost of an operation is small, if one averages over a sequence of operations, even though a simple operation might be expensive. Amortized analysis guarantees the average performance of each operation in the worst case. -\end{enumerate} - -Amortized analysis does not purely look each operation on a given data structure separately, it averages time required to perform a sequence of different data structure operations over all performed operations. With amortized analysis, we might see that even though one single operation might be expensive, the amortized cost of this operation on all operations is small. Different from average-case analysis, probability will not be applied. From the example later we will see that amortized analysis view the data structure in applicable scenario, to complete this tasks, what is the average cost of each operation, and it is acheviable given any input. Therefore, the same time complexity, say $O(f(n))$, worst-case > amortized > average. - -There are three types of amortized analysis: -\begin{enumerate} - \item Aggregate Analysis: - \item Accounting Method: - \item Potential method: -\end{enumerate} -%%%%%%%%%%%%%%%%%%space complexity%%%%%%%%%%%%%%% -\section{Space Complexity} -The analysis of space complexity is more straightforward, given that we are essentially the one who allocate space for the application. We simply link it to the size of items in the data structures. The only obscure is with \textit{recursive program} which takes space from stack but is hidden from the users by the programming language compiler or interpreter. The recursive program can be represented as a recursive tree, the maximums stack space it needs is decided by the height of the recursive tree, thus $O(h)$, given $h$ as the height. - -\paragraph{Space and Time Trade-off} In the field of algorithm design, we can usually trade space for time efficiency or trade time for space efficiency. For example, if you put your algorithm on a backend server, we need to response the request of users, then decrease the response time if especially useful here. Normally we want to decrease the time complexity by sacrificing more space if the extra space is not a problem for the physical machine. But in some cases, decrease the time complexity is more important and needed, thus we need might go for alternative algorithms that uses less space but might with more time complexity. - -\section{Summary} -For your convenience, we provide a table that shows the frequent used recurrence equations' time complexity. -\begin{figure}[h] - \centering - \includegraphics[width=1\columnwidth] {fig/complexity_cheatsheet.png} - \caption{The cheat sheet for time and space complexity with recurrence function. If T(n) = T(n-1)+T(n-2)+...+T(1)+O(n-1) = $3^n$. They are called factorial, exponential, quadratic, linearithmic, linear, logarithmic, constant. } - \label{fig:cheat_sheet} -\end{figure} - -%%%%%%%%%%%%%%%%%%%%%%%%%Examples%%%%%%%%%%%%%%%%%%%%%%%%% -% \subsection{More Examples} -% \begin{examples}[resume] -% \item \textbf{ Pow(x, n) (50).} - -% Solution: T(n)= T(n/2)+O(1), the complexity is the same as the binary search, $O(logn)$. -% \begin{lstlisting}[language=Python] -% def myPow(self, x, n): -% """ -% :type x: float -% :type n: int -% :rtype: float -% """ -% if n==0: -% return 1 -% if n<0: -% n=-n -% x=1.0/x -% def helper(n): -% if n==1: -% return x - -% h = n//2 -% r = n-h -% value = helper(h) #T(n/2), then we have O(1) -% if r==h: -% return value*value -% else: #r is going to be 1 bigger -% return value*value*x -% return helper(n) -% \end{lstlisting} -% \end{examples} - - -%%%%%%%%%%%%%%Cheat sheet%%%%%%%%%%%%%%%%%%%%%% -% \subsection{Big-O Cheat Sheet} -% \label{complexity_subsec_cheat_sheet} -% In this section, we provide the plotting of common seen time complexity functions (shown in Fig~\ref{fig:big_o_complexity_chart}): including $log_2{n}$, $n$, $n\log_2{n}$, $n^2$, $2^n$, and $n!$, so that we can sense the complexity change as the input size n increase. Resource found on \url{http://bigocheatsheet.com/}. - - -% \begin{table}[h] -% \begin{small} -% \centering -% \noindent\captionof{table}{ Explanation of Common Growth Rate} -% \noindent \begin{tabular}{|p{0.15\columnwidth}|p{0.2\columnwidth}| p{0.65\columnwidth}|} -% \hline -% Growth Rate & Name & Example operations \\ \hline -% $O(1)$ & Constant& append, get item, set item \\\hline -% $O(\log{n})$ &Logarithmic& binary search in the sorted array\\ \hline -% $O(n)$ & Liner & Copy, iteration\\ \hline -% $O(n\log{n})$ & Linear-Logarithmic& MergeSort, QuickSort\\ \hline -% $O(n^2)$ & Quadratic& Nested Loops\\ \hline -% $O(n^3)$ &Cubic& Matrix Multiplication\\ \hline -% $O(2^n)$ & Exponential& Backtracking, Combination\\ \hline -% $O(n!)$ & factorial & Permutation\\ \hline -% \end{tabular} -% \label{tab:single_sequence} -% \end{small} -% \end{table} - -% Also, we provide the average and worst time and space complexity for the some classical data structure's operations (shown in Fig.~\ref{fig:data_structure_complexity}) and of algorithms (shown in Fig.~\ref{fig:data_structure_complexity}). -% \begin{figure}[h!] -% \centering -% \includegraphics[width=0.9\columnwidth]{fig/common_data_structure_operations.png} -% \includegraphics[width=0.9\columnwidth]{fig/array_sorting_algorithms.png} -% \caption{Complexity of Common Data structures} -% \label{fig:data_structure_complexity} -% \end{figure} - -%%%%%%%%%%%%%%%%%%%%%%Exercise -\section{Exercises} -\subsection{Knowledge Check} -\begin{enumerate} - \item Use iteration and recursion tree to get the time complexity of $T(n)=T(n/3)+2T(2n/3)+O(n)$. - \item Get the time complexity of $T(n)=2T(n/2)+O(n^2)$. - \item $T(n)=T(n-1)+T(n-2)+T(n-3)+...+T(1)+O(1)$. -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_7_graph_implementation.tex b/Easy-Book/chapters/chapter_7_graph_implementation.tex deleted file mode 100644 index bd2c472..0000000 --- a/Easy-Book/chapters/chapter_7_graph_implementation.tex +++ /dev/null @@ -1,71 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Graph -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\documentclass[../main.tex]{subfiles} -\begin{document} -\begin{figure}[!ht] - \centering - % \includegraphics[width=\columnwidth]{fig/graph_1.png} - % \caption{An undirected graph} - % \label{fig:graph_1} - - \includegraphics[width=\columnwidth]{fig/example_undirected_directed_graph.png} - \caption{Example of undirected and directed graph: mark the vertices and edges} - \label{fig:graph_2} -\end{figure} -Graph is a natural way to represent connections and reasoning between things or events. A graph is made up of \textit{vertices} (or nodes, points) are connected by \textit{edges} (arcs, or lines). A graph structure is shown in Fig.~\ref{fig:graph_2}. There are many fields in that heavily rely on the graph, such as the probabilistic graphical models applied in computer vision, route problems, network flow in network science, link structures of a website in social media in computer science. - -In this chapter, we present graph as a data structure. However, graph is really a broad way to model problems; for example, we can model the possible solution space as a graph and apply graph search to find the possible solution to a problem. So, do not let the physical graph data structures limit our imagination. - - - -As the first chapter related to graph, we mainly focus on explain related concepts, terminologies, graph representation, and the basic exhaustive search methods on real graph data structures: -\begin{itemize} - \item Knowing the definition and the \textbf{terminologies} commonly used in Section~\ref{graph_terminology}. - \item \textbf{Representing the graph} data structures and implement these representation in Python in Section~\ref{graph_representation}. - \item Learn the most basic graph search: Breath-first-search and Depth-first search in Section~\ref{sec_bfs}. -\end{itemize} - -\paragraph{Arrangement of Graph in the Book} Searching in graph which lies at the heart of the field of graph algorithms, therefore, we put effort in this book to explain the behavior, properties of them compared with a lot other books. More advanced searching techniques and applications will be detailed in Chapter~\ref{chapter_non_linear_searching} in the part ~\ref{part_complete_searching}. And more advanced graph algorithms that build upon the basic searching techniques will be taught in Chapter ~\ref{chapter_advanced_non_linear_search} in part ~\ref{part_advanced_topics}. And graph related questions instead will be categorized in Chapter ~\ref{chapter_graph_problem} in Part ~\ref{part_question}. - -%\section{Graph Traversal} -% \begin{figure}[h] -% \centering -% % \includegraphics[width=0.6\columnwidth]{fig/dfs_bfs.png} -% % \caption{BFS VS DFS} -% % \label{fig:bfs_dfs} -% \includegraphics[width=0.6\columnwidth]{fig/example_graph.png} -% \caption{Example Graph} -% \label{example_graph} -% \end{figure} -% The breadth first search (BFS) and the depth first search (DFS) are the two algorithms used for traversing and searching a node in a graph. They can also be used to find out whether a node is reachable from a given node or not. %In Fig.~\ref{fig:bfs_dfs} shows the BFS and DFS traverse ordering. Starting from a given vertex $u$, BFS will traverse all of its adjacency nodes $v_i$ and print out them, and then continue to the adjacency nodes of $v_i$, while the DFS will traverse all of its adjacency nodes, but in a recursively way, which it recursively traverse the adjacency nodes of the current node untill reaching to a node that has no outgoing nodes. - -% Both BFS and DFS has a time complexity of $O(V+E)$ with adjacency list and $O(V^2)$ with adjacency matrix. - - - -% A basic DFS and BFS implementation will only need state (1) and (3). While in some advanced extension of DFS and BFS, state (2) might be needed. Searching is an universal approach in problem solving. With searching, it literally search in the solution space and find the solutions. In this chapter, we focus on the basic Breadth-first and Depth-first Search algorithms executed on real graph data structure, and then exploring how to apply the DFS and BFS techniques on problem solution space, and introduce an optimized searching technique called \textit{Backtracking}. -% \begin{enumerate} -% \item Breath-first Search in Section~\ref{sec_bfs}. -% \item Depth-first Search in Section~\ref{searching_dfs}. -% \item Graph Search for Problem Solving -% \item Backtracking -% \end{enumerate} - -\paragraph{Graph in Interviews} Graph solution for some problems are most likely to be the naive solution, and it is a nice first step to give the naive algorithm design and analysis before moving on to more advanced solutions, such as divide and conquer, dynamic programming, or greedy algorithm. For some problems, graph search might be the only solution, so learning how to pruning the searching space with techniques like bidirectional search and backtracking would become handy. - -\section{Introduction and Terminologies} -\label{graph_terminology} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% matrix and graph -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Graphs} -% \label{chapter_graph_matrix} -Graph is a widely used data structure to model real-world problems. A graph is a collection of \textit{vertices} and \textit{edges} (which connects two vertices). - - - -%%%%%%%%%%%%%graph representation%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_8_heap_priority_queue.tex b/Easy-Book/chapters/chapter_8_heap_priority_queue.tex deleted file mode 100644 index c403ab8..0000000 --- a/Easy-Book/chapters/chapter_8_heap_priority_queue.tex +++ /dev/null @@ -1,478 +0,0 @@ -\documentclass[main.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% heap and priprity queue -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -In this chapter, we introduce heap data structures which is essentially an array object but it can be viewed as a nearly complete binary tree. The concept of the data structures in this chapter is between liner and non-linear, that is using linear data structures to mimic the non-linear data structures and its behavior for higher efficiency under certain context. -%%%%%%%%%%%%%%%%%heap%%%%%%%%%%%%%%%%% -\section{Heap} -\label{sec_heap} -Heap is a tree based data structures that satisfies \textbf{heap property} but implemented as an array data structure. There are two kinds of heaps: \textbf{max-heaps} and \textbf{min-heaps}. In both kinds, the values in the nodes satisfy a \textbf{heap property}. For max-heap, the property states as for each node in the heap at $i$, $A[p[i]] <= A[i]$. Normally, heap is based on binary tree, which makes it a binary heap. Fig.~\ref{fig:max-heap-1} show a binary max-heap and how it looks like in a binary tree data structure. In the following content, we default our heap is a binary heap. Thus, the largest element in a max-heap is stored at the root. For a heap of $n$ elements the height is $\log n)$. % as for every node i other than root. $A[PARENT(i)]>= A[i]$. The unique usage of Heap, including miniHeap and maxiHeap, Monotic Heap. The (binary) heap data structure is an array that we can view as a nearly complete binary tree. The tree is completely filled on all levels except possibly the lowest, which is filled from left up to a point. -% \subsection{Introduction} -\begin{figure}[h!] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/binary_tree.png} - \caption{Max-heap be visualized with binary tree structure on the left, and implemnted with Array on the right.} - \label{fig:max-heap-1} -\end{figure} - -As we can see we can implement heap as an array due to the fact that the tree is complete. A complete binary tree is one in which each level must be fully filled before starting to fill the next level. Array-based heap is more space efficient compared with tree based due to the non-existence of the child pointers for each node. To make the math easy, we iterate node in the tree starting from root in the order of level by level and from left to right with beginning index as 1 (shown in Fig.~\ref{fig:max-heap-1}). According to such assigning rule, the node in the tree is mapped and saved in the array by the assigned index (shown in Fig.~\ref{fig:max-heap-1}). In heap, we can traverse the imaginary binary tree in two directions: \textbf{root-to-leaf} and \textbf{leaf-to-root}. Given a parent node with p as index, the left child of can be found in position $2p$ in the array. Similarly, the right child of the parent is at position $2p + 1$ in the list. To find the parent of any node in the tree, we can simply use $\lfloor p/2\rfloor$. In Python3, use integer division $n//2$. \textit{Note: we can start index with 0 as used in \textbf{heapq} library introduced later in this section. Given a node $x$, the left and right child will be $2*x+1$, $2*x+2$, and the parent node will have index $(x-1)//2$.} - -The common application of heap data structure include: -\begin{itemize} - \item Implementing a priority-queue data structure which will be detailed in the next section so that insertion and deletion can be implemented in $O(\log n)$; Priority Queue is an important component in algorithms like Kruskal's for minimum spanning tree (MST) problem and Dijkstra's for single-source shortest paths (SSSP) problem. - \item Implementing heapsort algorithm, -\end{itemize} - -Normally, there is usually no notion of 'search' in heap, but only insertion and deletion, which can be done by traversing a $O(\log n)$ leaf-to-root or root-to-leaf path. -%%%%%%%%%%%%%%%%%%Basic Implementation%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Basic Implementation} -The basic methods of a heap class should include: \textbf{pop}, \textbf{push}, and \textbf{heapify}. \textbf{push} an item into the heap and \textbf{pop} the root item at the heap out, and still maintain the heap property. And \textbf{heapify} denotes the operation needed for an given array, to convert it to a heap directly and efficiently. - -Let's implement a heap class using list. Because the first element of the heap is actually empty, we define our class as follows: -\begin{lstlisting}[language=Python] -class Heap: - def __init__(self): - self.heap = [None] - self.size = 0 - def __str__(self): - out = '' - for i in range(1, self.size + 1): - out += str(self.heap[i]) + ' ' - return out -\end{lstlisting} -Assuming we already have got a heap shown in Fig.~\ref{fig:max-heap-1}, push or pop an item from the current heap requires us to do post-processing in order to maintain the heap property. Let's discuss the two cases. Change it to use max heap as example. - -\paragraph{Push with Floating} When we push an item into, to maintain the complete binary tree property, the new item goes to the end of the heap(array) first. Assuming the new item a[i] is the smallest item up till now, there will be violation of the heap property through the \textbf{a[i]->root path}. To correct the potential violation, we traverse the path a[i]->root, and compare each node and its parent to decide if a swap operation is needed. For a min-heap, if the child node is smaller than the parent, that is a violation, and we swap these two nodes to let a[i] \textbf{float up} to make sure the subtree of a[i].parent obey the min-heap property. For example, in the min-heap. The time complexity is the same as the height of the complete tree, which is $O(\log n)$. -\begin{lstlisting}[language=Python] - def _float(self, index): # enforce min-heap, leaf-to-root - while index // 2: # while parent exist - p_index = index // 2 - print('p', p_index, index) - if self.heap[index] < self.heap[p_index]: # a violation - # swap - self.heap[index], self.heap[p_index] = self.heap[p_index], self.heap[index] - else: - break - index = p_index # move up the node - def insert(self, val): - self.heap.append(val) - self.size += 1 - self._float(index = self.size) -\end{lstlisting} - -\paragraph{Pop with Sinking} When we pop out the item at root node, or delete any item a[i], an empty spot appears at that position. To maintain the complete binary tree, we first simply use the last item to fill in this spot. However, in a min-heap, the last item will mostly not be the smallest item among the subtree rooted at a[i]. The smallest item will appear anywhere in the subtree. We simply do a search starts from node a[i] and compare its value with left and right child. The left and right subtree obey the min-heap property already, therefore the smallest item is among a[i], left, right. If the node is larger than its smaller child node, we swap the parent with the smaller child, and move our pointer to the smaller child node and repeat the above process until the current node is the smallest among these three nodes. This process is called like sinking down a[i] along the \textbf{path a[i]->leaf}. Same as the insert in the case of complexity, $O(\log n)$. -\begin{lstlisting}[language=Python] - def _sink(self, index): # enforce min-heap, root-to-leaf - while 2 * index <= self.size: - li = 2 * index - ri = li + 1 - mi = index - if self.heap[li] < self.heap[mi]: - mi = li - if ri <= self.size and self.heap[ri] < self.heap[mi]: - mi = ri - if mi != index: - # swap index with mi - self.heap[index], self.heap[mi] = self.heap[mi], self.heap[index] - else: - break - index = mi - def pop(self): - val = self.heap[1] - self.heap[1] = self. heap.pop() - self.size -= 1 - self._sink(index = 1) - return val -\end{lstlisting} -Now, let us run an example: -\begin{lstlisting}[language=Python] -h = Heap() -lst = [21, 1, 45, 78, 3, 5] -for v in lst: - h.insert(v) -print('heapify with insertion: ', h) -h.pop() -print('after pop(): ', h) -\end{lstlisting} -The output is listed as: -\begin{lstlisting} -heapify with insertion: 1 3 5 78 21 45 -after pop(): 3 21 5 78 45 -\end{lstlisting} - -\paragraph{Heapify with Bottom-up Sinking} Heapify is a procedure that convert a list to a heap data structure. We have learned the insert procedure. To heapify a list, we can do it through a series of insert iterating through the items in the list and we get an upper-bound complexity of $O(n\log n)$. However, a more efficient way to do it is is to treat the given list as a tree and to heapify directly on the list. %There are two possibly two ways to do this: (1) through sinking and (2) through floating. -To satisfy the heap property, we need to first start from the smallest subtree. For leaf nodes, they have no children which satisfies the heap property naturally. Therefore we can jumpy to the last parent node, which will be at position a[n//2]. We apply the sinking process as used in \textbf{pop} so that this subtree rooted at current node obeys the heap property. And we iterate through all the parents nodes that is a[1...n//2] in reversed order, we can guarentee that final complete binary tree still obeys the heap property. This follows a divide-and-conquer (DP) fashion. Instead of heaipfy A[1...n], we first, heaipfy A[n], A[n-1...n], A[n-2...n], ..., A[1...n]. The process is shown in Fig.~\ref{fig:heapify}. With this process, it can give us a tighter upper bound and close to $O(n)$. -\begin{figure}[h!] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/heapify.png} - \caption{Heapify for a given list.} - \label{fig:heapify} -\end{figure} -\begin{lstlisting}[language=Python] - def heapify_sink(self, lst): - self.heap = [None] + lst - self.size = len(lst) - for i in range(self.size//2, 0, -1): - self._sink(i) -\end{lstlisting} - -Now, run the following code: -\begin{lstlisting}[language=Python] -h = Heap() -h.heapify(lst) -print('heapify with heapify:', h) -\end{lstlisting} -Out put is: -\begin{lstlisting} -heapify with heapify: 1 5 21 78 3 45 -\end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Which way is more efficient building a heap from a list?} Using insertion or heapify? What is the efficiency of each method? The experimental result can be seen in the code. -\end{bclogo} -When we are solving a problem, unless specifically required for implementation, we can always use an existent Python module/package. Here, we introduce one Python module: heapq that implements heap data structure for us. -% \begin{enumerate} -% \item MAX-HEAPIFY, runs in $O(lgn)$, is the key to maintaining the max-heap property -% \item BUILD-MAX-HEAP, runs in linear time, produces a maxheap from an unordered input array -% \item MAX-HEAP-INSERT, HEAP-EXTRACT-MAX, HEAP-INCREASE-KEY, and HEAP-MAXIMUM, runs in $O(lgn)$ time, allow the heap data structure to implement a priority queue -% \end{enumerate} -%%%%%%%%%%%%%%%%%%Python Built-in Module: heapq%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Python Built-in Library: heapq} -\textbf{heapq}: heapq is a built-in library in Python that implements relevant functions to carry out various operations on heap data structure. These functions are listed and described in Table~\ref{tab:functions_in_heapq}. \textit{To note that heapq is not a data type like queue.Queue() or collections.deque(), it is a library (or class) that can do operations like it is on a heap.} %, which can be used to maintain a priority queue. Operations include heappush, heappop, and nsmallest. heapq in python to maintain a priority queue with $O(logn)$. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Methods of \textbf{heapq}} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -heappush(h, x) & Push the value item onto the heap, maintaining the heap invariant. \\\hline -heappop(h) &Pop and return the \textit{smallest} item from the heap, maintaining the heap invariant. If the heap is empty, IndexError is raised.\\ \hline -heappushpop(h, x) &Push item on the heap, then pop and return the smallest item from the heap. The combined action runs more efficiently than heappush() followed by a separate call to heappop().\\ \hline -heapify(x) & Transform list x into a heap, in-place, in linear time.\\ \hline -heapreplace(h, x) & Pop and return the smallest item from the heap, and also push the new item. The heap size doesn’t change. If the heap is empty, IndexError is raised. This is more efficient than heappop() followed by heappush(), and can be more appropriate when using a fixed-size heap.\\ \hline -nlargest(k, iterable, key = fun) & This function is used to return the k largest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -nsmallest(k, iterable, key = fun) & This function is used to return the k smallest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -\end{tabular} - \label{tab:functions_in_heapq} - \end{small} -\end{table} -heapq has some other functions like merge(), nlargest(), nsmallest() that we can use. Check out \url{https://docs.python.org/3.0/library/heapq.html} for more detials. - -\paragraph{Min-Heap} Now, let us try to heapify the same examplary list as used in the last section, [21, 1, 45, 78, 3, 5], we use need to call the function heapify(). The time complexity of heapify is $O(n)$ -\begin{lstlisting}[language = Python] -'''implementing with heapq''' -from heapq import heappush, heappop, heapify -h = [21, 1, 45, 78, 3, 5] -heapify(h) # inplace -print('heapify with heapq: ', h) -\end{lstlisting} -The print out is: -\begin{lstlisting} -heapify with heapq: [1, 3, 5, 78, 21, 45] -\end{lstlisting} - - Here we demonstrate how to use function nlargest() and nsmallest() if getting the first n largest or smallest is what we need, we do not need to heapify() the list as we needed in the heap and pop out the smallest. The step of heapify is built in these two functions. -\begin{lstlisting}[language=Python] -''' use heapq to get nlargest and nsmallest''' -li1 = [21, 1, 45, 78, 3, 5] -# using nlargest to print 3 largest numbers -print("The 3 largest numbers in list are : ", end="") -print(heapq.nlargest(3, li1)) - -# using nsmallest to print 3 smallest numbers -print("The 3 smallest numbers in list are : ", end="") -print(heapq.nsmallest(3, li1)) -\end{lstlisting} -The print out is: -\begin{lstlisting} -The 3 largest numbers in list are : [78, 45, 21] -The 3 smallest numbers in list are : [1, 3, 5] -\end{lstlisting} - - -\paragraph{Max-Heap} As we can see the default heap implemented in the heapq library is forcing the heap property of the min-heap. What if we want a max-heap instead? In heapq library, it does offer us function, but it is intentionally hided from users. It can be accessed like: heapq.\_[function]\_max(). Now, let us implement a max-heap instead. -\begin{lstlisting}[language = Python] -# implement a max-heap -h = [21, 1, 45, 78, 3, 5] -heapq._heapify_max(h) # inplace -print('heapify max-heap with heapq: ', h) -\end{lstlisting} -The print out is: -\begin{lstlisting} -heapify max-heap with heapq: [78, 21, 45, 1, 3, 5] -\end{lstlisting} - -Also, in practise, a simple hack for the max-heap is to save data as negative. Also, in the priority queue. -% What is we want a max-heap which returns the largest number instead of the smallest each time? -% \subsubsection{Max-heap and Min-heap} -% We can write our own MinHeap and MaxHeap class wrapper as follows so that it can be easier to use: -% \begin{lstlisting}[language = Python] -% class MaxHeapObj(object): -% def __init__(self,val): self.val = val -% def __lt__(self,other): return self.val > other.val -% def __eq__(self,other): return self.val == other.val -% def __str__(self): return str(self.val) - -% class MinHeap(object): -% def __init__(self): self.h = [] -% def heappush(self,x): heapq.heappush(self.h,x) -% def heappop(self): return heapq.heappop(self.h) -% def __getitem__(self,i): return self.h[i] -% def __len__(self): return len(self.h) - -% class MaxHeap(MinHeap): -% def heappush(self,x): heapq.heappush(self.h,MaxHeapObj(x)) -% def heappop(self): return heapq.heappop(self.h).val -% def __getitem__(self,i): return self.h[i].val -% \end{lstlisting} -\paragraph{More Private Functions} -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Private Methods of \textbf{heapq}} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -heappush(h, x) & Push the value item onto the heap, maintaining the heap invariant. \\\hline -heappop(h) &Pop and return the \textit{smallest} item from the heap, maintaining the heap invariant. If the heap is empty, IndexError is raised.\\ \hline -heappushpop(h, x) &Push item on the heap, then pop and return the smallest item from the heap. The combined action runs more efficiently than heappush() followed by a separate call to heappop().\\ \hline -heapify(x) & Transform list x into a heap, in-place, in linear time.\\ \hline -heapreplace(h, x) & Pop and return the smallest item from the heap, and also push the new item. The heap size doesn’t change. If the heap is empty, IndexError is raised. This is more efficient than heappop() followed by heappush(), and can be more appropriate when using a fixed-size heap.\\ \hline -nlargest(k, iterable, key = fun) & This function is used to return the k largest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -nsmallest(k, iterable, key = fun) & This function is used to return the k smallest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -\end{tabular} - \label{tab:functions_in_heapq} - \end{small} -\end{table} - -\paragraph{With Tuple/List or Customized Object as Elements} -Any object that supports comparison (\texttt{\_cmp\_()}) can be used in heap with \texttt{heapq}. When we want our item includes information as (priority, task), we can either put it in tuple or list. In the heap, we can change the value of any item just as in the list. However, the problem occurs after the change that the list will violate the heap priority. What we can do is use function such as \texttt{\_siftdown(heap, 0, len(heap)-1)} (used to implement heappush, and called with decreased priority ) and \texttt{\_siftup(heap, 0)} (used to implement heappop, and called with increased priority). -\begin{lstlisting}[language=Python] -import heapq - -heap = [[3, 'a'], [10, 'b'], [5,'c']] -heapq.heapify(heap) -print(heap) - -heap[0] = [6, 'a'] -print(heap) -heapq._siftup(heap, 0) #simlar to remove heap[0], put this item at the end -print(heap) -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%priority Queue%%%%%%%%%%%%%%%%%%% -\section{Priority Queue} -\label{sec_priority_queue} -A priority queue is an abstract data type(ADT) and an extension of queue with properties: (1) additionally each item has a priority associated with it. (2) In a priority queue, an item with high priority is served (dequeued) before an item with low priority. (3) If two items have the same priority, they are served according to their order in the queue. - -Heap is generally preferred for priority queue implementation because of its better performance compared with arrays or linked list. Also, in Python queue module, we have \texttt{PriorityQueue()} class that provided us the implementation. Beside, we can can implement priority queue with \texttt{heapq} library too. These contents will be covered in the next two subsection. - -Applications of Priority Queue: -\begin{enumerate} - \item CPU Scheduling - \item Graph algorithms like Dijkstra’s shortest path algorithm, Prim’s Minimum Spanning Tree, etc - \item All queue applications where priority is involved. -\end{enumerate} - -\subsubsection{Implement with \texttt{heapq} Library} The core function is the ones used to implement the heap: \texttt{heapify()}, \texttt{push()}, and \texttt{pop()}. The official document:\url{https://docs.python.org/2/library/heapq.html} gave the exact implementation. However, we are still going to summarize and organize this information in our book. In order to implement priority queue, our binary heap needs to have the following features: -\begin{enumerate} - \item Sort stability: when we get two tasks with equal priorities, we return them in the order as of they were originally added. A potential solution is to modify the original 2-element list (priority, task) into a 3-element list as (priority, count, task). The entry \texttt{count} serves as a tie-breaker so that two tasks with the same priority are returned in the order they were added. And also, since no two entry counts are the same the tuple comparison will never attemp to directly compare two tasks. - \item Find a task in the heap, and either remove it or update its priority. Situations like the priority of a task changes or if a pending task needs to be removed. We understand how inconvenient it can be to find the non-root item and update its value. Normally, finding the item is a linear search which takes $O(n)$ and update its value using either \texttt{\_siftdown()} or \texttt{\_siftup()} can be $O(\log n)$. The solution is: (1) do not remove the task other than the \texttt{pop} operation, but mark it as REMOVED instead; (2) to define a dictionary that use \texttt{task} as key and the 3-element list as value. We name it \texttt{entry\_finder}. When the entry is a list, in the heap that encompass these items will only get pointers. Therefore, we can execute the find/mark as removed operation using task as key and do it in the \texttt{entry\_finder} instead. -\end{enumerate} -Python code: -\begin{lstlisting}[language=Python] -from heapq import heappush, heappop, heapify -from typing import List -import itertools -class PriorityQueue: - def __init__(self, items:List[List]=[]): - self.pq = [] # list of entries arranged in a heap - self.entry_finder = {} # mapping of tasks to entries - self.REMOVED = '' # placeholder for a removed task - self.counter = itertools.count() # unique sequence count - # add count to items - for p, t in items: - item = [p, next(self.counter), t] - self.entry_finder[t] = item - self.pq.append(item) - heapify(self.pq) - - def add_task(self, task, priority=0): - 'Add a new task or update the priority of an existing task' - if task in self.entry_finder: - self.remove_task(task) - count = next(self.counter) - entry = [priority, count, task] - self.entry_finder[task] = entry - heappush(self.pq, entry) - - def remove_task(self, task): - 'Mark an existing task as REMOVED. Raise KeyError if not found.' - entry = self.entry_finder.pop(task) - entry[-1] = self.REMOVED - - def pop_task(self): - 'Remove and return the lowest priority task. Raise KeyError if empty.' - while self.pq: - priority, count, task = heappop(self.pq) - if task is not self.REMOVED: - del self.entry_finder[task] - return task - raise KeyError('pop from an empty priority queue') -\end{lstlisting} -Let's run an example with our customized \texttt{PriorityQueue} class: -\begin{lstlisting}[language=Python] -pq = PriorityQueue(items=[[6, 'task 6'], [5, 'task5'], [19, 'task19']]) -print(pq.pq) -pq.add_task('task 10', 10) -print(pq.pq) -pq.remove_task('task5') -print(pq.pq) -pq.pop_task() -\end{lstlisting} -With output as: -\begin{lstlisting}[numbers=none] -[[5, 1, 'task5'], [6, 0, 'task 6'], [19, 2, 'task19']] -[[5, 1, 'task5'], [6, 0, 'task 6'], [19, 2, 'task19'], [10, 3, 'task 10']] -[[5, 1, ''], [6, 0, 'task 6'], [19, 2, 'task19'], [10, 3, 'task 10']] -'task 6' -\end{lstlisting} - -\subsubsection{Implement with \texttt{PriorityQueue} class} Class \texttt{PriorityQueue()} is the same as \texttt{Queue()}, \texttt{LifoQueue()}, they have same member functions as shown in Table~\ref{tab:methods_of_queue}. Therefore, we skip the semantic introduction. \texttt{PriorityQueue()} normally thinks that the smaller the value is the higher the priority is. We use a similar example as above to demonstrate its function. -\begin{lstlisting}[language=Python] -import queue -pq = queue.PriorityQueue() -items=[[6, 'task 6'], [5, 'task5'], [19, 'task19']] -for item in items: - pq.put(item) - -print(pq.queue) -next_job = pq.get() -print('processing job:', next_job) -print(pq.queue) -\end{lstlisting} -The output is: -\begin{lstlisting} -[[5, 'task5'], [6, 'task 6'], [19, 'task19']] -processing job: [5, 'task5'] -[[6, 'task 6'], [19, 'task19']] -\end{lstlisting} -If we want to give the number with larger value as higher priority, a simple hack is to pass by negative value. Another more professional way is to pass by a customized object and rewrite the comparison operator: < and == in the class with \_\_lt\_\_() and \_\_eq\_\_(). In the following code, we show how to use higher value as higher priority. -\begin{lstlisting}[language = Python] -class Job(object): - def __init__(self, priority, description): - self.priority = priority - self.description = description - print('New job:', description) - return - # def __cmp__(self, other): - # return cmp(self.priority, other.priority) - '''customize the comparison operators ''' - def __lt__(self, other): # < - try: - return self.priority > other.priority - except AttributeError: - return NotImplemented - def __eq__(self, other): # == - try: - return self.priority == other.priority - except AttributeError: - return NotImplemented - -q = Queue.PriorityQueue() - -q.put( Job(3, 'Mid-level job') ) -q.put( Job(10, 'Low-level job') ) -q.put( Job(1, 'Important job') ) - -while not q.empty(): - next_job = q.get() - print('Processing job:', next_job.priority) -\end{lstlisting} -The print out is: -\begin{lstlisting} -Processing job: 10 -Processing job: 3 -Processing job: 1 -\end{lstlisting} -If we want the priority queue to be able to update the priority of a task, we can apply similar wrapper in the \texttt{heapq} section. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{In single thread programming, is \textbf{heapq} or \textbf{PriorityQueue} more efficient?} In fact, the PriorityQueue implementation uses heapq under the hood to do all prioritisation work, with the base Queue class providing the locking to make it thread-safe. While heapq module offers no locking, and operates on standard list objects. This makes the heapq module faster; there is no locking overhead. In addition, you are free to use the various heapq functions in different, noval ways, while the PriorityQueue only offers the straight-up queueing functionality. -\end{bclogo} -Let us take these knowledge into practice with a LeetCode Problem: -347. Top K Frequent Elements (medium). Given a non-empty array of integers, return the k most frequent elements. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: nums = [1,1,1,2,2,3], k = 2 -Output: [1,2] - -Example 2: - -Input: nums = [1], k = 1 -Output: [1] -\end{lstlisting} - -Analysis: to solve this problem, we need to first using a hashmap to get information as: item and its freqency. Then, we need to obtain the top frequent elements. The second step can be down with sorting, or using heap we learned. - -\textbf{Solution 1: Use Counter().} Counter() has a function most\_common(k) that will return the top k most frequent items. However, its complexity will be $O(n \log n)$. -\begin{lstlisting}[language=Python] -from collections import Counter -def topKFrequent(self, nums, k): - return [x for x, _ in Counter(nums).most_common(k)] -\end{lstlisting} - -\textbf{Solution 2: Use dict and heapq.nlargest()}. The complexity should be better than $O(n \log n)$. -\begin{lstlisting}[language=Python] -from collections import Counter -import heapq -def topKFrequent(self, nums, k): - count = collections.Counter(nums) - return heapq.nlargest(k, count.keys(), key=count.get) -\end{lstlisting} - -We can also use PriorityQueue(). -\begin{lstlisting}[language=Python] -from queue import PriorityQueue -class Solution: -def topKFrequent(self, nums, k): - h = PriorityQueue() - - # build a hashmap (element, frequency) - temp = {} - for n in nums: - if n not in temp: - temp[n] = 1 - else: - temp[n] += 1 - # put them as (-frequency, element) in the queue or heap - for key, item in temp.items(): - h.put((-item, key)) - - # get the top k frequent ones - ans = [None]*k - for i in range(k): - _, ans[i] = h.get() - return ans -\end{lstlisting} - -\section{Bonus} -\label{heap_sec_bonus} -\paragraph{Fibonacci heap} With fibonacc heap, insert() and getHighestPriority() can be implemented in O(1) amortized time and deleteHighestPriority() can be implemented in O(Logn) amortized time. -%%%%%%%%%%%%%%%%%%%%%%%LeetCode problems%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} - -\textbf{selection with key word: kth. These problems can be solved by sorting, using heap, or use quickselect} -\begin{enumerate} -\item 703. Kth Largest Element in a Stream (easy) - \item 215. Kth Largest Element in an Array (medium) - \item 347. Top K Frequent Elements (medium) - \item 373. Find K Pairs with Smallest Sums (Medium - \item 378. Kth Smallest Element in a Sorted Matrix (medium) -\end{enumerate} -\textbf{priority queue or quicksort, quickselect} -\begin{enumerate} - \item 23. Merge k Sorted Lists (hard) - \item 253. Meeting Rooms II (medium) - \item 621. Task Scheduler (medium) -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_abstract_data_strctures.tex b/Easy-Book/chapters/chapter_abstract_data_strctures.tex deleted file mode 100644 index fb5af51..0000000 --- a/Easy-Book/chapters/chapter_abstract_data_strctures.tex +++ /dev/null @@ -1,330 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Abstract Data Structures} -\label{chapter_abstract_data_structure} -(put a figure here) - - -\section{Introduction} -\label{chapter_abstract_data_structure_introduction} -Leaving alone statements that ``data structures are building blocks of algorithms'', they are just mimicking how things and events are organized in real-world in the digital sphere. Imagine that a data structure is an old-schooled file manager that has some basic operations: searching, modifying, inserting, deleting, and potentially sorting. In this chapter, we are simply learning how a file manager use to `lay out' his or her files (structures) and each `lay out's corresponding operations to support his or her work. - -We say the data structures introduced in this chapter are \textit{abstract} or idiomatic, because they are conventionally defined structures. Understanding these abstract data structures are like the terminologies in computer science. We further provide each abstract data structure's corresponding Python data structure in Part.~\ref{part_program_and_python}. - -There are generally three broad ways to organize data: Linear, tree-like, and graph-like, which we introduce in the following three sections. - -\paragraph{Items} We use the notion of \textbf{items} throughout this book as a generic name for unspecified data type. - -\paragraph{Records} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Linear%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Linear Data Structures} -\label{chapter_abstract_data_structure_linear_data_Strcuture} -\subsection{Array} -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/array_representation.png} - \caption{Array Representation} - \label{fig:array_representation} -\end{figure} -\paragraph{Static Array} An array or static array is container that holds a \textbf{fixed size} of sequence of items stored at \textbf{contiguous memory locations} and each item is identified by \textit{array index} or \textit{key}. The Array representation is shown in Fig.~\ref{fig:array_representation}. Since using contiguous memory locations, once we know the physical position of the first element, an offset related to data types can be used to access any item in the array with $O(1)$, which can be characterized as \textbf{random access}. Because of these items are physically stored contiguous one after the other, it makes array the most efficient data structure to store and access the items. Specifically, array is designed and used for fast random access of data. - - -\paragraph{Dynamic Array} In the static array, once we declared the size of the array, we are not allowed to do any operation that would change its size; saying we are banned from either inserting or deleting any item at any position of the array. In order to be able to change its size, we can go for \textit{dynamic array}. that is to sayStatic array and dynamic array differs in the matter of fixing size or not. A simple dynamic array can be constructed by allocating a static array, typically larger than the number of elements immediately required. The elements of the dynamic array are stored contiguously at the start of the underlying array, and the remaining positions towards the end of the underlying array are reserved, or unused. Elements can be added at the end of a dynamic array in constant time by using the reserved space, until this space is completely consumed. When all space is consumed, and an additional element is to be added, then the underlying fixed-sized array needs to be increased in size. Typically resizing is expensive because it involves allocating a new underlying array and copying each element from the original array. Elements can be removed from the end of a dynamic array in constant time, as no resizing is required. The number of elements used by the dynamic array contents is its logical size or size, while the size of the underlying array is called the dynamic array's capacity or physical size, which is the maximum possible size without relocating data. Moreover, if the memory size of the array is beyond the memory size of your computer, it could be impossible to fit the entire array in, and then we would retrieve to other data structures that would not require the physical contiguity, such as \textit{linked list}, \textit{trees}, \textit{heap}, and \textit{graph} that we would introduce next. - -\paragraph{Operations} To summarize, array supports the following operations: -\begin{itemize} - \item Random access: it takes $O(1)$ time to access one item in the array given the index; - \item Insertion and Deletion (for dynamic array only): it consumes Average $O(n)$ time to insert or delete an item from the middle of the array due to the fact that we need to shift all other items; - \item Search and Iteration: $O(n)$ time for array to iterate all the elements in the array. Similarly to search an item by value through iteration takes $O(n)$ time too. -\end{itemize} -No matter it's static or dynamic array, they are static data structures; the underlying implementation of dynamic array is static array. When frequent need of insertion and deletion, we need dynamic data structures, The concept of static array and dynamic array exist in programming languages such as C--for example, we declare \texttt{int a[10]} and \texttt{int* a = new int[10]}, but not in Python, which is fully dynamically typed(need more clarification). -% Arrays are the basic units implementing other data structures, such as hashtables, heaps, queues, stacks. - -\subsection{Linked List} -Dynamic data structures, on the other hand, is designed to support flexible size and efficient insertion and deletion. Linked List is one of the simplest dynamic data structures; it achieves the flexibility by abandoning the idea of storing items at contiguous location. Each item is represented separately--meaning it is possible to have item of different data types, and all items are linked together through \textit{pointers}. A pointer is simply a variable that holds the address of an item as a value. Normally we define a record data structure, namely \texttt{node}, to include two variables: one is the value of the item and the other is a pointer that addressing the next \texttt{node}. - -\paragraph{Why is it a highly dynamic data structure?} Imagine each node as a 'signpost' which says two things: the name of the stop and address of the next stop. Suppose you start from the first stop, you can head to the next stop since the first signpost tells you the address. You would only know the total number of stops by arriving at the end signpost, wherein no sign of the address. To add a stop, you can just put it at the end, at the head or anywhere in the middle by modifying any possible signpost before or after the one you add. - -\begin{figure}[ht!] - \centering - \includegraphics[width=.7\columnwidth]{fig/linked_list1.png} - \caption{Singly Linked List} - \label{fig:singly_linkedlist} - \includegraphics[width=0.9\columnwidth]{fig/DLL1.png} - \caption{Doubly Linked List} -\end{figure} - -\paragraph{Singly and Doubly Linked List} When the \texttt{node} has only one pointer, it is called \textit{singly linked list} which means we can only scan nodes in one direction; when there is two pointers, one pointer to its predecessor and another to its successor, it is called \textit{doubly linked list} which supports traversal in both forward and backward directions. - -\paragraph{Operations and Disadvantages} -\begin{itemize} - \item No Random access: in linked list, we need to start from some pointer and to find one item, we need to scan all items sequentially in order to find it and access it; - \item Insertion and Deletion: only $O(1)$ to insert or delete an item if we are given the node after where to insert or the node the delete. - \item Search and Iteration: $O(n)$ time for linked list to iterate all items. Similarly to search an item by value through iteration takes $O(n)$ time too. - \item Extra memory space for a pointer is required with each element of the list. -\end{itemize} - -\paragraph{Recursive} A linked list data structure is actually a \textit{recursive data} structure; any node can be treated as a head node thus making it a sub-linked list. -\subsection{Stack and Queue} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.7\columnwidth]{fig/stack_queue_1.png} - \caption{Stack VS Queue} - \label{fig:stack_queue_1} -\end{figure} -Stacks and queues are \textbf{dynamic arrays} with restrictions on deleting operation. Items adding and deleting in a stack follows the ``Last in, First out(LIFO)'' rule, and in a queue, the rule is ``First in, First out(FIFO)'', this process is shown in Fig.~\ref{fig:stack_queue_1}. We can simply think of stack as a stack of plates, we always put back and fetch a plate from the top of the pile. Queue is just like a real-life queue in any line, to be first served with your delicious ice cream, you need to be there in the head of the line. - - -Implementation-wise, stacks and queues are a simply dynamic array that we add item by appending at the end of array, and they only differs with the delete operation: for stack, we delete item from the end; for a queue, we delete item from the front instead. Of course, we can also implement with any other linear data structure, such as linked list. Conventionally, the add and deletion operation is called ``push'' and ``pop'' in a stack, and ``enque'' and ``deque'' in a queue. - -\paragraph{Operations} Stacks and Queues support limited access and limited insertion and deletion and the search and iteration relies on its underlying data structure. - - -Stacks and queues are widely used in computer science. First, they are used to implement the three fundamental searching strategies--Depth-first, Breath-first, and Priority-first Search. Also, stack is a recursive data structure as it can be defined as: -\begin{itemize} - \item a stack is either empty or - \item it consists of a top and the rest which is a stack; - \end{itemize} - -\subsection{Hash Table} -\begin{figure}[h!] - \centering - \includegraphics[width=0.6\columnwidth]{fig/hash_table_1.png} - \caption{Example of Hashing Table, replace key as index} - \label{fig:hash_table_1} -\end{figure} -A hash table is a data structure that (a) stores items formed as \{key: value\} pairs, (b) and uses a \textit{hash function} $index=h(key)$ to compute an index into an array of buckets or slots, from which the mapping value will be stored and accessed; for users, ideally, the result is given a key we are expected to find its value in constant time--only by computing the hash function. An example is shown in Fig.~\ref{fig:hash_table_1}. Hashing will not allow two pairs that has the same key. - -First, the key needs to be of real number; when it is not, a conversion from any type it is to a real number is necessary. Now, we assume the keys passing to our hash function are all real numbers. We define a \textit{universe} set of keys $U=\{0,1,2,...,|U-1|\}$. To frame hashing as a math problem: given a set of keys drawn from $U$ that has n \{key: value\} pairs, a hash function needs to be designed to map each pair to a key in a set in range $\{0,..,m-1\}$ so that it fits into a table with size m (denoted by $T[0...m-1]$), usually $n>m$. We denote this mapping relation as $h:U\xrightarrow{}\{0,...,m-1\}$. The simplest hashing function is $h=key$, called \textit{direct hashing}, which is only possible when the keys are drawn from $\{0,...,m-1\}$ and it is usually not the case in reality. - -Continue from the hashing problem, when two keys are mapped into the same slot, which will surely happen given $n>m$, this is called \textit{collision}. In reality, a well-designed hashing mechanism -should include: (1) a hash function which minimizes the number of collisions -and (2) a efficient collision resolution if it occurs. -\subsubsection{Hashing Functions} -The essence of designing hash functions is uniformity and randomness. We further use $h(k,m)$ to represent our hash function, which points out that it takes two variables as input, the key as $k$, and $m$ is the size of the table where values are saved. One essential rule for hashing is if two keys are equal, then a hash function should produce the same key value ($h(s, m)=h(t, m)$, if $s=t$). And, we try our best to minimize the collision to make it unlikely for two distinct keys to have the same value. Therefore our expectation for average collision times for the same slot will be $\alpha= \frac{n}{m}$, which is called \textbf{loading factor} and is a critical statistics for design hashing and analyze its performance. Besides, -a good hash function satisfied the condition of simple uniform hashing: each key is equally likely to be mapped to any of the $m$ slots. But usually it is not possible to check this condition because one rarely knows the probability distribution according to which the keys are drawn. There are generally four methods: -\begin{enumerate} - \item \textbf{The Direct addressing method}, $h(k, m) = k$, and $m=n$. Direct addressing can be impractical when n is beyond the memory size of a computer. Also, it is just a waste of spaces when $m<=0$ disjoint trees. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.4\columnwidth, height=5cm]{fig/542px-Graph_theory_tree.png} - \includegraphics[width=0.55\columnwidth, height=5cm]{fig/tree_property.png} - \caption{Example of Trees. Left: Free Tree, Right: Rooted Tree with height and depth denoted} - \label{fig:tree_property} -\end{figure} -However, free trees are not commonly seen and applied in computer science (not in coding interviews either) and there are better ways--\textit{rooted trees}. In a {rooted tree}, a special node is singled out which is called the \textit{root} and all the edges are oriented to point away from the root. The rooted node and one-way structure enable the rooted tree to indicate a hierarchy relation between nodes whereas not so in the free tree. A comparison between free tree and the rooted tree is shown in Fig.~\ref{fig:tree_property}. - - - -\subsubsection{Rooted Trees} A rooted tree introduces a \textbf{parent-child}, \textbf{sibling relationship} between nodes to indicate the hierarchy relation. -%In general purposed programming and the coding interviews, a rooted tree is more widely used compared with the free tree. Thus, the rooted trees are one of the well-known non-linear data structures. They organize data hierarchically other than in the linear way. - -\paragraph{Three Types of Nodes} Just like a real tree, we have the root, branches, and finally the leaves. The first node of the tree is called the \textbf{root node}, which will likely to be connected to its several underlying children node(s), making the root node the parent node of its children. Besides the root node, there are another two kinds of nodes: \textit{inner nodes} and \textit{leaf nodes}. A leaf node can be found at the last level of the tree which has no further children. An inner node is any node in the tree that has both parent node and children, which is also any node that can not be characterized as either leaf or root node. A node can be both root and leaf node at the same time, if it is the only node that composed of the tree. - -\paragraph{Terminologies of Nodes} We define the following terminologies to characterize nodes in a tree. -\begin{itemize} - \item \textbf{Depth:} The \textit{depth} (or level) of a node is the number of edges from the node to the tree's root node. The depth of the root node is $0$. % and the depth of all nodes can be obtained from up-down level-by-level traversal. - \item \textbf{Height:} The \textit{height} of a node is the number of edges on the \textit{longest path} from the node to a leaf. A leaf node will have a height of $0$. - \item \textbf{Descendant:} The \textit{descendant} of a node is any node that is reachable by repeated proceeding from parent to child starting from this node. They are also known as \textit{subchild}. - \item \textbf{Ancestor:} The \textit{ancestor} of a node is any node that is reachable by repeated proceeding from child to parent starting from this node. - \item \textbf{Degree:} The \textbf{degree} of a node is the number of its children. A leaf is necessarily degreed zero. -\end{itemize} - -\paragraph{Terminologies of Trees} Following the characteristics of nodes, we further define some terminologies to describe a tree. -\begin{itemize} - \item \textbf{Height:} The \textit{height}(or \textit{depth}) of a tree would be the height of its root node, or equivalently, the depth of its deepest node. - \item \textbf{Diameter:} The \textit{diameter} (or \textit{width}) of a tree is the number of nodes (or edges) on the longest path between any two leaf nodes. - \item \textbf{Path:} A \textit{path} is defined as a sequence of nodes and edges connecting a node with a descendant. We can classify them into three types: -\begin{enumerate} - \item Root->Leaf Path: the starting and ending node of the path is the root and leaf node respectively; - \item Root->Any Path: the starting and ending node of the path is the root and any node (inner, leaf node) respectively; - \item Any->Any Path: the starting and ending node of the path is both any node (Root, inner, leaf node) respectively. -\end{enumerate} -\end{itemize} - -\paragraph{Representation of Trees} Like linked list, which chains nodes together via pointers--once the first node is given, we can get hold of information of all nodes, a rooted tree can be represented with nodes consisting of pointers and values too. Because in a tree, a node would have multiple children, indicating a node can have multiple pointers. Such representation makes a rooted tree a \textit{recursive} data structure: each node can be viewed as a root node, making this node and all the nodes that reachable from this node a subtree of its parent. This recursive structure is the main reason we separate it from graph field, and make it one of its own data structure. The advantages are summarized as: -\begin{itemize} - \item A tree is an easier data structure that can be recursively represented as a root node connected with its children. - \item Trees can be always used to organize data and can come with efficient information retrieval. Because of the recursive tree structure, divide and conquer can be easily applied on trees (a problem can be most likely divided into subproblems related to its subtrees). For example, Segment Tree, Binary Search Tree, Binary heap, and for the pattern matching, we have the tries and suffix trees. -\end{itemize} - -The recursive representation is also called \textit{explicit} representation. The counterpart--\textit{implicit} representation will not use pointer but with array, wherein the connections are implied by the positions of the nodes. We will see how it works in the next section. - -\paragraph{Applications of Trees} -Trees have various applications due to its convenient recursive data structures which related the trees and one fundamental algorithm design methology-Divide and Conquer. We summarize the following important applications of trees: - -\begin{enumerate} - \item Unlike arrays and linked list, tree is hierarchical: (1) we can store information that naturally forms hierarchically, e.g., the file systems on a computer, the employee relation in at a company. (2) If we organize keys of the tree with ordering, e.g. Binary Search Tree, Segment Tree, Trie used to implement prefix lookup for strings. - \item Trees are relevant to the study of analysis of algorithms not only because they implicitly model the behavior of recursive programs but also because they are involved explicitly in many basic algorithms that are widely used. - \item Algorithms applied on graph can be analyzed with the concept of tree, such as the BFS and DFS can be represented as a tree data structure, and a spanning tree that include all of the vertices in the graph. These trees are the basis of other kind of computational problems in the field of graph. -\end{enumerate} - -\begin{importantnote} -Tree is a recursive structure, it can almost used to visualize any recursive based algorithm design or even computing the complexity in which case it is specifically called \textit{recursion tree}. -\end{importantnote} - -\subsection{N-ary Tres and Binary Tree} -\begin{figure}[!ht] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/n-ary_binary_tree.png} - \caption{A 6-ary Tree Vs a binary tree.} - \label{fig:nary_vs_binary} -\end{figure} -For a rooted tree, if each node has no more than $N$ children, it is called \textit{N-ary} Tree. When $N=2$, it is further distinguished as a \textit{binary tree}, where its possible two children are typically called \textit{left child} and \textit{right child}. Fig.~\ref{fig:nary_vs_binary} shows a comparison of a 6-ary tree and a binary tree. Binary tree is more common than N-ary tree because it is simplier and more concise, thus making it more popular for coding interviews. -% \subsubsection{N-ary Tree} - -% \subsubsection{Binary Tree} -% A binary tree is one of the most typical tree structure. A binary tree is made of nodes which has at most two branches--the ``left child" and the ``right child"--and a data element. The ``root" node is the topmost node in the tree. The left and right child recursively point to smaller ``subtrees" on either side. -\begin{figure}[!ht] - \centering - \includegraphics[width = 0.8\columnwidth]{fig/full_complete_perfect_binary_tree.png} - \caption{Example of different types of binary trees} - \label{fig:binary_tree_type} -\end{figure} -\paragraph{Types of Binary Tree} -There are four common types of Binary Tree: -\begin{enumerate} - \item \textbf{Full Binary Tree:} A binary tree is full if every node has either 0 or 2 children. We can also say that a \textbf{full} binary tree is a binary tree in which all nodes except leaves have two children. In full binary tree, the number of leaves ($|L|$) and the number of all other non-leaf nodes ($|NL|$) has relation: $|L| = |NL| + 1$. The total number of nodes compared with the height $h$ will be: - \begin{align} - n &=2^0+2^1+2^2+...+2^h\\ - &= 2^{h+1}-1 - \end{align} - \item \textbf{Complete Binary Tree:} A Binary Tree is \textbf{complete} if all levels are completely filled except possibly the last level and the last level has all keys as left as possible. - - \item \textbf{Perfect Binary Tree:} A Binary tree is \textbf{perfect} in which all internal nodes have two children and all leaves are at the same level. This also means a perfect binary tree is both a full and complete binary tree. - - \item \textbf{Balanced Binary Tree:} A binary tree is balanced if the height of the tree is $O(\log n)$ where $n$ is the number of nodes. For Example, \textit{AVL tree} maintains $O(\log n)$ height by making sure that the difference between heights of left and right subtrees is at most 1. - - \item Degenerate (or pathological) tree: A Tree where every internal node has one child. Such trees are performance-wise same as linked list. -\end{enumerate} -And each we show one example in Fig.~\ref{fig:binary_tree_type}. - -Complete tree and a perfect tree can be represented with an array, and we assign index 0 for root node, and given a node with index $i$, the children will be $2*i+1$ and $2*i+2$, this is called \textit{implicit} representation, wherein its counterpart recursive representation is called \textit{explicit} representation. - - - - - - -% \section{Geometric Data Structures} -% A single data point such as a real number 8 is called a \textit{scalar}, and an array of items can be a \textit{vector}, which is one dimensional, a two dimensional data such as an image, needed to be represented with a \textit{matrix}. To find where each data point is in the dimensional data structures, we build up \textit{coordinates} with dimensions and use \textit{point} to mark the position in the geometry field. - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_advanced_data_structures.tex b/Easy-Book/chapters/chapter_advanced_data_structures.tex deleted file mode 100644 index d8a8b30..0000000 --- a/Easy-Book/chapters/chapter_advanced_data_structures.tex +++ /dev/null @@ -1,504 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -In this chapter, we extend the data structure learned from the first part with more advanced data structures. These data structures are not as widely used as the basic data structures, however, they can be often seen to implement more advanced algorithms or they can be more efficient compared with algorithms that relies on a more basic version. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Monotonic Stack -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Monotone Stack} -\label{section_mono_stack} -\textit{A monotone Stack is a data structure the elements from the front to the end is strictly either increasing or decreasing. } For example, there is an line at the hair salo, and you would naturally start from the end of the line. However, if you are allowed to kick out any person that you can win at a fight, if every one follows the rule, then the line would start with the most powerful man and end up with the weakest one. This is an example of monotonic decreasing stack. -\begin{itemize} - \item Monotonically Increasing Stack: to push an element $e$, starts from the rear element, we pop out element $r>=e$ (violation); - \item Monotonically Decreasing Stack: we pop out element $r<=e$ (violation). T -\end{itemize} -The process of the monotone decresing stack is shown in Fig.~\ref{fig:mono_stack}. -\begin{figure}[h!] - \centering - \includegraphics[width=0.9\columnwidth]{fig/monotone_stack_fig.png} - \caption{The process of decreasing monotone stack} - \label{fig:mono_stack} -\end{figure} -\textit{Sometimes, we can relax the strict monotonic condition, and can allow the stack or queue have repeat value. } - -To get the feature of the monotonic queue, with $[5, 3, 1, 2, 4]$ as example, if it is increasing: -\begin{lstlisting}[numbers=none] -index v Increasing stack Decreasing stack -1 5 [5] [5] -2 3 [3] 3 kick out 5 [5, 3] #3->5 -3 1 [1] 1 kick out 3 [5, 3, 1] #1->3 -4 2 [1, 2] #2->1 [5, 3, 2] 2 kick out 1 -5 4 [1, 2, 4] #4->2 [5,4] 4 kick out 2, 3 -\end{lstlisting} -By observing the above process, what features we can get? -\begin{itemize} - \item Pushing in to get smaller/larger item to the left: When we push an element in, if there exists one element right in front of it, 1) for increasing stack, we find the \textbf{nearest smaller item to the left} of current item, 2) for decreasing stack, we find the \textbf{nearest larger item} to the left instead. In this case, we get [-1, -1, -1, 1, 2], and [-1, 5, 3, 3, 5] respectively. - \item Popping out to get smaller/larger item to the right: when we pop one element out, for the kicked out item, such as in step of 2, increasing stack, 3 forced 5 to be popped out, for 5, 3 is the first smaller item to the right. Therefore, if one item is popped out, for this item, the current item that is about to be push in is 1) for increasing stack, \textbf{the nearest smaller item to its right}, 2) for decreasing stack, \textbf{the nearest larger item to its right}. In this case, we get [3,1, -1, -1, -1], and [-1, 4, 2, 4, -1] respectively. -\end{itemize} -The conclusion is with monotone stack, we can search for smaller/larger items of current item either to its left/right. - -\paragraph{Basic Implementation} - -This monotonic queue is actually a data structure that needed to add/remove element from the end. In some application we might further need to remove element from the front. Thus Deque from collections fits well to implement this data structure. Now, we set up the example data: -\begin{lstlisting}[language=Python] -A = [5, 3, 1, 2, 4] -import collections -\end{lstlisting} - -\paragraph{Increasing Stack} We can find first smaller item to left/right. - -\begin{lstlisting}[language=Python] -def increasingStack(A): - stack = collections.deque() - firstSmallerToLeft = [-1]*len(A) - firstSmallerToRight = [-1]*len(A) - for i,v in enumerate(A): - while stack and A[stack[-1]] >= v: # right is from the popping out - firstSmallerToRight[stack.pop()] = v # A[stack[-1]] >= v - if stack: #left is from the pushing in, A[stack[-1]] < v - firstSmallerToLeft[i] = A[stack[-1]] - stack.append(i) - return firstSmallerToLeft, firstSmallerToRight, stack -\end{lstlisting} -Now, run the above example with code: -\begin{lstlisting}[language=Python] -firstSmallerToLeft, firstSmallerToRight, stack = increasingQueue(A) -for i in stack: - print(A[i], end = ' ') -print('\n') -print(firstSmallerToLeft) -print(firstSmallerToRight) -\end{lstlisting} -The output is: -\begin{lstlisting} -1 2 4 - -[-1, -1, -1, 1, 2] -[3, 1, -1, -1, -1] -\end{lstlisting} - -\paragraph{Decreasing Stack} We can find first larger item to left/right. - -\begin{lstlisting}[language=Python] -def decreasingStack(A): - stack = collections.deque() - firstLargerToLeft = [-1]*len(A) - firstLargerToRight = [-1]*len(A) - for i,v in enumerate(A): - while stack and A[stack[-1]] <= v: - firstLargerToRight[stack.pop()] = v - - if stack: - firstLargerToLeft[i] = A[stack[-1]] - stack.append(i) - return firstLargerToLeft, firstLargerToRight, stack -\end{lstlisting} -Similarily, the output is: -\begin{lstlisting} -5 4 - -[-1, 5, 3, 3, 5] -[-1, 4, 2, 4, -1] -\end{lstlisting} -For the above problem, If we do it with brute force, then use one for loop to point at the current element, and another embedding for loop to look for the first element that is larger than current, which gives us $O(n^2)$ time complexity. If we think about the BCR, and try to trade space for efficiency, and use monotonic queue instead, we gain $O(n)$ linear time and $O(n)$ space complexity. -% Let us look at an example, -% \begin{lstlisting} -% Given an array [5, 3, 1, 2, 4], our target is to return an array of the same size that each element denotes the relative index we need to move to the right to find the first element that is larger than the current element, if we can not find, then we use -1. For this example the return would be [-1, 3, 1, 1, -1]. -% \end{lstlisting} - - -% Solution: If we do it with brute force, then use one for loop to point at the current element, and another embedding for loop to look for the first element that is larger than current, which gives us $O(n^2)$ time complexity. If we think about the BCR, and try to trade space for efficiency, we can use a decreasing monotonic queue. The first elment to the right that is larger than current is equaivalent to find the first element in the left that is smaller than the element(this means we need to use decreasing queue). First we have $[5]$, then $[5, 3]$, $[5, 3 ,1]$, then when $2$ comes in, we need to kick out $1$, so for $1$ the first larger element to its right size is $2$, we record $index(2)-index(1) = 1$. Then we have $4$, which could kick out $2$, so $4$ is the required one, then we set $r[index(2)] = index(4)-index(2) = 1$, then $r[index(3)] = index(4)-index(3) = 3$, Finally there would only $[5, 4]$, so we set them to be $-1$. -% \begin{lstlisting} -% index v decreasing queue -% 1 5 [5] -% 2 3 [5,3] -% 3 1 [5,3,1] -% 4 2 [5, 3, 2], kick out 1, we found the first larger number to the right of 1, which is 2 -% 5 4 [5,4], kick out 2, for 2, we found 4, kick out 3, for 3 we found 4 -% \end{lstlisting} -% \begin{lstlisting}[language = Python] -% a = [5, 3, 1, 2, 4] -% def firstLagerNumToRight(num): -% if not num: -% return [] -% monoStack = [] #decreasing monotonic stack -% rst = [-1]*len(num) -% for i, v in enumerate(num): -% while monoStack and v >= num[monoStack[-1]]: -% index = monoStack.pop() -% rst[index] = i-index -% monoStack.append(i) -% return rst -% print(firstLagerNumToRight(a)) -% # [-1, 3, 1, 1, -1] -% \end{lstlisting} -Monotone stack is especially useful in the problem of subarray where we need to find smaller/larger item to left/right side of an item in the array. To better understand the features and applications of monotone stack, let us look at some examples. First, we recommend the audience to practice on these obvious applications shown in LeetCode Problem Section before moving to the examples: - -There is one problem that is pretty interesting: - -\paragraph{Sliding Window Maximum/Minimum } Given an array nums, there is a sliding window of size k which is moving from the very left of the array to the very right. You can only see the k numbers in the window. Each time the sliding window moves right by one position. Return the max sliding window. (LeetCode Probelm: 239. Sliding Window Maximum (hard)) -\begin{lstlisting}[numbers=none] -Example: - -Input: nums = [1,3,-1,-3,5,3,6,7], and k = 3 -Output: [3,3,5,5,6,7] -Explanation: - -Window position Max ---------------- ----- -[1 3 -1] -3 5 3 6 7 3 - 1 [3 -1 -3] 5 3 6 7 3 - 1 3 [-1 -3 5] 3 6 7 5 - 1 3 -1 [-3 5 3] 6 7 5 - 1 3 -1 -3 [5 3 6] 7 6 - 1 3 -1 -3 5 [3 6 7] 7 -\end{lstlisting} - -\textbf{Analysis:} In the process of moving the window, any item that is smaller than its predecessor will not affect the max result anymore, therefore, we can use decrese stack to remove any trough. If the window size is the same as of the array, then the maximum value is the first element in the stack (bottom). With the sliding window, we record the max each iteration when the window size is the same as k. At each iteration, if need to remove the out of window item from the stack. For example of [5, 3, 1, 2, 4] with k = 3, we get [5, 3, 4]. At step 3, we get 5, at step 4, we remove 5 friom the stack, and we get 3. At step 5, we remove 3 if it is in the stack, and we get 4. With the monotone stack, we decrease the time complexity from $O(kn)$ to $O(n)$. -\begin{lstlisting}[language=Python] -import collections - -def maxSlidingWindow(self, nums, k): - ds = collections.deque() - ans = [] - for i in range(len(nums)): - while ds and nums[i] >= nums[ds[-1]]: indices.pop() - ds.append(i) - if i >= k - 1: ans.append(nums[ds[0]]) #append the current maximum - if i - k + 1 == ds[0]: ds.popleft() #if the first also the maximum number is out of window, pop it out - return ans -\end{lstlisting} - -\begin{examples}[resume] -\item \textbf{907. Sum of Subarray Minimums (medium).} Given an array of integers A, find the sum of min(B), where B ranges over every (contiguous) subarray of A. Since the answer may be large, return the answer modulo $10^9 + 7$. \textit{Note: 1 <= A.length <= 30000, 1 <= A[i] <= 30000.} -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [3,1,2,4] -Output: 17 -Explanation: Subarrays are [3], [1], [2], [4], [3,1], [1,2], [2,4], [3,1,2], [1,2,4], [3,1,2,4]. -Minimums are 3, 1, 2, 4, 1, 1, 2, 1, 1, 1. Sum is 17. -\end{lstlisting} - -\textbf{Analysis:} For this problem, using naive solution to enumerate all possible subarries, we end up with $n^2$ subarray and the time complexity would be $O(n^2)$, and we will receive LTE. For this problem, we just need to sum over the minimum in each subarray. Try to consider the problem from another angel, what if we can figure out how many times each item is used as minimum value corresponding subarry? Then res = sum(A[i]*f(i)). If there is no duplicate in the array, then To get f(i), we need to find out: -\begin{itemize} - \item left[i], the length of strict bigger numbers on the left of A[i], - \item right[i], the length of strict bigger numbers on the right of A[i]. -\end{itemize} -For the given examples, if A[i] = 1, then the left item is 3, and the right item is 4, we add 1*(left\_len*right\_len) to the result. However, if there is duplicate such as [3, 1, 4, 1], for the first 1, we need [3,1], [1], [1,4], [1, 4,1] with subarries, and for the second 1, we need [4,1], [1] instead. Therefore, we set the right length to find the >= item. Now, the problem in converted to the first smaller item on the left side and the first smaller or equal item on the right side. From the feature we draw above, we need to use increasing stack, as we know, from the pushing in, we find the first smaller item, and from the popping out, for the popped out item, the current item is the first smaller item on the right side. The code is as: -\begin{lstlisting}[language=Python] -def sumSubarrayMins(self, A): - n, mod = len(A), 10**9 + 7 - left, s1 = [1] * n, [] - right = [n-i for i in range(n)] - for i in range(n): # find first smaller to the left from pushing in - while s1 and A[s1[-1]] > A[i]: # can be equal - index = s1.pop() - right[index] = i-index # kicked out - if s1: - left[i] = i-s1[-1] - else: - left[i] = i+1 - s1.append(i) - return sum(a * l * r for a, l, r in zip(A, left, right)) % mod -\end{lstlisting} -The above code, we can do a simple improvement, by adding 0 to each side of the array. Then eventually there will only have [0, 0] in the stack. All of the items originally in the array they will be popped out, each popping, we can sum up the result directely: -\begin{lstlisting}[language=Python] -def sumSubarrayMins(self, A): - res = 0 - s = [] - A = [0] + A + [0] - for i, x in enumerate(A): - while s and A[s[-1]] > x: - j = s.pop() - k = s[-1] - res += A[j] * (i - j) * (j - k) - s.append(i) - return res % (10**9 + 7) -\end{lstlisting} -\end{examples} -\section{Disjoint Set} -\textbf{Disjoint-set data structure} (aka union-find data structure or merge-find set) maintains a collection $S = \{S_1, S_2, ..., S_k\}$ of disjoint \textit{dynamic} sets by partitioning a set of elements. We identify each set by a \textbf{representative}, which is some member of the set. It does matter which member is used only if we get the same answer both times if we ask for the representative twice without modifying the set. Choosing the smallest member in a set as representative is an examplary prespecified rule. According to its typical applications such as implementing Kruskal's minimum spanning tree algorithm and tracking connected components dynamically, disjoint-set should support the following operations: -\begin{enumerate} - \item \texttt{make\_set(x)}: create a new set whose only member is $x$. To keep these sets to be disjoint, this member should not already be in some existent sets. - \item \texttt{union(x, y)}: unites the two dynamic sets that contain $x$ and $y$, say $S_x \cup S_y$ into a new set that is the union of these two sets. In practice, we merge one set into the other say $S_y$ into $S_x$, we then remove/destroy $S_y$. This will be more efficient than create a new one that unions and destroy the other two. - \item \texttt{find\_set(x)}: returns a pointer to the representative of the set that contains $x$. - -\end{enumerate} -\paragraph{Applications} Disjoint sets are applied to implement union-find algorithm where perfoms \texttt{find\_set} and \texttt{union}. Union-find algorithms can be used into some basic graph algorithms, such as cycle detection, tracking connected components in the graph dynamically, ~\footnote{where new edge will be added and the search based algorithm each time will be rerun to find them again}, Krauskal's MST algorithm, and Dijkstra's Shortest path algorithm. - -\paragraph{Connected Component} Before we move to the implementation, let us first see how disjoint set can be applied to connected components. -\begin{figure}[h] - \centering - \includegraphics[width=0.98\columnwidth] {fig/disjoint_set.png} - \caption{The connected components using disjoint set.} - \label{fig:cc_undirected_disjoint_set} -\end{figure} -At first, we assign a set id for each vertex in the graph. Then we traverse each edge, and if the two endpoints of the edge belongs to different set, then we union the two sets. As shown in the process, first vertex 0 and 1 has different set id, then we update 1's id to 0. For edge (1, 2), we update 2's id to 0. For edge(0, 2), they are already in the same set, no update needed. We apply the same process with edge (2, 4), (3, 4), and (5, 6). - -\subsection{Basic Implementation with Linked-list or List} -Before we head off to more efficient and complex implementation, we first implement a baseline for the convenience of comparison. The key for the implementation is two dictionaries named \texttt{item\_set} (saves the mapping between item and its set id, which will only be one to one) and \texttt{set\_item} (the value of the key will be a list, because one set will have one to multiple relation). - -If our coding is right, each item must have an item when \texttt{find\_set} function is called, if not we will call \texttt{make\_set}. For each existing \texttt{set}, it will have at least one item. For function \texttt{union}, we choose the set that has less items to merge to the one that with more items. - -\begin{lstlisting}[language=Python] -class DisjointSet(): - '''Implement a basic disjoint set''' - def __init__(self, items): - self.n = len(items) - self.item_set = dict(zip(items, [i for i in range(self.n)])) # first each set only has one item [i], this can be one->multiple match - self.set_item = dict(zip([i for i in range(self.n)], [[item] for item in items])) # each item will always belong to one set - - def make_set(self, item): - '''make set for new incoming set''' - if item in self.item_set: - return - - self.item_set[item] = self.n - self.n += 1 - - def find_set(self, item): - if item in self.item_set: - return self.item_set[item] - else: - print('not in the set yet: ', item) - return None - - def union(self, x, y): - id_x = self.find_set(x) - id_y = self.find_set(y) - if id_x == id_y: - return - - sid, lid = id_x, id_y - if len(self.set_item[id_x]) > len(self.set_item[id_y]): - sid, lid = id_y, id_x - # merge items in sid to lid - for item in self.set_item[sid]: - self.item_set[item] = lid - self.set_item[lid] += self.set_item[sid] - del self.set_item[sid] - return -\end{lstlisting} - -\paragraph{Complexity} For $n$ items, we spend $O(n)$ time to initialize the two hashmaps. With the help of hashmap, function \texttt{find\_set} tasks only $O(1)$ time, accumulating it will give us $O(n)$. For function \texttt{union}, it takes more effort to analyze. From another angle, for one item $x$, it will only update its item id when we are unioning it to another set $x_1$. The first time, the resulting set $x_1$ will have at least two items. The second update will be union $x_1$ to $x_2$. Because the merged one will have smaller length, thus the resulting items in $x_2$ will at least be 4. Then it is the third, ..., up to $k$ updates. Because a resulting set will at most has $n$ in size, so for each item, at most $\log n$ updates will be needed. For $n$ items, this makes the upper bound for \texttt{union} to be $n\log n$. - -However, for our implementation, we has additional cost, which is in \texttt{union}, where we merge the list. This cost can be easily limited to constant by using linked list. However, even with \texttt{list}, there are different ways to concatenate one list to another: - -\begin{enumerate} -\item Use $+$ operator: The time complexity of the concat operation for two lists, A and B, is O(A + B). This is because you aren't adding to one list, but instead are creating a whole new list and populating it with elements from both A and B, requiring you to iterate through both. - -\item \texttt{extend(lst)}: Use extend which doesn't create a new list but adds to the original. The time complexity should only be $O(1)$. On the other hand \texttt{l += [i]} modifies the original list and behaves like extend. -\end{enumerate} - -\subsection{Implementation with Disjoint-set Forests} -Instead of using linear linked list, we use tree structure. Different with trees we have introduced before that a node points to its children, an item here will only points to its parent. A tree represents a set, and the root node is the representative and it points to itself. The straightforward algorithms that use this structure are not faster than the linked-list version. By introducing two heuristics--``Union by rank'' and ``path compression"--we can achieve asympotically optimal disjoint-set data structure. -\begin{figure}[h] - \centering - \includegraphics[width=0.98\columnwidth] {fig/disjoint_set.png} - \caption{A disjoint forest} - \label{fig:disjoint_forest_1} -\end{figure} -\subsubsection{Naive Version} We first need to create a \texttt{Node} class which stores \texttt{item} and another parent pointer \texttt{parent}. An \texttt{item} can be any immutable data structure with necessary information represents a node. -\begin{lstlisting}[language=Python] -class Node: - def __init__(self, item): - self.item = item # save node information - self.parent = None -\end{lstlisting} - -We need one dict data structure \texttt{item\_finder} and one set data structure \texttt{sets} to track nodes and set. From \texttt{item\_finder} we can do (item, node) map to find node, and then from the node further we can find its set representative node or execute \texttt{union} operation. \texttt{sets} is used to track all the representative nodes. When we union two sets, the one merged to the other will be deleted in \texttt{sets}. At the easy version, \texttt{make\_set} will create tree with only one node. \texttt{find\_set} will start from the node and traverse all the way back to its final parent which is when \texttt{node.parent==node}. And a \texttt{union} operation will simply point one tree's root node to the root of another through \texttt{parent}. The code is as follows: -\begin{lstlisting}[language=Python] -class DisjointSet(): - '''Implement with disjoint-set forest''' - def __init__(self, items): - self.n = len(items) - self.item_finder = dict() - self.sets = set() # sets will have only the parent node - - for item in items: - node = Node(item) - node.parent = node - self.item_finder[item] = node # from item we can find the node - self.sets.add(node) - - def make_set(self, item): - '''make set for new incoming set''' - if item in self.item_finder: - return - - node = Node(item) - node.parent = node - self.item_finder[item] = node - self.sets.add(node) - self.n += 1 - - def find_set(self, item): - # from item->node->parent to set representative - if item not in self.item_finder: - print('not in the set yet: ', item) - return None - node = self.item_finder[item] - while node.parent != node: - node = node.parent - return node - - def union(self, x, y): - node_x = self.find_set(x) - node_y = self.find_set(y) - if node_x.item == node_y.item: - return - - #the root of one tree to point to the root of the other - # merge x to y - node_x.parent = node_y - #remove one set - self.sets.remove(node_x) - return - - def __str__(self): - ans = '' - for root in self.sets: - ans += 'set: '+ str(root.item) + '\n' - return ans - - def print_set(self, item): - if item in self.item_finder: - node = self.item_finder[item] - print(node.item, '->', end='') - while node.parent != node: - node = node.parent - print(node.item, '->', end='') -\end{lstlisting} -Let's run an example: -\begin{lstlisting}[language=Python] -ds = DisjointSet(items=[i for i in range(5)]) -ds.union(0,1) -ds.union(1,2) -ds.union(2,3) -ds.union(3, 4) -print(ds) -for item in ds.item_finder.keys(): - ds.print_set(item) - print(' ') -\end{lstlisting} -The output is: -\begin{lstlisting}[numbers=none] -set: 4 - -0 ->1 ->2 ->3 ->4 -> -1 ->2 ->3 ->4 -> -2 ->3 ->4 -> -3 ->4 -> -4 -> -\end{lstlisting} -The above implementation, both \texttt{make\_set} and \texttt{union} takes $O(1)$ time complexity. The main time complexity is incurred at \texttt{find\_set}, which traverse a path from node to root. If we assume each tree in the disjoint-set forest is balanced, the upper bound of this operation will be $O(\log n)$. However, if the tree is as worse as a linear linked list, the time complexity will goes to $O(n)$. This makes the total time complexity from $O(n\log n)$ to $O(n^2)$. - -\subsubsection{Heuristics} -\paragraph{Union by Rank} -As we have seen from the above example, A sequence of $n-1$ \texttt{union} operations may create a tree that is just a linear chain of $n$ nodes. Union by rank, which is similar to the weighted-union heuristic we used with the linked list implementation, is applied to avoid the worst case. For each node, other than the parent pointer, it adds \texttt{rank} to track the upper bound of the height of the associated node (the number of edges in the longest simple path between the node and a descendant leaf). In union by rank, we make the root with smaller rank point to the root with larger rank. - -In the initialization, and \texttt{make\_set} operation, a single noded tree has an initial rank of 0. In \texttt{union(x, y)}, there will exist three cases: -\begin{lstlisting}[numbers=none] -Case 1 x.rank == y.rank: - join x to y - y.rank += 1 -Case 2: x.rank < y.rank: - join y to x - x.rank += 1 -Case 3: x.rank > y.rank: - join y to x - x's rank stay unchanged -\end{lstlisting} -Now, with adding \texttt{rank} to the node. We modify the naive implementation: -\begin{lstlisting}[language=Python] -class Node: - def __init__(self, item): - self.item = item # save node information - self.parent = None - self.rank = 0 -\end{lstlisting} -The updated implementation of \texttt{union}: -\begin{lstlisting}[language=Python] - def union(self, x, y): - node_x = self.find_set(x) - node_y = self.find_set(y) - if node_x.item == node_y.item: - return - - # link - if node_x.rank > node_y.rank: - node_y.parent = node_x - #remove one set - self.sets.remove(node_y) - elif node_x.rank < node_y.rank: - node_x.parent = node_y - self.sets.remove(node_x) - else: - node_x.parent = node_y - node_y.rank += 1 - self.sets.remove(node_x) - return -\end{lstlisting} - -\paragraph{Path Compression} -In our naive implementation, \texttt{find\_set} took the most time. With path compression, during the process of \texttt{find\_set}, it simply make each node on the find path point directly to its root. Path Compression wont affect the rank of each node. Now, we modify this function: -\begin{lstlisting}[language=Python] - def _find_parent(self, node): - while node.parent != node: - node = node.parent - return node - - def find_set(self, item): - '''modified to do path compression''' - # from item->node->parent to set representative - if item not in self.item_finder: - print('not in the set yet: ', item) - return None - node = self.item_finder[item] - node.parent = self._find_parent(node) # change node's parent to the root node - return node.parent -\end{lstlisting} -The same example, the output will be: -\begin{lstlisting}[numbers=none] -set: 1 - -0 ->1 -> -1 -> -2 ->1 -> -3 ->1 -> -4 ->1 -> -\end{lstlisting} -\begin{lstlisting}[language=Python] -import time, random -t0 = time.time() -n = 100000 -ds = DisjointSet(items=[i for i in range(n)]) -for _ in range(n): - i, j = random.randint(0, n-1), random.randint(0, n-1) #[0,n] - ds.union(i, j) -print('time: ', time.time()-t0) -\end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Experiment to the running time of Linked-list VS naive forest VS heuristic forest} We run the disjoint set with n=100,000, and with n times of union: -\begin{lstlisting}[language=Python] -import time, random -t0 = time.time() -n = 100000 -ds = DisjointSet(items=[i for i in range(n)]) -for _ in range(n): - i, j = random.randint(0, n-1), random.randint(0, n-1) #[0,n] - ds.union(i, j) -print('time: ', time.time()-t0) -\end{lstlisting} -The resulting time is: 1.09s, 50.4s, 1.19s -\end{bclogo} -\paragraph{Note} As we see, in our implementation, we have never removed any item from disjoint-set structure. Also, from the above implementation, we know the sets of the nodes, but we cant track items from the root node. How can we further improve this? -\section{Fibonacci Heap} -\section{Exercises} -\subsection{Knowledge Check} -\subsection{Coding Practice} -\paragraph{Disjoint Set} -\begin{enumerate} - \item 305. Number of Islands II (hard) -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_advanced_graph_algorithm.pdf b/Easy-Book/chapters/chapter_advanced_graph_algorithm.pdf deleted file mode 100644 index a5f90df..0000000 Binary files a/Easy-Book/chapters/chapter_advanced_graph_algorithm.pdf and /dev/null differ diff --git a/Easy-Book/chapters/chapter_advanced_graph_algorithm.synctex.gz b/Easy-Book/chapters/chapter_advanced_graph_algorithm.synctex.gz deleted file mode 100644 index 162cc6f..0000000 Binary files a/Easy-Book/chapters/chapter_advanced_graph_algorithm.synctex.gz and /dev/null differ diff --git a/Easy-Book/chapters/chapter_advanced_graph_algorithm.tex b/Easy-Book/chapters/chapter_advanced_graph_algorithm.tex deleted file mode 100644 index ae2663c..0000000 --- a/Easy-Book/chapters/chapter_advanced_graph_algorithm.tex +++ /dev/null @@ -1,1569 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Advanced graph search -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\documentclass[../main.tex]{subfiles} -% \usepackage[pdf]{graphviz} -%\usepackage{subcaption} -\begin{document} -Our standing at graph algorithms: -\begin{enumerate} - \item \textcolor{gray}{Search Strategies} (Chapter) - \item \textcolor{gray}{Combinatorial Search}(Chapter) - \item \underline{Advanced Graph Algorithm}(Current) - \item Graph Problem Patterns(Future Chapter) -\end{enumerate} -This chapter is more to apply the basic search strategies and two advanced algorithm design methodologies--Dynamic Programming and Greedy Algorithms-- on a variety of classical graph problems: -\begin{itemize} - \item Cycle Detection (Section~\ref{sec_cycle_dection}), Topological Sort(Section~\ref{sec_topological_sort}), and Connected Components(Section~\ref{sec_connected_component}) which all require a through understanding to properties of basic graph search, especially Depth-first graph search. - \item On the other hand, Minimum Spanning Tree (MST) and Shortest Path Algorithm on the entails our mastering of Breath-first Graph Search. - \item Moreover, to achieve better efficiency, Dynamic Programming and Greedy Algorithms has to be leveraged in the graph search process. For example, Bellman-Ford algorithm uses the Dynamic Programming to avoid recomputing intermediate paths while searching the shortest paths from a single source to all other targets. The classical Prim's and Kruskal's MST algorithm both demonstrates how greedy algorithm can be applied, each in a different way. -\end{itemize} -%%%%%%%%%%%%%%%%%%%%%%Cycle detection%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%Source: https://www.cs.cornell.edu/courses/cs2112/2012sp/lectures/lec24/lec24-12sp.html -\section{Cycle Detection} -\label{sec_cycle_dection} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.49\columnwidth] {fig/chapter_advanced_graph/undirected_cyclic_graph.png} - \caption{Undirected Cyclic Graph. $(0, 1, 2, 0)$ is a cycle} - \label{fig:advanced_graph_ucg} - \includegraphics[width=0.49\columnwidth] {fig/chapter_advanced_graph/directed_cyclic_graph.png} - \caption{Directed Cyclic Graph, $(0, 1, 2, 0)$ is a cycle.} - \label{fig:cc_cycle} -\end{figure} -\paragraph{Problem Definition} -Detect cycles in both directed and undirected graph. Specifically, given a path with $k+1$ vertices, denoted as ${v_0, v_1, ..., v_k}$ in graph $G$: -\begin{enumerate} - \item When $G$ is directed: a cycle is formed if $v_0=v_k$ and the path contains at least one edge. For example, there is a cycle ${0, 1, 2, 0}$ shown in the directed graph of Fig.~\ref{fig:cc_cycle}. - \item When $G$ is undirected: the path forms a cycle only if $v_0=v_k$ and the path length is at least three (i.e., there are at least three distinct vertices within the path). For example, in the undirected graph of Fig.~\ref{fig:cc_cycle}, we couldn't say $(0, 2)$ is a cycle even though there is a path ${0, 2, 0}$, but the path ${0, 2, 1, 0}$ is as the path length $\geq 3$. -\end{enumerate} - -\paragraph{DFS to Solve Cycle Detection} Recall the process of DFS graph search where a vertex has three possible states--white, gray, and black. A back edge appears while we reach to an adjacent vertex $v$ which is in gray state from current vertex $u$. -If we connect $v$ back to its ancestor $u$, we find our cycle if the graph is directed. -When the graph is undirected, we have discussed that it has only tree edge and back edge. -Thus, we will use two states: visited and not visted. For edge $(u, v)$, we check two conditions: -\begin{enumerate} - \item if $v$ is visited already. In Fig.~\ref{fig:advanced_graph_ucg}, when we are at $1$, we first visit $0$ - \item avoiding cycle of length one which is any existing edge within the graph. -We can easily achieve this by tracking the predecessor $p$ of the exploring vertex during the search, and making sure the predecessor is not the same as the current vertex: $p\neq u$. %we need to distinguish between cycle of two nodes and cycle of more nodes. If this path includes two nodes as of $v_0, v_1, v_2$, when we are visiting $v_1$, the predecessor of $v_1$, which is $v_0$ will be the same as of $v_2$ which is a node with gray state. So, in addition to a normal DFS based cycle detection for the directed graph, we track the predecessor of the current visiting vertex. -\end{enumerate} - -\paragraph{Cycle Detection for Directed Graph} -We define a function \texttt{hasCycleDirected} with $g$ as the adjacent list of graph, $state$ as a list to track state for each vertex, and $s$ as the exploring vertex. The function returns a boolean value to indicate if there is a cycle or not. The function is essentially a DFS graph search along with an extra condition check on the back edge. -\begin{lstlisting}[language=Python] - def hasCycleDirected(g, s, state): - state[s] = STATE.gray # first be visited - for v in g[s]: - if state[v] == STATE.white: - if hasCycleDirected(g, v, state): - print(f'Cycle found at node {v}.') - return True - elif state[v] == STATE.gray: # aback edge - print(f'Cycle starts at node {v}.') - return True - else: - pass - state[s] = STATE.black # mark it as complete - return False -\end{lstlisting} -Because a graph can be disconnected with multiple components, we run \texttt{hasCycleDirected} on each unvisited vertex within the graph in a main function. -\begin{lstlisting}[language=Python] - def cycleDetectDirected(g): - n = len(g) - state = [STATE.white] * n - for i in range(n): - if state[i] == STATE.white: - if hasCycleDirected(g, i, state): - return True - return False -\end{lstlisting} - -\paragraph{Cycle Detection for Undirected Graph} First, we add another variable \texttt{p} to track the predecessor. -$p$ will first be initialized to $-1$ because the root in the rooted search tree has no predecessor (or ancestor). - We can use the three coloring state as we did in directed graph, but it is a slight overkill. - In the implementation, we only use boolean value to mark its state: -\begin{lstlisting}[language=Python] -def hasCycleUndirected(g, s, p, visited): - visited[s] = True - for v in g[s]: - if not visited[v]: - if hasCycleUndirected(g, v, s, visited): - print(f'Cycle found at node {v}.') - return True - else: - if v != p: # both black and gray - print(f'Cycle starts at node {v}.') - return True - return False -\end{lstlisting} -The main function: -\begin{lstlisting}[language=Python] -def cycleDetectUndirected(g): - n = len(g) - visited = [False] * n - for i in range(n): - if not visited[i]: - if hasCycleUndirected(g, i, -1, visited): - print(f'Cycle found at start node {i}.') - return True - - return False -\end{lstlisting} -Please check the source code to try out the examples. - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{How to find all cycles? {First, we need to enumerate all paths while searching in order to get all cycles. This requires us to retreat to less efficient search strategy: depth-first tree search. Second, for each path, we find where the cycle starts by comparing each $v_i$ with current vertex $u$: in directed graph, once $v_i==u$, the cycle is $v_i, v_{i+1}, ..., v_k, v_i$; in undirected graph, the cycle is found only if the length of $v_i, ..., v_k$ $\geq$ 3.}} -\end{bclogo} - -\section{Topological Sort} -\label{sec_topological_sort} -\paragraph{Problem Definition} In a given Directed Acyclic Graph (DAG) $G=(V, E)$, \textit{topological sort/ordering} of is a linear ordering of the vertices $V$, such that for each edge $e \in E, e = (u, v)$, $u$ comes before $v$. If a vertex represents a task to be completed and each directed edge denotes the order between two tasks, then topological sort is a way of linearly ordering a number of tasks in a completable sequence. -\begin{figure}[h] - \centering - \includegraphics[width=0.7\columnwidth]{fig/chapter_advanced_graph/ts_2.png} - \caption{DAG 1} - \label{fig:dag_1} -\end{figure} - -Every DAG has at least one topological ordering. For example, the topological ordering of Fig~\ref{fig:dag_1} can be \texttt{[0, 1, 3, (2, 4, 5), 6]}, where $(2, 4, 5)$ can be of any order, i.em., $(2, 4, 5), (2, 5, 4), (4, 2, 5), (4, 5, 2), (5, 2, 4), (5, 4, 2)$. - -A topological ordering is only possible if there is no cycle existing in the graph. Thus, a cycle detection should be applied first when we are given a possible cyclic graph. - -% \begin{figure}[h] -% \centering -% \includegraphics[width=0.7\columnwidth]{fig/chapter_advanced_graph/ts_1.png} -% \caption{DAG 2} -% \label{fig:dag_2} -% \end{figure} -\subsubsection{Kahn's algorithm (1962)} In topological sort, the first vertex is always ones with in-degree 0 (a vertex with no incoming edges). A naive algorithm is to decide the first node (with in-degree 0), add it in resulting order $S$, and remove all outgoing edges from this node. Repeat this process until: -\begin{itemize} - \item $V-S$ is empty, i.e., $|S|=|V|$, which indicates we found valid topological ordering. - \item no node with 0 in-degree found in the remaining graph $G = (V-S, E^{'})$ where $E^{'}$ are the remaining edges from $E$ after the removal, i.e., $|S| < |V|$, indicating a cycle exists in $V-S$ and no valid answer exists. -\end{itemize} -For example, with the digraph in Fig.~\ref{fig:dag_1}, the process is: -\begin{lstlisting}[numbers=none] -S Removed Edges -0, 3 are the in-degree 0 nodes -Add 0 (0, 1) -1, 3 are the current in-degree 0 node -Add 1 (1, 2) -3 is the only in-degree 0 node -Add 3 (3, 2), (3, 4), (3,5) -2, 4, 5 are the in-degree 0 nodes -Add 2 -Add 4 -Add 5 (5, 6) -6 is the only in-degree 0 node -Add 6 -V-S empty, stop -\end{lstlisting} -In this process, we see that in some time 2, 4, 5 are no in-degree 0 nodes, that is why their orderings can be permutated, resulting multiple topological orderings. - -In implementation, instead of removing edges from the graph explicitly, a better option is to track $V-S$ with each vertex's in-degree: whenever a in-degree 0 vertex $u$ is added into $S$, $\forall v, u\rightarrow v$, decrease the in-degree of $v$ by one. We also keep a queue of the all nodes with in-degree zero $Q$. Whenever a vertex in $V-S$ is detected with zero in-degree, add it into $Q$. Accumulatively, the cost of decreasing the in-degree for vertices in $V-S$ is $|E|$ as from the start to end,``all edges are removed.'' The cost of removing of vertex from $V-S$ is $|V|$ as all nodes are removed at the end. With the initialization of the in-degree for vertices in $V-S$, we have a total of $O(2|E|+|V|)$, i.e., $O(|E|+|V|)$ as the time complexity. Python code: -\begin{lstlisting}[language=Python] -from collections import defaultdict -import heapq -def kahns_topo_sort(g): - S = [] - V_S =[(0, node) for node in range(len(g))] # initialize node with 0 as in-degree - indegrees = defaultdict(int) - # Step 1: count the in-degree - for u in range(len(g)): - indegrees[u] = 0 - for u in range(len(g)): - for v in g[u]: - indegrees[v]+= 1 - print(f'initial indegree : {indegrees}') - V_S = [(indegree, node) for node, indegree in indegrees.items()] - heapq.heapify(V_S) - - # Step 2: Kan's algorithm - while len(V_S) > 0: - indegree, first_node = V_S.pop(0) - if indegree != 0: # cycle found, no topological ordering - return None - S.append(first_node) - # Remove edges - for v in g[first_node]: - indegrees[v] -= 1 - # update V_S - for idx, (indegree, node) in enumerate(V_S): - if indegree != indegrees[node]: - V_S[idx] = (indegrees[node], node) - heapq.heapify(V_S) - return S -\end{lstlisting} -Calling the function using graph in Fig.~\ref{fig:dag_1} gives result: -\begin{lstlisting} -initial indegree : defaultdict(, {0: 0, 1: 1, 2: 2, 3: 0, 4: 1, 5: 1, 6: 1}) -[0, 1, 3, 2, 4, 5, 6] -\end{lstlisting} - -\subsubsection{Linear Topological Sort with Depth-first Graph Search} In depth-first graph search, if there is an edge $u\rightarrow v$, the recursive search from $v$ will always be completed ahead of the search of $u$. With a simple reverse of the finishing ordering of vertices in depth-first graph search, the topological ordering takes $O(|E|+|V|)$ time. The time complexity equates to that of Kahn's algorithm, but this process is more efficient as it does not require the counting and updates of node in-degrees. The whole process is exactly the same as Cycle Detection with additional complete ordering tracking. - -First, the code of the DFS is: -\begin{lstlisting}[language=Python] -def dfs(g, s, colors, complete_orders): - colors[s] = STATE.gray - for v in g[s]: - if colors[v] == STATE.white: - if dfs(g, v, colors, complete_orders): - return True - elif colors[v] == STATE.gray: # a cycle appears - print(f'Cycle found at node {v}.') - return True - colors[s] = STATE.black - complete_orders.append(s) - return False -\end{lstlisting} -Then main function is: -\begin{lstlisting}[language=Python] -def topo_sort(g): - n = len(g) - complete_orders = [] - colors = [STATE.white] * n - for i in range(n): # run dfs on all the node - if colors[i] == STATE.white: - ans = dfs(g, i, colors, complete_orders) - if not ans: - print('Cycle found, no topological ordering') - return None - return complete_orders[::-1] -\end{lstlisting} -Call \texttt{topo\_sort} on the graph, we will have the sorted ordering as: -\begin{lstlisting}[language=Python] -[3, 5, 6, 4, 0, 1, 2] -\end{lstlisting} -which is another linear topological ordering. -\subsubsection{Example: Course Schedule (L210, m)} -There are a total of $n$ courses that you have to take. Some courses may have prerequisites, for example course 1 has to be taken before course 0, which is expressed as $[0, 1]$. Given the total number of courses and the prerequisite pairs, return the ordering of courses you should take to finish all courses. If it is impossible to finish, return an empty array. - -\paragraph{Analysis} Viewing a pair $[u, v]$ as an directed edge $v\rightarrow u$, we have a directed graph with $n$ vertices and we solve the ordering of courses as getting the topological sort of vertices in the resulting digraph. - -%%%%%%%%%%%%%%%%%%%%%%Connected Components%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Connected Components} -\label{sec_connected_component} -\paragraph{Problem Definition} -In graph theory, a \textit{connected component}(or simply component) is defined as a subgraph where all vertices are mutually connected, i.e., where there exists a path between any two vertices in it. A graph $G = (V, E)$ is thus composed of separate connected components(sets) which are mutually exclusive and include all the vertices, .i.e., $V = V_0 \cup V_1 \cup ... \cup V_{m-1}, V_i \cap V_{j\neq i} = \emptyset $. A connected component algorithm should be able to cluster vertices of each single connected component. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth] {fig/undirected_cc_example.png} - \caption{The connected components in undirected graph, each dashed read circle marks a connected component.} - \label{fig:cc_undirected} -\end{figure} -% of an undirected graph in which any two vertices are connected to each other by paths, and which is connected to no additional vertices in the supergraph. -For example, the undirected graph in Fig.~\ref{fig:cc_undirected} has two connected components: $\{0, 1, 2, 3, 4\}$ and $\{5, 6\}$. - -Given a directed graph, -\begin{itemize} - \item the term \textit{Strongly Connected Component (SCC)} or \textit{diconnected} is used to refer to the same definition-- where in a SCC any two vertices are reachable to each other by paths. In the leftest directed graph shown in Fig.~\ref{fig:connected_components}, there is a total of five SCCs: $\{0, 1, 2\}$, $\{3\}$,$\{4\}$, $\{5\}$, and $\{6\}$. Vertex 5 and 6 is only connected in one way, resulting into two separate SCCs. - \item ignoring the direction of edges, a \textit{weakly connected component (WCC)} equates to a connected component in the resulting undirected graph. -\end{itemize} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.8\columnwidth] {fig/directed_scc_example.png} - \caption{The strongly connected components in directed graph, each dashed read circle marks a strongly connected component.} - \label{fig:connected_components} -\end{figure} - -\paragraph{Cycles and Strongly Connected Components} A directed graph is acyclic if and only it has no strongly connected subgraphs with more than one vertex. We call SCCs with at least two vertices nontrivial SCCs. Nontrial SCCs contains at least one directed cycle, and more specifically, nontrivial SCCs is composed of a set of directed cycles as we have observed that there are two directed cycles in our above example and they share at least one common vertex. The shared common vertex act as ``transferring stop'' between these directed cycles thus they all compose to one component. Therefore, SCCs algorithms can be indirectly used to detect cycles. If there exists nontrivial SCC, directed graph contains cycles. - -\subsection{Connected Components Detection} -In general, there are two ways to detect connected components in an undirected graph: graph search and union-find, each suits different needs. -\paragraph{Graph Search and Search Tree} In undirected graph $G$, executing a BFS or DFS starting at some vertex $u$ will result in a rooted search tree. As the edges are undirected or bidirectional, all vertices in the search tree belong to the same connected component. To find all connected components, we simply loop through all vertices $V$, for each vertex $u$: -\begin{itemize} - \item if $u$ is not visited yet, we start a new DFS/BFS. Mark all vertices along the traversal as the same component. - \item otherwise, $u$ is already included in a previously found connected component, continue. -\end{itemize} -The time complexity will be $O(|V|+|E|)$ and the space complexity will be $O(|V|)$. Since the code is trivial, we only demonstrate it in the notebook. -\subsubsection{Union Find} -We represent each connected component as a set. For the exemplary graph in Fig.~\ref{fig:cc_undirected}, we have two sets: ${0, 1, 2, 3, 4}$ and ${5, 6}$. Unlike the graph-search based approach, where the edges are visited in certain order,in union-find approach, the ordering of edges to be visited can be arbitrary. The algorithm using union-find is: -\begin{itemize} - \item Initialize in total $|V|$ sets, one for each vertex $V$. - \item For each edge $(u, v)$ in $E$, \texttt{union} the two sets where vertex $u$ and $v$ previously belongs to. -\end{itemize} -Implementing it with Python: -\begin{lstlisting}[language=Python] -from collections import defaultdict -def connectedComponent(g): - n = len(g) - # initialize disjoint set - ds = DisjointSet(n) - - for i in range(n): - for j in g[i]: # for edge i<->j - ds.union(i, j) - return ds.get_num_sets(), ds.get_all_sets() -\end{lstlisting} -How we implement the union-find data structure decides the complexity of this approach. For example, if we use linked list based structure, the complexity will be $O(|E|\times|V|)$ as we traversal $|V|$ edges and each step in worst case can take $O(|V|)$ to find the set that it belongs to. However, if path compression and union by rank is used for optimization, the time complexity could be lowered to $O(|E|\times \log|V|)$. -\paragraph{Dynamic Graph} Since union-find has worse time complexity compared with graph search, then why do we care about it? The answer is: if we use graph search, whenever new edges and vertices are added to the graph, we have to rerun the graph search algorithm. Imagine that if we double $|V|$ and $E$, the worst time complexity will be $O(|V|\times (|V|+|E|)$, bringing up the complexity to polynomial of the number of edges. However, for each additional edge, union-find adds only a single merge operation to address the change, keeping the time complexity unchanged. - -In detail, we adapt the union-find structure dynamically. Set up a \texttt{dict} to track vertex and its index in the union find. Set \texttt{index=0}. When a new edge $(u, v)$ comes, union find includes: -\begin{itemize} - \item check if $u$ and $v$ exists in \texttt{dict}. If not, (a) add a key-value into the node tracker, (b) append \texttt{index} into the list of vertex-set, (c) \texttt{index+=1}. - \item \texttt{find} the sets where $u$ and $v$ belongs to. -\end{itemize} -\paragraph{Implementation} Here we demonstrate how to implement a dynamic connected component detection algorithm. First, convert the graph representation from adjacent list to a list of edges: -\begin{lstlisting}[language=Python] -ug_edges = [(0, 1), (0, 2), (1, 2), (2, 4), (4, 3), (4, 3), (5, 6)] -\end{lstlisting} -Then, we implement a class \texttt{DynamicConnectedComponent} offering all functions needed. -\begin{lstlisting}[language=Python] -class DynamicConnectedComponent(): - def __init__(self): - self.ds = DisjointSet(0) - self.node_index= defaultdict(int) - self.index_node = defaultdict(int) - self.index = 0 - - def add_edge(self, u, v): - if u not in self.node_index: - self.node_index[u], self.index_node[self.index] = self.index, u - self.ds.p.append(self.index) - self.ds.n += 1 - self.index += 1 - - if v not in self.node_index: - self.node_index[v], self.index_node[self.index] = self.index, v - self.ds.p.append(self.index) - self.ds.n += 1 - self.index += 1 - u, v = self.node_index[u], self.node_index[v] - self.ds.union(u, v) - return - - def get_num_sets(self): - return self.ds.get_num_sets() - - def get_all_sets(self): - sets = self.ds.get_all_sets() - return {self.index_node[key] : set([self.index_node[i] for i in list(value)]) for key, value in sets.items()} -\end{lstlisting} -Now, to find the connected components dynamically based on incoming edges, we can run: -\begin{lstlisting}[language=Python] -dcc = DynamicConnectedComponent() -for u, v in ug_edges: - dcc.add_edge(u, v) -dcc.get_num_sets(), dcc.get_all_sets() -\end{lstlisting} -The output is consistent with previous result, which is: -\begin{lstlisting} -(2, {3: {0, 1, 2, 3, 4}, 6: {5, 6}}) -\end{lstlisting} -\subsubsection{Examples} -\begin{enumerate} -\item 547. Number of Provinces(medium) - \item 128. Longest Consecutive Sequence (hard), union find solution: \url{https://leetcode.com/problems/longest-consecutive-sequence/discuss/1109808/Python-Clean-Union-Find-with-explanation} -\end{enumerate} -% Therefore, the additional cost for a newly added edge is constant, making the algorithm stable and linear all the time. Using the \texttt{DisjointSet} data structure implemented in Section \ref{}, our algorithm for running the above algorithm in a dynamic graph is implemented as: -% \begin{lstlisting}[language=Python] -% class UGraph(): -% def __init__(self, vertices, edges): -% self.vertices = vertices -% self.edges = edges -% self.ds = DisjointSet(self.vertices) -% self._init_connected_component() -% print(f'The initial connected components are: {self.ds.set_item}') - -% def _init_connected_component(self): -% for u, v in self.edges: -% self.ds.union(u, v) - -% def add_edge(self, u, v): -% # track nodes and make new set -% for node in [u, v]: -% if node not in self.vertices: -% self.vertices.add(node) -% self.ds.make_set(node) - -% # track edges -% if {u, v} not in self.edges: -% self.edges.append({u, v}) - -% # update ds -% self.ds.union(u, v) -% print(f'The connected components are after adding edge ({u}, {v}) are: {self.ds.set_item}') -% \end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Implement WCC detection algorithm in directed graph? { }} -\end{bclogo} -% \paragraph{Example: } - -\subsection{Strongly Connected Components} -In graph theory, two nodes $u, v \in V$ are called strongly -connected iff $v$ is reachable from $u$ and -$u$ is reachable from $v$. If we contract each SCC into a single vertex, the resulting graph will be a DAG. Denoting the contracted DAG as $G^{SCC} = (V^{SCC}, E^{SCC})$, $V^{SCC}$ are vertices of SCCs and $E^{SCC}$ are defined as follows: - -$(C_1, C_2)$ is an edge in $G^{SCC}$ iff $\exists u \in C_1, v \in C_2$. $(u, v)$ is an edge in $G$. - -In other words, if there is an edge in $G$ -from any node in $C_1$ to any node in $C_2$, there is an edge in $G^{SCC}$ from $C_1$ to $C_2$. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.85\columnwidth] {fig/chapter_advanced_graph/scc.png} - \caption{A graph with four SCCs.} - \label{fig:dfs_not_enough} -\end{figure} - -\paragraph{Kosaraju's Algorithm} If we were to do a DFS in $G$, and $C_1\rightarrow C_2$ is an edge in $G^{SCC}$, then at least one vertex in $C_1$ will finish after all vertices in $C_2$ being finished. If we first start with vertex $0$, the finishing order of of all vertices is [1, 6, 5, 4, 3, 2, 0]. $0$ finished later than $4$ from $C_2$, satisfying the claim. If we look purely at the last node from each SCC to turn dark, we get a topological sort of $G^{SCC}$ in reverse([1, \textcolor{red}{6}, \textcolor{red}{5}, \textcolor{red}{4}, 3, 2, \textcolor{red}{0}]), which is $[C_4, C_3, C_2, C_1]$. How to find the last node in each SCC? We can reverse the dfs finishing order, having [0, 2, 3, 4, 5, 6, 1]. - -If we reverse the order, we have [0, 2, 3, 4, 5, 6, 1]. What happens if we do another round of DFS on the given ordering? First, starting from 0 (last node), we can (1) reach to all vertices in $C_1$ as they are connected, (2) reach to vertices in $C_2$ if there exists no edge or edges only from $C_1$ to $C_2$ in between. If we can reverse the edges in between, then we can avoid (2) and still keeps (1). The way we do this is: reverse the direction of all edges in graph $G$. Run DFS on the reversed finishing ordering, then a SCC will include any vertex along the traversal that hasn's been put into a SCC yet. In our example, the process is: -\begin{lstlisting}[numbers=none] -0: find {0, 1, 2, 3} -4: find {4} -5: find {5} -6: find {6} -\end{lstlisting} -We formalize Kosaraju's algorithm into three steps: -\begin{enumerate} -\item Retrieve a reversed finishing order of vertices during DFS $L$. This step is similar to topological sort in an DAG. -\item Transpose the original graph $G$ to $G^T$ by reversing the directional of edges in $G$. -\item Run another DFS in $L_1$ ordering on $G^T$, any df-search tree starting from a vertex that hasn't been put into a SCC yet make up to another SCC. -\end{enumerate} - -\paragraph{Implementation} The main function \texttt{scc} calls two functions: \texttt{topo\_sort\_scc} and \texttt{reverse\_graph} to get $L$ and $G^T$. The topological ordering like function: -\begin{lstlisting}[language=Python] -# DFS traversal with reversed complete orders -def dfs(g, s, colors, complete_orders): - colors[s] = STATE.gray - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors, complete_orders) - colors[s] = STATE.black - complete_orders.append(s) - return - -# topologically sort in terms of the last node of each scc -def topo_sort_scc(g): - v = len(g) - complete_orders = [] - colors = [STATE.white] * v - for i in range(v): # run dfs on all the node - if colors[i] == STATE.white: - dfs(g,i, colors, complete_orders) - return complete_orders[::-1] -\end{lstlisting} -The main scc is straightforward: -\begin{lstlisting}[language=Python] -# get conversed graph -def reverse_graph(g): - rg = [[] for i in range(len(g))] - for u in range(len(g)): - for v in g[u]: - rg[v].append(u) - return rg - -def scc(g): - rg = reverse_graph(g) - orders = topo_sort_scc(g) - - # track states - colors = [STATE.white] * len(g) - sccs = [] - - # traverse the reversed graph - for u in orders: - if colors[u] != STATE.white: - continue - scc = [] - dfs(rg, u, colors, scc) - sccs.append(scc) - return sccs -\end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to take a look at Tarjans' algorithm for SCC} -\end{bclogo} -\subsubsection{Examples} -\begin{enumerate} - \item 1520. Maximum Number of Non-Overlapping Substrings (hard): set up 26 nodes for all letters. A node represents a substray from start to end. Given a string abacdb, for a(0-2), add an edge between a -> to any other letter between start and end.Then we will have a directed graph. There is a scc (loop) between a and d, meaning a substring a has occurence of b and b substring has occurence of a, which is conflicting condition 2, so that they have to be combined. all results are sccs that are leaves in the contracted scc graph. We can think the scc graph is acyclic which is a forest. If we choose an internal node, we cant choose any of the leaves. Which making choosing the number of leaves maximum. Another solution is using two pointers: \url{https://zxi.mytechroad.com/blog/greedy/leetcode-1520-maximum-number-of-non-overlapping-substrings/} -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%MST%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Minimum Spanning Trees} - \begin{figure}[!ht] - \centering - \includegraphics[width=0.99\columnwidth]{fig/mst_example.png} - \caption{Example of minimum spanning tree in undirected graph, the green edges are edges of the tree, and the yellow filled vertices are vertices of MST (change this to a graph with multiple spanning tree, and highlight the one with the minimum ones.} - \label{fig:prim_example} - \end{figure} -\paragraph{Problem Definition} A \textit{spanning tree} in an undirected -graph $G=(V, E)$ is a set of edges, with -no cycles, that connects all vertices. There can exist many spanning trees in a graph. Given a weighted graph, we are particularly interested with the \textit{minimum spanning tree (MST)}--a spanning tree with the least total edge cost. - -One example is shown in Fig.~\ref{fig:prim_example}. This graph can represent a collection of houses, and possible wires that we can lay. How we lay wires -to connect all houses with the least total cost is equivalently a MST problem. - -\paragraph{Spanning Tree} -To obtain a tree from a graph, the essence is to select edges iteratively until we have $|V|-1$ edges which form a tree connecting $V$. We have two general approaches: -\begin{itemize} - \item Start with a forest consists of $|V|$ trees and contains only one node. We design a method to merge these trees into a final connected MST by selecting one edge at a time. This is the path taken by the Kruskal's algorithm. - \item Start with a root node which can be any vertex selected from $G$, grow the tree by spanning to more nodes iteratively. In the process, we maintain two disjoint sets of vertices: one containing vertices that are in the growing spanning tree $S$ and the other to track all remaining vertices $V-S$. This is the path taken by the Prim's algorithm. -\end{itemize} -We denote the edges in the growing as $A$. In this section, we explain two greedy algorithms to find MST. - - - -\subsection{Kruskal's Algorithm} - \begin{figure}[!ht] - \centering - \includegraphics[width=0.99\columnwidth]{fig/mst_kruskal.png} - \caption{The process of Kruskal's Algorithm} - \label{fig:prim_kruskal} - \end{figure} -Kruskal's algorithm starts with $|V|$ trees that each has only one node. The main process of the algorithm is to merge these trees into a single one by iterating through all edges. - -\paragraph{Generate Spanning Tree with Union-Find} For each edge $(u, v)$: -\begin{itemize} - \item if $u$ and $v$ belongs to the same tree, adding this edge will form a cycle, thus we discard this edge. - \item otherwise, combine these two trees and add this edge into $A$. -\end{itemize} -This process will result in a single spanning tree. In implementation wise, we can do this easily by using union-find data structure. A tree is a set. Adding one edge is to merge two sets/trees into a single one if they belong to different sets. - -\paragraph{Being Greedy with MST } - At each step $i$, we have $|E|-i$ edges to choose from. Applying the principle of greedy algorithm, maybe we can try to choose the edge with the minimum cost among $|E|-i$ options. That is to say, we iterate edges in increasing order of its weight in the process of generating a spanning tree. Doing so will ensure us to have the MST, and this algorithm is the so called Kruskal's algorithm. - -Fig.~\ref{fig:prim_kruskal} demonstrates the run of Kruskal's on the input undirected graph. Here, the edges are ordered increasingly, i.e., [(1,2), (3, 5), (2, 3), (2, 5), (3, 4), (4, 5), (1, 3)]. As initialization, we assign a set id for each vertex that is marked in read and placed above its corresponding vertex. The process is: -\begin{lstlisting}[numbers=none] -edge logic action -(1,2) 1's set_id 1 != 2's set_id 2 merge set 2 to set 1 -(3,5) 3's set_id 3 != 5's set_id 5 merge set 5 to set 3 -(2,3) 2's set_id 1 != 3's set_id 3 merge set 3 to set 1 -(2,5) 2's set_id 1 == 5's set_id 1 continue -(3,4) 3's set_id 1 != 4's set_id 4 merge set 4 to set 1 -(4,5) 4's set_id 1 == 5's set_id 1 continue -(1,3) 1's set_id 1 == 3's set_id 1 continue -\end{lstlisting} -This process produces edges $[(1,2), (3,5), (2,3), (3, 4)]$ as the edges of the final MST. We can have slightly better performance if we can stop iterating through edges once we have selected $|V|-1$ edges. The implementation is as simply as: -\begin{lstlisting}[language=Python] -from typing import Dict -def kruskal(g: Dict): - # g is a dict with node: adjacent nodes - vertices = [i for i in range(1, 1 + len(g))] - vertices = g.keys() - n = len(vertices) - ver_idx = {v: i for i, v in enumerate(vertices)} - - # initialize a disjoint set - ds = DisjointSet(n) - - # sort all edges - edges = [] - for u in vertices: - for v, w in g[u]: - if (v, u, w) not in edges: - edges.append((u, v, w)) - edges.sort(key=lambda x: x[2]) - - # main section - A = [] - for u, v, w in edges: - if ds.find(ver_idx[u]) != ds.find(ver_idx[v]): - ds.union(ver_idx[u], ver_idx[v]) - print(f'{u} -> {v}: {w}') - A.append((u, v, w)) - return A -\end{lstlisting} -For the exemplary graph, we denote an weighted edge as a (key, value) pair, where the value is a tuple of two with the first item being the other endpoint from the key vertex and the second item being the weight of the edge. The graph will thus be represented by a dictionary, $\{$1:[(2, 2), (3, 12)], 2:[(1, 2), (3, 4), (5, 5)], 3:[(1, 12), (2, 4), (4, 6), (5, 3)], 4:[(3, 6), (5, 7)], 5:[(2, 5), (3, 3), (4, 7)]$\}$. Running \texttt{kruskal(a)} will return the following edges: -\begin{lstlisting}[numbers=none] -[(1, 2, 2), (3, 5, 3), (2, 3, 4), (3, 4, 6)] -\end{lstlisting} -\paragraph{Complexity Analysis} The sorting takes $O(|E|\log|E|)$ big oh time. The cost of checking each edge's belonging set id and merging two trees into a single one is decided by the complexity of the disjoint set, it can range from $O(\log|V|)$ to $O(|V|)$. Therefore, we can conclude the time complexity will be bounded by the sorting time, i.e., $O(|E|\log|E|)$. - - - \subsection{Prim's Algorithm} -% \paragraph{Minimum Spanning Tree} With the above process, we are able to find an arbitrary spanning tree. To make sure this tree is the minimum among all, we have to make sure each time the edge we add is a \textit{safe edge}. A safe edge is an edge that may be added to $A$ without violating the invariant that $A$ is a subset of some minimum spanning tree. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.45\columnwidth]{fig/graph_cut.png} - \caption{A cut denoted with red curve partition V into \{1,2,3\} and \{4,5\}.} - \label{fig:graph_cut} - \end{figure} - In graph theory, a \textit{cut} is a partition of $V$ into $S$ and $V-S$. For example, in Fig.~\ref{fig:graph_cut} a cut is marked by red curve, removing three edges $(2, 5), (3, 5), (3, 4)$ partitions the set into two subgraph with subsets $\{1,2,3\}$ and $\{4,5\}$. A \textit{cross edge} $(u, v) \in E$ crosses the cut $(S, V-S)$ if one of its endpoint is in $S$ and the other is in $V-S$. A \textit{light edge} is the minimum edge among all cross edges, such as edge $(3,5)$ is the light edge in our example. We say, a cut \textit{respects} a set of edges $A$ if no edge in $A$ crosses the cut, such as the marked cut in the example respects the set of edges $(1,2), (2, 3), (1, 3)$. - - Prim's algorithm starts with a randomly chosen root node and be put into a set $S$, leaving us with two sets of vertices, $S$ and $V-S$. Next, it iteratively grows the partial and connected MST by adding an edge from the cross edges between the cut of $(S, V-S)$. Prim's algorithm is greedy in the sense that it chooses a light edge among its options to form the final MST. \textcolor{red}{This process simulates the uniform-cost search which compose the Dijkstra's shortest path algorithm.} - - \begin{figure}[!ht] - \centering - \includegraphics[width=0.99\columnwidth]{fig/mst_prim_2.png} - \caption{Prim's Algorithm, at each step, we manage the cross edges.} - \label{fig:prim_1} - \end{figure} - Fig.~\ref{fig:prim_1} demonstrates the process of Prim's algorithm. We start from vertex $1$. with the set $A$, $S$, $V-S$, the cross edges at each step are denoted as $CE$, and a decision valid if it does not form a cycle within $A$, we list the process as: - \begin{lstlisting}[numbers=none] - A S V-S CE light edge - 1 2,3,4,5 (1,2), (1,3) (1,2) -(1,2) 1,2 3,4,5 (1,3),(2,3),(2,5) (2,3) -(1,2),(2,3) 1,2,3 4,5 (3,4),(3,5),(2,5) (3,5) -(1,2),(2,3),(3,5) 1,2,3,5 4 (3,4),(5,4) (3,4) -(1,2),(2,3),(3,5),(3,4) 1,2,3,4,5 -\end{lstlisting} - -\subsubsection{Implementation} One key step is to track all valid cross edges and be able to select the minimum edge from the set. Naturally, we use priority queue \texttt{pq}. \texttt{pq} can be implemented in two ways: -\begin{itemize} - \item \textbf{Priority Queue by Edges}--Considering the set $S$ as a frontier set, \texttt{pq} maintains all edges expanded from the frontier set. - \item \textbf{Priority Queue by Vertices}--\texttt{pq} maintains the minimum cross edge cost between vertices in $S$ to the current vertex which is in $|V-S|$. This is an optimization over the first approach as it reduces multiple cross edges between $S$ and current vertex $v$ into a single cost -- the minimum. - %We can represent the cost update for $v$ as $\min_u c(u, v), u \in S, (u, v) \in E$. -\end{itemize} -\paragraph{Priority Queue by Edges} For example shown in Fig.~\ref{fig:prim_1}, at first, the frontier set has only $1$, then we have edges $(1,2), (1,3)$ in \texttt{pq}. Once edge $(1,2)$ is popped out as it has the smallest weight, we explore all outgoing edges of vertex 2 to nodes in $V-S$, adding $(2,3), (2,5)$ in \texttt{pq}, resulting $pq=(2,3),(2,5),(1,3)$. Then we pop out edge $(2,3)$, and explore outgoing edges of vertex 3 and add $(3,4),(3,5)$ into \texttt{pq}, with $pq=(2,5),(1,3),(3,4),(3,5)$. At this moment, we can see that edge $(1,3)$ is no longer a cross edge. Therefore, whenever we are about to add the light edge into the expanding tree, we check if both of its endpoints are in set $S$ already. If true, we skip this edge and use the next valid light edge. Repeat this process will get us the set of edges $A$ forming a MST. The Python code is as: -\begin{lstlisting}[language=Python] -import queue - -def _get_light_edge(pq, S): - while pq: - # Pick the light edge - w, u, v = pq.get() - # Filter out non-cross edge - if v not in S: - S.add(v) - return (u, v, w) - return None - -def prim(g): - cur = 1 - n = len(g.items()) - S = {cur} #spanning tree set - pq = queue.PriorityQueue() - A = [] - - while len(S) < n: - # Expand edges for the exploring vertex - for v, w in g[cur]: - if v not in S: - pq.put((w, cur, v)) - - le = _get_light_edge(pq, S) - if le: - A.append(le) - cur = le[1] #set the exploring vertex - else: - print(f'Graph {g} is not connected.') - break - return A -\end{lstlisting} -In line 24, we use a 3 item tuple representing the edge cost, the first endpoint in the set $S$ and the second endpoint in $V-S$ to align with the fact that the \texttt{PriorityQueue()} uses the first item of a tuple as the key for sorting. The \texttt{while} loop is similar to our breath-first-search and can be terminated in the following two conditions: -\begin{itemize} - \item when the set $S$ is as large as the set $V$ by checking the size of set $S$ - \item when we can not find a light edge which happens when the graph is not connected. -\end{itemize} -Call \texttt{prim(a)} will return us the following $A$: -\begin{lstlisting}[numbers=none] -[(1, 2, 2), (2, 3, 4), (3, 5, 3), (3, 4, 6)] -\end{lstlisting} -\paragraph{Complexity Analysis} The main cost of this implementation is on the priority queue, which has a maximum of $|E|$ items. In the worst case we have to enqueue and dequeue all edges, making the complexity as $O(|E|\log|E|)$. In a graph, generally, $E < V^2$, the complexity become $O(|E|\log V)$. -\paragraph{Priority Queue by Vertices} -Instead of tracking cross edges in the priority queue explicitly, we reduce all cross edges that reaches to a vertex in $V-S$ into the smallest cost and a predecessor which is to track the node in $S$ that resulted in the smallest cost, saving us some additional space and time in the queue operations. -% \begin{itemize} -% \item If a vertex $v$ in $V-S$ is not connected to any vertex in $S$, it has a $\infty$ cost. -% \item If this vertex $v$ is connected to one vertex $u$ in $S$ through edge $(u, v)$ which has an edge cost $w$, this vertex will have a cost $w$. -% \item If this vertex $v$ is connected to multiple vertices in $S$, keep the smallest edge cost as its cost. -% \end{itemize} - - \begin{figure}[!ht] - \centering - \includegraphics[width=0.99\columnwidth]{fig/mst_prim_1.png} - \caption{Prim's Algorithm} - \label{fig:prim_2} - \end{figure} - As shown in Fig.~\ref{fig:prim_2}, we first initialize a priority queue with $|V|$ items, each has a task id same as the vertex id, a predecessor vertex $p=-1$, and a cost initialized with $\infty$. We start by pointing vertex $1$ as the root node, setting $S={1}$ and modify the task 1's cost to 0 and points its predecessor to itself. Then, we repeatedly pop out the vertex in the queue that has the smallest weight, along with the predecessor of this node, we are choosing the light edge. With this chosen node, we are able to reach out to adjacent nodes that are still in $V-S$ and see if we are able to find an even ``lighter'' edge. Applying this process on the given example: - \begin{enumerate} - \item First, we have the start vertex 1 with the smallest cost, pop it out, and explore edges $(1,2), (1, 3)$, resulting in (a) modifying task 2 and 3's cost to 2 and 12, respectively and (b) set 2 and 3's predecessor to 1. - \item Pop out vertex 2, explore edges $(2,3), (2,5)$, resulting in (a) modifying task 3 and 5's cost to 4 and 5, respectively and (b) set 3 and 5's predecessor to 2. - \item Pop out vertex 3, explore edges $(3, 5), (3,4)$, resulting in (a) modifying task 5 and 4's cost to 3 and 6, respectively and (b) set 3 and 5's predecessor to 3. - \item Pop out vertex 5, explore edges $(5, 4)$: since the new cross edge $(5,4)$ has larger cost compared with previous reduced cross edge to reach to vertex 4, the vertex 4 in the queue is not modified. - \item Pop out vertex 4, no more new edges to expand, terminate the program. - \end{enumerate} - This process results in the exactly same MST compared with the implementation by edges. However, it adds additional challenges into the implementation of the priority queue: We have to modify an enqueued item's record during the life cycle of the queue. In the Python implementation, we use the our customized \texttt{PriorityQueue()} in Section.~\ref{}(also included in the notebook). The main process of the algorithm is: - \begin{lstlisting}[language=Python] -def prim2(g): - n = len(g.items()) - pq = PriorityQueue() - S = {} - A = [] - # Initialization - for i in range(n): - pq.add_task(task=i+1, priority=float('inf'), info=None) # task: vertex, priority: edge cost, info: predecessor vertex - - S = {1} - pq.add_task(1, 0, info=1) - - while len(S) < n: - u, p, w = pq.pop_task() - if w == float('inf'): - print(f'Graph {g} is not connected.') - break - A.append((p, u, w)) - S.add(u) - for v, w in g[u]: - if v not in S and w < pq.entry_finder[v][0]: - pq.add_task(v, w, u) - - return A - \end{lstlisting} - Calling function \texttt{prim2(a)} will output the following $A$: - \begin{lstlisting}[language=Python] - [(1, 1, 0), (1, 2, 2), (2, 3, 4), (3, 5, 3), (3, 4, 6)] - \end{lstlisting} - - \subsubsection{Examples} - \begin{enumerate} - \item 1584. Min Cost to Connect All Points (medium) - \item 1579. Remove Max Number of Edges to Keep Graph Fully Traversable (hard) - \end{enumerate} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to prove the correctness of Kruskal's Algorithm.} -\end{bclogo} - - - -%%%%%%%%%%%%%%%%%%%%Shortest-Paths Algorithms%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Shortest-Paths Algorithms} -\label{sec_single_source_shortest_paths} -\paragraph{Problem Definition} Given a weighted, directed graph $G=(V, E)$, with weight function $w:E\rightarrow R$ that maps edges to real-valued weights, the weight of a path $p = (v_0, v_1, ..., v_k)$ is the summation over its constituent edge weights, denoted as $w(p)$: -\begin{equation} - w(p)=\sum_{i=1}^{k} w(v_{i-1}, v_i) -\end{equation} -\textit{The shortest path} problem between $v_i$ and $v_j$ is to find the shortest path weight $\sigma(v_i, v_j)$ along with the shortest path $p$. -\begin{equation} -\sigma(v_i, v_j) = \left\{ -\begin{array}{ll} -\min\{w(p):v_i\xrightarrow{\text{p}} v_j\} & \mbox{if there is a path from } v_i \mbox{ to } v_j \\ -\infty &\mbox{otherwise} -\end{array} -\right. -\end{equation} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/directed_graph_negative.png} - \caption{A weighted and directed graph.} - \label{fig:sp_udg} -\end{figure} -For example, for the graph shown in Fig.~\ref{fig:sp_udg}, the shortest-path weight and its corresponding shortest-path between $s$ to any other vertex in $V$ is listed as: -\begin{lstlisting}[numbers=none] -(source, target) shortest-path weight shortest path -(s, s) 0 s -(s, y) 7 (s, y) -(s, x) 4 (s, y, x) -(s, t) 2 (s, y, x, t) -(s, z) -2 (s, y, x, t, z) -\end{lstlisting} - -\paragraph{Variants of Shortest-path Problems} Generally, there exists a few variants of shortest path problems: -\begin{enumerate} - \item \textit{Single-source shortest-path:} Find a shortest path from a given source $s$ vertex to each vertex $v \in V$. - \item \textit{Single-target shortest-path:} Find a shortest path to a given target $t$ from each vertex $v \in V$. By reversing the direction of each edge in the graph, we can reduce this problem to a single-source shortest-path problem. - \item \textit{Single-pair shortest-path problem:} Find a shortest path from $u$ to $v$ for given vertices $u$ and $v$. If we solve the single-source problem with source vertex $u$, we solve this problem too. %Moreover, all known algorithms for this problem have the same worst-case asympototic running time as the best single-source algorithms. - \item \textit{All-pairs shortest-path problem:} Find a shortest path from $u$ to $v$ for every pair of vertices $u$ and $v$ in $V$ if there exists one. Although we can solve this problem by running a single-source algorithm once for each vertex, we usually can solve it faster with algorithms addressed in Section (Sec.~\ref{sec_all_pairs_shortest_paths}). -\end{enumerate} - -% As we can see, the single-source shortest-path problem is essential to any other mutants, we first discuss the algorithm design and analysis for this type. With its solution at hand, we further address the all-pairs shortest path problem. - -\subsection{Algorithm Design} -\label{chapter_advanced_graph_sec_algorithm_design} -In this section, we discuss the shortest path problem, and analyze it by using both graph theory and the fundamental algorithm design principle--Dynamic Programming. -\paragraph{Shortest path and Cycle} From our experience in Combinatorial Search, we have to detect cycles within a path in the graph-based tree search to avoid being stuck in infinite recursion. So, how will cycle affect the detection of shortest paths? For example, in Fig.~\ref{fig:sp_udg}, a path $p=(s, t, x, t)$ contains the cycle $(t, x, t)$. Because the cycle has a positive path weight $5+(-2)$, the path $(s, t)$ remains smaller than the path that comes with the cycle. However, if we switch the weight of edge $(t, x)$ with that of $(x, t)$, then the same cycle $(t, x, t)$ will have negative path weight $(-5)+2$, repeating the cycle within the path infinitely we will have a cost of $-\infty$. Therefore, for a graph where the weights can be both negative and positive, one requirement posed on the single-source shortest-path algorithm, recursive or iterative, is to detect the negative-weight cycle that is reachable from the source. Once we get rid of all negative-weight cycles, the remaining of the algorithm can focus on only shortest-paths of at most $|V|-1$ edges, and the resulting shortest-paths will not contain neither negative- nor positive-weight cycles. - -\subsubsection{Exponential Naive Solution} -% Therefore, when we are designing shortest-path algorithms for our given problems, it is important to address questions: -% \begin{enumerate} -% \item Does the graph have a weight function that is non-negative? If true, we can use \textbf{Dijkstra's algorithm} detailed in Subsection~\ref{subsec_dijkstra}. -% \item If not, is the graph acyclic? If it is acyclic, then a negative-weight cycle will never exist in the graph. We solve our problems with \textbf{Single-source shortest paths in directed acyclic graphs} in Subsection~\ref{subsec_general_shortest_path}. -% \item If the graph is potentially cyclic, and potentially end up with negative-weight cycle, we go for \textbf{Bellman-Ford Algorithm} (subsection~\ref{subsec_bellman_ford}) for answer. -% \end{enumerate} -Assume the given graph has no negative-weight cycle, a naive solution to obtain the shortest path and its weight is simply through a tree-search which starts from a source vertex $s$ and enumerates all possible paths between $s$ to any other vertex in $V$. The search tree will have a maximum height of $|V|-1$, making the time complexity of this naive solution to be $O(b^{|V|})$, where $b$ is the maximum branch of a vertex. Recall the path enumeration in Search Strategies, we implement this solution as: -\begin{lstlisting}[language=Python] -def all_paths(g, s, path, cost, ans): - ans.append({'path': path[::], 'cost': cost}) - for v, w in g[s]: - # Avoid cycle - if v in path: - continue - path.append(v) - cost += w - all_paths(g, v, path, cost, ans) - cost -= w - path.pop() -\end{lstlisting} -To obtain all possible paths, we call the function \texttt{all\_paths()} with the following code: -\begin{lstlisting}[language=Python] -g = { - 't':[('x', 5), ('y', 8), ('z', -4)], - 'x':[('t',-2)], - 'y':[('x',-3), ('z',9)], - 'z':[('x',7)], - 's':[('t', 6), ('y', 7)], -} -ans = [] -all_paths(g, 's', ['s'], 0, ans) -\end{lstlisting} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth, height=8cm]{fig/all_paths_demo.png} - \includegraphics[width=0.1\columnwidth, height=8cm]{fig/shortest_path_demo.png} - \caption{All paths from source vertex $s$ for graph in Fig.~\ref{fig:sp_udg} and its shortest paths.} - \label{fig:sp_udg_all_paths} -\end{figure} -\paragraph{Shortest-paths Tree} We visualize all paths in \texttt{ans} in a tree structure shown in Fig.~\ref{fig:sp_udg_all_paths}. We can easily extract the shortest paths between $s$ to any other vertex from this result, which is shown on the right side of Fig.~\ref{fig:sp_udg_all_paths}. All possible paths starting from source vertex can be viewed as a tree, and the shortest paths from source to all other vertices within the graph will be a subtree of the former tree structure, known as the \textit{shortest-paths tree}. Formally, a shortest-paths tree rooted at $s$ is a directed subgraph $G^{'}=(V^{'}, E^{'})$, where $V^{'}\in V$ and $E^{'}\in E$, such that -\begin{enumerate} - \item $V^{'}$ is the set of vertices reachable from $s$ in $G$, - \item for each $v \in V^{'}$, the unique simple path from $s$ to $v$ in $G^{'}$ is a shortest path from $s$ to $v$ in $G$. -\end{enumerate} -\paragraph{Predecessor Rule} -The shortest-paths tree makes it possible for us to track shortest paths with the predecessor rule: Given a graph $G=(V,E)$, and in the single-source shortest path problem, we maintain for each vertex $v\in V$ a predecessor $\pi$ that is either another vertex or empty as for the root node. The shortest-paths between $s$ and another vertex $v$ can be obtained by iterating the chained predecessors starting from $v$ and all the way backward to the source $s$. To summarize, each vertex $v$ in the graph stores -two values, $d(v)$ and $\pi(v)$ , which (inductively) describe a tentative shortest path from $s$ to $v$. - -% If there is negative-weight cycle, the above naive solution would fail us to detect that cycle -\subsubsection{Optimization} -As we see, shortest path problem is a truly combinatorial optimization problem, making them the best demonstration examples of the algorithm design principles--Dynamic Programming and Greedy Algorithm. On the other hand, depending on the characteristics of targeting graph, either they are dense or spares, directed acyclic graph (DAG) or not DAG, we can further optimize the efficiency besides of the design principle. However, in this chapter, we focus on the gist: \textit{how to solve all-pair shortest path problems with dynamic programming?} - -% in this section, we will recall the properties of Dynamic Programming in the context of solving the shortest path problem: overlapping subproblems and optimal substructures. -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.95\columnwidth]{fig/shortest_path_property_1.png} -% \caption{Flattening graph for better observation} -% \label{fig:shortest_path_pro_1} -% \end{figure} - -First, we use an adjacency matrix to represent our weight matrix $W$ of size $|V|\times |V|$. In the process, we track shortest-path weight estimate $D$ and additionally the predecessor $\Pi$. Both $D$ and $\Pi$ are of same size as $W$. $w_{ij}$ indicates the weight of each edge with startpoint $i$ and endpoint $j$, -\begin{equation} - W{(i, j)} = \left\{ - \begin{array}{lll} - 0 & \mbox{if } i=j \\ - w_{ij} & \mbox{if $i\ne j$, and $(i, j)\in E$ } \\ - \infty & \mbox{if $i\ne j$, and $(i, j)\notin E$ } - \end{array} - \right. -\end{equation} -With this definition, we show a naive directed graph in Fig.~\ref{fig:naive_graph} along with its $W$. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/naive_graph.png} - - \includegraphics[width=0.4\columnwidth]{fig/DP_shortest_paths.png} - \caption{The simple graph and its adjacency matrix representation (changing it to lower letter)} - \label{fig:naive_graph} -\end{figure} - -\paragraph{Overlapping Subproblems and Optimal Substructures} For all-pair shortest paths, we have $|V|^2$ optimal subproblems, each subproblem $D{(i,j)}$ is defined as the shortest-path between $v_i$ and $v_j$. Optimal Substructures states ``\textit{the optimal solution to a problem has the optimal solutions to subproblems in it}.'' -%Use the bottom-up approach, the core of dynamic programming is the smaller subproblems constituent larger subproblems, and the subproblems overlap in the sense that they can be used multiple times to constituent multiple larger subproblems. -All of this boils down to how to define the ``subproblem'' and how a larger subproblem is divided into smaller subproblems (the recurrence relation). - -With our naive directed graph, the shortest path between $a$ and $d$ come from the shortest path between $a$ to an intermediate node $x$ or the shortest path between $a$ and $d$ found so far. First, we define the subproblem as the shortest path between $a$ and $d$ with maximum path length(MPL) $m$. With this definition, we show two possible ways of dividing the subproblem: -\begin{enumerate} - \item We divide a subproblem with MPL $m$ into a subproblem with MPL $m-1$ and an edge. Therefore, the shortest path at this maximum length $m$ is either the shortest path found so far or equals to the shortest path between $a$ and $x$ plus the weight of edge $(x,d)$, our recurrence relation is: - \begin{align} - D^{m}{(a,d)} = \min_x(D^{m-1}{(a,d)}, D^{m-1}{(a,x)}+W{(x,d)}) - \label{eq:dp_1} - \end{align} - As we can see, each update for an item in distance matrix $D$ takes $O(|V|)$ time as it has to check all possible intermediate nodes. Furthermore, it takes $|V|-1$ passes to update $D^{0}$ all the way to $D^{|V|-1}$. Therefore, this approach has a time complexity of $O(|V|^4)$. We demonstrate the update process in Fig.~\ref{fig:dp_1} for our naive example. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.8\columnwidth]{fig/DP_shortest_paths (1).png} - \caption{DP process using Eq. ~\ref{eq:dp_1} for Fig.~\ref{fig:naive_graph}} - \label{fig:dp_1} -\end{figure} - \begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/DP_shortest_paths (2).png} - \caption{DP process using Eq. ~\ref{eq:dp_2} for Fig.~\ref{fig:naive_graph}} - \label{fig:dp_2} -\end{figure} - \item We divide a subproblem with MPL $m$ into two equal sized subproblem, each with MPL $m/2$. Therefore, the shortest path at this maximum length $m$ is either the shortest path found so far or equals to the shortest path between $a$ and $x$ of length $m/2$ plus the shortest path between $x$ and $d$ of length $m/2$. With recurrence relation: - \begin{align} - D^{m}{(a, d)} = \min_x(D^{m/2}{(a,d)}, D^{m/2}{(a,x)}+D^{m/2}{(x,d)}) - \label{eq:dp_2} - \end{align} - Similarly, each update takes $|V|$ time. Differently, it only takes $\log |V|$ updates to get the final optimal subproblems. Thus, this approach gives a better time complexity, $O(|V|^3\log|V|)$. The process is demonstrated in Fig.~\ref{fig:dp_2}. -\end{enumerate} - -Alternatively, we define the subproblem as the shortest path between $a$ and $d$ with $x$ as an intermediate node along the path, the number of intermediate node is $|V|$. Here, we use $k$ to index the intermediate node, and $i$, $j$ to index the start and end node. Then a subproblem $D^{k}{(i,j)}$ can be either the shortest path between $i$ and $j$ with intermediate nodes ${0, 1, ..., k-1}$ or the shortest path between $i$ and $k$ with all previous intermediate nodes plus the shortest path between $k$ and $j$ with all previous intermediate nodes. The recurrence relation is:%between $A$ and $D$ with $|V|$ possible intermediate nodes. - \begin{align} - D^{k}{(i,j)} = \min(D^{\{0, ..., k-1\}}{(i,j)}, D^{\{0,..., k-1\}}{(i,k)}+D^{\{0,..., k-1\}}{(k,j)}) - \label{eq:dp_3} - \end{align} -As we see, each recurrence update only takes constant time. At the end, after we consider all possible intermediate nodes, we reach out to the optimal solution. This approach results in the best time complexity, $O(|V|^3)$ so far. We demonstrate the update process in Fig.~\ref{fig:dp_3}. At pass $C$, using $C$ as intermediate node, we end up only use $C$-th row and $C$-th column to update our matrix. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.8\columnwidth]{fig/chapter_advanced_graph/DP_shortest_paths (3).png} - \caption{DP process using Eq. ~\ref{eq:dp_3} for Fig.~\ref{fig:naive_graph}} - \label{fig:dp_3} -\end{figure} - -As we shall see later, the first way is similar to Bellman-Ford, the second is a repeated squaring version of Bellman-Ford, and the third is Floyd-warshall algorithm. - -\paragraph{Greedy algorithms} For $|V|^2$ subproblems, solving each subproblem takes at least $|V|$ using Floyd-warshall algorithm. Greedy approach would think of ways to decide the optional solution to each subproblem in one try, making it $|V|^2$ or $|V|+|E|$. We will see Dijkstra algorithm which is only applicable on all positive weighted $W$. - - - -% \paragraph{Overlapping Subproblems} For any path $(v_0,..., v_{k})$ with $k$ edges, it can be split into subpaths. There exists many ways to do it: we can split it into 2 to $k$ subpaths, and each one's path length varies. If we set the number of subpaths to 2: -% \begin{itemize} -% \item One subpath be $(v_0, ..., v_{k-1})$ will be of length $k-1$ and the other subpath is left with length $1$. -% \item Assume $k$ is an even number, we decompose the path into half and half, each of length $k/2$. -% \end{itemize} - -% In the single-source shortest-path problem, there lies $|V|-1$ number of subproblems, denoted as: -% \begin{align} -% \{ s \xrightarrow[]{\text{p}} v \text{ } | \text{ } v \in V, v\neq s \} -% \end{align} -% Each subproblem $ s \xrightarrow[]{\text{p}} v$ represents all paths between $s$ and $v$ and along each one's path weight. Using the first type of decomposition shown above, this subproblem can be resolved through subproblems: -% \begin{align} -% s \xrightarrow[]{\text{p}} v \text{ } = s \xrightarrow[]{\text{p}} u, (u, v) \text{ }, \text{ } u \in V, -% \label{eq_sp_subp} -% \end{align} -% where $(u, v)$ is edge incident to $v$ (explain incident in graph). For example, in Fig.~\ref{fig:sp_udg}, vertex $z$ has two incident edges $(y, z)$ and $(t, z)$, the paths between $s$ and $z$ can be obtained from subpaths: $s \xrightarrow[]{\text{p}} y, (y, z)$ and $s \xrightarrow[]{\text{p}} t, (t, z)$. From the search tree, we can also clearly see that conclusion. - -% Also, in our problem, some subproblems are used repeatedly to reconstruct a larger subproblem. For example, $s \xrightarrow[]{\text{p}} y$ is used to get $s \xrightarrow[]{\text{p}} x$, $s \xrightarrow[]{\text{p}} z$. These are the overlapping subproblems property applied in shortest-path context. -% \paragraph{Optimal Substructures} -% Optimal Substructures states ``\textit{the optimal solution to a problem has the optimal solutions to subproblems in it}.'' Using our example, as shown in the shortest paths in Fig.~\ref{fig:sp_udg}, the optimal solution between $s$ and $z$ has other optimal solutions: $\sigma(s, t), \sigma(s, x), \sigma(s, y)$. But, specifically with the decomposition shown in Eq.~\ref{eq_sp_subp}, our optimal substructure is: -% \begin{align} -% \sigma(s, v) = \min_u (\sigma(s, u) + w(u, v)) -% \label{eq_optimal_substructure} -% \end{align} - - - - -% \paragraph{Greedy algorithms} For $|V|^2$ subproblems, solving each subproblem takes at least $|V|$ using bell-man ford. Greedy approach would think ways to decide the optional solution to each subproblem in one try, making it $|V|^2$ or $|V|+|E|$. We will see Dijkstra algorithm which is only applicable on all positive weighted $W$. - -In the following section, we start with going through algorithms solving single-source shortest path problem before we put up more details to the all-pair shortest path algorithms introduced above. - -\subsection{The Bellman-Ford Algorithm} -\label{subsec_bellman_ford} -Bellman-ford algorithm addresses single-source shortest path problem using a single-source version of DP approach one. -\paragraph{Dynamic Programming Representation} -Given a single source node $s$ in graph $G$, we define $D$ and $\Pi$ as just a one-dimensional vector instead of a matrix in all-pair shortest paths. $D^{m}_i$ represents the shortest path between $s$ and $i$ with maximum path length $m$. When $m=0$, there is a shortest path from $s$ to $v$ with no edge iff $s=v$. -\begin{align} - D^{0}_{i} = \left\{ - \begin{array}{ll} - 0 & \mbox{if } s=i \\ - \infty &\mbox{otherwise} - \end{array} - \right. -\label{eq_shortest_path_start} -\end{align} -Similarly, $\Pi^0$ is initialized as \texttt{None}. Our simplified recurrence relation is: -\begin{align} - D^{m}_{i} &= \min (D^{m-1}_{i}, \min_{k, k\in [0, n-1]}(D^{m-1}_{k}+W{(k, i)})) -\end{align} -% where $u$ enumerates all possible intermediate nodes. When $u=v$, $w_{(v,v)}=0$, the first item equals to the second in the first parenthesis. Further, omit the starting node $s$, the recurrence relation can be simplified as: -which can be further simplified to: -\begin{align} - D^{m}_{i} &= \min_{k, k\in [0, n-1]}(D^{m-1}_{k}+W{(k, i)}) - \label{eq_sp_dp} -\end{align} - - In Eq.~\ref{eq_sp_dp}, once an intermediate node is found to have smaller tentative path weight than the current's value, we set $\Pi(i)=k$. -\paragraph{Implementation} In function \texttt{bellman\_ford\_dp}, $W$ is an $n\times n$ adjacency matrix. In the first \texttt{for} loop, we run recurrence relation in Eq.~\ref{eq_sp_dp} for $|V|-1$ passes, giving the fact that other than the negative-weight cycle, there will be at most $|V|-1$ edges for all paths within the graph. -\begin{lstlisting}[language=Python] -def bellman_ford_dp(s, W): - n = len(W) - # D, pi - D = [float('inf') if i!=s else 0 for i in range(n)] # * n - P = [None] * n - for m in range(n-1): - newD = D[:] - for i in range(n): # endpoint - for k in range(n): # intermediate node - if D[k] + W[k][i] < newD[i]: - P[i] = k - newD[i] = D[k] + W[k][i] - - D = newD - print(f'D{m+1}: {D}') - return D, P -\end{lstlisting} -Now, to retrieve the path from source $s$ to other vertices, we implement a recursive function named \texttt{get\_path} that starts from the target $u$ and backtraces to the source $s$ through $\Pi$. The code is as: -\begin{lstlisting}[language=Python] -def get_path(P, s, u, path): - path.append(u) - if u == s: - print('Reached to the source vertex, stop!') - return path[::-1] - elif u is None: - print(f"No path found between {s} and {u}.") - return [] - else: - return get_path(P, s, P[u], path) -\end{lstlisting} -For the graph in Fig.~\ref{fig:sp_udg}, the updating on $D$ using $s$ as source is visualized in Fig.~\ref{fig:bellman_ford_1}. -\begin{figure}[!ht] - \centering - % \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.4\columnwidth]{fig/bellman-ford-D (2).png} - % \caption{Initialization} - % \end{subfigure} - % \begin{subfigure}{.32\textwidth} - % \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_1.png} - % \caption{After $1^{st}$ Pass} - % \end{subfigure} - % \begin{subfigure}{.32\textwidth} - % \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_2.png} - % \caption{After $2^{nd}$ Pass} - % \end{subfigure} - - % \begin{subfigure}{.32\textwidth} - % \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_3.png} - % \caption{After $3^{rd}$ Pass} - % \end{subfigure} - % \begin{subfigure}{.32\textwidth} - % \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_4.png} - % \caption{After $4^{th}$ Pass} - % \end{subfigure} - \caption{The update on $D$ for Fig.~\ref{fig:sp_udg}. The gray filled spot marks the nodes that updated its estimate value, with its precessor indicated by incoming red arrow.} - \label{fig:bellman_ford_1} -\end{figure} -Connecting all red arrows along with the shaded gray nodes, we have a tree structure, each update on $D$, we expand the tree by one more level, updating the best estimate reaching to target node with one more possible edge. We visualize this tree structure in Fig.~\ref{fig:bellman_ford_2}. We explain the tree like this: if we are at most one edge away from $s$, we get $t$ as small as $6$, if we are three edges away, $t$ is able to gain a smaller value through its predecessor $x$ which is at most 2 edges away. After the last round of update, when the tree reaches to height $|V|-1$, the predecessor vector $\Pi$ will gives out the shortest-path tree: each edge in the shortest path tree can be obtained by connecting each predecessor with vertices in the graph. The shortest-path tree is marked in Fig.~\ref{fig:bellman_ford_2} in red color. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.7\columnwidth]{fig/shortest_path_dp_1.png} - \caption{The tree structure indicates the updates on $D$, and the shortest path tree marked by red arrows. } - \label{fig:bellman_ford_2} -\end{figure} -\paragraph{Formal Bellman-Ford Algorithm} -In the above implementation, at each round, we made a copy of $D$, which is named \texttt{newD}. However, we can actually reuse the original $D$ and update directly on it. The difference is: we would update \texttt{D[i]} at step $m$ with other \texttt{D[k]} at step $m$ instead of at step $m-1$, making some nodes' optimal estimate even more optimal. In the previous implementation, we can guarantee that after iteration $i$, for all $i \in [0, n-1]$, $D_i$ is at most the weight of every path from $s$ to $i$ using at most $m$ edges. In the new version, we end up reaching to the optimal value even earlier, but still it takes $n-1$ passes to guarantee. -% \begin{align} -% d_v \leq \min(w(p)): |p| \leq i-1, -% \end{align}%However, this doesnt matter since once it reaches to $|V|-1$ updates, the result will never change again. -% Updating in the same $D$ would only decrease $d_v$, so that the above inequation still remains true. - -Second, the inner two \texttt{for} loops are equivalently enumerating edges: for each possible edge $(k, i)$, we update the best estimate for node $i$. With such two points modified, we get our official the Bellman-Ford algorithm, which states: -\begin{enumerate} - \item Initialize $D$ and $\Pi$ as $D^{0}$ and $\Pi^0$. - \item Run a relaxation process for $|v|-1$ passes. Within each pass, go through each edge $(u, v) \in E$, with Eq.~\ref{eq_sp_dp}, if using $u$ as an intermediate node, the tentative shortest path has smaller value, update $D$ and $\Pi$. -\end{enumerate} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Implement Bellman-Ford by checking edges from adjacency list as defined in \texttt{g} for Fig.~\ref{fig:sp_udg}. We should notice that different ordering of vertices /edges to be relaxed leads to different intermediate results in $D$, though the final result is the same.} -\end{bclogo} - -\paragraph{Implementation} We give out an exemplary implementation -\begin{lstlisting}[language=Python] -def bellman_ford(g: dict, s: str): - n = len(g) - # Assign an enumerial index for each key - V = g.keys() - # Key to index - ver2idx = dict(zip(V, [i for i in range(n)])) - # Index to key - idx2ver = dict(zip([i for i in range(n)], V)) - # Initialization the dp matrix with d estimate and predecessor - si = ver2idx[s] - D = [float('inf') if i!=si else 0 for i in range(n)] # * n - P = [None] * n - - # n-1 passes - for i in range(n-1): - # relax all edges - for u in V: - ui = ver2idx[u] - for v, w in g[u]: - vi = ver2idx[v] - # Update dp's minimum path value and predecessor - if D[vi] > D[ui] + w: - D[vi] = D[ui] + w - P[vi] = ui - print(f'D{i+1}: {D}') - return D, P, ver2idx, idx2ver -\end{lstlisting} -During each pass, we relax on the estimation $D$ with the following ordering: -\begin{lstlisting}[numbers=none] - 's':[('t', 6), ('y', 7)], - 't':[('x', 5), ('y', 8), ('z', -4)], - 'x':[('t',-2)], - 'y':[('x',-3), ('z',9)], - 'z':[('x',7)], -\end{lstlisting} -Printing out on the updates of $D$, we can see that it converges to the optimal value faster than the previous strict Dynamic programming version. - -\paragraph{Time Complexity} The first dynamic programming solution takes $O(|V|^3)$, and the formal Bellman-Ford takes $O(|V||E|)$. The later would be more efficient than the first if our graph is dense. -% When the ordering of edges to be relaxed e each pass is -% \begin{lstlisting}[numbers=none] -% g = { -% 't':[('x', 5), ('y', 8), ('z', -4)], -% 'x':[('t',-2)], -% 'y':[('x',-3), ('z',9)], -% 'z':[('x',7)], -% 's':[('t', 6), ('y', 7)], -% }, -% \end{lstlisting} -% The process of Bellman-Ford algorithm is visualized in Fig.~\ref{fig:bellman_ford_1} on a directed graph shown in Fig.~\ref{fig:sp_udg} that contains no negative-weight cycle. - -\paragraph{Detect Negative-weight Cycle} -% \begin{figure} -% \centering -% \includegraphics[width=0.95\columnwidth]{fig/path_cycle.png} -% \caption{A path that has at least one cycle.} -% \label{fig:path_cycle} -% \end{figure} -If the graph contains no negative-weight cycle, after $|V|-1$ passes of relaxation, $D$ will reach to the minimum path value. Thus, if we run additional pass of relaxation, no vertex would be updated further. However, if there exists at least one negative-weight cycle, the $|V|^{th}$ update will have at least one vertex in $D$ with decreased value. -%%%%%%%%Proof%%%%%%%%%% -% Assume on a graph there is one negative-weight cycle that is reachable from source $s$; let this cycle be $c=(v_0, v_1, ..., v_{k}), v_0=v_{k}$ and $\sum_{i=1}^{k} w(v_{i-1}, v_i) <0$. Assume at the $|V|^{th}$ pass, no additional updated is detected, making $v_{i}.d \leq v_{i-1}.d + w(v_{i-1}, v_i), i\in [1,k]$. Summing the inequalities, we get: -% \begin{equation} -% \sum_{i=1}^{k} d_{v_{i}} \leq \sum_{i=1}^{k} d_{v_{i-1}} + \sum_{i=1}^{k}w(v_{i-1}, v_i) -% \end{equation} -% Since $v_0=v_k$, we cancel the summation of $v.d$ on each side, and get -% \begin{equation} -% 0 \leq \sum_{i=1}^{k}w(v_{i-1}, v_i) -% \end{equation} -% which contradicts with the fact that it is a negative-weight cycle. Therefore, there will be at least one vertex on the cycle that will keep decreasing its value on the $|V|^{th}$ pass. - - -\subsubsection{Special Cases and Further Optimization} -From the perspective of optimization, there are at least two approaches we can try to further boost the time efficiency, such as -\begin{enumerate} - \item special linear ordering of vertices to relax its leaving edges that leads us to its shortest-paths in just one pass of the Bellman-Ford algorithm, - \item and some greedy approach that takes only one pass of relaxation which can be similar to breath-first graph search or the Prim's algorithm. -\end{enumerate} -In Fig.~\ref{fig:bellman_ford_1}, suppose we are relaxing leaving edges of vertices in linear order $[s, t, y, z, x]$, the process will be as follows: -\begin{lstlisting}[numbers=none] -vertex edges relaxed vertices -s (s,t),(s,y) {t:6, y:7} -t (t,x),(t,y),(t,z) {x:11, z:2, t:6, y:7} -y (y,x), (y,z) {x:4, z:2, t:6, y:7} -z (z,x) {x:4, z:2, t:6, y:7} -x (x,t) {t:2, x:4, z:2, y:7} -\end{lstlisting} -\begin{figure}[!ht] - \centering - \begin{subfigure}{.45\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_0_tp.png} - \caption{Initialization} - \end{subfigure} - \begin{subfigure}{.45\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_1_tp.png} - \caption{After $1^{st}$ Pass} - \end{subfigure} - \caption{The execution of Bellman-Ford's Algorithm with ordering $[s, t, y, z, x]$.} - \label{fig:bellman_ford_3} -\end{figure} -The process is also visualized in Fig.~\ref{fig:bellman_ford_3}. -We see that only vertex $z$ did not find its shortest-path weight. Why? From $s$ to $z$, there are paths: $(s, t, z), (s, t, z), (s, t, y, z), (s, y, x, t, z)$. If we want to make sure after one pass of updates, vertex $z$ reaches to its minimum shortest-path weight, we have to make sure its predecessors all reach to its minimum-path weight too which are vertex $y$ and $t$. Same rule applies to its predecessors. In this graph, the ordering -\begin{lstlisting}[numbers=none] -vertex predecessor -s None -t s, x -y s, t -x t, y, z -z y, t -\end{lstlisting} -From the listing, we see that the pair $t$ and $x$ conflicts each other: $t$ needs $x$ as predecessor and $x$ needs $t$ as predecessor. Tracking down this clue, we will find out that it is due to the fact that $t$ and $x$ coexist in a cycle. -\paragraph{Order Vertices with Topological Sort} -\begin{figure}[!ht] - \centering - \begin{subfigure}{.45\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_0_dag.png} - \caption{Initialization} - \end{subfigure} - \begin{subfigure}{.45\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/bellman_ford_1_dag.png} - \caption{After $1^{st}$ Pass} - \end{subfigure} - \caption{The execution of Bellman-Ford's Algorithm on DAG using topologically sorted vertices. The red color marks the shortest-paths tree.} - \label{fig:bellman_ford_2} -\end{figure} -Taking away edge $(x, t)$, we are able to obtain a topological ordering of the vertices, which is $[s, t, y, z, x]$. Relaxing vertices by this order of its leaving edges will guarantee to reach to the global-wise shortest-path weight that would otherwise be reached in $|V|-1$ passes in Bellman-Ford algorithm using arbitrary ordering of vertices. The shortest-paths tree is shown in Fig.~\ref{fig:bellman_ford_2}. - -So far, we discovered a $O(|V|+|E|)$ linear algorithm for single-source shortest-path problem when the given graph being directed, weighted, and acyclic. The algorithm consists of two steps: topological sorting of vertices in $G$ and one pass of Bellman-Ford algorithm using the reordered vertices instead of arbitrary ordering. Calling the \texttt{topo\_sort} function from Section.~\ref{sec_topological_sort}, we have our Python code: -\begin{lstlisting}[language=Python] -def bellman_ford_dag(g, s): - s = s - n = len(g) - # Key to index - ver2idx = dict(zip(g.keys(), [i for i in range(n)])) - # Index to key - idx2ver = dict(zip([i for i in range(n)], g.keys())) - # Convert g to index - ng = [[] for _ in range(n)] - for u in g.keys(): - for v, _ in g[u]: - ui = ver2idx[u] - vi = ver2idx[v] - ng[ui].append(vi) - V = topo_sort(ng) - # Initialization the dp matrix with d estimate and predecessor - si = ver2idx[s] - dp = [(float('inf'), None) for i in range(n)] - dp[si] = (0, None) - - # relax all edges - for ui in V: - u = idx2ver[ui] - for v, w in g[u]: - vi = ver2idx[v] - # Update dp's minimum path value and predecessor - if dp[vi][0] > dp[ui][0] + w: - dp[vi] = (dp[ui][0] + w, ui) - return dp -\end{lstlisting} -\subsection{Dijkstra's Algorithm} -\label{subsec_dijkstra} -\paragraph{From Prim's to Dijkstra's} In Breath-first Search, it hosts a FIFO queue, and whenever the vertex finishes exploring and turns into BLACK color, it is guaranteed to have the shortest-path length from the source. Similarly, in Prim's algorithm, it maintains a priority queue of cross edges between the spanning tree set $S$ and the remaining set $V-S$, whenever a vertex is added into $S$, it is a part of the MST. - -In the shortest-path problem, using the same initialization in Bellman-Ford algorithm, that source vertex has $0$ estimate to the source and all other vertices take $\infty$. Following the process of Prim's algorithm, we set a set $S$ to save vertices that has found its shortest-path weight and predecessor, which is empty initially. Then, the algorithm starts the from the ``lightest'' vertex in $V-S$ to add to the set $S$, which is source vertex $s$ at first, and it relax on the shortest-path estimate of vertices that are the endpoints of edges leaving the lightest vertex. This process is repeated in a loop until $V-S$ is empty. This devised approach indeed follows the principle of greedy algorithm just as Prim's algorithm does, this algorithm is called \textit{Dijkstra's}. - -\paragraph{How is it greedy?} Dijkstra's is the ``greedy'' version of Bellman-ford Algorithm. At each step, dynamic programming uses Eq.~\ref{eq_sp_dp} to update $D^{m}_i$ by trying all possible edges that extend the paths between $s$ and $i$ one at a time. Bellman-ford can only guarantee to achieve the optimal solution at the very end of running all passes. However, in Dijkstra algorithm, it reaches to the optimal solution in only one step--whenever a vertex is added into $S$, it adds a vertex in the shortest-path tree with only ``local'' information. -%-optimal substructures indicates that we can extend from vertices that have reached to its minimum path estimate to its adjacent vertices through outgoing edges. - -\paragraph{Correctness Condition: Non-negative Weight} But, how to make sure that whenever the vertex was added into set $S$, it reaches to its shortest-path weight? Specifically, how to ensure our locally optimal decision is global optimal? -%, that when a vertex $u$ is added into $S$, its estimated shortest-path weight is $\sigma(s, u)$. -This also means after this step, no matter how many additional paths with larger path length can reach to $i$, they shall never have less distance. This requires all of graph edges to be non-negative. -\begin{figure}[!ht] - \centering - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_0.png} - \caption{$s$ enters Queue} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_0.5.png} - \caption{$s$ enters $S$} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_1.png} - \caption{$t$ and $y$ enter Queue} - \end{subfigure} - - - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_1.5.png} - \caption{$t$ enters $S$} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_2.png} - \caption{$z$ and $x$ enter Queue} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_2.5.png} - \caption{$y$ enters $S$} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_3.png} - \caption{$x$ is modified in Queue} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_3.5.png} - \caption{$z$ enters $S$} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_4.png} - \caption{Queue is not modified} - \end{subfigure} - - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/dijkstra_4.5.png} - \caption{$x$ enters $S$} - \end{subfigure} - \caption{The execution of Dijkstra's Algorithm on non-negative weighted graph. Red circled vertices represent the priority queue, and blue circled vertices represent the set $S$. Eventually, the blue colored edges represent the shortest-paths tree.} - \label{fig:dijikstra} -\end{figure} -\paragraph{Implementation} - -The implementation relies on the \texttt{PriorityQueue()} customized data structure once again, where we can modify an existing item in the queue. There are two ways to apply the priority queue: -\begin{itemize} - \item Add all vertices into the queue all at once at the beginning. Then only \texttt{deque} and modification operations are needed. - \item Add vertex in the queue only when it is relaxed and has a non-$\infty$ shortest-path estimate. The process of Dijkstra algorithm on a non-negative weighted graph that takes this approach of queue is demonstrated in Fig.~\ref{fig:dijikstra} and the code is as follows: -\end{itemize} -\begin{lstlisting}[language=Python] -def dijkstra(g, s): - Q = PriorityQueue() - S = [] - # task: vertex id, priority: shortest-path estimate, info: predecessor - Q.add_task(task=s, priority=0, info=None) - visited = set() - while not Q.empty(): - # Use the light vertex - u, up, ud = Q.pop_task() - visited.add(u) - S.append((u, ud, up)) - - # Relax adjacent vertice - for v, w in g[u]: - # Already found the shortest path for this id - if v in visited: - continue - - vd, vp = Q.get_task(v) - # First time to add the task or already in the queue, but need update - if not vd or ud + w < vd: - Q.add_task(task=v, priority=ud + w, info=u) - return S -\end{lstlisting} - - -% \paragraph{Correctness of Dijkstra's Algorithm} -% To prove the correctness of a greedy algorithm is not easy matter, and often the contradiction method can be used. In this process, we need to show that for each vertex $u\in V$, we have $u.d = \sigma(s, u)$ at the time when $u$ is added to set $S$. -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.5\columnwidth]{fig/dijkstra_proof.png} -% \caption{The proof of correctness of Dijkstra's Algorithm} -% \label{fig:dijkstra_proof} -% \end{figure} -% \begin{enumerate} -% \item Initialization: First, $S=\emptyset$, the conclusion holds true. -% \item Maintenance: We wish to show that in each iteration, $u.d=\sigma(s,u)$ for the vertex added to set $S$. We use contradiction, let $u$ be the first vertex for which $u.d\neq\sigma(s,u)$. For this to work, $u$ cant be $s$ and it also needs to be a vertex that is reachable from $s$. Importantly, if $u.d\neq\sigma(s,u)$, then there shall exist another path, namely $s\rightarrow x \rightarrow y \rightarrow u$ is shorter. We set the context of one step before, that prior to adding $u$ to $S$, path $p$ connects $s$ to a vertex is in $S$, namely $x$ in $V-S$. At this step, because of induction, $p_1 = s\rightarrow x$ is $\sigma(s,x)$. Edge (x, y) is relaxed at that time. - -% First, because $y$ appears before $u$ on the shortest-path on shortest path $s \rightarrow u$, and all edges are non-negative, we have: -% \begin{align} -% \sigma(s,y) &\leq \sigma(s, u), \\ -% y.d &= \sigma(s,y) \\ -% y.d &\leq u.d -% \end{align} - -% Because, both vertices $u$ and $y$ were in $V-S$ when $u$ was added, we have $u.d\leq y.d$ since we chose $u$ instead. Thus we get an equality -% $y.d = \sigma(s, y) = \sigma(s, u) = u.d$ -% \end{enumerate} - -% There is another version reasoned with non-decreasing property: -% \begin{enumerate} -% \item At the first pass, we start from the source vertex, which is a shortest path itself with weight $0$. The next we do it to relax edges that out of the source vertex, and among them, we choose the shortest path. Because of the non-decreasing property, that if $v^{*}_i$ is the minimum at this step, then all other possible paths that reaches to $v^{*}_i$ through ${S_{v_l}-v^{*}_i}$ will has longer length, there weight will only increase, such that it will never come back to harness the current decision. -% \item At the second step, we follow the same rule, relax the edges out of $v^{*}_i$. Then we have a selection of paths are one and two length away. we would be able to finalize the shortest path from $s$ to one vertex $v^{*}_i$ by comparing the minimum weights of all of the reachable vertices. -% \end{enumerate} -% , Once we make the decision, we do not need to relax weights through the whole vertex set other than relaxing the weights of reachable vertices through the current smallest vertex. This is due to the optimal substructure property. - -% For example, in Fig.~\ref{}, shown in (b) at the first pass, we can decide the shortest-path between $s$ and $y$. The next pass, we only relax the path weights of $t,x, z$ that is reachable through $t$. Now, we have information of the path weights for those paths that have maximum length of 2. Then vertex $z$'s shortest path will be finalized. And so on. - - - -\paragraph{Complexity Analysis} -Once again, the complexity of Dijkstra's relies on the specific implementation of the priority queue. In our implementation, we used a customized \texttt{PriorityQueue()} which takes $|V|$ to initialize the queue. In this queue, we did not really remove the task from the queue but instead marked it as ``REMOVED,'' so we can end up having maximum of $|E|$ vertices in the queue, making the cost of extracting the minimum item be $O(\log |E|)$. For the update, the main cost comes from inserting an new vertex through \texttt{heappush-like} operation, which is $O(\log |E|)$ too. In all, we have $|V|$ times of \texttt{pops} and $|V|$ times of updates, ending up with a worst-case time complexity of $O(|V|\log |E|)$. - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to prove the correctness of Dijkstra's using greedy algorithms' two approaches on proving.} -\end{bclogo} -% \subsubsection{Implementation} -% Same as of Bellman-Ford's algorithm with the initialization, an relaxation process. - -% In Dijkstra, we separate two sets of vertices. One set $S$ whose final shortest-path weights from the source $s$ have already been decided. And the other set $V-S$ waiting to be decided. Each step, we only need to relax edges reachable to the current smallest vertices, instead of relax edges reachable through all previous relaxed edges. Once we decide it is the smallest, this vertex will be removed from set $V-S$, and add into $S$. We can use a pseudo-code to represent it simply first: -% \begin{lstlisting} -% Dijkstra(G, w, s): -% initialize-single-source(G, s) -% S = {} -% Q = G.V -% while Q not empty: -% u=extract_min(Q) -% S = S + {u} -% for each vertex v in G.adj[u]: -% Relax(u, v, w) -% \end{lstlisting} -% Now, let us implement Dijkstra's algorithm with Python. - - - -\subsection{All-Pairs Shortest Paths} -\label{sec_all_pairs_shortest_paths} -In this section, we first summarize the solutions to singe-source shortest-path problem due to the fact that the problem of finding all-pairs shortest-path problem can be naturally decomposed into $|V|$ such single sourced subproblems. Next, we systematically build into three all-pair paths algorithms we are about to learn: - -\paragraph{Summary to Single-source Shortest-Path Algorithms} The solutions vary to the type of weighted graph $G$ that we are dealing with: -\begin{itemize} - \item if (1) each weight $w \in R$ and (2) only non-negative cycle, we can apply the generalist dynamic programming approach--Bellman-Ford Algorithm, - \item if each weight is non-negative, i.e., $w \in R^{+}$, we take the greedy approach--``Dijkstra's Algorithm'' - \item and (1) if the graph is acyclic and (2) only have non-negative cycles, we can run one pass of Bellman-Ford algorithm with vertices being relaxed in topologically sorted liner ordering. -\end{itemize} -Depends on which category the given graph $G$ falls into, a naive and nature solution to all-pairs shortest-path problem can be addressed by running the corresponding algorithm $|V|$ passes--once for each vertex viewed as source in a complexity scaled by $|V|$ times. -%To serve the general case, such extended Bellman-ford algorithm will have a running time as of $O(|V|^2|E|)$, which will goes up to $O(|V|^4)$ if $G$ is dense. -\subsubsection{Extended Bellman-Ford's Algorithm} We leverage the first DP approach in Section ~\ref{chapter_advanced_graph_sec_algorithm_design}. Define weight matrix $W$, shortest-path weight estimate matrix $D$, and predecessor matrix $\Pi$. We have recurrence relation: -\begin{align} - D^{m}{(i, j)} &= \min_{k\in [0, n-1] }(D^{m-1}{(i, k)}+W{(k, j)}), - \label{eq_recurrence_extended_bellman} -\end{align} -$\Pi^{m}{(i,j)}$ is updated by: -\begin{align} - \Pi^{m}{(i,j)} = \left\{ - \begin{array}{ll} - \mbox{None,} &\mbox{if } D^{m}{(i, j)=0} \mbox{ or } D^{m}{(i, j)} = \infty,\\ - \operatorname*{argmin}_{k \in [0, n-1]} (D^{m-1}{(i, k)}+W{(k, j)}), &\mbox{otherwise. } - \end{array} - \right. - \label{eq_bellman_predecessor} -\end{align} -with initialization: -\begin{align} - D^{0}{(i, j)} = \left\{ - \begin{array}{ll} - 0, & \mbox{if } i=j, \\ - \infty, &\mbox{otherwise.} - \end{array} - \right. -\label{eq_extend_bellman_ford} -\end{align} -\begin{align} - \Pi^{0}{(i,j)} = None - \label{eq_bellman_predecessor} -\end{align} - -% \begin{equation} -% W{(i, j)} = \left\{ -% \begin{array}{lll} -% 0 & \mbox{if } i=j \\ -% w_{ij} & \mbox{if $i\ne j$, and $(i, j)\in E$ } \\ -% \infty & \mbox{if $i\ne j$, and $(i, j)\notin E$ } -% \end{array} -% \right. -% \end{equation} - -% , from a vector to a matrix of size $|V| \times |V|$. Each $w_{ij}$, $d_{ij}$, $\pi_{ij}$, and $\sigma_{ij}$ is defined as follows: -% \begin{itemize} -% \item $w_{ij}$ indicates the weight of each edge with startpoint $i$ and endpoint $j$, -% \begin{equation} -% w{(i, j)} = \left\{ -% \begin{array}{lll} -% 0 & \mbox{if } i=j \\ -% w_{ij} & \mbox{if $i\ne j$, and $(i, j)\in E$ } \\ -% \infty & \mbox{if $i\ne j$, and $(i, j)\notin E$ } -% \end{array} -% \right. -% \end{equation} -% \item Compared with Eq.~\ref{eq_optimal_substructure}, single source $s$ is replaced by $i$, which can be any vertex from $V$, -% \begin{align} -% \sigma(i, j) = \min_{u \in V} (\sigma(i, u) + w{(u, j)}) -% \end{align} -% \item Similarly, compared with Eq.~\ref{eq_sp_dp}, $d$ adds additional index $i$, representing the starting vertex for the path $i\rightarrow j$. The following recurrence relation shows how to reach to the shortest-path weight estimate between vertices $i$ and $j$ that has at most $m$ edges from its subproblem that has at most $m-1$ edges by checking all of $j$'s incoming edges. The extended equation is as follows: -% \begin{align} -% d^{(m)}{(i, j)} &= \min_{u\in V}(d^{(m-1)}{(i, u)}+w{(u, j)}), -% \label{eq_recurrence_extended_bellman} -% \end{align} -% where $d^{(0)}{(i, j)}$ is initalized as: -% \begin{align} -% d^{(0)}{(i, j)} = \left\{ -% \begin{array}{ll} -% 0, & \mbox{if } i=j, \\ -% \infty, &\mbox{otherwise.} -% \end{array} -% \right. -% \label{eq_extend_bellman_ford} -% \end{align} -% \item $\pi^{(m)}{(i,j)}$ represents the predecessor of $j$ in some shortest-path from vertex $i$ to vertex $j$ that has at most $m$ edges. Only if $i=j$ or when there is no path between vertex $i$ and $j$ will $\pi^{(m)}{(i,j)}$ be empty. This definition is depicted as: -% \begin{align} -% \pi^{(m)}{(i,j)} = \left\{ -% \begin{array}{ll} -% \mbox{NULL,} &\mbox{if } d^{(m)}{(i, j)=0} \mbox{ or } d^{(m)}{(i, j)} = \infty,\\ -% \operatorname*{argmin}_{u \in V} (d^{(m-1)}{(i, u)}+w{(u, j)}), &\mbox{otherwise. } -% \end{array} -% \right. -% \label{eq_bellman_predecessor} -% \end{align} -% \end{itemize} -In detail, out extended Bellman-ford algorithm consists of these main steps: -\begin{enumerate} - \item Initialization: we initialize $d$ and $\pi$ using Eq.~\ref{eq_extend_bellman_ford} and \ref{eq_bellman_predecessor}. - \item For every pair of vertices $i$ and $j$, we update the $d$ and $\pi$ using recurrence relation in Eq.~\ref{eq_recurrence_extended_bellman} and \ref{eq_bellman_predecessor}, respectively, for $|V|-1$ passes. - \item Run the ${|V|}^{th}$ pass to decide if any negative-weight cycle exist in each rooted shortest-path tree. -\end{enumerate} -To notice that after one pass of update on $D$ since it is initialized, $D^{(1)} = W$, thus, in our implementation, only $|V|-2$ passes of updates are needed actually. Assume we have converted the graph shown in Fig.~\ref{fig:sp_udg} into a $W$ adjacency matrix representation and a dictionary \texttt{key2idx} that maps each key to a numerical index from $0$ to $|V|-1$. This extended Bellman-ford algorithm is implemented in main function \texttt{extended\_bellman\_ford\_with\_predecessor} which calls a subfunction \texttt{bellman\_ford\_with\_predecessor} that does one pass of relaxation and does not detect non-negative cycle. The code is as: -\begin{lstlisting}[language=Python] -import copy -def bellman_ford_with_predecessor(W, L, P): - n = len(W) - for i in range(n): # source - for j in range(n): # endpoint - for k in range(n): # extend one edge - if L[i][k] + W[k][j] < L[i][j]: - L[i][j] = L[i][k] + W[k][j] # set d - P[i][j] = k # set predecessor - -def extended_bellman_ford_with_predecessor(W): - n = len(W) - # initialize L, first pass - L = copy.deepcopy(W) - print(f'L1 : {L} \n') - P = [[None for _ in range(n)] for _ in range(n)] - for i in range(n): - for j in range(n): - if L[i][j] != 0 and L[i][j] != float('inf'): - P[i][j] = i - # n-2 passes - for i in range(n-2): - bellman_ford_with_predecessor(W, L, P) - print(f'L{i+2}: {L} \n') - return L, P -\end{lstlisting} -The \texttt{L} matrix will be having all zeros along the diagonal, in this case, it is -\begin{lstlisting}[numbers=none] -[ [0, 2, 4, 7, -2], - [inf, 0, 3, 8, -4], - [inf, -2, 0, 6, -6], - [inf, -5, -3, 0, -9], - [inf, 5, 7, 13, 0]], -\end{lstlisting} -\begin{figure}[!ht] - \centering - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/shortest_path_trees_0.png} - \caption{$s$ as source} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/shortest_path_trees_1.png} - \caption{$t$ as source} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/shortest_path_trees_2.png} - \caption{$x$ as source} - \end{subfigure} - - - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/shortest_path_trees_3.png} - \caption{$y$ as source} - \end{subfigure} - \begin{subfigure}{.32\textwidth} - \includegraphics[width=0.99\columnwidth]{fig/shortest_path_trees_4.png} - \caption{$z$ as source} - \end{subfigure} - \caption{All shortest-path trees starting from each vertex.} - \label{fig:shortest_path_tree_all} -\end{figure} -We reconstruct the shortest-path trees and visualize them in Fig.~\ref{fig:shortest_path_tree_all}. -% \paragraph{What will learn?} -% Just as in previous section the predecessor subgraph $G_\pi$ forms a shortest-paths tree from a given source vertex, the subgraph induced by the $i$-th row of the $\Pi$ matrix should be a shortest-paths tree with root $i$. -% The standard All Pair Shortest Path algorithms such as matrix multiplication like shortest paths algorithm and Floyd–Warshall algorithm are just like Bellman–Ford, and are also application of Dynamic Programming. -% For $n$ nodes in a graph, if there exists no negative-weight cycles, then for every pair of vertices there is a shortest path that is simple and thus contains at most $n-1$ paths. Therefore, we end up having $\sigma(i,j)=l^{(n-1)}_{ij}=l^{(n)}_{ij}=l^{(n+1)}_{ij}$. -\subsubsection{Repeated Squaring Extended Bellman-Ford Algorithm} -We leverage the second DP approach in Section ~\ref{chapter_advanced_graph_sec_algorithm_design}. This approach bears resemblance to the repeated squaring optimization in matrix multiplication. % in the case of with extra outer loop over $|V|$ vertices as source vertex. The benefits from being matrix multiplication like is we can further improve its efficiency using matrix multiplication optimization which is called \textbf{Repeated Squaring}. -Repeated squaring is a general method for fast computation of exponentiation with large powers of a number or more generally of a polynomial or a square matrix. The underlying algorithm design methodogy is divide and conquer. Assume our input is $x^{n}$, where $x$ is an expression, repeat squaring computes this in $O(\log n)$ steps by repeatedly squaring an intermediate result. Repeating Squaring method is actually used a lot in some advanced algorithm. Another one we will see in String algorithms. -\paragraph{Repeated Squaring Applied on Extended Bellman-Ford Algorithm} -If we observe the \texttt{bellman\_ford\_one\_pass}, it has three for loops, and it shows similar pattern with matrix multiplication. Suppose $A$ and $B$ are both $n\times n$ matrix, and we compute $C=A\times B$, the formulation is $c_{ij} = \sum_{k=0}^{n-1} a_{ik}\cdot b_{kj}$ which has the same pattern as of Eq.~\ref{eq_recurrence_extended_bellman}. If we use $\cdot$ to mark \texttt{bellman\_ford\_one\_pass} operation on $L$ and $W$, we will have the following relations: -\begin{align} - L^{1} &= L^{0}\cdot W = W,\\\nonumber - L^{2 }&= L^{1}\cdot W = W^2, \\\nonumber - L^{3 }&= L^{2}\cdot W = W^3, \\\nonumber - &\vdots\\\nonumber - L^{n-1 }&= L^{n-2}\cdot W = W^{n-1}\nonumber -\end{align} - - -With repeated squaring technique, we can compute $L^{n-1}$ with only $\log (n-1)$ round of one pass operation -\begin{align} - L^{1} &= W,\\\nonumber - L^{2 }&= W \cdot W, \\\nonumber - L^{4 }&= W^2 \cdot W^2, \\\nonumber - &\vdots\nonumber -\end{align} -The above repeation stops when our $m \geq n-1$. The implementation is: -\begin{lstlisting}[language=Python] -import copy -import math -def bellman_ford_repeated_square(L): - n = len(W) - for i in range(n): # source - for j in range(n): # endpoint - for k in range(n): # double the extending length - L[i][j] = min(L[i][j], L[i][k]+L[k][j]) - -def extended_bellman_ford_repeated_square(W): - n = len(W) - # initialize L, first pass - L = copy.deepcopy(W) - print(f'L1 : {L} \n') - # log n passes - for i in range(math.ceil(math.log(n))): - bellman_ford_repeated_square(L) - print(f'L{2^(i+1)}: {L} \n') - return L -\end{lstlisting} - -\subsubsection{The Floyd-Warshall Algorithm} -We leverage the third DP approach in Section ~\ref{chapter_advanced_graph_sec_algorithm_design}, this approach is called \textit{The Floyd-Warshall Algorithm}. We directly put the code here: -\begin{lstlisting}[language=Python] -def floyd_warshall(W): - L = copy.deepcopy(W) #L0 - n = len(W) - for k in range(n): # intermediate node - for i in range(n): # start node - for j in range(n): # end node - L[k][i] = min(L[k][i], L[k][j] + L[j][i]) - return L -\end{lstlisting} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_advanced_linear_search.tex b/Easy-Book/chapters/chapter_advanced_linear_search.tex deleted file mode 100644 index 73a1d16..0000000 --- a/Easy-Book/chapters/chapter_advanced_linear_search.tex +++ /dev/null @@ -1,525 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/two_pointers.png} - \caption{Two pointer Technique} - \label{fig:two pointer} -\end{figure} -On linear data structures, or on implicit linear state space, either a particular targeted item or a consecutive substructure such as a subarray and substring can be searched. - -To find a single item on linear space, we can apply linear search in general, or binary search if the data structure is ordered/sorted with logarithmic cost. In this chapter, we introduce two pointer techniques that are commonly used to solve two types of problems: -\begin{enumerate} - \item Searching: To search for an item such as median, a predefined substructure, and a substructure that satisfy certain conditions such as finding the minimum subarray length wherein the subarray equals to a targeted sum. Or find a substructure satisfy a string pattern. - \item Adjusting: To adjust ordering or arrangement of items in the data structure such as removing duplicates from sorted array. -\end{enumerate} - -% \section{Introduction to Two Pointers} -As the name suggests, Two pointers technique involves two pointers that start and move with the following two patterns: -\begin{enumerate} - \item Equi-directional: Both pointers start from the beginning of the array, and usually one moves faster and the other slower. Sliding window algorithm can be put into this category. - \item Opposite-directional: One pointer start at the start position and conversely the other pointer starts at the end. These two oppositely posed pointers move toward each other and usually meet in the middle. -\end{enumerate} -In the following sections, we will detail on two-pointer technique exemplified on real interview questions. - -\section{Slow-Faster Pointers} -Suppose we have two pointers, $i$ and $j$, which may or may not start at the start position in the linear data structures, but one move slower ($i$) and the other faster ($j$). Two pointers can decide either a pair or a subarray to solve related problems. For the case of subarray, the algorithm is called sliding window algorithm. On the span of the array, and at most of three potential sub-spaces exist: from start index to $i$ ($[0, i]$), from $i$ to $j$ ($[i, j]$), and from $j$ to the end index ($[j, n]$). - -Even though slow-faster pointers technique rarely given formal introduction in book, it is widely used in algorithms. In sorting, Lumuto's partition in the QuickSort used the slow-faster pointers to divide the whole region into three parts according the comparison result to the pivot: Smaller Items region, Larger Items region, and the unrestricted region. In string pattern matching, fixed sliding window and one we will introduce in this chapter. - -In this section, we explain how two pointers work on two types of linear data structures: Array and Linked List. -\subsection{Array} -\subsubsection{Remove Duplicates from Sorted Array(L26)} Given a sorted array $a=[0,0,1,1,1,2,2,3,3,4]$, remove the duplicates in-place such that each element appears only once and return the new length. Do not allocate extra space for another array, you must do this by modifying the input array in-place with O(1) extra memory. In the given example, there are in total of 5 unique items and 5 is returned. -\paragraph{Analysis} We set both slower pointer $i$ and the faster pointer $j$ at the first item in the array. Recall that slow-fast pointers cut the space of the sorted array into three parts, we can define them as: -\begin{enumerate} - \item unique items in region $[0, i]$, - \item untouched items in region $[i+1, j]$, - \item and unprocessed items in region $[j+1, n)$. -\end{enumerate} -In the process, we compare the items pointed by two pointers, once these two items does not equal, we find an new unique item. We copy this unique item at the faster pointer right next to the position of the slower pointer. Afterwards, we move the slow pointer by one position to remove duplicates of our copied value. - - -With our example, at first, $i=j=0$, region one has one item which is naively unique and region two has zero item. Part of the process is illustrated as: -\begin{lstlisting}[numbers=none] -i j [0, i] [i+1, j] process -0 0 [0] [] item 0==0, j+1=1 -0 1 [0] [0] item 0==0, j+1=2 -0 2 [0] [0, 1] item 0!=1, i+1=1, copy 1 to index 1, j+1=3 -1 3 [0, 1] [1, 1] item 1==1, j+1=4 -1 4 [0, 1] [1, 1, 1] item 1==1, j+1=5 -1 5 [0, 1] [1, 1, 1, 2]item 1==2, i+1=2, copy 2 to index 2, j+1=6 -2 6 [0, 1, 2] [1, 1, 2, 2] -\end{lstlisting} -The code is given as: -\begin{lstlisting}[language=Python] -def removeDuplicates(nums) -> int: - i, j = 0, 0 - while j < len(nums): - if nums[i] != nums[j]: - # Copy j to i+1 - i += 1 - nums[i] = nums[j] - j += 1 - return i + 1 -\end{lstlisting} -After calling the above function on our given example, array $a$ becomes $[[0, 1, 2, 3, 4, 2, 2, 3, 3, 4]$. Check the source code for the whole visualized process. - -\subsubsection{Minimum Size Subarray Sum(L209)} Given an array of $n$ positive integers and a positive integer $s$, find the minimal length of a contiguous subarray of which the $sum \geq s$. If there isn't one, return 0 instead. -\begin{lstlisting}[numbers=none] -Example: - -Input: s = 7, nums = [1,4,1,2,4,3] -Output: 2 -Explanation: the subarray [4,3] has the minimal length under the problem constraint. -\end{lstlisting} -\paragraph{Analysis} In this problem, we need to secure a substructure--subarray--that not only satisfies a condition($sum \geq s$) but also has the minimal length. Naively, we can enumerate all subarrays and search through them to find the minimal length, which requires at least $O(n^2)$ time complexity using prefix sum. The code is as: -\begin{lstlisting}[language=Python] -\end{lstlisting} -However, we can use two pointers $i$ and $j$ ($i\leq j)$ and both points at the first item. In this case, these two pointers defines a subarray $a[i:j+1]$ and we care the region $[i, j]$. As we increase pointer $j$, we keep adding positive item into the sum of the subarray, making the subarray sum monotonically increasing. Oppositely, if we increase pointer $i$, we remove positive item away from the subarray, making the sum of the subarray monotonically decreasing. The detailed steps of two pointer technique in this case is as: -\begin{enumerate} - \item Get the optimal subarray for all subproblems(subarries) that start from current $i$, which is $0$ at first. We accomplish this by forwarding $j$ pointer to include enough items until $sum \geq s$ that we pause and go to the next step. Let's assume pointer $j$ stops at $e_0$. - \item Get the optimal subarray for all subproblems(subarries) that end with current j, which is $e_0$ at the moment. We do this by forwarding pointer $i$ this time to shrink the window size until $sum\geq s$ no longer holds. Let's assume pointer $i$ stops at index $s_0$. Now, we find the optimal solution for subproblems $a[0:i,0:j]$( denoting subarries with the start point in range $[0, i]$ and the end point in range $[0,j]$. - \item Now that $i=s_0$ and $j=e_0$, we repeat step 1 and 2. -\end{enumerate} -In our example, we first move $j$ until $j=3$ with a subarray sum of 8. Then we move pointer $i$ until $i=1$ when the subarray sum is less than 7. For subarray $[1, 4, 1, 2]$, we find its optimal solution to have a length 3. The Python code is given as: -\begin{lstlisting}[language=Python] -def minSubArrayLen(s: int, nums) -> int: - i, j = 0, 0 - acc = 0 - ans = float('inf') - while j < len(nums): - acc += nums[j] - # Shrink the window - while acc >= s: - ans = min(ans, j - i + 1) - acc -= nums[i] - i += 1 - j += 1 - - return ans if ans < float('inf') else 0 -\end{lstlisting} - -Because both pointer $i$ and $j$ move at most $n$ steps, with the total operations to be at most $2n$, making the time complexity as $O(n)$. The above question would be trivial if the maximum subarray length is asked. - -\subsection{Minimum Window Substring (L76, hard)} - Given a string $S$ and a string $T$, find all the minimum windows in $S$ which will contain all the characters in $T$ in complexity $O(n)$. -\begin{lstlisting}[numbers=none] -Example: -Input: S = "AOBECDBANC", T = "ABC" -Output: ["CDBA", "BANC"] -\end{lstlisting} -\begin{figure}[h!] - \centering - \includegraphics[width=0.5\columnwidth]{fig/minimum_window_substring.png} - \caption{The data structures to track the state of window.} - \label{fig:minimum_window} -\end{figure} -\paragraph{Analysis} Applying two pointers, with the region between pointer $i$ and $j$ to be our testing substring. For this problem, the condition for the window $[i, j]$ it will at most have all characters from $T$. The intuition is we keep expanding the window by moving forward $j$ until all characters in $T$ is found. Afterwards, we contract the window so that we can find the minimum window with the condition satisfied. Instead of using another data structure to track the state of the current window, we can depict the pattern $T$ as a dictionary data structure where all unique characters comprising the keys and with the number of occurrence of each character as value. We use another variable \texttt{count} to track how the number of unique characters. In all, they are used to track the state of the moving window in $[i, j]$, with the value of the dictionary to indicate how many occurrence is short of, and the \texttt{count} represents how many unique characters is not fully found, and we depict the state in Fig.~\ref{fig:minimum_window}. - -\begin{figure}[h!] - \centering - \includegraphics[width=1.1\columnwidth]{fig/minimum_window_substring_process.png} - \caption{The partial process of applying two pointers. The grey shaded arrow indicates the pointer that is on move.} - \label{fig:minimum_window_process} -\end{figure} - -Along the expanding and shrinking of the window that comes with the movement of pointer $i$ and $j$, we track the state with: -\begin{itemize} - \item When forwarding $j$, we encompass $S[j]$ in the window. If $S[j]$ is a key in the dictionary, decrease the value by one. Further, if the value reaches to the threshold $0$, we decrease \texttt{count} by one, meaning we are short of one less character in the window. - \item Once \texttt{count=0}, our window satisfy condition for contracting. We then forward $i$, removing $S[i]$ from the window if it is existing key in the dictionary by increasing this key's value, meaning the window is short of one more character. Once the value reaches to the threshold of $1$, we increase $count$. -\end{itemize} -Part of this process with our example is shown in Fig.~\ref{fig:minimum_window_process}. And the Python code is given as: -\begin{lstlisting}[language=Python] -from collections import Counter -def minWindow(s, t): - dict_t = Counter(t) - count = len(dict_t) - i, j = 0, 0 - ans = [] - minLen = float('inf') - while j < len(s): - c = s[j] - if c in dict_t: - dict_t[c] -= 1 - if dict_t[c] == 0: - count -= 1 - # Shrink the window - while count == 0 and i < j: - curLen = j - i + 1 - if curLen < minLen: - minLen = j - i + 1 - ans = [s[i:j+1]] - elif curLen == minLen: - ans.append(s[i:j+1]) - - c = s[i] - if c in dict_t: - dict_t[c] += 1 - if dict_t[c] == 1: - count += 1 - i += 1 - - j += 1 - return ans -\end{lstlisting} - -% until When the window has all the desired characters, we contract (if possible) and save the smallest window till now. The only difference compared with the above problem is the definition of desirable: we need to compare the state of current window with the required state in T. They can be handled as a hashmap with character as key and frequency of characters as value. -% \begin{lstlisting}[language=Python] -% def minWindow(self, s, t): -% dict_t = Counter(t) -% state = Counter() -% required = len(dict_t) - -% # left and right pointer -% i, j = 0, 0 - -% formed = 0 -% ans = float("inf"), None # min len, and start pos - -% while j < len(s): -% char = s[j] -% # record current state -% if char in dict_t: -% state[char] += 1 -% if state[char] == dict_t[char]: -% formed += 1 - -% # Try and contract the window till the point where it ceases to be 'desirable'. -% # bPrint = False -% while i<=j and formed == required: -% # if not bPrint: -% # print('found:', s[i:j+1], i, j) -% # bPrint = True -% char = s[i] -% if j-i+1 < ans[0]: -% ans = j - i + 1, i -% # change the state -% if char in dict_t: -% state[char] -= 1 -% if state[char] == dict_t[char]-1: -% formed -= 1 - -% # Move the left pointer ahead, -% i += 1 - -% # Keep expanding the window -% j += 1 -% # if bPrint: -% # print('move to:', s[i:j+1], i, j) -% return "" if ans[0] == float("inf") else s[ans[1] : ans[1] + ans[0]] -% \end{lstlisting} - -% The process would be: -% \begin{lstlisting}[numbers=none] -% found: ADOBEC 0 5 -% move to: DOBECO 1 6 -% found: DOBECODEBA 1 10 -% move to: ODEBAN 6 11 -% found: ODEBANC 6 12 -% move to: ANC 10 13 -% \end{lstlisting} -\subsection{When Two Pointers do not work} Two pointer does not always work on subarray related problems. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What happens if there exists negative number in the array? } Since the sum of the subarray is no longer monotonically increasing with the number of items between two pointers, we can not figure out how to move two pointers each step. Instead (1) we can use prefix sum and organize them in order, and use binary search to find all possible start index. (2) use monotone stack (see LeetCode probelm: 325. Maximum Size Subarray Sum Equals k, 325. Maximum Size Subarray Sum Equals k (hard))) -\end{bclogo} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What if we are to check the maximum average subarray? } -644. Maximum Average Subarray II (hard). Similarly, the average of subarray does not follow a certain order with the moving of two pointers at each side, making it impossible to decide how to make the two pointers. - -\end{bclogo} - -\subsection{Linked List} -The complete code to remove cycle is provided in google colab together with running examples. -\subsubsection{Middle of the Linked List(L876)} -%The simplest example of slow-fast pointers on linked list is to get the middle node of a given linked list. -Given a non-empty, singly linked list with head node $head$, return a middle node of linked list. When the linked list is of odd length, there exists one and only middle node, but when it is of even length, two exists and we return the second middle node. -\begin{lstlisting}[numbers=none] -Example 1 (odd length): - -Input: [1,2,3,4,5] -Output: Node 3 from this list (Serialization: [3,4,5]) - -Example 2 (even length): - -Input: [1,2,3,4,5,6] -Output: Node 4 - - from this list (Serialization: [4,5,6]) -\end{lstlisting} - -\paragraph{Analysis} If the data structure is array, we can compute the position of the middle item simply with the total length. Following this method, if only one pointer is applied, we can first iterate over the whole linked list in $O(n)$ time to get the length. Then we do another iteration to obtain the middle node. $n + \frac{n}{2}$ times of operations needed, making the time complexity $O(n)$. - -However, we can apply two pointers simultaneously at the head node, each one moves at different paces: the slow pointer moves one step at a time and the fast moves two steps instead. When the fast pointer reached the end, the slow pointer will stop at the middle. This slow-faster pointers technique requires only $\frac{n}{2}$ times of operations, which is three times faster than our naive method, although the big Oh time complexity still remains $O(n)$. -\paragraph{Implementation} -\begin{figure}[h] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/middle_of_linked_list.png} - \caption{Slow-fast pointer to find middle} - \label{fig:slow-faster} -\end{figure} -Simply, we illustrate the process of running the two pointers technique on our two examples in Fig.~\ref{fig:slow-faster}. As we can see, when the slow pointer reaches to item 3, the faster pointer is at item 5, which is the last item in the first example that comes with odd length. Further, when the slow pointer reaches to item 4, the faster pointer reaches to the empty node of the last item in the second example that comes with even length. Therefore, in the implementation, we check two conditions in the \texttt{while} loop: -\begin{enumerate} - \item For example 1: if the fast pointer has no successor (\texttt{fast.next==None}), the loop terminates. - \item For example 1: if the fast pointer is invalid (\texttt{fast==None}), the loop terminates. -\end{enumerate} -The Python code is as: -\begin{lstlisting}[language=Python] -def middleNode(head): - slow = fast = head - while fast and fast.next: - fast = fast.next.next - slow = slow.next - return slow -\end{lstlisting} - -\subsubsection{Floyd's Cycle Detection (Floyd's Tortoise and Hare)} -\begin{figure}[h!] - \centering - \includegraphics[width=0.8\columnwidth]{fig/circular_linked_list.png} - \caption{Circular Linked List} - \label{fig:circular_linked_list} -\end{figure} -When a linked list which has a cycle, as shown in Fig.~\ref{fig:circular_linked_list}, iterating items over the list will make the program stuck into infinite loop. The pointer starts from the heap, traverse to the start of the loop, and then comes back to the start of the loop again and continues this process endlessly. To avoid being stuck into a ``trap'', we have to possibly solve the following three problems: -\begin{enumerate} - \item Check if there exists a cycle. - \item Check where the cycle starts. - \item Remove the cycle once it is detected. -\end{enumerate} -The solution encompasses the exact way of slow faster pointers traversing through the linked list as our last example. With the slow pointer iterating one item at a time, and the faster pointer in double pace, these two pointers will definitely meet at one item in the loop. In our example, they will meet at node 6. So, is it possible that it will meet at the non-loop region starts from the heap and ends at the start node of the loop? The answer is No, because the faster pointer will only traverse through the non-loop region once and it is always faster than the slow pointer, making it impossible to meet in this region. This method is called Floyd's Cycle Detection, aka Floyd's Tortoise and Hare Cycle Detection. Let's see more details at how to solve our mentioned three problems with this method. -\paragraph{Check Linked List Cycle(L141)} -Compared with the code in the last example, we only need to check if the \texttt{slow} and \texttt{fat} pointers are pointing at the same node: If it is, we are certain that there must be a loop in the list and return \texttt{True}, otherwise return \texttt{False}. -\begin{lstlisting}[language=Python] -def hasCycle(head): - slow = fast = head - while fast and fast.next: - slow = slow.next - fast = fast.next.next - if slow == fast: - return True - return False -\end{lstlisting} -\paragraph{Check Start Node of Linked List Cycle(L142)} Given a linked list, return the node where the cycle begins. If there is no cycle, return \texttt{None}. -\begin{figure}[h!] - \centering - \includegraphics[width=0.6\columnwidth]{fig/TQoyH.png} - \caption{Floyd's Cycle finding Algorithm} - \label{fig:floyd_cycle_1} -\end{figure} - -For a given linked list, assume the slow and fast pointers meet at node somewhere in the cycle. As shown in Fig.~\ref{fig:floyd_cycle_1}, we denote three nodes: head ($h$, start node of cycle($s$), and meeting node in the cycle($m$). we denote the distance between $h$ and $s$ to be $x$, the distance between $s$ and $m$ to be $y$, and the distance between $m$ and $s$ to be $z$. Because the faster pointer traverses through the list in double speed, when it meets up with the slow pointer, the distance that it traveled($x+y+z+y$) to be two times of the distance traveled by the slow pointer ($x+y$). -\begin{align} - x + y + z + y &= x + y \\ - x = z -\end{align} -From the above equation, we obtain the equal relation between $x$ and $z$. the starting node of the cycle from the head is $x$, and $y$ is the distance from the start node to the slow and fast pointer's node, and $z$ is the remaining distance from the meeting point to the start node. Therefore, after we have detected the cycle from the last example, we can reset the slow pointer to the head of the linked list after. Then we make the slow and the fast pointer both traverse at the same pace--one node at a time--until they meet at a node we stop the traversal. The node where they stop at is the start node of the cycle. The code is given as: - -% the meeting point, and making both slow and fast pointer to move one node at a time, they will meet at the starting node of the cycle. - - -% Now, let's try to device the algorithm. Both slow and fast pointer starts at position 0, the node index they travel each step is: [0,1,2,3,...,k] and [0,2,4,6,...,2k] for slow and fast pointer respectively. Therefore, the total distance traveled by the slow pointer is half of the distance travelled by the fat pointer. From the above figure, we have the distance travelled by slow pointer to be $d_s = x+y$, and for the fast pointer $d_f = x+y+z+y = x+2y+z$. With the relation $2*d_s = d_f$. We will eventually get $x = z$. Therefore, by moving slow pointer to the start of the linked list after the meeting point, and making both slow and fast pointer to move one node at a time, they will meet at the starting node of the cycle. (LeetCode problem: 142. Linked List Cycle II (medium)). -\begin{lstlisting}[language=Python] -def detectCycle(head): - slow = fast = head - - def getStartNode(slow, fast, head): - # Reset slow pointer - slow = head - while fast and slow != fast: - slow = slow.next - fast = fast.next - return slow - - while fast and fast.next: - slow = slow.next - fast = fast.next.next - # A cycle is detected - if slow == fast: - return getStartNode(slow, fast, head) - - return None -\end{lstlisting} - -\paragraph{Remove Linked List Cycle} -We can remove the cycle by recirculing the last node in the cycle, which in example in Fig.~\ref{fig:circular_linked_list} is node 6 to an empty node. Therefore, we have to modify the above code to make the \texttt{slow} and {fast} pointers stop at the last node instead of the start node of the loop. This subroutine is implemented as: -\begin{lstlisting}[language=Python] -def resetLastNode(slow, fast, head): - slow = head - while fast and slow.next != fast.next: - slow = slow.next - fast = fast.next - fast.next = None -\end{lstlisting} -The complete code to remove cycle is provided in google colab together with running examples. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What if there has not only one, but multiple cycles in the Linked List? } -\end{bclogo} - -\section{Opposite-directional Pointers} -Another variant of two pointers technique is to place these two pointers oppositely: one at the beginning and the other at the end of the array. Through the process, they move toward each other until they meet in the middle. Details such as how much each pointer moves or which pointer to move at each step decided by our specific problems to solve. We just have to make sure when we are applying this technique, we have considered its whole state space, and will not miss out some area which makes the search incomplete. - -The simplest example of this two pointers method is to reverse an array or a string around. For example, when the list $a=[1, 2, 3, 4, 5]$ is reversed, it becomes $[5, 4, 3, 2, 1]$. Of course we can simply assign a new list and copy the items in reversed orders. But, with two pointers, we are able to reverse it in-place and using only $O(\frac{n}{2})$ times of operations through the following code: -\begin{lstlisting}[language=Python] -def reverse(a): - i, j = 0, len(a) - 1 - while i < j: - # Swap items - a[i], a[j] = a[j], a[i] - i += 1 - j -= 1 -\end{lstlisting} -Moreover, binary search can be viewed as an example of opposite-directional pointers. At first, these two pointers are the first and the last item in the array. Then depends on which side of the target compared with the item in the middle, one of the pointers move either forward or backward to the middle point, reducing the search space to half of where it started at each step. We also explore another example with this technique. -\subsubsection{Two Sum on Sorted Array(L167)} -Given an array of integers that is already sorted in ascending order, find two numbers such that they add up to a specific target number. -\begin{lstlisting}[numbers=none] -Input: numbers = [2,7,11,15], target = 9 -Output: [1,2] -Explanation: The sum of 2 and 7 is 9. Therefore index1 = 0, index2 = 1. -\end{lstlisting} -\paragraph{Analysis} If we simply put enumerate all possible pairs, we have to take $O(n^2)$ to solve this problem. However, with the opposite-directional two pointers, it gives out linear performance. - -Denote the list as $A=[a_1, a_2, ..., a_{n-1}, a_{n}]$, and for the sorted array we have $a_1\leq a_2 \leq...\leq a_{n-1} \leq a_n$. The range of the sum of any two items in the array is within two possible ranges: $[a_1+a_2, a_1+a_n]$ and $[a_1+a_n, a_{n-1}+a_n]$. By placing one pointer $i$ at $a_1$ and the other $j$ at $a_n$ to start with, we can get $a_1+a_n$ as the sum. Pointer $i$ can only move forward, accessing larger items. On the other hand, pointer $j$ can only backward, accessing smaller items. Now there are three scenarios according to the comparison between the target and the current sum of the two pointers: -\begin{enumerate} - \item If $t == a[i] + a[j]$, target sum found. - \item If $t > a[i] + a[j]$, we have to increase the sum, we can only do this by moving pointer $i$ forward. - \item If $t > a[i] + a[j]$, we have to decrease the sum, we can only do this by moving pointer $j$ backward. -\end{enumerate} -The Python code is as: -\begin{lstlisting}[language=Python] -def twoSum(a, target): - n = len(a) - i, j = 0, n-1 - while i < j: - temp = a[i] + a[j] - if temp == target: - return [i, j] - elif temp < target: - i += 1 - else: - j -= 1 - return [] -\end{lstlisting} -\section{Follow Up: Three Pointers} -Sometimes, manipulating two pointers is not even enough to distinguish different subspaces, we might need to the assistant of one another pointer to make things work. -\subsubsection{Binary Subarrays With Sum (L930)} In an array $A$ of $0$s and $1$s, how many non-empty subarrays have sum $S$? -\begin{lstlisting}[numbers=none] -Example 1: -Input: A = [1,0,1,0,1], S = 2 -Output: 4 -Explanation: -The 4 subarrays are listed below: -[1,0,1], index (0, 2) -[1,0,1,0], index (0, 3) -[0,1,0,1], index (1, 4) -[1,0,1], index (2, 4) -\end{lstlisting} -\paragraph{Analysis} -This problem is highly similar to the minimum length subarray problem we encountered before. We naturally start with two pointers $i$ and $j$, and restrict the subarray in range $[i, j]$ to satisfy condition $sum\leq S$. The window is contracted when the condition is violated. We would have write the following code: -\begin{lstlisting}[language=Python] -def numSubarraysWithSum(a, S): - i, j = 0, 0 - win_sum = 0 - ans = 0 - while j < len(a): - win_sum += a[j] - while i < j and win_sum > S: - win_sum -= a[i] - i += 1 - if win_sum == S: - ans += 1 - print('({}, {})'.format(i, j)) - j += 1 - return ans -\end{lstlisting} -However, the above code only returns $3$, instead of $4$ as shown in the example. By printing out pointers $i$ and $j$, we can see the above code is missing case $(2, 4)$. Why? Because we are restricting the subarray sum in range $[i, j]$ to be smaller than or equal to $S$, with the occruence of $0$s that might appear in the front or in the rear of the subarray: -\begin{itemize} -\item In the process of expanding the subarray, pointer $j$ is moved one at a time. Thus, even though $0$s appear in the rear of the subarray, the counting is correct. -\item However, in the process of shrinking the subarray while the restriction is violated($sum > S$), we stop right away once $sum \leq S$. And in the code, we end up only counting it as one occurrence. With $0$s at the beginning of the subarray, such as the subarray $[0, 1, 0, 1]$ with index $1$ and $4$, there count should be two instead of one. -\end{itemize} -The solution is to add another pointer $i_h$ to handle the missed case: When the $sum=S$, count the total occurrence of $0$ in the front. Compared with the above solution, the code only differs slightly with the additional pointer and one extra \texttt{while} loop to deal the case. Also we need to pay attention that $i_h \leq j$, otherwise, the \texttt{while} loop would fail with example with only zeros and a targeting sum $0$. -\begin{lstlisting}[language=Python] -def numSubarraysWithSum(a, S): - i, i_h, j = 0, 0, 0 - win_sum = 0 - ans = 0 - while j < len(a): - win_sum += a[j] - while i < j and win_sum > S: - win_sum -= a[i] - i += 1 - # Move i_h to count all zeros in the front - i_h = i - while i_h < j and win_sum == S and a[i_h] == 0: - ans += 1 - i_h += 1 - - if win_sum == S: - ans += 1 - j += 1 - return ans -\end{lstlisting} - -We noticed that in this case, we have to explicitly restrict $i < j$ and $i_h < j$ due to the special case, while in all our previous examples, we do not have to. - -\section{Summary} -Two pointers is a powerful tool for solving problems on liner data structures, such as ``certain'' subarray and substring problems as we have shown in the examples. The ``window'' secluded between the two pointers can be viewed as sliding window: It can move slide forwarding with the forwarding the slower pointer. Two important properties are generally required for this technique to work: -\begin{enumerate} -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/Sliding_window_property.png} - \caption{Sliding Window Property} - \label{fig:slide_window} -\end{figure} - \item Sliding window property: Either we move the faster pointer $j$ forward by one, or move the slower pointer $i$, we can get the state of current window in $O(1)$ cost knowing the state of the last window. - - For example, given an array, imagine that we have a fixed size window as shown in Fig.~\ref{fig:slide_window}, and we can slide it forward one position at a time, compute the sum of each window. The bruteforce solution would be of $O(kn)$ complexity where $k$ is the window size and $n$ is the array size by using two nested \texttt{for} loops: one to set the starting point, and the other to compute the sum in $O(k)$. However, the sum of the current window ($S_c$) can be computed from the last window ($S_l$), and the items that just slid out and in as $a_j$ and $a_i$ respectively. Then $S_c = S_l-a_i+a_j$. Getting the state of of the window between two pointers in $O(1)$ as shown in the example is our called Sliding Window Property. - - Usually, for an array with numerical value, it satisfies the sliding window property if we are to compute its sum or product. For substring, as shown in our minimum window substring example, we can get the state of current window referring to the state of the last window in $O(1)$ with the assist of dictionary data structure. In substring, this is more obscure, and the general requirement is that the state of the substring does not relate to the order of the characters(anagram-like state). - - \item Monotonicity: For subarray sum/product, the array should only comprise all positive/negative values so that the prefix sum/product has monotonicity: moving the faster pointer and the slower pointer forward results into opposite change to the state. The same goes for the substring problems where we see from the minimum window substring example the change of the state: \texttt{count} and the value of the dictionary is monotonic, and each either increases or decreases with the moving of two pointers. -% \begin{lstlisting}[language=Python] -% def fixedSlideWindow(A, k): -% n = len(A) -% if k >= n: -% return sum(A) -% # compute the first window -% acc = sum(A[:k]) -% ans = acc -% # slide the window -% for i in range(n-k): # i is the start point of the window -% j = i + k # j is the end point of the window -% acc = acc - A[i] + A[j] -% ans = max(ans, acc) -% return ans -% \end{lstlisting} -\end{enumerate} - -% The steps of using sliding windows: -% \begin{enumerate} -% \item Initialize the left and right pointer; -% \item Handle the right pointer and record the state of the current window; -% \item While the window is in the state of desirable: record the optimal solution, move the left pointer and record the state (change or stay unchanged). -% \item Up till here, the state is not desirable. Move the right pointer in order to find a desirable window; -% \end{enumerate} -\section{Exercises} -\begin{enumerate} -\item 3. Longest Substring Without Repeating Characters -\item 674. Longest Continuous Increasing Subsequence (easy) -\item 438. Find All Anagrams in a String -\item 30. Substring with Concatenation of All Words -\item 159. Longest Substring with At Most Two Distinct Characters -\item 567. Permutation in String -\item 340. Longest Substring with At Most K Distinct Characters -\item 424. Longest Repeating Character Replacement -\end{enumerate} -% that data strArray Search is to find a \textbf{sub-structure} on a given linear data structure( Chapter~\ref{chapter_linear_data_structure}) or a virtual linear search space. Categorized by the definition of sub-structure: -% \begin{itemize} -% \item Define the sub-structure as a \textbf{particular item}: Usually the worst and average performance is $O(n)$. \textbf{Binary search} (Section~\ref{sec_binary_search}) finds an item within an ordered data structure, each time, the search space is elimilated by half in size, which makes the worst time complexity $O(\log n)$. Using hashmap can gain us the best complexity of $O(1)$. -% \item Define the sub-structure as a \textbf{consecutive substructure} indexed by a start and end index (subarray) in the linear data structure, we introduce the \textbf{Sliding Window Algorithm} (Section~\ref{sec_pointer_sliding_window}). Compared with the brute force solution, it decrease the complexity from $O(n^2)$ to $O(n)$. If the sub-structure is \textbf{predefined pattern}, we need pattern matching algorithms. This usually exists in string data structure, and we do string pattern matching (Section~\ref{}). -% \end{itemize} - -% \subfile{chapters/learning/search/sliding_window} - - - - - - - \end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_code_data_structure.tex b/Easy-Book/chapters/chapter_code_data_structure.tex deleted file mode 100644 index 1b870bd..0000000 --- a/Easy-Book/chapters/chapter_code_data_structure.tex +++ /dev/null @@ -1,2614 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Python Data Structures} - -\section{Introduction} -Python is object-oriented programming language where each object is implemented using \texttt{C++} in the backend. The built-in data types of \texttt{C++} follows more rigidly to the abstract data structures. We would get by just learning how to use Python data types alone: its \textbf{property}--immutable or mutable, its \textbf{built-in in-place operations}--such as \texttt{append()}, \texttt{insert()}, \texttt{add()}, \texttt{remove()}, \texttt{replace()} and so, and \textbf{built-in functions} and \textbf{operations} that offers additional ability to manipulate data structure--an object here. However, some data types' behaviors might confuse us with abstract data structures, making it hard to access and evaluate its efficiency. - -In this chapter and the following three chapters, we starts to learn Python data structures by relating its \texttt{C++} data structures to our learned abstract data structures, and then introduce each's property, built-in operations, built-in functions and operations. \textit{Please read the section Understanding Object in the Appendix--Python Knowledge Base to to study the properties of Built-in Data Types first if Python is not your familiar language}. - -\paragraph{Python Built-in Data Types} In Python 3, we have four built-in scalar data types: \texttt{int}, \texttt{float}, \texttt{complex}, \texttt{bool}. At higher level, it includes four sequence types: \texttt{str}--string type, \texttt{list}, \texttt{tuple}, and \texttt{range}; one mapping type: \texttt{dict} and two set types: \texttt{set} and \texttt{fronzenset}. Among these 12 built-in data types, other than the scalar types, the others representing some of our introduced abstract data structures. - - -\paragraph{Abstract Data Types with Python Data Types/Modules} -To relate the abstract data types to our build-in data types we have: -\begin{itemize} - \item Sequence type corresponds to Array data structure: includes \texttt{string}, \texttt{list}, \texttt{tuple}, and \texttt{range} - \item \texttt{dict}, \texttt{set}, and \texttt{fronzenset} mapps to the hash tables. - \item For linked list, stack, queue, we either need to implement it with build-in data types or we have Python Modules. -\end{itemize} -%%%%%%%%%%%%%%%% -% Array : list, string, and tuple! % -%%%%%%%%%%%%%%%% -% \chapter{Basic Data Structure} -\section{Array and Python Sequence} -\label{chapter_array_string} - We will see from other remaining contents of this part that how array-based Python data structures are used to implement the other data structures. On the LeetCode, these two data structures are involved into $25\%$ of LeetCode Problems. -\subsection{Introduction to Python Sequence} -In Python, \textit{sequences} are defined as ordered sets of objects indexed by non-negative integers, we use \textit{index} to refer and in Python it defaultly starts at $0$. Sequence types are \textbf{iterable}. Iterables are able to be iterated over. Iterators are the agents that perform the iteration, where we have \texttt{iter()} built-in function. -\begin{itemize} - \item \texttt{string} is a sequence of characters, it is immutable, and with \textbf{static array} as its backing data structure in C++. - \item \texttt{list} and \texttt{tuple} are sequences of \textbf{arbitrary} objects.--meaning it accepts different types of objects including the 12 built-in data types and any other objects. This sounds fancy and like magic! However, it does not change the fact that its backing abstract data structure is \textbf{dynamic array}. They are able to have arbitrary type of objects through the usage of pointers to objects, pointing to object's physical location, and each pointer takes fixed number of bytes in space (in 32-bit system, 4 bytes, and for a 64-bit system, 8 bytes instead). - \item \texttt{range}: In Python 3, \texttt{range()} is a type. But range does not have backing array data structure to save a sequence of value, it computes on demand. Thus we will first introduce range and get done with it before we focus on other sequence types. -\begin{lstlisting}[language=Python] ->>> type(range) - -\end{lstlisting} - \end{itemize} - -All these sequence type data structures share the most common methods and operations shown in Table~\ref{tab:common_method_sequence_python} and \ref{tab:common_operation_sequence_python}. To note that in Python, the indexing starts from 0. - -Let us examine each type of sequence further to understand its performance, and relation to array data structures. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%STring%%%%%%%%%%%%%%% -\subsection{Range} -\subsubsection{Range Syntax} -The range object has three attributes: \texttt{start}, \texttt{stop}, \texttt{step}, and a range object can be created as \texttt{range(start, stop, step}. These attributes need to integers--both negative and positive works--to define a range, which will be $[start, stop)$. The default value for \texttt{start} and \texttt{stop} is 0. For example: -\begin{lstlisting}[language=Python] ->>> a = range(10) ->>> b = range(0, 10, 2) ->>> a, b -(range(0, 10), range(0, 10, 2)) -\end{lstlisting} -Now, we print it out: -\begin{lstlisting}[language=Python] ->>> for i in a: -... print(i, end=' ') -... -0 1 2 3 4 5 6 7 8 9 -\end{lstlisting} -And for \texttt{b}, it will be: -\begin{lstlisting}[language=Python] ->>> for i in b: -... print(i, end=' ') -... -0 2 4 6 8 -\end{lstlisting} -Like any other sequence types, \texttt{range} is iterable, can be indexed and sliced. -\begin{lstlisting}[language=Python] -\end{lstlisting} -\subsubsection{What you do not see} -The range object might be a little bizarre when we first learn it. Is it an iterator, a generator? The answer to both questions are NO. What is it then? It is more like a sequence type that differs itself without other counterparts with its own unique properties: -\begin{itemize} - \item It is ``lazy'' in the sense that it doesn’t generate every number that it ``contain'' when we create it. Instead it gives those numbers to us as we need them when looping over it. Thus, it saves us space: -\begin{lstlisting}[language=Python] ->>> a = range(1_000_000) ->>> b = [i for i in a] ->>> a.__sizeof__(), b.__sizeof__() -(48, 8697440) -\end{lstlisting} -This is just how we define the behavior of the range class back in the C++ code. We does not need to save all integers in the range, but be generated with function that specifically asks for it. - \item It is not an iterator; it won't get consumed. We can iterate it multiple times. This is understandable given how it is implemented. -\end{itemize} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%STring%%%%%%%%%%%%%%% -\subsection{String} -String is \textbf{static array} and its items are just characters, represented using ASCII or Unicode \footnote{In Python 3, all strings are represented in Unicode. In Python 2 are stored internally as 8-bit ASCII, hence it is required to attach 'u' to make it Unicode. It is no longer necessary now.}. String is immutable which means once its created we can no longer modify its content or extent its size. String is more compact compared with storing the characters in \texttt{list} because of its backing array wont be assigned to any extra space. - -\subsubsection{String Syntax} strings can be created in Python by wrapping a sequence of characters in single or double quotes. Multi-line strings can easily be created using three quote characters. - -\paragraph{New a String} - - -We specially introduce some commonly and useful functions. -\paragraph{Join} -The \texttt{str.join()} method will concatenate two strings, but in a way that passes one string through another. For example, we can use the \texttt{str.join()} method to add whitespace to that string, which we can do like so: -\begin{lstlisting}[language=Python] -balloon = "Sammy has a balloon." -print(" ".join(balloon)) -#Ouput -S a m m y h a s a b a l l o o n . -\end{lstlisting} -The \texttt{str.join()} method is also useful to combine a list of strings into a new single string. -\begin{lstlisting}[language=Python] -print(",".join(["a", "b", "c"])) -#Ouput -abc -\end{lstlisting} -\paragraph{Split} -Just as we can join strings together, we can also split strings up using the \texttt{str.split()} method. This method separates the string by whitespace if no other parameter is given. -\begin{lstlisting}[language=Python] -print(balloon.split()) -#Ouput -['Sammy', 'has', 'a', 'balloon.'] -\end{lstlisting} -We can also use str.split() to remove certain parts of an original string. For example, let’s remove the letter 'a' from the string: -\begin{lstlisting}[language=Python] -print(balloon.split("a")) -#Ouput -['S', 'mmy h', 's ', ' b', 'lloon.'] -\end{lstlisting} -Now the letter a has been removed and the strings have been separated where each instance of the letter a had been, with whitespace retained. - -\paragraph{Replace}The \texttt{str.replace()} method can take an original string and return an updated string with some replacement. - -Let’s say that the balloon that Sammy had is lost. Since Sammy no longer has this balloon, we will change the substring "has" from the original string balloon to "had" in a new string: -\begin{lstlisting}[language=Python] -print(balloon.replace("has","had")) -#Ouput -Sammy had a balloon. -\end{lstlisting} -We can use the replace method to delete a substring: -\begin{lstlisting}[language=Python] -ballon.replace("has", '') -\end{lstlisting} -Using the string methods \texttt{str.join()}, \texttt{str.split()}, and \texttt{str.replace()} will provide you with greater control to manipulate strings in Python. - -\paragraph{Conversion between Integer and Character} -Function \texttt{ord()} would get the int value (ASCII) of the char. And in case you want to convert back after playing with the number, function \texttt{chr()} does the trick. -\begin{lstlisting}[language = Python] -print(ord('A'))# Given a string of length one, return an integer representing the Unicode code point of the character when the argument is a unicode object, -print(chr(65)) -\end{lstlisting} - -\subsubsection{String Functions} - Because string is one of the most fundamental built-in data types, this makes managing its built-in common methods shown in Table~\ref{tab:common_operation_string} and \ref{tab:common_operation_string_boolean} necessary. Use boolean methods to check whether characters are lower case, upper case, or title case, can help us to sort our data appropriately, as well as provide us with the opportunity to standardize data we collect by checking and then modifying strings as needed. -\begin{table}[!ht] -\begin{small} -\centering -\noindent\captionof{table}{ Common Methods of String} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.55\columnwidth}| } - \hline -Method & Description \\ \hline -count(substr, [start, end]) & Counts the occurrences of a substring with optional start and end position \\\hline -find(substr, [start, end]) &Returns the index of the first occurrence of a substring or returns -1 if the substring is not found\\ \hline -join(t) &Joins the strings in sequence t with current string between each item\\ \hline -lower()/upper() &Converts the string to all lowercase or uppercase\\ \hline -replace(old, new) &Replaces old substring with new substring\\ \hline -strip([characters]) &Removes withspace or optional characters\\ \hline -split([characters], [maxsplit]) &Splits a string separated by whitespace or an optional separator. Returns a list\\ \hline -expandtabs([tabsize]) & Replaces tabs with spaces. -\end{tabular} - \label{tab:common_operation_string} - - \centering -\noindent\captionof{table}{ Common Boolean Methods of String} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.55\columnwidth}| } - \hline -Boolean Method & Description \\ \hline - -isalnum() & String consists of only alphanumeric characters (no symbols)\\ \hline -isalpha() & String consists of only alphabetic characters (no symbols)\\ \hline -islower() & String's alphabetic characters are all lower case\\ \hline -isnumeric() & String consists of only numeric characters\\ \hline -isspace() & String consists of only whitespace characters\\ \hline -istitle() & String is in title case\\ \hline -isupper() & String's alphabetic characters are all upper case\\ \hline -\end{tabular} - \label{tab:common_operation_string_boolean} - \end{small} -\end{table} - - - -%%%%%%%%%%%%%%%%%%%List%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{List} -The underlying abstract data structure of \texttt{list} data types is \textbf{dynamic array}, meaning we can add, delete, modify items in the list. It supports random access by indexing. List is the most widely one among sequence types due to its mutability. - - Even if list supports data of arbitrary types, we do not prefer to do this. Use \texttt{tuple} or \texttt{namedtuple} for better practice and offers better clarification. -\subsubsection{What You see: List Syntax} -\textbf{New a List:} We have multiple ways to new either empty list or with initialized data. List comprehension is an elegant and concise way to create new list from an existing list in Python. -\begin{lstlisting}[language = Python] -# new an empty list -lst = [] -lst2 = [2, 2, 2, 2] # new a list with initialization -lst3 = [3]*5 # new a list size 5 with 3 as initialization -print(lst, lst2, lst3) -# output -# [] [2, 2, 2, 2] [3, 3, 3, 3, 3] -\end{lstlisting} - -We can use \textbf{list comprehension} and use \texttt{enumerate} function to loop over its items. -\begin{lstlisting}[language=Python] -lst1 = [3]*5 # new a list size 5 with 3 as initialization -lst2 = [4 for i in range(5)] -for idx, v in enumerate(lst1): - lst1[idx] += 1 -\end{lstlisting} - -\paragraph{Search} We use method \texttt{list.index()} to obtain the index of the searched item. -\begin{lstlisting}[language = Python] -print(lst.index(4)) #find 4, and return the index -# output -# 3 -\end{lstlisting} -If we print(lst.index(5)) will raise ValueError: 5 is not in list. Use the following code instead. -\begin{lstlisting}[language=Python] -if 5 in lst: - print(lst.index(5)) -\end{lstlisting} -\textbf{Add Item } We can add items into list through \texttt{insert(index, value)}--inserting an item at a position in the original list or \texttt{list.append(value)}--appending an item at the end of the list. -\begin{lstlisting}[language = Python] -# INSERTION -lst.insert(0, 1) # insert an element at index 0, and since it is empty lst.insert(1, 1) has the same effect -print(lst) - -lst2.insert(2, 3) -print(lst2) -# output -# [1] -# [2, 2, 3, 2, 2] -# APPEND -for i in range(2, 5): - lst.append(i) -print(lst) -# output -# [1, 2, 3, 4] -\end{lstlisting} - -\textbf{Delete Item} - -\textbf{Get Size of the List} We can use \texttt{len} built-in function to find out the number of items storing in the list. -\begin{lstlisting}[language=Python] -print(len(lst2)) -# 4 -\end{lstlisting} - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%List and Array%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{What you do not see: Understand List} -To understand list, we need start with its C++ implementation, we do not introduce the C++ source code, but instead use function to access and evaluate its property. -\paragraph{List Object and Pointers} In a 64-bits (8 bytes) system, such as in Google Colab, a pointer is represented with 8 bytes space. In Python3, the list object itself takes $64$ bytes in space. And any additional element takes $8$ bytes. In Python, we can use \texttt{getsizeof()} from \texttt{sys} module to get its memory size, for example: -\begin{lstlisting}[language=Python] -lst_lst = [[], [1], ['1'], [1, 2], ['1', '2']] -\end{lstlisting} -And now, let us get the memory size of \texttt{lst\_lst} and each list item in this list. -\begin{lstlisting}[language=Python] -import sys -for lst in lst_lst: - print(sys.getsizeof(lst), end=' ') -print(sys.getsizeof(lst_lst)) -\end{lstlisting} -The output is: -\begin{lstlisting}[numbers=none] -64 72 72 80 80 104 -\end{lstlisting} -We can see a list of integers takes the same memory size as of a list of strings with equal length. -\paragraph{insert and append} Whenever insert and append is called, and assume the original length is $n$, Python could compare $n+1$ with its allocated length. If you append or insert to a Python list and the backing array isn't big enough, the backing array must be expanded. When this happens, the backing array is grown by approximately 12\% the following formula (comes from C++): -\begin{lstlisting}[language=C] -new_allocated = (size_t)newsize + (newsize >> 3) + - (newsize < 9 ? 3 : 6); -\end{lstlisting} -Do an experiment, we can see how it works. Here we use \texttt{id()} function to obtain the pointer's physical address. We compare the size of the list and its underlying backing array's real additional size in space (with 8 bytes as unit). -\begin{lstlisting}[language=Python] -a = [] -for size in range(17): - a.insert(0, size) - print('size:', len(a), 'bytes:', (sys.getsizeof(a)-64)//8, 'id:', id(a)) -\end{lstlisting} -The output is: -\begin{lstlisting}[numbers=none] -size: 1 bytes: 4 id: 140682152394952 -size: 2 bytes: 4 id: 140682152394952 -size: 3 bytes: 4 id: 140682152394952 -size: 4 bytes: 4 id: 140682152394952 -size: 5 bytes: 8 id: 140682152394952 -size: 6 bytes: 8 id: 140682152394952 -size: 7 bytes: 8 id: 140682152394952 -size: 8 bytes: 8 id: 140682152394952 -size: 9 bytes: 16 id: 140682152394952 -size: 10 bytes: 16 id: 140682152394952 -size: 11 bytes: 16 id: 140682152394952 -size: 12 bytes: 16 id: 140682152394952 -size: 13 bytes: 16 id: 140682152394952 -size: 14 bytes: 16 id: 140682152394952 -size: 15 bytes: 16 id: 140682152394952 -size: 16 bytes: 16 id: 140682152394952 -size: 17 bytes: 25 id: 140682152394952 -\end{lstlisting} -The output addresses the growth patterns as [0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...]. - -Amortizely, \texttt{append} takes $O(1)$. However, it is $O(n)$ for \texttt{insert} because it has to first shift all items in the original list from [pos, end] by one position, and put the item at pos with random access. - -\subsubsection{Common Methods of List} We have already seen how to use \texttt{append}, \texttt{insert}. Now, Table~\ref{tab:common_operation_list} shows us the common List Methods, and they will be used as \texttt{list.methodName()}. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Common Methods of List} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.55\columnwidth}| } - \hline -Method & Description \\ \hline -append() & Add an element to the end of the list \\\hline -extend(l) &Add all elements of a list to the another list\\ \hline -insert(index, val) &Insert an item at the defined index $s$\\ \hline -pop(index) &Removes and returns an element at the given index\\ \hline -remove(val) & Removes an item from the list\\ \hline -clear() &Removes all items from the list\\ \hline -index(val) &Returns the index of the first matched item\\ \hline -count(val) &Returns the count of number of items passed as an argument\\ \hline -sort() &Sort items in a list in ascending order\\ \hline -reverse() &Reverse the order of items in the list (same as list[::-1])\\ \hline -copy() &Returns a shallow copy of the list (same as list[::])\\ \hline -\end{tabular} - \label{tab:common_operation_list} - \end{small} -\end{table} - - -\subsubsection{Two-dimensional List} -Two dimensional list is a list within a list. In this type of array the position of an data element is referred by two indices instead of one. So it represents a table with rows and columns of data. For example, we can declare the following 2-d array: -\begin{lstlisting}[language=Python] -ta = [[11, 3, 9, 1], [25, 6,10], [10, 8, 12, 5]] -\end{lstlisting} -The scalar data in two dimensional lists can be accessed using two indices. One index referring to the main or parent array and another index referring to the position of the data in the inner list. If we mention only one index then the entire inner list is printed for that index position. The example below illustrates how it works. -\begin{lstlisting}[language=Python] -print(ta[0]) -print(ta[2][1]) -\end{lstlisting} -And with the output -\begin{lstlisting}[numbers=none] -[11, 3, 9, 1] -8 -\end{lstlisting} -In the above example, we new a 2-d list and initialize them with values. There are also ways to new an empty 2-d array or fix the dimension of the outer array and leave it empty for the inner arrays: -\begin{lstlisting}[language=Python] -# empty two dimensional list -empty_2d = [[]] - -# fix the outer dimension -fix_out_d = [[] for _ in range(5)] -print(fix_out_d) -\end{lstlisting} -All the other operations such as delete, insert, update are the same as of the one-dimensional list. - -\paragraph{Matrices} We are going to need the concept of matrix, which is defined as a collection of numbers arranged into a fixed number of rows and columns. For example, we define $3 \times 4$ (read as 3 by 4) order matrix is a set of numbers arranged in 3 rows and 4 columns. And for $m_1$ and $m_2$, they are doing the same things. -\begin{lstlisting}[language=Python] -rows, cols = 3, 4 -m1 = [[0 for _ in range(cols)] for _ in range(rows)] # rows * cols -m2 = [[0]*cols for _ in range(rows)] # rows * cols -print(m1, m2) -\end{lstlisting} -The output is: -\begin{lstlisting}[numbers=none] -[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] -\end{lstlisting} -We assign value to m1 and m2 at index (1, 2) with value $1$: -\begin{lstlisting}[language=Python] -m1[1][2] = 1 -m2[1][2] = 1 -print(m1, m2) -\end{lstlisting} -And the output is: -\begin{lstlisting}[numbers=none] -[[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]] [[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]] -\end{lstlisting} -However, we can not declare it in the following way, because we end up with some copies of the same inner lists, thus modifying one element in the inner lists will end up changing all of the them in the corresponding positions. Unless the feature suits the situation. -\begin{lstlisting}[language=Python] -# wrong declaration -m4 = [[0]*cols]*rows -m4[1][2] = 1 -print(m4) -\end{lstlisting} -With output: -\begin{lstlisting}[numbers=none] -[[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0] -\end{lstlisting} - -\paragraph{Access Rows and Columns} In the real problem solving, we might need to access rows and columns. Accessing rows is quite easy since it follows the declaraion of two-dimensional array. -\begin{lstlisting}[language=Python] -# accessing row -for row in m1: - print(row) -\end{lstlisting} -With the output: -\begin{lstlisting}[numbers=none] -[0, 0, 0, 0] -[0, 0, 1, 0] -[0, 0, 0, 0] -\end{lstlisting} -However, accessing columns will be less straightforward. To get each column, we need another inner for loop or list comprehension through all rows and obtain the value from that column. This is usually a lot slower than accessing each row due to the fact that each row is a pointer while each col we need to obtain from each row. -\begin{lstlisting}[language=Python] -# accessing col -for i in range(cols): - col = [row[i] for row in m1] - print(col) -\end{lstlisting} -The output is: -\begin{lstlisting}[numbers=none] -[0, 0, 0] -[0, 0, 0] -[0, 1, 0] -[0, 0, 0] -\end{lstlisting} -There's also a handy ``idiom'' for transposing a nested list, turning 'columns' into 'rows': -\begin{lstlisting}[language=Python] -transposedM1 = list(zip(*m1)) -print(transposedM1) -\end{lstlisting} -The output will be: -\begin{lstlisting}[numbers=none] -[(0, 0, 0), (0, 0, 0), (0, 1, 0), (0, 0, 0)] -\end{lstlisting} - -% \begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try use \texttt{numpy} third party module?} -% \begin{lstlisting}[language=Python] -% import numpy as np -% b = np.array(list) # convert list to array -% col = b[:, 0] # access the first col -% row = b[0, :] # access the first row -% \end{lstlisting} -% \end{bclogo} - -\subsection{Tuple} -A \texttt{tuple} has \textbf{static array} as its backing abstract data structure in C, which is immutable--we can not add, delete, or replace items once its created and assigned with value. You might think if \texttt{list} is a dynamic array and has no restriction same as of the tuple, why would we need \texttt{tuple} then? - -\paragraph{Tuple VS List} We list how we use each data type and why is it. The main benefit of tuple's immutability is it is hashable, we can use them as keys in the hash table--\texttt{dictionary types}, whereas the mutable types such as list and range can not be applied. Besides, in the case that the data does not to change, the tuple's immutability will guarantee that the data remains write-protected and iterating an immutable sequence is faster than a mutable sequence, giving it slight performance boost. Also, we generally use tuple to store a variety of data types. For example, in a class score system, for a student, we might want to have its name, student id, and test score, we can write \texttt{('Bob', 12345, 89)}. - - - -\subsubsection{Tuple Syntax} -\paragraph{New and Initialize Tuple} Tuples are created by separating the items with a comma. It is commonly wrapped in parentheses for better readability. Tuple can also be created via a built-in function \texttt{tuple()}, if the argument to \texttt{tuple()} is a sequence then this creates a tuple of elements of that sequences. This is also used to realize type conversion. - -An empty tuple: -\begin{lstlisting}[language=Python] -tup = () -tup3 = tuple() -\end{lstlisting} -When there is only one item, put comma behind so that it wont be translated as \texttt{string}, which is a bit bizarre! -\begin{lstlisting}[language=Python] -tup2 = ('crack', ) -tup1 = ('crack', 'leetcode', 2018, 2019) -\end{lstlisting} -Converting a string to a tuple with each character separated. -\begin{lstlisting}[language=Python] -tup4 = tuple("leetcode") # the sequence is passed as a tuple of elements ->> tup4: ('l', 'e', 'e', 't', 'c', 'o', 'd', 'e') -\end{lstlisting} -Converting a list to a tuple. -\begin{lstlisting}[language=Python] -tup5 = tuple(['crack', 'leetcode', 2018, 2019]) # same as tuple1 -\end{lstlisting} -If we print out these tuples, it will be -\begin{lstlisting} -tup1: ('crack', 'leetcode', 2018, 2019) -tup2: crack -tup3: () -tup4: ('l', 'e', 'e', 't', 'c', 'o', 'd', 'e') -tup5: ('crack', 'leetcode', 2018, 2019) -\end{lstlisting} - -\paragraph{Changing a Tuple} Assume we have the following tuple: -\begin{lstlisting}[language=Python] -tup = ('a', 'b', [1, 2, 3]) -\end{lstlisting} -If we want to change it to \texttt{('c', 'b', [4,2,3])}. We can not do the following operation as we said a tuple cannot be changed in-place once it has been assigned. -\begin{lstlisting}[language=Python] -tup = ('a', 'b', [1, 2, 3]) -#tup[0] = 'c' #TypeError: 'tuple' object does not support item assignment -\end{lstlisting} -Instead, we initialize another tuple and assign it to \texttt{tup} variable. -\begin{lstlisting}[language=Python] -tup=('c', 'b', [4,2,3]) -\end{lstlisting} -However, for its items which are mutable itself, we can still manipulate it. For example, we can use index to access the list item at the last position of a tuple and modify the list. -\begin{lstlisting}[language=Python] -tup[-1][0] = 4 -#('a', 'b', [4, 2, 3]) -\end{lstlisting} - - -% \paragraph{Deleting a Tuple} Because tuple is immutable, it also means we cannot delete or remove items from a tuple. But deleting a tuple entirely is possible using the keyword \texttt{del}. -% \begin{lstlisting}[language=Python] -% del tup -% print(tup) -% \end{lstlisting} -% After del, when try to use tup again it returns NameError. -% \begin{lstlisting} -% NameError: name 'tup' is not defined -% \end{lstlisting} -\subsubsection{Understand Tuple} -The backing structure is \textbf{static array} which states that the way the tuple is structure is similar to list, other than its write-protected. We will just brief on its property. -\paragraph{Tuple Object and Pointers} Tuple object itself takes $48$ bytes. And all the others are similar to corresponding section in list. -\begin{lstlisting}[language=Python] -lst_tup = [(), (1,), ('1',), (1, 2), ('1', '2')] -import sys -for tup in lst_tup: - print(sys.getsizeof(tup), end=' ') -\end{lstlisting} -The output will be: -\begin{lstlisting}[numbers=none] -48 56 56 64 64 -\end{lstlisting} - -\subsubsection{Named Tuples}In named tuple, we can give all records a name, say ``Computer\_Science'' to indicate the class name, and we give each item a name, say 'name', 'id', and 'score'. We need to import \texttt{namedtuple} class from module \texttt{collections}. For example: -\begin{lstlisting}[language=Python] -record1 = ('Bob', 12345, 89) -from collections import namedtuple -Record = namedtuple('Computer_Science', 'name id score') -record2 = Record('Bob', id=12345, score=89) -print(record1, record2) -\end{lstlisting} -The output will be: -\begin{lstlisting}[language=Python] -('Bob', 12345, 89) Computer_Science(name='Bob', id=12345, score=89) -\end{lstlisting} - - - - - - -\subsection{Summary} -All these sequence type data structures share the most common methods and operations shown in Table~\ref{tab:common_method_sequence_python} and \ref{tab:common_operation_sequence_python}. To note that in Python, the indexing starts from 0. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Common Methods for Sequence Data Type in Python} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.55\columnwidth}| } - \hline - Function Method& Description \\ \hline -len(s) & Get the size of sequence s \\\hline -min(s, [,default=obj, key=func]) &The minimum value in s (alphabetically for strings)\\ \hline -max(s, [,default=obj, key=func]) &The maximum value in s (alphabetically for strings)\\ \hline - sum(s, [,start=0) &The sum of elements in s(return $TypeError$ if $s$ is not numeric)\\ \hline -all(s) & Return $True$ if all elements in $s$ are True (Similar to $and$)\\ \hline -any(s) &Return $True$ if any element in $s$ is True (similar to $or$)\\ \hline -\end{tabular} - \label{tab:common_method_sequence_python} -\centering -\noindent\captionof{table}{ Common out of place operators for Sequence Data Type in Python} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.55\columnwidth}| } - \hline -Operation & Description \\ \hline -s + r & Concatenates two sequences of the same type \\\hline -s * n &Make $n$ copies of $s$, where $n$ is an integer\\ \hline -$v_1, v_2, ..., v_n = s$ &Unpack $n$ variables from $s$\\ \hline -s[i] &Indexing-returns $i$th element of $s$\\ \hline -s[i:j:stride] & Slicing-returns elements between $i$ and $j$ with optinal stride\\ \hline -x in s &Return $True$ if element $x$ is in $s$\\ \hline -x not in s &Return $True$ if element $x$ is not in $s$\\ \hline -\end{tabular} - \label{tab:common_operation_sequence_python} - \end{small} -\end{table} - - - -%%%%%%%%%%%%%%%% -% Circular Array! % -%%%%%%%%%%%%%%%% -\subsection{Bonus} -\paragraph{Circular Array} -The corresponding problems include: -\begin{enumerate} - \item 503. Next Greater Element II -\end{enumerate} - - -\subsection{Exercises } -\begin{enumerate} - \item 985. Sum of Even Numbers After Queries (easy) - \item 937. Reorder Log Files - -You have an array of logs. Each log is a space delimited string of words. - -For each log, the first word in each log is an alphanumeric identifier. Then, either: - - Each word after the identifier will consist only of lowercase letters, or; - Each word after the identifier will consist only of digits. - -We will call these two varieties of logs letter-logs and digit-logs. It is guaranteed that each log has at least one word after its identifier. - -Reorder the logs so that all of the letter-logs come before any digit-log. The letter-logs are ordered lexicographically ignoring identifier, with the identifier used in case of ties. The digit-logs should be put in their original order. - -Return the final order of the logs. -\begin{lstlisting} -Example 1: - -Input: ["a1 9 2 3 1","g1 act car","zo4 4 7","ab1 off key dog","a8 act zoo"] -Output: ["g1 act car","a8 act zoo","ab1 off key dog","a1 9 2 3 1","zo4 4 7"] - - - -Note: - - 0 <= logs.length <= 100 - 3 <= logs[i].length <= 100 - logs[i] is guaranteed to have an identifier, and a word after the identifier. -\end{lstlisting} -\begin{lstlisting}[language=Python] -def reorderLogFiles(self, logs): - letters = [] - digits = [] - for idx, log in enumerate(logs): - splited = log.split(' ') - id = splited[0] - type = splited[1] - - if type.isnumeric(): - digits.append(log) - else: - letters.append((' '.join(splited[1:]), id)) - letters.sort() #default sorting by the first element and then the second in the tuple - - return [id + ' ' + other for other, id in letters] + digits -\end{lstlisting} -\begin{lstlisting}[language=Python] -def reorderLogFiles(logs): - digit = [] - letters = [] - info = {} - for log in logs: - if '0' <= log[-1] <= '9': - digit.append(log) - else: - letters.append(log) - index = log.index(' ') - info[log] = log[index+1:] - - letters.sort(key= lambda x: info[x]) - return letters + digit -\end{lstlisting} -\end{enumerate} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Linked List -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Linked List} -\label{chapter_linked_list} -Python does not have built-in data type or modules that offers the Linked List-like data structures, however, it is not hard to implement it ourselves. -%%%%%%%%%%Singly Linked List%%%%%%%%%%%% -\subsection{Singly Linked List} -\label{linked_list_subsec_singly} -\begin{figure}[h!] - \centering - \includegraphics[width=.98\columnwidth]{fig/linked_list1.png} - \caption{Linked List Structure} - \label{fig:singly_linkedlist} -\end{figure} - -Linked list consists of \texttt{nodes}, and each \texttt{node} consists of at least two variables for singly linked lit: \texttt{val} to save data and \texttt{next}, a pointer that points to the successive node. The \texttt{Node} class is given as: -\begin{lstlisting}[language=Python] -class Node(object): - def __init__(self, val = None): - self.val = val - self.next = None -\end{lstlisting} - -In Singly Linked List, usually we can start to with a \textbf{head} node which points to the first node in the list; only with this single node we are able to trace other nodes. For simplicity, demonstrate the process without using class, but we provide a class implementation with name \texttt{SinglyLinkeList} in our online python source code. Now, let us create an empty node named \texttt{head}. -\begin{lstlisting}[language=Python] -head = None -\end{lstlisting} -We need to implement its standard operations, including insertion/append, delete, search, clear. However, if we allow to the head node to be \texttt{None}, there would be special cases to handle. Thus, we implement a \textbf{dummy node}--a node but with \texttt{None} as its value as the head, to simplify the coding. Thus, we point the head to a dummy node: -\begin{lstlisting}[language=Python] -head = Node(None) -\end{lstlisting} -\paragraph{Append Operation} As the append function in list, we add node at the very end of the linked list. If without the dummy node, then there will be two cases: -\begin{itemize} - \item When \texttt{head} is an empty node, we assign the new \texttt{node} to \texttt{head}. - \item When it is not empty, we because all we have that is available is the head pointer, thus, it we need to first traverse all the nodes up till the very last node whose \texttt{next} is \texttt{None}, then we connect \texttt{node} to the last node through assigning it to the last node's \texttt{next} pointer. -\end{itemize} -The first case is simply bad: we would generate a new node and we can not track the head through in-place operation. However, with the dummy node, only the second case will appear. The code is: -\begin{lstlisting}[language=Python] -def append(head, val): - node = Node(val) - cur = head - while cur.next: - cur = cur.next - cur.next = node - return -\end{lstlisting} -Now, let use create the same exact linked list in Fig.~\ref{fig:singly_linkedlist}: -\begin{lstlisting}[language=Python] -for val in ['A', 'B', 'C', 'D']: - append(head, val) -\end{lstlisting} -\paragraph{Generator and Search Operations} In order to traverse and iterate the linked list using syntax like \texttt{for ... in} statement like any other sequence data types in Python, we implement the \texttt{gen()} function that returns a generator of all nodes of the list. Because we have a dummy node, so we always start at \texttt{head.next}. -\begin{lstlisting}[language=Python] -def gen(head): - cur = head.next - while cur: - yield cur - cur = cur.next -\end{lstlisting} -Now, let us print out the linked list we created: -\begin{lstlisting}[language=Python] -for node in iter(head): - print(node.val, end = ' ') -\end{lstlisting} -Here is the output: -\begin{lstlisting}[numbers=none] -A B C D -\end{lstlisting} -Search operation we find a node by value, and we return this node, otherwise, we return \texttt{None}. -\begin{lstlisting}[language=Python] -def search(head, val): - for node in gen(head): - if node.val == val: - return node - return None -\end{lstlisting} -Now, we search for value `B' with: -\begin{lstlisting}[language=Python] -node = search(head, 'B') -\end{lstlisting} - -\paragraph{Delete Operation} For deletion, there are two scenarios: deleting a node by value when we are given the head node and deleting a given node such as the node we got from searching 'B'. - -The first case requires us to first locate the node first, and rewire the pointers between the predecessor and successor of the deleting node. Again here, if we do not have a dummy node, we would have two cases: if the node is the head node, repoint the head to the next node, we connect the previous node to deleting node's next node, and the head pointer remains untouched. With dummy node, we would only have the second situation. In the process, we use an additional variable \texttt{prev} to track the predecessor. -\begin{lstlisting}[language=Python] -def delete(head, val): - cur = head.next # start from dummy node - prev = head - while cur: - if cur.val == val: - # rewire - prev.next = cur.next - return - prev = cur - cur = cur.next -\end{lstlisting} -Now, let us delete one more node--'A' with this function. -\begin{lstlisting}[language=Python] -delete(head,'A') -for n in gen(head): - print(n.val, end = ' ') -\end{lstlisting} -Now the output will indicate we only have two nodes left: -\begin{lstlisting}[language=Python] -C D -\end{lstlisting} - -The second case might seems a bit impossible--we do not know its previous node, the trick we do is to copy the value of the next node to current node, and we delete the next node instead by pointing current node to the node after next node. While, that is only when the deleting node is not the last node. When it is, we have no way to completely delete it; but we can make it ``invalid'' by setting value and \texttt{Next} to \texttt{None}. -\begin{lstlisting}[language=Python] -def delete(head, val): - cur = head.next # start from dummy node - prev = head - while cur: - if cur.val == val: - # rewire - prev.next = cur.next - return - prev = cur - cur = cur.next -\end{lstlisting} -Now, let us try deleting the node 'B' via our previously found \texttt{node}. -\begin{lstlisting}[language=Python] -deleteByNode(node) -for n in gen(head): - print(n.val, end = ' ') -\end{lstlisting} -The output is: -\begin{lstlisting}[language=Python] -A C D -\end{lstlisting} - -\paragraph{Clear} When we need to clear all the nodes of the linked list, we just set the node next to the dummy head to \texttt{None}. -\begin{lstlisting}[language=Python] - def clear(self): - self.head = None - self.size = 0 -\end{lstlisting} - -Question: Some linked list can only allow insert node at the tail which is Append, some others might allow insertion at any location. To get the length of the linked list easily in O(1), we need a variable to track the size - - -\subsection{Doubly Linked List} -\label{linked_list_subsec_doubly} -\begin{figure}[h] - \centering - \includegraphics[width=0.9\columnwidth]{fig/DLL1.png} - \caption{Doubly Linked List} - \label{fig:dll1} -\end{figure} -On the basis of Singly linked list, doubly linked list (dll) contains an extra pointer in the node structure which is typically called \texttt{prev} (short for previous) and points back to its predecessor in the list. We define the \texttt{Node} class as: -\begin{lstlisting}[language=Python] -class Node: - def __init__(self, val, prev = None, next = None): - self.val = val - self.prev = prev # reference to previous node in DLL - self.next = next # reference to next node in DLL -\end{lstlisting} -Similarly, let us start with setting the dummy node as head: -\begin{lstlisting}[language=Python] -head = Node() -\end{lstlisting} - -Now, instead of for me to continue to implement all operations that are slightly variants of the singly linked list, why do not you guys implement it? Do not worry, try it first, and also I have the answer covered in the google colab, enjoy! - -Now, I assume that you have implemented those operations and or checked up the solutions. We would notice in \texttt{search()} and \texttt{gen()}, the code is exactly the same, and for other operations, there is only one or two lines of code that differs from SLL. Let's quickly list these operations: - -\paragraph{Append Operation} In DLL, we have to set the appending node's \texttt{prev} pointer to the last node of the linked list. The code is: -\begin{lstlisting}[language=Python] -def append(head, val): - node = Node(val) - cur = head - while cur.next: - cur = cur.next - cur.next = node - node.prev = cur ## only difference - return -\end{lstlisting} - -\paragraph{Generator and Search Operations} There is no much difference if we just search through \texttt{next} pointer. However, with the extra \texttt{prev} pointer, we can have two options: either search forward through \texttt{next} or backward through \texttt{prev} if the given starting node is any node. Whereas for SLL, this is not an option, because we would not be able to conduct a complete search--we can only search among the items behind from the given node. When the data is ordered in some way, or if the program is parallel--situations that bidirectional search would make sense. -\begin{lstlisting}[language=Python] -def gen(head): - cur = head.next - while cur: - yield cur - cur = cur.next -\end{lstlisting} -\begin{lstlisting}[language=Python] -def search(head, val): - for node in gen(head): - if node.val == val: - return node - return None -\end{lstlisting} - -\paragraph{Delete Operation} -To delete a node by value, we first find it in the linked list, and the rewiring process needs to deal with the next node's \texttt{prev} pointer if the next node exists. -\begin{lstlisting}[language=Python] -def delete(head, val): - cur = head.next # start from dummy node - while cur: - if cur.val == val: - # rewire - cur.prev.next = cur.next - if cur.next: - cur.next.prev = cur.prev - return - cur = cur.next -\end{lstlisting} -For \texttt{deleteByNode}, because we are cutting off \texttt{node.next}, we need to connect node to \texttt{node.next.next} in two directions: first point \texttt{prev} of later node to current node, and set point current node's \texttt{next} to the later node. -\begin{lstlisting}[language=Python] -def deleteByNode(node): - # pull the next node to current node - if node.next: - node.val = node.next.val - if node.next.next: - node.next.next.prev = node - node.next = node.next.next - else: #last node - node.prev.next = None - return node -\end{lstlisting} - -\paragraph{Comparison} We can see there is some slight advantage of dll over sll, but it comes with the cost of handing the extra \texttt{prev}. This would only be an advantage when bidirectional searching plays dominant factor in the matter of efficiency, otherwise, better stick with sll. - -\paragraph{Tips} From our implementation, in some cases we still need to worry about if it is the last node or not. The coding logic can further be simplified if we put a dummy node at the end of the linked list too. - - - -\subsection{Bonus} -% \paragraph{Tail Pointer} -% For both singly and doubly linked list, if we add another \textbf{tail} pointer to its class, which points at the last node in the list, can simplify some operations of the linked list from $O(n)$ to $O(1)$. - -\paragraph{Circular Linked List} -A circular linked list is a variation of linked list in which the first node connects to last node. To make a circular linked list from a normal linked list: in singly linked list, we simply set the last node's \texttt{next} pointer to the first node; in doubly linked list, other than setting the last node's \texttt{next} pointer, we set the \texttt{prev} pointer of the first node to the last node making the circular in both directions. - -Compared with a normal linked list, circular linked list saves time for us to go to the first node from the last (both sll and dll) or go to the last node from the first node (in dll) by doing it in a single step through the extra connection. Because it is a circle, when ever a search with a \texttt{while} loop is needed, we need to make sure the end condition: just make sure we searched a whole cycle by comparing the iterating node to the starting node. - -\paragraph{Recursion} Recursion offers additional pass of traversal--bottom-up on the basis of the top-down direction and in practice, it offers clean and simpler code compared with iteration. - -\subsection{Hands-on Examples} -\paragraph{Remove Duplicates (L83)} Given a sorted linked list, delete all duplicates such that each element appear only once. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: 1->1->2 -Output: 1->2 - -Example 2: - -Input: 1->1->2->3->3 -Output: 1->2->3 -\end{lstlisting} - -\subsubsection{Analysis} This is a linear complexity problem, the most straightforward way is to iterate through the linked list and compare the current node's value with the next's to check its equivalency: (1) if YES: delete one of the nodes, here we go for the next node; (2) if NO: we can move to the next node safely and sound. - -\paragraph{Iteration without Dummy Node} We start from the \texttt{head} in a \texttt{while} loop, if the next node exists and if the value equals, we delete next node. However, after the deletion, we can not move to next directly; say if we have 1->1->1, when the second 1 is removed, if we move, we will be at the last 1, and would fail removing all possible duplicates. The code is given: -\begin{lstlisting}[language=Python] -def deleteDuplicates(self, head): - """ - :type head: ListNode - :rtype: ListNode - """ - if not head: - return None - - def iterative(head): - current = head - while current: - if current.next and current.val == current.next.val: - # delete next - current.next = current.next.next - else: - current = current.next - return head - - return iterative(head) -\end{lstlisting} -\paragraph{With Dummy Node} We see with a dummy node, we put \texttt{current.next} in the whole loop, because only if the next node exists, would we need to compare the values. Besides, we do not need to check this condition within the \texttt{while} loop. -\begin{lstlisting}[language=Python] -def iterative(head): - dummy = ListNode(None) - dummy.next = head - current = dummy - while current.next: - if current.val == current.next.val: - # delete next - current.next = current.next.next - else: - current = current.next - return head -\end{lstlisting} -\paragraph{ Recursion}Now, if we use recursion and return the node, thus, at each step, we can compare our node with the returned node (locating behind the current node), same logical applies. A better way to help us is drawing out an example. With 1->1->1. The last 1 will return, and at the second last 1, we can compare them, because it equals, we delete the last 1, now we backtrack to the first 1 with the second last 1 as returned node, we compare again. The code is the simplest among all solutions. -\begin{lstlisting}[language=Python] - def recursive(node): - if node.next is None: - return node - - next = recursive(node.next) - if next.val == node.val: - node.next = node.next.next - return node -\end{lstlisting} - - - -\subsection{Exercises} -Basic operations: -\begin{enumerate} - \item 237. Delete Node in a Linked List (easy, delete only given current node) - \item 2. Add Two Numbers (medium) - \item 92. Reverse Linked List II (medium, reverse in one pass) - \item 83. Remove Duplicates from Sorted List (easy) - \item 82. Remove Duplicates from Sorted List II (medium) - \item Sort List - \item Reorder List -\end{enumerate} - -Fast-slow pointers: -\begin{enumerate} - \item 876. Middle of the Linked List (easy) - \item Two Pointers in Linked List - \item Merge K Sorted Lists -\end{enumerate} - -Recursive and linked list: -\begin{enumerate} -\item 369. Plus One Linked List (medium) -\end{enumerate} - - - - - -% this is the advanced data structure - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% stack and queue -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Stack and Queue} -\label{chapter_queue_stack} -Stack data structures fits well for tasks that require us to check the previous states from cloest level to furtherest level. Here are some examplary applications: (1) reverse an array, (2) implement DFS iteratively as we will see in Chapter~\ref{chapter_non_linear_searching}, (3) keep track of the return address during function calls, (4) recording the previous states for backtracking algorithms. - -Queue data structures can be used: (1) implement BFS shown in Chapter~\ref{chapter_non_linear_searching}, (2) implement queue buffer. - -In the remaining section, we will discuss the implement with the built-in data types or using built-in modules. After this, we will learn more advanced queue and stack: the priority queue and the monotone queue which can be used to solve medium to hard problems on LeetCode. - -\subsection{Basic Implementation} - -For Queue and Stack data structures, the essential operations are two that adds and removes item. In Stack, they are usually called \textbf{PUSH} and \textbf{POP}. PUSH will add one item, and POP will remove one item and return its value. These two operations should only take $O(1)$ time. Sometimes, we need another operation called PEEK which just return the element that can be accessed in the queue or stack without removing it. While in Queue, they are named as \textbf{Enqueue} and \textbf{Dequeue}. - -The simplest implementation is to use Python List by function $insert()$ (insert an item at appointed position), $pop()$ (removes the element at the given index, updates the list , and return the value. The default is to remove the last item), and $append()$. However, the list data structure can not meet the time complexity requirement as these operations can potentially take $O(n)$. We feel its necessary because the code is simple thus saves you from using the specific module or implementing a more complex one. - -\paragraph{Stack} The implementation for stack is simplily adding and deleting element from the end. -\begin{lstlisting}[language = Python] -# stack -s = [] -s.append(3) -s.append(4) -s.append(5) -s.pop() -\end{lstlisting} - -\paragraph{Queue} For queue, we can append at the last, and pop from the first index always. Or we can insert at the first index, and use pop the last element. -\begin{lstlisting}[language = Python] -# queue -# 1: use append and pop -q = [] -q.append(3) -q.append(4) -q.append(5) -q.pop(0) -\end{lstlisting} -Running the above code will give us the following output: -\begin{lstlisting}[language=Python] -print('stack:', s, ' queue:', q) -stack: [3, 4] queue: [4, 5] -\end{lstlisting} - -The other way to implement it is to write class and implement them using concept of node which shares the same definition as the linked list node. Such implementation can satisfy the $O(1)$ time restriction. For both the stack and queue, we utilize the singly linked list data structure. %Here we just give the code and comments directly: - -\paragraph{Stack and Singly Linked List with top pointer} Because in stack, we only need to add or delete item from the rear, using one pointer pointing at the rear item, and the linked list's next is connected to the second toppest item, in a direction from the top to the bottom. -\begin{lstlisting}[language=Python] -# stack with linked list -'''a<-b<-c<-top''' -class Stack: - def __init__(self): - self.top = None - self.size = 0 - - # push - def push(self, val): - node = Node(val) - if self.top: # connect top and node - node.next = self.top - # reset the top pointer - self.top = node - self.size += 1 - - def pop(self): - if self.top: - val = self.top.val - if self.top.next: - self.top = self.top.next # reset top - else: - self.top = None - self.size -= 1 - return val - - else: # no element to pop - return None -\end{lstlisting} - -\paragraph{Queue and Singly Linked List with Two Pointers} For queue, we need to access the item from each side, therefore we use two pointers pointing at the head and the tail of the singly linked list. And the linking direction is from the head to the tail. -\begin{lstlisting}[language=Python] -# queue with linked list -'''head->a->b->tail''' -class Queue: - def __init__(self): - self.head = None - self.tail = None - self.size = 0 - - # push - def enqueue(self, val): - node = Node(val) - if self.head and self.tail: # connect top and node - self.tail.next = node - self.tail = node - else: - self.head = self.tail = node - - self.size += 1 - - def dequeue(self): - if self.head: - val = self.head.val - if self.head.next: - self.head = self.head.next # reset top - else: - self.head = None - self.tail = None - self.size -= 1 - return val - - else: # no element to pop - return None -\end{lstlisting} - -Also, Python provide two built-in modules: \textbf{Deque} and \textbf{Queue} for such purpose. We will detail them in the next section. -%%%%%%%%%%%%%%%%%%%%Deque and Queue%%%%%%%%%%%%%%%%%% -\subsection{Deque: Double-Ended Queue} -Deque object is a supplementary container data type from Python \textbf{collections} module. It is a generalization of stacks and queues, and the name is short for ``double-ended queue''. Deque is optimized for adding/popping items from both ends of the container in $O(1)$. Thus it is preferred over \textbf{list} in some cases. To new a deque object, we use \textbf{deque([iterable[, maxlen]])}. This returns us a new deque object initialized left-ro-right with data from iterable. If maxlen is not specified or is set to None, deque may grow to an arbitray length. Before implementing it, we learn the functions for \textbf{deque class} first in Table~\ref{tab:common_operation_deque}. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Common Methods of Deque} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -append(x) & Add x to the right side of the deque. \\\hline -appendleft(x) &Add x to the left side of the deque.\\ \hline -pop() &Remove and return an element from the right side of the deque. If no elements are present, raises an IndexError.\\ \hline -popleft() &Remove and return an element from the left side of the deque. If no elements are present, raises an IndexError.\\ \hline -maxlen & Deque objects also provide one read-only attribute:Maximum size of a deque or None if unbounded.\\ \hline -count(x) &Count the number of deque elements equal to x.\\ \hline -extend(iterable) &Extend the right side of the deque by appending elements from the iterable argument.\\ \hline -extendleft(iterable) &Extend the left side of the deque by appending elements from iterable. Note, the series of left appends results in reversing the order of elements in the iterable argument.\\ \hline -remove(value) &emove the first occurrence of value. If not found, raises a ValueError.\\ \hline -reverse() &Reverse the elements of the deque in-place and then return None.\\ \hline -rotate(n=1) &Rotate the deque n steps to the right. If n is negative, rotate to the left.\\ \hline -\end{tabular} - \label{tab:common_operation_deque} - \end{small} -\end{table} - -In addition to the above, deques support iteration, pickling, len(d), reversed(d), copy.copy(d), copy.deepcopy(d), membership testing with the in operator, and subscript references such as d[-1]. %Indexed access is O(1) at both ends but slows to O(n) in the middle. For fast random access, use lists instead. - -Now, we use deque to implement a basic stack and queue,the main methods we need are: append(), appendleft(), pop(), popleft(). -\begin{lstlisting}[language = Python] -'''Use deque from collections''' -from collections import deque -q = deque([3, 4]) -q.append(5) -q.popleft() - -s = deque([3, 4]) -s.append(5) -s.pop() -\end{lstlisting} -Printing out the q and s: -\begin{lstlisting}[language=Python] -print('stack:', s, ' queue:', q) -stack: deque([3, 4]) queue: deque([4, 5]) -\end{lstlisting} - -\paragraph{Deque and Ring Buffer} Ring Buffer or Circular Queue is defined as a linear data structure in which the operations are performed based on FIFO (First In First Out) principle and the last position is connected back to the first position to make a circle. This normally requires us to predefine the maximum size of the queue. To implement a ring buffer, we can use deque as a queue as demonstrated above, and when we initialize the object, set the maxLen. Once a bounded length deque is full, when new items are added, a corresponding number of items are discarded from the opposite end. - -\subsection{Python built-in Module: Queue} -The \textbf{queue module} provides thread-safe implementation of Stack and Queue like data structures. It encompasses three types of queue as shown in Table~\ref{tab:three_classes_queue}. \textit{In python 3, we use lower case queue, but in Python 2.x it uses Queue, in our book, we learn Python 3.} -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Datatypes in Queue Module, maxsize is an integer that sets the upperbound limit on the number of items that can be places in the queue. Insertion will block once this size has been reached, until queue items are consumed. If maxsize is less than or equal to zero, the queue size is infinite.} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.45\columnwidth}| } - \hline -Class & Data Structure \\ \hline -class queue.Queue(maxsize=0) & Constructor for a FIFO queue. \\\hline -class queue.LifoQueue(maxsize=0) & Constructor for a LIFO queue. \\ \hline -class queue.PriorityQueue(maxsize=0) & Constructor for a priority queue.\\ \hline -\end{tabular} - \label{tab:three_classes_queue} - \end{small} -\end{table} - -Queue objects (Queue, LifoQueue, or PriorityQueue) provide the public methods described below in Table~\ref{tab:methods_of_queue}. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Methods for Queue's three classes, here we focus on single-thread background.} - \noindent \begin{tabular}{|p{0.45\columnwidth}|p{0.45\columnwidth}| } - \hline -Class & Data Structure \\ \hline -Queue.put(item[, block[, timeout]]) & Put item into the queue. \\\hline -Queue.get([block[, timeout]]) & Remove and return an item from the queue. \\ \hline -Queue.qsize() & Return the approximate size of the queue.\\ \hline -Queue.empty() & Return True if the queue is empty, False otherwise.\\ \hline -Queue.full() & Return True if the queue is full, False otherwise. \\ \hline -\end{tabular} - \label{tab:methods_of_queue} - \end{small} -\end{table} - -Now, using Queue() and LifoQueue() to implement queue and stack respectively is straightforward: -\begin{lstlisting}[language = Python] -# python 3 -import queue -# implementing queue -q = queue.Queue() -for i in range(3, 6): - q.put(i) -\end{lstlisting} -\begin{lstlisting}[language = Python] -import queue -# implementing stack -s = queue.LifoQueue() - -for i in range(3, 6): - s.put(i) -\end{lstlisting} -Now, using the following printing: -\begin{lstlisting}[language=Python] -print('stack:', s, ' queue:', q) -stack: queue: -\end{lstlisting} -Instead we print with: -\begin{lstlisting}[language=Python] -print('stack: ') -while not s.empty(): - print(s.get(), end=' ') -print('\nqueue: ') -while not q.empty(): - print(q.get(), end = ' ') -stack: -5 4 3 -queue: -3 4 5 -\end{lstlisting} - -% \subsubsection{Obvious Application} -% 496. Next Greater Element I -% \begin{lstlisting} -% You are given two arrays (without duplicates) nums1 and nums2 where nums1's elements are subset of nums2. Find all the next greater numbers for nums1's elements in the corresponding places of nums2. - -% The Next Greater Number of a number x in nums1 is the first greater number to its right in nums2. If it does not exist, output -1 for this number. - -% Example 1: - -% Input: nums1 = [4,1,2], nums2 = [1,3,4,2]. -% Output: [-1,3,-1] -% Explanation: -% For number 4 in the first array, you cannot find the next greater number for it in the second array, so output -1. -% For number 1 in the first array, the next greater number for it in the second array is 3. -% For number 2 in the first array, there is no next greater number for it in the second array, so output -1. - -% Example 2: - -% Input: nums1 = [2,4], nums2 = [1,2,3,4]. -% Output: [3,-1] -% Explanation: -% For number 2 in the first array, the next greater number for it in the second array is 3. -% For number 4 in the first array, there is no next greater number for it in the second array, so output -1. - -% Note: - -% All elements in nums1 and nums2 are unique. -% The length of both nums1 and nums2 would not exceed 1000. -% \end{lstlisting} -% Analysis: The naive solution is: one for loop in nums1, another embedded loop in nums2 to first identify the number and then find the answer. The time complexity will be $O(m*n)$ the length of each array. - -% This is also a straightforward application of the features of monotonic queue, use the decreasing queue doing popping out to find the first larger number to its right. But, after we get the result for the nums2, we need to map it to nums1, converting nums1 to a hashmap, specifically a dict can help us achieve $O(m+n)$ time complexity. -% \begin{lstlisting}[language=Python] -% class Solution: -% def nextGreaterElement(self, nums1, nums2): -% """ -% :type nums1: List[int] -% :type nums2: List[int] -% :rtype: List[int] -% """ -% if not nums1: -% return [] -% rst = [-1]*len(nums1) -% # converting nums1 to hashmap -% nums = collections.defaultdict(int) #value and index, value as key because has no duplicate -% for i, v in enumerate(nums1): -% nums[v] = i - -% dq = collections.deque() # -% # implementing a decresing queue in nums2 -% for i, v in enumerate(nums2): -% while dq and dq[-1] <= v: -% if dq[-1] in nums: #check if it is in nums1 -% rst[nums[dq[-1]]] = v -% dq.pop() -% dq.append(v) -% return rst -% \end{lstlisting} - -% % can be put into exercise -% 503. Next Greater Element II -% \begin{lstlisting} -% Given a circular array (the next element of the last element is the first element of the array), print the Next Greater Number for every element. The Next Greater Number of a number x is the first greater number to its traversing-order next in the array, which means you could search circularly to find its next greater number. If it doesn't exist, output -1 for this number. - -% Example 1: - -% Input: [1,2,1] -% Output: [2,-1,2] -% Explanation: The first 1's next greater number is 2; -% The number 2 can't find next greater number; -% The second 1's next greater number needs to search circularly, which is also 2. - -% Note: The length of given array won't exceed 10000. -% \end{lstlisting} -% for a circular array we can use $nums = nums*2$, each time we save the result, we just need to check the index range. -% \begin{lstlisting}[language = Python] -% class Solution: -% def nextGreaterElements(self, nums): -% """ -% :type nums: List[int] -% :rtype: List[int] -% """ -% if not nums: -% return [] -% n = len(nums) -% nums = nums*2 -% dq = collections.deque() #save index because we need to check if this is in the original nums -% rst = [-1]*n - -% for i, v in enumerate(nums): -% while dq and nums[dq[-1]] < v: -% if 0 <= dq[-1] < n: -% rst[dq[-1]] = v -% dq.pop() -% dq.append(i) - -% return rst -% \end{lstlisting} -% 121. Best Time to Buy and Sell Stock -% \begin{lstlisting} -% Say you have an array for which the ith element is the price of a given stock on day i. - -% If you were only permitted to complete at most one transaction (i.e., buy one and sell one share of the stock), design an algorithm to find the maximum profit. - -% Note that you cannot sell a stock before you buy one. - -% Example 1: - -% Input: [7,1,5,3,6,4] -% Output: 5 -% Explanation: Buy on day 2 (price = 1) and sell on day 5 (price = 6), profit = 6-1 = 5. -% Not 7-1 = 6, as selling price needs to be larger than buying price. - -% Example 2: - -% Input: [7,6,4,3,1] -% Output: 0 -% Explanation: In this case, no transaction is done, i.e. max profit = 0. -% \end{lstlisting} -% Analysis: the naive solution is using two embedded for loops, one for buy, another later for sell. We have $O(n^2)$ time complexity. Suppose we are construct an increasing monotone queue for the prices. Each time when we kick out previous larger one, for that number we can check the price difference of it with the first element in the queue which is the smallest. At last, for the constructed queue, for the first and the last element, there is another price difference. -% \begin{lstlisting}[language = Python] -% def maxProfit(self, prices): -% if len(prices)<=1: -% return 0 - -% iq = collections.deque() -% ans = 0 -% for p in prices: -% while iq and iq[-1] >= p: -% sell = iq.pop() -% if iq: -% ans = max(ans, sell-iq[0]) -% iq.append(p) -% if len(iq) >= 2: -% ans = max(ans, iq[-1] - iq[0]) -% return ans -% \end{lstlisting} - -%%%%%%%%%%%%%Bonus%%%%%%%%%%%% -\subsection{Bonus} -\paragraph{Circular Linked List and Circular Queue} The circular queue is a linear data structure in which the operation are performed based on FIFO principle and the last position is connected back to the the first position to make a circle. It is also called ``Ring Buffer''. Circular Queue can be either implemented with a list or a circular linked list. If we use a list, we initialize our queue with a fixed size with None as value. To find the position of the enqueue(), we use $rear = (rear +1 ) \% size $. Similarily, for dequeue(), we use $front = (front+1) \% size$ to find the next front position. - -%%%%%%%%%%%%%LeetCode Problems%%%%%%%%%%%% -\subsection{Exercises} -\paragraph{Queue and Stack} -\begin{enumerate} - \item 225. Implement Stack using Queues (easy) - \item 232. Implement Queue using Stacks (easy) - \item 933. Number of Recent Calls (easy) -\end{enumerate} -Queue fits well for buffering problem. -\begin{enumerate} - \item 933. Number of Recent Calls (easy) - \item 622. Design Circular Queue (medium) -\end{enumerate} -\begin{lstlisting} -Write a class RecentCounter to count recent requests. - -It has only one method: ping(int t), where t represents some time in milliseconds. - -Return the number of pings that have been made from 3000 milliseconds ago until now. - -Any ping with time in [t - 3000, t] will count, including the current ping. - -It is guaranteed that every call to ping uses a strictly larger value of t than before. - - - -Example 1: - -Input: inputs = ["RecentCounter","ping","ping","ping","ping"], inputs = [[],[1],[100],[3001],[3002]] -Output: [null,1,2,3,3] -\end{lstlisting} -Analysis: This is a typical buffer problem. If the size is larger than the buffer, then we squeeze out the easilest data. Thus, a queue can be used to save the t and each time, squeeze any time not in the range of [t-3000, t]: -\begin{lstlisting}[language=Python] -class RecentCounter: - - def __init__(self): - self.ans = collections.deque() - - def ping(self, t): - """ - :type t: int - :rtype: int - """ - self.ans.append(t) - while self.ans[0] < t-3000: - self.ans.popleft() - return len(self.ans) -\end{lstlisting} - -\paragraph{Monotone Queue} -\begin{enumerate} - \item 84. Largest Rectangle in Histogram - \item 85. Maximal Rectangle - \item 122. Best Time to Buy and Sell Stock II - \item 654. Maximum Binary Tree -\end{enumerate} -Obvious applications: -\begin{enumerate} - \item 496. Next Greater Element I - \item 503. Next Greater Element I - \item 121. Best Time to Buy and Sell Stock -\end{enumerate} - -\begin{enumerate} - \item 84. Largest Rectangle in Histogram - \item 85. Maximal Rectangle - \item 122. Best Time to Buy and Sell Stock II - \item 654. Maximum Binary Tree - \item 42 Trapping Rain Water - \item 739. Daily Temperatures - \item 321. Create Maximum Number -\end{enumerate} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% hash map -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Hash Table} -\label{chapter_hashmap} - -\subsection{Implementation} -In this section, we practice on the learned concepts and methods by implementing hash set and hash map. - -\paragraph{Hash Set} Design a HashSet without using any built-in hash table libraries. To be specific, your design should include these functions: (705. Design HashSet) -\begin{lstlisting}[numbers=none] -add(value): Insert a value into the HashSet. -contains(value) : Return whether the value exists in the HashSet or not. -remove(value): Remove a value in the HashSet. If the value does not exist in the HashSet, do nothing. -\end{lstlisting} -For example: -\begin{lstlisting}[numbers=none] -MyHashSet hashSet = new MyHashSet(); -hashSet.add(1); -hashSet.add(2); -hashSet.contains(1); // returns true -hashSet.contains(3); // returns false (not found) -hashSet.add(2); -hashSet.contains(2); // returns true -hashSet.remove(2); -hashSet.contains(2); // returns false (already removed) -\end{lstlisting} -\textit{Note: Note: (1) All values will be in the range of [0, 1000000]. (2) The number of operations will be in the range of [1, 10000].} - -\begin{lstlisting}[language=Python] -class MyHashSet: - - def _h(self, k, i): - return (k+i) % 10001 - - def __init__(self): - """ - Initialize your data structure here. - """ - self.slots = [None]*10001 - self.size = 10001 - - def add(self, key: 'int') -> 'None': - i = 0 - while i < self.size: - k = self._h(key, i) - if self.slots[k] == key: - return - elif not self.slots[k] or self.slots[k] == -1: - self.slots[k] = key - return - i += 1 - # double size - self.slots = self.slots + [None]*self.size - self.size *= 2 - return self.add(key) - - - def remove(self, key: 'int') -> 'None': - i = 0 - while i < self.size: - k = self._h(key, i) - if self.slots[k] == key: - self.slots[k] = -1 - return - elif self.slots[k] == None: - return - i += 1 - return - - def contains(self, key: 'int') -> 'bool': - """ - Returns true if this set contains the specified element - """ - i = 0 - while i < self.size: - k = self._h(key, i) - if self.slots[k] == key: - return True - elif self.slots[k] == None: - return False - i += 1 - return False -\end{lstlisting} - -\paragraph{Hash Map} Design a HashMap without using any built-in hash table libraries. To be specific, your design should include these functions: (706. Design HashMap (easy)) -\begin{itemize} - \item put(key, value) : Insert a (key, value) pair into the HashMap. If the value already exists in the HashMap, update the value. - \item get(key): Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key. - remove(key) : Remove the mapping for the value key if this map contains the mapping for the key. -\end{itemize} -Example: -\begin{lstlisting}[numbers=none] -hashMap = MyHashMap() -hashMap.put(1, 1); -hashMap.put(2, 2); -hashMap.get(1); // returns 1 -hashMap.get(3); // returns -1 (not found) -hashMap.put(2, 1); // update the existing value -hashMap.get(2); // returns 1 -hashMap.remove(2); // remove the mapping for 2 -hashMap.get(2); // returns -1 (not found) -\end{lstlisting} - -\begin{lstlisting}[language=Python] -class MyHashMap: - def _h(self, k, i): - return (k+i) % 10001 # [0, 10001] - def __init__(self): - """ - Initialize your data structure here. - """ - self.size = 10002 - self.slots = [None] * self.size - - - def put(self, key: 'int', value: 'int') -> 'None': - """ - value will always be non-negative. - """ - i = 0 - while i < self.size: - k = self._h(key, i) - if not self.slots[k] or self.slots[k][0] in [key, -1]: - self.slots[k] = (key, value) - return - i += 1 - # double size and try again - self.slots = self.slots + [None]* self.size - self.size *= 2 - return self.put(key, value) - - - def get(self, key: 'int') -> 'int': - """ - Returns the value to which the specified key is mapped, or -1 if this map contains no mapping for the key - """ - i = 0 - while i < self.size: - k = self._h(key, i) - if not self.slots[k]: - return -1 - elif self.slots[k][0] == key: - return self.slots[k][1] - else: # if its deleted keep probing - i += 1 - return -1 - - - def remove(self, key: 'int') -> 'None': - """ - Removes the mapping of the specified value key if this map contains a mapping for the key - """ - i = 0 - while i < self.size: - k = self._h(key, i) - if not self.slots[k]: - return - elif self.slots[k][0] == key: - self.slots[k] = (-1, None) - return - else: # if its deleted keep probing - i += 1 - return -\end{lstlisting} -\subsection{Python Built-in Data Structures} -\subsubsection{SET and Dictionary} In Python, we have the standard build-in data structure \textit{dictionary} and \textit{set} using hashtable. For the set classes, they are implemented using dictionaries. Accordingly, the requirements for set elements are the same as those for dictionary keys; namely, that the object defines both $\_\_eq\_\_()$ and $\_\_hash\_\_()$ methods. A Python built-in function $hash(object=)$ is implementing the hashing function and returns an integer value as of the hash value if the object has defined $\_\_eq\_\_()$ and $\_\_hash\_\_()$ methods. As a result of the fact that $hash()$ can only take immutable objects as input key in order to be hashable meaning it must be immutable and comparable (has an \_\_eq\_\_() or \_\_cmp\_\_() method). % Set and Dictionary cannot contain mutable elements such as lists or dictionaries. However, they can contain immutable collections such as tuples or instances of ImmutableSet. For convenience in implementing sets of sets, inner sets are automatically converted to immutable form, for example, $Set([Set(['dog'])])$ is transformed to $Set([ImmutableSet(['dog'])])$. - -\paragraph{Python 2.X VS Python 3.X} -In Python 2X, we can use slice to access keys() or items() of the dictionary. However, in Python 3.X, the same syntax will give us TypeError: 'dict\_keys' object does not support indexing. Instead, we need to use function list() to convert it to list and then slice it. For example: -\begin{lstlisting} -# Python 2.x -dict.keys()[0] - -# Python 3.x -list(dict.keys())[0] -\end{lstlisting} - -\paragraph{\texttt{set} Data Type} -Method Description -Python Set remove() Removes Element from the Set -Python Set add() adds element to a set -Python Set copy() Returns Shallow Copy of a Set -Python Set clear() remove all elements from a set -Python Set difference() Returns Difference of Two Sets -Python Set difference\_update() Updates Calling Set With Intersection of Sets -Python Set discard() Removes an Element from The Set -Python Set intersection() Returns Intersection of Two or More Sets -Python Set intersection\_update() Updates Calling Set With Intersection of Sets -Python Set isdisjoint() Checks Disjoint Sets -Python Set issubset() Checks if a Set is Subset of Another Set -Python Set issuperset() Checks if a Set is Superset of Another Set -Python Set pop() Removes an Arbitrary Element -Python Set symmetric\_difference() Returns Symmetric Difference -Python Set symmetric\_difference\_update() Updates Set With Symmetric Difference -Python Set union() Returns Union of Sets -Python Set update() Add Elements to The Set. - -If we want to put string in set, it should be like this: -\begin{lstlisting}[language = Python] ->>> a = set('aardvark') ->>> -{'d', 'v', 'a', 'r', 'k'} ->>> b = {'aardvark'}# or set(['aardvark']), convert a list of strings to set ->>> b -{'aardvark'} -#or put a tuple in the set -a =set([tuple]) or {(tuple)} -\end{lstlisting} -Compare also the difference between {} and set() with a single word argument. - -\paragraph{\texttt{dict} Data Type} -Method Description -clear() Removes all the elements from the dictionary -copy() Returns a copy of the dictionary -fromkeys() Returns a dictionary with the specified keys and values -get() Returns the value of the specified key -items() Returns a list containing a tuple for each key value pair -keys() Returns a list containing the dictionary's keys -pop() Removes the element with the specified key and return value -popitem() Removes the last inserted key-value pair -setdefault() Returns the value of the specified key. If the key does not exist: insert the key, with the specified value -update() Updates the dictionary with the specified key-value pairs -values() Returns a list of all the values in the dictionary - -See using cases at \url{https://www.programiz.com/python-programming/dictionary}. - - -\subsubsection{Collection Module} -\paragraph{OrderedDict} Standard dictionaries are unordered, which means that any time you loop through a dictionary, you will go through every key, but you are not guaranteed to get them in any particular order. -The OrderedDict from the collections module is a special type of dictionary that keeps track of the order in which its keys were inserted. Iterating the keys of an orderedDict has predictable behavior. This can simplify testing and debugging by making all the code deterministic. - -\paragraph{defaultdict} Dictionaries are useful for bookkeeping and tracking statistics. One problem is that when we try to add an element, we have no idea if the key is present or not, which requires us to check such condition every time. -\begin{lstlisting}[language = Python] -dict = {} -key = "counter" -if key not in dict: - dict[key]=0 -dict[key] += 1 -\end{lstlisting} -The defaultdict class from the collections module simplifies this process by pre-assigning a default value when a key does not present. For different value type it has different default value, for example, for int, it is 0 as the default value. A defaultdict works exactly like a normal dict, but it is initialized with a function (“default factory”) that takes no arguments and provides the default value for a nonexistent key. Therefore, a defaultdict will never raise a KeyError. Any key that does not exist gets the value returned by the default factory. For example, the following code use a lambda function and provide 'Vanilla' as the default value when a key is not assigned and the second code snippet function as a counter. -\begin{lstlisting}[language=Python] -from collections import defaultdict -ice_cream = defaultdict(lambda: 'Vanilla') -ice_cream['Sarah'] = 'Chunky Monkey' -ice_cream['Abdul'] = 'Butter Pecan' -print ice_cream['Sarah'] -# Chunky Monkey -print ice_cream['Joe'] -# Vanilla -\end{lstlisting} - -%https://www.accelebrate.com/blog/using-defaultdict-python/ -\begin{lstlisting}[language = Python] -from collections import defaultdict -dict = defaultdict(int) # default value for int is 0 -dict['counter'] += 1 -\end{lstlisting} -There include: -Time Complexity for Operations Search, Insert, Delete: $O(1)$. - -\paragraph{Counter} -\subsection{Exercises} -\begin{enumerate} - \item 349. Intersection of Two Arrays (easy) - \item 350. Intersection of Two Arrays II (easy) -\end{enumerate} - 929. Unique Email Addresses -\begin{lstlisting} - Every email consists of a local name and a domain name, separated by the @ sign. - -For example, in alice@leetcode.com, alice is the local name, and leetcode.com is the domain name. - -Besides lowercase letters, these emails may contain '.'s or '+'s. - -If you add periods ('.') between some characters in the local name part of an email address, mail sent there will be forwarded to the same address without dots in the local name. For example, "alice.z@leetcode.com" and "alicez@leetcode.com" forward to the same email address. (Note that this rule does not apply for domain names.) - -If you add a plus ('+') in the local name, everything after the first plus sign will be ignored. This allows certain emails to be filtered, for example m.y+name@email.com will be forwarded to my@email.com. (Again, this rule does not apply for domain names.) - -It is possible to use both of these rules at the same time. - -Given a list of emails, we send one email to each address in the list. How many different addresses actually receive mails? - -Example 1: - -Input: ["test.email+alex@leetcode.com","test.e.mail+bob.cathy@leetcode.com","testemail+david@lee.tcode.com"] -Output: 2 -Explanation: "testemail@leetcode.com" and "testemail@lee.tcode.com" actually receive mails - -Note: - 1 <= emails[i].length <= 100 - 1 <= emails.length <= 100 - Each emails[i] contains exactly one '@' character. -\end{lstlisting} -Answer: Use hashmap simply Set of tuple to save the corresponding sending exmail address: local name and domain name: -\begin{lstlisting}[language=Python] -class Solution: - def numUniqueEmails(self, emails): - """ - :type emails: List[str] - :rtype: int - """ - if not emails: - return 0 - num = 0 - handledEmails = set() - for email in emails: - local_name, domain_name = email.split('@') - local_name = local_name.split('+')[0] - local_name = local_name.replace('.','') - handledEmails.add((local_name,domain_name) ) - return len(handledEmails) -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%Graph Representation -\section{Graph Representations} -\label{graph_representation} -Graph data structure can be thought of a superset of the array and the linked list, and tree data structures. In this section, we only introduce the presentation and implementation of the graph, but rather defer the searching strategies to the principle part. Searching strategies in the graph makes a starting point in algorithmic problem solving, knowing and analyzing these strategies in details will make an independent chapter as a problem solving principle. -\subsection{Introduction} -Graph representations need to show users full information to the graph itself, $G = (V, E)$, including its vertices, edges, and its weights to distinguish either it is directed or undirected, weighted or unweighted. There are generally four ways: (1) Adjacency Matrix, (2) Adjacency List, (3) Edge List, and (4) optionally, Tree Structure, if the graph is a free tree. Each will be preferred to different situations. An example is shown in Fig~\ref{fig:graph_represent}. -\begin{figure}[!ht] - \centering - \includegraphics[width=\columnwidth]{fig/graph_representation.png} - \caption{Four ways of graph representation, renumerate it from 0. Redraw the graph} - \label{fig:graph_represent} -\end{figure} - -\paragraph{Double Edges in Undirected Graphs} In directed graph, the number of edges is denoted as $|E|$. However, for the undirected graph, because one edge $(u, v)$ only means that vertex $u$ and $v$ are connected; we can reach to $v$ from $u$ and it also works the other way around. To represent undirected graph, we have to double its number of edges shown in the structure; it becomes $2|E|$ in all of our representations. %From now on, for a given graph, $G = (V, E)$, for directed graph, we have $|V|$, $|E|$ - -\subsubsection{Adjacency Matrix} -An adjacency matrix of a graph is a 2-D matrix of size $|V|\times|V|$: each dimension, row and column, is vertex-indexed. Assume our matrix is \texttt{am}, if there is an edge between vertices 3,4, and if its unweighted graph, we mark it by setting \texttt{am[3][4]=1}, we do the same for all edges and leaving all other spots in the matrix zero-valued. For undirected graph, it will be a symmetric matrix along the main diagonal as shown in A of Fig.~\ref{fig:graph_represent}; the matrix is its own transpose: $am = {am}^T$. We can choose to store only the entries on and above the diagonal of the matrix, thereby cutting the memory need in half. For unweighted graph, typically our adjacency matrix is zero-and-one valued. For a weighted graph, the adjacency matrix becomes a weight matrix, with $w(i, j)$ to denote the weight of edge $(i, j)$; the weight can be both negative or positive or even zero-valued in practice, thus we might want to figure out how to distinguish the non-edge relation from the edge relation when the situation arises. - -The Python code that implements the adjacency matrix for the graph in the example is: -\begin{lstlisting}[language=Python, numbers=none] -am = [[0]*7 for _ in range(7)] - -# set 8 edges -am[0][1] = am[1][0] = 1 -am[0][2] = am[2][0] = 1 -am[1][2] = am[2][1] = 1 -am[1][3] = am[3][1] = 1 -am[2][4] = am[4][2] = 1 -am[3][4] = am[4][3] = 1 -am[4][5] = am[5][4] = 1 -am[5][6] = am[6][5] = 1 -\end{lstlisting} - -\paragraph{Applications} Adjacency matrix usually fits well to the dense graph where the edges are close to $|V|^2$, leaving a small ratio of the matrix be blank and unused. Checking if an edge exists between two vertices takes only $O(1)$. However, an adjacency matrix requires exactly $O(V)$ to enumerate the the neighbors of a vertex $v$--an operation commonly used in many graph algorithms--even if vertex $v$ only has a few neighbors. Moreover, when the graph is sparse, an adjacency matrix will be both inefficient in the space and iteration cost, a better option is adjacency list. %For the above example, our matrix will be: - -%%%%%%%%%%%%%%%%%%%List%%%%%%%%%% -\subsubsection{Adjacency List} -An adjacency list is a more compact and space efficient form of graph representation compared with the above adjacency matrix. In adjacency list, we have a list of $V$ vertices which is vertex-indexed, and for each vertex $v$ we store anther list of neighboring nodes with their vertex as the value, which can be represented with an array or linked list. For example, with adjacency list as $[[1,2,3],[3,1],[4,6,1]]$, node 0 connects to 1,2,3, node 1 connect to 3,1, node 2 connects to 4,6,1. - -In Python, We can use a normal 2-d array to represent the adjacent list, for the same graph in the example, it as represented with the following code: -\begin{lstlisting}[language=Python, numbers=none] -al = [[] for _ in range(7)] - -# set 8 edges -al[0] = [1, 2] -al[1] = [2, 3] -al[2] = [0, 4] -al[3] = [1, 4] -al[4] = [2, 3, 5] -al[5] = [4, 6] -al[6] = [5] -\end{lstlisting} - -\paragraph{Applications} -The upper bound space complexity for adjacency list is $O(|V|^2)$. However, with adjacency list, to check if there is an edge between node $u$ and $v$, it has to take $O(|V|)$ time complexity with a linear scanning in the list \texttt{al[u]}. If the graph is static, meaning we do not add more vertices but can modify the current edges and its weight, we can use a set or a dictionary Python data type on second dimension of the adjacency list. This change enables $O(1)$ search of an edge just as of in the adjacency matrix. - -%%%%%%%%%%%%%%%%%%%%%%%%Edge List%%%%%%%%%%%%%%% -\subsubsection{Edge List} -The edge list is a list of edges (one-dimensional), where the index of the list does not relate to vertex and each edge is usually in the form of (starting vertex, ending vertex, weight). We can use either a \texttt{list} or a \texttt{tuple} to represent an edge. The edge list representation of the example is given: -\begin{lstlisting}[language=Python, numbers=none] -el = [] -el.extend([[0, 1], [1, 0]]) -el.extend([[0, 2], [2, 0]]) -el.extend([[1, 2], [2, 1]]) -el.extend([[1, 3], [3, 1]]) -el.extend([[3, 4], [4, 3]]) -el.extend([[2, 4], [4, 2]]) -el.extend([[4, 5], [5, 4]]) -el.extend([[5, 6], [6, 5]]) -\end{lstlisting} -\paragraph{Applications} -Edge list is not widely used as the AM and AL, and usually only be needed in a subrountine of algorithm implementation--such as in Krukal's algorithm to fine Minimum Spanning Tree(MST)--where we might need to order the edges by its weight. - -% Function to generate the list of all edges from either Adjacency Matrix or Adjacency List is similar to the following code: -% \begin{lstlisting}[language=Python] -% def generate_edges(graph): -% edges = [] -% for node in graph: -% for neighbour in graph[node]: -% edges.append((node, neighbour)) - -% return edges - -% print(generate_edges(graph)) -% \end{lstlisting} -\subsubsection{Tree Structure} -If the connected graph has no cycle and the edges $E = V-1$, which is essentially a tree. We can choose to represent it either one of the three representations. Optionally, we can use the tree structure is formed as rooted tree with \texttt{nodes} which has value and pointers to its children. We will see later how this type of tree is implemented in Python. - -\subsection{Use Dictionary} -In the last section, we always use the vertex indexed structure, it works but might not be human-friendly to work with, in practice a vertex always comes with a ``name''--such as in the cities system, a vertex should be a city's name. Another inconvenience is when we have no idea of the total number of vertices, using the index-numbering system requires us to first figure our all vertices and number each, which is an overhead. - -To avoid the two inconvenience, we can replace Adjacency list, which is a list of lists with embedded dictionary structure which is a dictionary of dictionaries or sets. -\paragraph{Unweighted Graph}For example, we demonstrate how to give a ``name'' to exemplary graph; we replace $0$ with `a', $1$ with `b', and the others with $\{'c', d, 'e', 'f', 'g'\}$. We declare \texttt{ defaultdict(set)}, the outer list is replaced by the dictionary, and the inner neighboring node list is replaced with a \texttt{set} for $O(1)$ access to any edge. - -In the demo code, we simply construct this representation from the edge list. -\begin{lstlisting}[language=Python] -from collections import defaultdict - -d = defaultdict(set) -for v1, v2 in el: - d[chr(v1 + ord('a'))].add(chr(v2 + ord('a'))) -print(d) -\end{lstlisting} -And the printed graph is as follows: -\begin{lstlisting}[numbers=none] -defaultdict(, {'a': {'b', 'c'}, 'b': {'d', 'c', 'a'}, 'c': {'b', 'e', 'a'}, 'd': {'b', 'e'}, 'e': {'d', 'c', 'f'}, 'f': {'e', 'g'}, 'g': {'f'}}) -\end{lstlisting} -\paragraph{Weighted Graph} If we need weights for each edge, we can use two-dimensional dictionary. We use $10$ as a weight to all edges just to demonstrate. -\begin{lstlisting}[language=Python] -dw = defaultdict(dict) -for v1, v2 in el: - vn1 = chr(v1 + ord('a')) - vn2 = chr(v2 + ord('a')) - dw[vn1][vn2] = 10 -print(dw) -\end{lstlisting} -We can access the edge and its weight through \texttt{dw[v1][v2]}. The output of this structure is given: -\begin{lstlisting}[numbers=none] -defaultdict(, {'a': {'b': 10, 'c': 10}, 'b': {'a': 10, 'c': 10, 'd': 10}, 'c': {'a': 10, 'b': 10, 'e': 10}, 'd': {'b': 10, 'e': 10}, 'e': {'d': 10, 'c': 10, 'f': 10}, 'f': {'e': 10, 'g': 10}, 'g': {'f': 10}}) -\end{lstlisting} - - - - - - -% Path: here we only need a vector to save each path. - -% \subsubsection{Implementation} -% If we implement DFS in tree, then no need a hashmap to save the visited spot. If it is in the graph, which would very necessary. -% \begin{lstlisting}[language = Python] -% #Recursive -% def DFS(root): -% #END Condition -% if not root: -% return -% visit(root) -% for node in root.adjacent: -% if not node.visited: -% DFS(node) -% \end{lstlisting} -% \begin{lstlisting}[language = Python] -% #Iterative, implemented using a stack -% def DFS_iter(): -% root.visited = 1 -% stack = [] -% stack.append(root) -% while stack: -% n=stack.pop() -% visit(n) -% n.visited=1 -% for node in n.adjacent: -% if not node.visited: -% stack.append(node) -% \end{lstlisting} - - - -%%%%%%%%%%%%%%%%%%%%%%%Tree Representation -\section{Tree Data Structures} - -In this section, we focus on implementing a \textbf{recursive} tree structure, since a free tree just works the same way as of the graph structure. Also, we have already covered the implicit structure of tree in the topic of heap. In this section, we first implement the recursive tree data structure and the construction of a tree. In the next section, we discuss the searching strategies on the tree--tree traversal, including its both recursive and iterative variants. -% A binary tree is made of nodes which has at most two branches--the ``left child" and the ``right child"--and a data element. The ``root" node is the topmost node in the tree. The left and right child recursively point to smaller ``subtrees" on either side. - - -put an figure here of a binary and n-ary tree. - -%\subsubsection{Nodes and Tree Construction} -Because a tree is a hierarchical--here which is represented recursively--structure of a collection of nodes. We define two classes each for the N-ary tree node and the binary tree node. A node is composed of a variable \texttt{val} saving the data and children pointers to connect the nodes in the tree. - - -\paragraph{Binary Tree Node} In a binary tree, the children pointers will at at most two pointers, which we define as \texttt{left} and \texttt{right}. The binary tree node is defined as: -\begin{lstlisting}[language = Python] -class BinaryNode: - def __init__(self, val): - self.left = None - self.right = None - self.val = val -\end{lstlisting} - -\paragraph{N-ary Tree Node} For N-ary node, when we initialize the length of the node's children with additional argument \texttt{n}. -\begin{lstlisting}[language = Python] -class NaryNode: - def __init__(self, n, val): - self.children = [None] * n - self.val = val -\end{lstlisting} -In this implementation, the children is ordered by each's index in the list. In real practice, there is a lot of flexibility. It is not necessarily to pre-allocate the length of its children, we can start with an empty list \texttt{[]} and just append more nodes to its children list on the fly. Also -we can replace the list with a dictionary data type, which might be a better and more space efficient way. - -\paragraph{Construct A Tree} Now that we have defined the tree node, the process of constructing a tree in the figure will be a series of operations: -\begin{lstlisting}[numbers=none] - 1 - / \ - 2 3 - / \ \ -4 5 6 -\end{lstlisting} -\begin{lstlisting}[language=Python] -root = BinaryNode(1) -left = BinaryNode(2) -right = BinaryNode(3) -root.left = left -root.right = right -left.left = BinaryNode(4) -left.right = BinaryNode(5) -right.right = BinaryNode(6) -\end{lstlisting} - -We see that the above is not convenient in practice. A more practice way is to represent the tree with the heap-like array, which treated the tree as a complete tree. For the above binary tree, because it is not complete in definition, we pad the left child of node 3 with \texttt{None} in the list, we would have array \texttt{[1, 2, 3, 4, 5, None, 6]}. The root node will have index $0$, and given a node with index $i$, the children nodes of it will be indexed with $n*i+j, j \in [1, ..., n]$. Thus, a better way to construct the above tree is to start from the array and and traverse the list recursively to build up the tree. - -We define a recursive function with two arguments: \texttt{a}--the input array of nodes and \texttt{idx}--indicating the position of the current node in the array. At each recursive call, we construct a \texttt{BinaryNode} and set its \texttt{left} and \texttt{right} child to be a node returned with two recursive call of the same function. Equivalently, we can say these two subprocess--\texttt{constructTree(a, 2*idx + 1)} and \texttt{constructTree(a, 2*idx + 2)} builds up two subtrees and each is rooted with node \texttt{2*idx+1} and \texttt{2*idx+2} respectively. When there is no items left in the array to be used, it natually indicates the end of the recursive function and return \texttt{None} to indicate its an empty node. We give the following Python code: -\begin{lstlisting}[language=Python] -def constructTree(a, idx): - ''' - a: input array of nodes - idx: index to indicat the location of the current node - ''' - if idx >= len(a): - return None - if a[idx]: - node = BinaryNode(a[idx]) - node.left = constructTree(a, 2*idx + 1) - node.right = constructTree(a, 2*idx + 2) - return node - return None -\end{lstlisting} -Now, we call this function, and pass it with out input array: -\begin{lstlisting}[language=Python] -nums = [1, 2, 3, 4, 5, None, 6] -root = constructTree(nums, 0) -\end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Please write a recursive function to construct the N-ary tree given in Fig.~\ref{}?} -\end{bclogo} -In the next section, we discuss tree traversal methods, and we will use those methods to print out the tree we just build. - - -% \section{Time complexity of Binary Tree} -% If we spent O(n) to convert $T(n)$ to $2T(n/2)$. We have the following deduction: -% \begin{equation} \label{bt_time} -% \begin{split} -% T(n) & = 2T(n/2) + O(n)\\ -% & = 2 * 2T(n/4) + O(n) + O(n)\\ -% & = O(nlogn) -% \end{split} -% \end{equation} -% which is the same as merge sort. If the divide cost is only $O(1)$. -% \begin{equation}\label{bt_time2} -% \begin{split} -% T(n) &= 2T(n/2) + O(1)\\ -% & = 2 * 2T(n/4) + O(1) + O(1)\\ -% &= n + (1 + 2 + 4 +...+ n)\\ -% &\approx n + 2n\\ -% &\approx O(n) -% \end{split} -% \end{equation} - -\subsection{LeetCode Problems} - -To show the nodes at each level, we use LevelOrder function to print out the tree: -\begin{lstlisting}[language=Python] -def LevelOrder(root): - q = [root] - while q: - new_q = [] - for n in q: - if n is not None: - print(n.val, end=',') - if n.left: - new_q.append(n.left) - if n.right: - new_q.append(n.right) - q = new_q - print('\n') -LevelOrder(root) -# output -# 1, - -# 2,3, - -# 4,5,None,6, -\end{lstlisting} -\textbf{Lowest Common Ancestor}. The lowest common ancestor is defined between two nodes p and q as the lowest node in T that has both p and q as descendants (where we allow a node to be a descendant of itself). There will be two cases in LCA problem which will be demonstrated in the following example. -\begin{examples}[resume] -\item \textbf{Lowest Common Ancestor of a Binary Tree (L236).} Given a binary tree, find the lowest common ancestor (LCA) of two given nodes in the tree. Given the following binary tree: root = [3,5,1,6,2,0,8,null,null,7,4] -\begin{lstlisting}[numbers=none] - _______3______ - / \ - ___5__ ___1__ - / \ / \ - 6 _2 0 8 - / \ - 7 4 - -Example 1: -Input: root = [3,5,1,6,2,0,8,null,null,7,4], p = 5, q = 1 -Output: 3 -Explanation: The LCA of of nodes 5 and 1 is 3. - -Example 2: -Input: root = [3,5,1,6,2,0,8,null,null,7,4], p = 5, q = 4 -Output: 5 -Explanation: The LCA of nodes 5 and 4 is 5, since a node can be a descendant of itself - according to the LCA definition. -\end{lstlisting} -\textbf{Solution: Divide and Conquer}. There are two cases for LCA: 1) two nodes each found in different subtree, like example 1. 2) two nodes are in the same subtree like example 2. If we compare the current node with the p and q, if it equals to any of them, return current node in the tree traversal. Therefore in example 1, at node 3, the left return as node 5, and the right return as node 1, thus node 3 is the LCA. In example 2, at node 5, it returns 5, thus for node 3, the right tree would have None as return, thus it makes the only valid return as the final LCA. The time complexity is $O(n)$. -\begin{lstlisting}[language=Python] -def lowestCommonAncestor(self, root, p, q): - """ - :type root: TreeNode - :type p: TreeNode - :type q: TreeNode - :rtype: TreeNode - """ - if not root: - return None - if root == p or root == q: - return root # found one valid node (case 1: stop at 5, 1, case 2:stop at 5) - left = self.lowestCommonAncestor(root.left, p, q) - right = self.lowestCommonAncestor(root.right, p, q) - if left is not None and right is not None: # p, q in the subtree - return root - if any([left, right]) is not None: - return left if left is not None else right - return None -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%Heap and priority queue -%%%%%%%%%%%%%%%%%%Basic Implementation%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%heap%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%heap%%%%%%%%%%%%%%%%% -\section{Heap} - count = Counter(nums) -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% In this section, we introduce heap data structures which is essentially an array object but it can be viewed as a nearly complete binary tree. The concept of the data structures in this chapter is between liner and non-linear, that is using linear data structures to mimic the non-linear data structures and its behavior for higher efficiency under certain context. - -Heap is a tree based data structure that satisfies \textit{the heap ordering property}. The ordering can be one of two types: -\begin{itemize} - \item the min-heap property: the value of each node is greater than or equal ($\geq$) to the value of its parent, with the minimum-value element at the root. -\item the max-heap property: the value of each node is less than or equal to ($\leq$) the value of its parent, with the maximum-value element at the root. -\end{itemize} -\paragraph{Binary Heap} -\begin{figure}[h!] - \centering - \includegraphics[width = 0.98\columnwidth]{fig/binary_tree.png} - \caption{Max-heap be visualized with binary tree structure on the left, and be implemented with Array on the right.} - \label{fig:max-heap-1} -\end{figure} -A heap is not a sorted structure but can be regarded as partially ordered. The maximum number of children of a node in a heap depends on the type of heap. However, in the more commonly-used heap type, there are at most two children of a node and it's known as a Binary heap. A min-binary heap is shown in Fig.~\ref{fig:max-heap-1}. Throughout this section the word ``heap'' will always refer to a min-heap. - -Heap is commonly used to implement priority queue that each time the item of the highest priority is popped out -- this can be done in $O(\log n)$. As we go through the book, we will find how often priority queue is needed to solve our problems. It can also be used in sorting, such as the heapsort algorithm. - -\paragraph{Heap Representation} -A binary heap is always a complete binary tree that each level is fully filled before starting to fill the next level. Therefore it has a height of $\log n$ given a binary heap with $n$ nodes. A complete binary tree can be uniquely represented by storing its level order traversal in an array. Array representation more space efficient due to the non-existence of the children pointers for each node. - -In the array representation, index 0 is skipped for convenience of implementation. Therefore, root locates at index 1. Consider a k-th item of the array, its parent and children relation is: -\begin{itemize} - \item its left child is located at $2*k$ index, - \item its right child is located at $2*k+1$. index, - \item and its parent is located at $k/2$ index (In Python3, use integer division $n//2$). -\end{itemize} - - -% In Python3, use integer division $n//2$. \textit{Note: we can start index with 0 as used in \textbf{heapq} library introduced later in this section. Given a node $x$, the left and right child will be $2*x+1$, $2*x+2$, and the parent node will have index $(x-1)//2$.} - - -% Normally, there is usually no notion of 'search' in heap, but only insertion and deletion, which can be done by traversing a $O(\log n)$ leaf-to-root or root-to-leaf path. - - -\subsection{Basic Implementation} -The basic methods of a heap class should include: \texttt{push}--push an item into the heap, \texttt{pop}--pop out the first item, and \texttt{heapify}--convert an arbitrary array into a heap. In this section, we use the heap shown in Fig.~\ref{fig:min-heap} as our example. -\begin{figure}[h!] - \centering - \includegraphics[width = 0.8\columnwidth]{fig/min_heap_push.png} - \caption{A Min-heap.} - \label{fig:min-heap} -\end{figure} -\paragraph{Push: Percolation Up} The new element is initially appended to the end of the heap (as the last element of the array). The heap property is repaired by comparing the added element with its parent and moving the added element up a level (swapping positions with the parent). This process is called \textit{percolation up}. The comparison is repeated until the parent is larger than or equal to the percolating element. When we push an item in, the item is initially appended to the end of the heap. Assume the new item is the smaller than existing items in the heap, such as $5$ in our example, there will be violation of the heap property through the path from the end of the heap to the root. To repair the violation, we traverse through the path and compare the added item with its parent: -\begin{itemize} - \item if parent is smaller than the added item, no action needed and the traversal is terminated, e.g. adding item 18 will lead to no action. - \item otherwise, swap the item with the parent, and set the node to its parent so that it can keep traverse. -\end{itemize} -Each step we fix the heap ordering property for a substree. - The time complexity is the same as the height of the complete tree, which is $O(\log n)$. - - To generalize the process, a \texttt{\_float()} function is first implemented which enforce min heap ordering property on the path from a given index to the root. -\begin{lstlisting}[language=Python] -def _float(idx, heap): - while idx // 2: - p = idx // 2 - # Violation - if heap[idx] < heap[p]: - heap[idx], heap[p] = heap[p], heap[idx] - else: - break - idx = p - return -\end{lstlisting} -With \texttt{\_float()}, function \texttt{push} is implemented as: -\begin{lstlisting}[language=Python] -def push(heap, k): - heap.append(k) - _float(idx = len(heap) - 1, heap=heap) -\end{lstlisting} - -\paragraph{Pop: Percolation Down} When we pop out the item, no matter if it is the root item or any other item in the heap, an empty spot appears at that location. We first move the last item in the heap to this spot, and then start to repair the heap ordering property by comparing the new item at this spot to its children: -\begin{itemize} - \item if one of its children has smaller value than this item, swap this item with that child and set the location to that child's location. And then continue. - \item otherwise, the process is done. -\end{itemize} -\begin{figure}[h!] - \centering - \includegraphics[width = 0.44\columnwidth]{fig/min_heap_insert.png} - \includegraphics[width = 0.44\columnwidth]{fig/min_heap_insert_step1.png} - \caption{Left: delete node 5, and move node 12 to root. Right: 6 is the smallest among 12, 6, and 7, swap node 6 with node 12.} - \label{fig:min-heap-pop} -\end{figure} -Similarly, this process is called \textit{percolation down}. Same as the insert in the case of complexity, $O(\log n)$. We demonstrate this process with two cases: -\begin{itemize} - \item if the item is the root, which is the minimum item $5$ in our min-heap example, we move 12 to the root first. Then we compare 12 with its two children, which are 6 and 7. Swap 12 with 6, and continue. The process is shown in Fig.~\ref{fig:min-heap-pop}. - \item if the item is any other node instead of root, say node 7 in our example. The process is exactly the same. We move 12 to node 7's position. By comparing 12 with children 10 and 15, 10 and 12 is about to be swapped. With this, the heap ordering property is sustained. -\end{itemize} - -We first use a function \texttt{\_sink} to implement the percolation down part of the operation. -\begin{lstlisting}[language=Python] -def _sink(idx, heap): - size = len(heap) - while 2 * idx < size: - li = 2 * idx - ri = li + 1 - mi = idx - if heap[li] < heap[mi]: - mi = li - if ri < size and heap[ri] < heap[mi]: - mi = ri - if mi != idx: - # swap index with mi - heap[idx], heap[mi] = heap[mi], heap[idx] - else: - break - idx = mi -\end{lstlisting} -The \texttt{pop} is implemented as: -\begin{lstlisting}[language=Python] -def pop(heap): - val = heap[1] - # Move the last item into the root position - heap[1] = heap.pop() - _sink(idx=1, heap=heap) - return val -\end{lstlisting} - - -\paragraph{Heapify} Heapify is a procedure that converts a list to a heap. To heapify a list, we can naively do it through a series of insertion operations through the items in the list, which gives us an upper-bound time complexity : $O(n\log n)$. However, a more efficient way is to treat the given list as a tree and to heapify directly on the list. %There are two possibly two ways to do this: (1) through sinking and (2) through floating. - -To satisfy the heap property, we need to first start from the smallest subtrees, which are leaf nodes. Leaf nodes have no children which satisfy the heap property naturally. Therefore we can jumpy to the last parent node, which is at position \texttt{n//2} with starting at 1 index. We apply the percolation down process as used in \texttt{pop} operation which works forwards comparing the node with its children nodes and applies swapping if the heap property is violated. At the end, the subtree rooted at this particular node obeys the heap ordering property. We then repeat the same process for all parents nodes items in the list in range $[n/2, 1]$--in reversed order of $[1, n/2]$, which guarantees that the final complete binary tree is a binary heap. This follows a dynamic programming fashion. The leaf nodes $a[n/2+1, n]$ are naturally a heap. Then the subarrays are heapified in order of $a[n/2, n]$, $a[n/2-1, n], ..., [1, n]$ as we working on nodes $[n/2, 1]$. we first heaipfy $a[n, n], A[n-1...n], A[n-2...n], ..., A[1...n]$. Such process gives us a tighter upper bound which is $O(n)$. - -We show how the heapify process is applied on $a=[21, 1, 45, 78, 3, 5]$ in Fig.~\ref{fig:heapify}. -% \begin{figure}[h!] -% \centering -% \includegraphics[width = 0.98\columnwidth]{fig/heapify.png} -% \caption{Heapify for a given list.} -% \label{fig:heapify} -% \end{figure} -\begin{figure}[h!] - \centering - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_1.png} - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_2.png} - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_3.png} - \caption{Heapify: The last parent node 45.} - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_4.png} - \caption{Heapify: On node 1} - - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_5.png} - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_6.png} - \includegraphics[width = 0.3\columnwidth]{fig/min_heap_heapify_7.png} - \caption{Heapify: On node 21.} - \label{fig:heapify} -\end{figure} - -Implementation-wise, the \texttt{heapify} function call \texttt{\_sink} as its subroutine. The code is shown as: -\begin{lstlisting}[language=Python] -def heapify(lst): - heap = [None] + lst - n = len(lst) - for i in range(n//2, 0, -1): - _sink(i, heap) - return heap -\end{lstlisting} - -% Now, run the following code: -% \begin{lstlisting}[language=Python] -% h = Heap() -% h.heapify(lst) -% print('heapify with heapify:', h) -% \end{lstlisting} -% Out put is: -% \begin{lstlisting} -% heapify with heapify: 1 5 21 78 3 45 -% \end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Which way is more efficient building a heap from a list?} Using insertion or heapify? What is the efficiency of each method? The experimental result can be seen in the code. -\end{bclogo} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to use the percolation up process to heaipify the list. } -\end{bclogo} - -% \begin{enumerate} -% \item MAX-HEAPIFY, runs in $O(lgn)$, is the key to maintaining the max-heap property -% \item BUILD-MAX-HEAP, runs in linear time, produces a maxheap from an unordered input array -% \item MAX-HEAP-INSERT, HEAP-EXTRACT-MAX, HEAP-INCREASE-KEY, and HEAP-MAXIMUM, runs in $O(lgn)$ time, allow the heap data structure to implement a priority queue -% \end{enumerate} -%%%%%%%%%%%%%%%%%%Python Built-in Module: heapq%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Python Built-in Library: \texttt{heapq}} -When we are solving a problem, unless specifically required for implementation, we can always use an existent Python module/package. \texttt{heapq} is one of the most frequently used library in problem solving. - -\texttt{heapq}~\footnote{\url{https://docs.python.org/3.0/library/heapq.html}} is a built-in library in Python that implements heap queue algorithm. \texttt{heapq} object implements a minimum binary heap and it provides three main functions: \texttt{heappush}, \texttt{heappop}, and \texttt{heaipfy} similar to what we have implemented in the last section. The API differs from our last section in one aspect: it uses zero-based indexing. There are other three functions: \texttt{nlargest}, \texttt{nsmallest}, and \texttt{merge} that come in handy in practice. These functions are listed and described in Table~\ref{tab:functions_in_heapq}.% \textit{To note that heapq is not a data type like queue.Queue() or collections.deque(), it is a library (or class) that can do operations like it is on a heap.} %, which can be used to maintain a priority queue. Operations include heappush, heappop, and nsmallest. heapq in python to maintain a priority queue with $O(logn)$. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Methods of \textbf{heapq}} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -{heappush(h, x)} & Push the \texttt{x} onto the heap, maintaining the heap invariant. \\\hline -{heappop(h)} &Pop and return the \textit{smallest} item from the heap, maintaining the heap invariant. If the heap is empty, \texttt{IndexError} is raised.\\ \hline -{heappushpop(h, x)} &Push \texttt{x} on the heap, then pop and return the smallest item from the heap. The combined action runs more efficiently than {heappush()} followed by a separate call to \texttt{heappop()}.\\ \hline -{heapify(x)} & Transform list \texttt{x} into a heap, in-place, in linear time.\\ \hline -{nlargest(k, iterable, key = fun)} & This function is used to return the \texttt{k} largest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -{nsmallest(k, iterable, key = fun)} & This function is used to return the \texttt{k} smallest elements from the iterable specified and satisfying the key if mentioned. \\ \hline - -{merge(*iterables, key=None, reverse=False)} & Merge multiple sorted inputs into a single sorted output. Returns a \textit{generator} over the sorted values. \\ \hline -{heapreplace(h, x)} & Pop and return the smallest item from the heap, and also push the new item.\\ \hline -\end{tabular} - \label{tab:functions_in_heapq} - \end{small} -\end{table} - -Now, lets see some examples. -\paragraph{Min-Heap} Given the exemplary list $a =[21, 1, 45, 78, 3, 5]$, we call the function \texttt{heapify()} to convert it to a min-heap. -\begin{lstlisting}[language = Python] -from heapq import heappush, heappop, heapify -h = [21, 1, 45, 78, 3, 5] -heapify(h) -\end{lstlisting} -The heapified result is $h=[1, 3, 5, 78, 21, 45]$. Let's try \texttt{heappop} and \texttt{heappush}: -\begin{lstlisting}[language=Python] -heappop(h) -heappush(h, 15) -\end{lstlisting} -The print out for \texttt{h} is: -\begin{lstlisting}[language=Python] -[5, 15, 45, 78, 21] -\end{lstlisting} - -\paragraph{\texttt{nlargest} and \texttt{nsmallest}} -To get the largest or smallest first $n$ items with these two functions does not require the list to be first heapified with \texttt{heapify} because it is built in them. -\begin{lstlisting}[language=Python] -from heapq import nlargest, nsmallest -h = [21, 1, 45, 78, 3, 5] -nl = nlargest(3, h) -ns = nsmallest(3, h) -\end{lstlisting} -The print out for \texttt{nl} and \texttt{ns} is as: -\begin{lstlisting} -[78, 45, 21] -[1, 3, 5] -\end{lstlisting} - -\paragraph{Merge Multiple Sorted Arrays} Function \texttt{merge} merges multiple iterables into a single generator typed output. It assumes all the inputs are sorted. For example: -\begin{lstlisting}[language=Python] -from heapq import merge -a = [1, 3, 5, 21, 45, 78] -b = [2, 4, 8, 16] -ab = merge(a, b) -\end{lstlisting} -The print out of $ab$ directly can only give us a generator object with its address in the memory: -\begin{lstlisting}[language=Python] - -\end{lstlisting} -We can use list comprehension and iterate through $ab$ to save the sorted array in a list: -\begin{lstlisting}[language=Python] -ab_lst = [n for n in ab] -\end{lstlisting} -The print out for \texttt{ab\_lst} is: -\begin{lstlisting}[language=Python] -[1, 2, 3, 4, 5, 8, 16, 21, 45, 78] -\end{lstlisting} - - -\paragraph{Max-Heap} As we can see the default heap implemented in \texttt{heapq} is forcing the heap property of the min-heap. What if we want a max-heap instead? In the library, it does offer us function, but it is intentionally hided from users. It can be accessed like: \texttt{heapq.\_[function]\_max()}. Now, we can heapify a max-heap with function \texttt{\_heapify\_max}. -\begin{lstlisting}[language = Python] -from heapq import _heapify_max -h = [21, 1, 45, 78, 3, 5] -_heapify_max(h) -\end{lstlisting} -The print out for \texttt{h} is: -\begin{lstlisting} -[78, 21, 45, 1, 3, 5] -\end{lstlisting} - -Also, in practise, a simple hack for the max-heap is to save data as negative. Whenever we use the data, we convert it to the original value. For example: -\begin{lstlisting}[language=Python] -h = [21, 1, 45, 78, 3, 5] -h = [-n for n in h] -heapify(h) -a = -heappop(h) -\end{lstlisting} -\texttt{a} will be 78, as the largest item in the heap. - -% \paragraph{More Private Functions} -% \begin{table}[h] -% \begin{small} -% \centering -% \noindent\captionof{table}{ Private Methods of \textbf{heapq}} -% \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } -% \hline -% Method & Description \\ \hline -% heappush(h, x) & Push the value item onto the heap, maintaining the heap invariant. \\\hline -% heappop(h) &Pop and return the \textit{smallest} item from the heap, maintaining the heap invariant. If the heap is empty, IndexError is raised.\\ \hline -% heappushpop(h, x) &Push item on the heap, then pop and return the smallest item from the heap. The combined action runs more efficiently than heappush() followed by a separate call to heappop().\\ \hline -% heapify(x) & Transform list x into a heap, in-place, in linear time.\\ \hline -% heapreplace(h, x) & Pop and return the smallest item from the heap, and also push the new item. The heap size doesn’t change. If the heap is empty, IndexError is raised. This is more efficient than heappop() followed by heappush(), and can be more appropriate when using a fixed-size heap.\\ \hline -% nlargest(k, iterable, key = fun) & This function is used to return the k largest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -% nsmallest(k, iterable, key = fun) & This function is used to return the k smallest elements from the iterable specified and satisfying the key if mentioned. \\ \hline -% \end{tabular} -% \label{tab:functions_in_heapq} -% \end{small} -% \end{table} - -\paragraph{With Tuple/List or Customized Object as Items for Heap} -Any object that supports comparison (\texttt{\_cmp\_()}) can be used in heap with \texttt{heapq}. When we want our item to include information such as ``priority'' and ``task'', we can either put it in a tuple or a list. \texttt{heapq} For example, our item is a list, and the first is the priority and the second denotes the task id. -\begin{lstlisting} -heap = [[3, 'a'], [10, 'b'], [5,'c'], [8, 'd']] -heapify(heap) -\end{lstlisting} -The print out for \texttt{heap} is: -\begin{lstlisting}[language=Python] -[[3, 'a'], [8, 'd'], [5, 'c'], [10, 'b']] -\end{lstlisting} -However, if we have multiple tasks that having the same priority, the relative order of these tied tasks can not be sustained. This is because the list items are compared with the whole list as key: it first compare the first item, whenever there is a tie, it compares the next item. For example, when our example has multiple items with $3$ as the first value in the list. -\begin{lstlisting}[language=Python] -h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']] -heapify(h) -\end{lstlisting} -The printout indicates that the relative ordering of items [3, 'e'], [3, 'd'], [3, 'a'] is not kept: -\begin{lstlisting}[language=Python] -[[3, 'a'], [3, 'd'], [10, 'c'], [5, 'b'], [3, 'e']] -\end{lstlisting} -Keeping the relative order of tasks with same priority is a requirement for \textit{priority queue} abstract data structure. We will see at the next section how priority queue can be implemented with \texttt{heapq}. - -\paragraph{Modify Items in \texttt{heapq}} -In the heap, we can change the value of any item just as what we can in the list. However, the violation of heap ordering property occurs after the change so that we need a way to fix it. We have the following two private functions to use according to the case of change: -\begin{itemize} - \item\texttt{\_siftdown(heap, startpos, pos)}: \texttt{pos} is where the where the new violation is. \texttt{startpos} is till where we want to restore the heap invariant, which is usually set to $0$. Because in \texttt{\_siftdown()} it goes backwards to compare this node with the parents, we can call this function to fix when an item's value is decreased. - \item \texttt{\_siftup(heap, pos)}: In \texttt{\_siftup()} items starting from \texttt{pos} are compared with their children so that smaller items are sifted up along the way. Thus, we can call this function to fix when an item's value is increased. -\end{itemize} -We show one example: -\begin{lstlisting}[language=Python] -import heapq -heap = [[3, 'a'], [10, 'b'], [5,'c'], [8, 'd']] -heapify(heap) -print(heap) - -heap[0] = [6, 'a'] -# Increased value -heapq._siftup(heap, 0) -print(heap) -#Decreased Value -heap[2] = [3, 'a'] -heapq._siftdown(heap, 0, 2) -print(heap) -\end{lstlisting} -The printout is: -\begin{lstlisting}[language=Python] -[[3, 'a'], [8, 'd'], [5, 'c'], [10, 'b']] -[[5, 'c'], [8, 'd'], [6, 'a'], [10, 'b']] -[[3, 'a'], [8, 'd'], [5, 'c'], [10, 'b']] -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%priority Queue%%%%%%%%%%%%%%%%%%% -\section{Priority Queue} -\label{sec_priority_queue} -A priority queue is an abstract data type(ADT) and an extension of queue with properties: -\begin{enumerate} - \item A queue that each item has a priority associated with. - \item In a priority queue, an item with higher priority is served (dequeued) before an item with lower priority. - \item If two items have the same priority, they are served according to their order in the queue. -\end{enumerate} -Priority Queue is commonly seen applied in: -\begin{enumerate} - \item CPU Scheduling, - \item Graph algorithms like Dijkstra’s shortest path algorithm, Prim’s Minimum Spanning Tree, etc. - \item All queue applications where priority is involved. -\end{enumerate} -The properties of priority queue demand sorting stability to our chosen sorting mechanism or data structure. Heap is generally preferred over arrays or linked list to be the underlying data structure for priority queue. In fact, the Python class \texttt{PriorityQueue()} from Python module \texttt{queue} uses \texttt{heapq} under the hood too. -We later will see how to implement priority queue with \texttt{heapq} and how to use \texttt{PriorityQueue()} class for our purpose. In default, the lower the value is, the higher the priority is, making min-heap the underlying data structure. -\subsubsection{Implement with \texttt{heapq} Library} - -The core functions: \texttt{heapify()}, \texttt{push()}, and \texttt{pop()} within \texttt{heapq} lib are used in our implementation. In order to implement priority queue, our binary heap needs to have the following features: -\begin{itemize} - \item \textbf{Sort stability:} when we get two tasks with equal priorities, we return them in the same order as they were originally added. A potential solution is to modify the original 2-element list \texttt{[priority, task]} into a 3-element list as \texttt{[priority, count, task]}. \texttt{list} is preferred because \texttt{tuple} does not allow item assignment. The entry \texttt{count} indicates the original order of the task in the list, which serves as a tie-breaker so that two tasks with the same priority are returned in the same order as they were added to preserve the sort stability. Also, since no two entry counts are the same so that in the tuple comparison the task will never be directly compared with the other. For example, use the same example as in the last section: -\begin{lstlisting}[language=Python] -import itertools -counter = itertools.count() -h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']] -h = [[p, next(counter), t] for p, t in h] -\end{lstlisting} -The printout for \texttt{h} is: -\begin{lstlisting}[language=Python] -[[3, 0, 'e'], [3, 1, 'd'], [10, 2, 'c'], [5, 3, 'b'], [3, 4, 'a']] -\end{lstlisting} -If we \texttt{heapify} \texttt{h} will gives us the same order as the original \texttt{h}. The relative ordering of ties in the sense of priority is sustained. -\item \textbf{Remove arbitrary items or update the priority of an item:} In situations such as the priority of a task changes or if a pending task needs to be removed, we have to update or remove an item from the heap. we have seen how to update an item's value in $O(\log n)$ time cost with two functions: \texttt{\_siftdown()} and \texttt{\_siftup()} in a heap. However, how to remove an arbitrary item? We could have found and replaced it with the last item in the heap. Then depending on the comparison between the value of the deleted item and the last item, the two mentioned functions can be applied further. - -However, there is a more convenient alternative: whenever we ``remove'' an item, we do not actually remove it but instead simply mark it as ``removed''. These ``removed'' items will eventually be popped out through a normally \texttt{pop} operation that comes with heap data structure, and which has the same time cost $O(\log n)$. With this alternative, when we are updating an item, we mark the old item as ``removed'' and add the new item in the heap. Therefore, with the special mark method, we are able to implement a heap wherein arbitrary removal and update is supported with just three common functionalities: \texttt{heapify}, \texttt{heappush}, and \texttt{heappop}. - -Let's use the same example here. We first remove task `d' and then update task `b''s priority to 14. Then we use another list \texttt{vh} to get the relative ordering of tasks in the heap according to the priority. -\begin{lstlisting}[language=Python] -REMOVED = '' -# Remove task 'd' -h[1][2] = REMOVED -# Updata task 'b''s proprity to 14 -h[3][2] = REMOVED -heappush(h, [14, next(counter), 'b']) -vh = [] -while h: - item = heappop(h) - if item[2] != REMOVED: - vh.append(item) -\end{lstlisting} -The printout for \texttt{vh} is: -\begin{lstlisting}[language=Python] -[[3, 0, 'e'], [3, 4, 'a'], [10, 2, 'c'], [14, 5, 'b']] -\end{lstlisting} - -\item \textbf{Search in constant time:} To search in the heap of an arbitrary item--non-root item and root-item--takes linear time. In practice, tasks should have unique task ids to distinguish from each other, making the usage of a \texttt{dictionary} where \texttt{task} serves as key and the the 3-element list as value possible (for a list, the value is just a pointer pointing to the starting position of the list). With the dictionary to help search, the time cost is thus decreased to constant. We name this dictionary here as \texttt{entry\_finder}. Now, with we modify the previous code. The following code shows how to add items into a heap that associates with \texttt{entry\_finder}: -\begin{lstlisting}[language=Python] -# A heap associated with entry_finder -counter = itertools.count() -entry_finder = {} -h = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']] -heap = [] -for p, t in h: - item = [p, next(counter), t] - heap.append(item) - entry_finder[t] = item -heapify(heap) -\end{lstlisting} -Then, we execute the remove and update operations with \texttt{entry\_finder}. -\begin{lstlisting}[language=Python] -REMOVED = '' -def remove_task(task_id): - if task_id in entry_finder: - entry_finder[task_id][2] = REMOVED - entry_finder.pop(task_id) # delete from the dictionary - return - -# Remove task 'd' -remove_task('d') -# Updata task 'b''s priority to 14 -remove_task('b') -new_item = [14, next(counter), 'b'] -heappush(heap, new_item) -entry_finder['b'] = new_item -\end{lstlisting} -\end{itemize} -In the notebook, we provide a comprehensive class named \texttt{PriorityQueue} that implements just what we have discussed in this section. - -\subsubsection{Implement with \texttt{PriorityQueue} class} Class \texttt{PriorityQueue()} class has the same member functions as class \texttt{Queue()} and \texttt{LifoQueue()} which are shown in Table~\ref{tab:methods_of_queue}. Therefore, we skip the introduction. First, we built a queue with: -\begin{lstlisting}[language=Python] -from queue import PriorityQueue -data = [[3, 'e'], [3, 'd'], [10, 'c'], [5,'b'], [3, 'a']] -pq = PriorityQueue() -for d in data: - pq.put(d) - -process_order = [] -while not pq.empty(): - process_order.append(pq.get()) -\end{lstlisting} -The printout for \texttt{process\_order} shown as follows indicates how \texttt{PriorityQueue} works the same as our \texttt{heapq}: -\begin{lstlisting} -[[3, 'a'], [3, 'd'], [3, 'e'], [5, 'b'], [10, 'c']] -\end{lstlisting} -\paragraph{Customized Object} If we want the higher the value is the higher priority, we demonstrate how to do so with a customized object with two comparison operators: \texttt{<} and \texttt{==} in the class with magic functions \texttt{\_\_lt\_\_()} and \texttt{\_\_eq\_\_()}. The code is as: -\begin{lstlisting}[language = Python] -class Job(): - def __init__(self, priority, task): - self.priority = priority - self.task = task - return - - def __lt__(self, other): - try: - return self.priority > other.priority - except AttributeError: - return NotImplemented - def __eq__(self, other): - try: - return self.priority == other.priority - except AttributeError: - return NotImplemented -\end{lstlisting} - -Similarly, if we apply the wrapper shown in the second of heapq, we can have a priority queue that is having sort stability, remove and update item, and with constant serach time. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{In single thread programming, is \textbf{heapq} or \textbf{PriorityQueue} more efficient?} In fact, the PriorityQueue implementation uses heapq under the hood to do all prioritisation work, with the base Queue class providing the locking to make it thread-safe. While heapq module offers no locking, and operates on standard list objects. This makes the heapq module faster; there is no locking overhead. In addition, you are free to use the various heapq functions in different, noval ways, while the PriorityQueue only offers the straight-up queueing functionality. -\end{bclogo} -\subsubsection{Hands-on Example} -\paragraph{Top K Frequent Elements (L347, medium)} Given a non-empty array of integers, return the k most frequent elements. -\begin{lstlisting}[numbers=none] -Example 1: -Input: nums = [1,1,1,2,2,3], k = 2 -Output: [1,2] - -Example 2: -Input: nums = [1], k = 1 -Output: [1] -\end{lstlisting} - -\paragraph{Analysis:} We first using a hashmap to get information as: item and its frequency. Then, the problem becomes obtaining the top k most frequent items in our counter: we can either use sorting or use heap. Our exemplary code here is for the purpose of getting familiar with related Python modules. -\begin{itemize} - \item \textbf{Counter().} \texttt{Counter()} has function \texttt{most\_common(k)} that will return the top $k$ most frequent items. The time complexity is $O(n \log n)$. -\begin{lstlisting}[language=Python] -from collections import Counter -def topKFrequent(nums, k): - return [x for x, _ in Counter(nums).most_common(k)] -\end{lstlisting} - -\item \textbf{\texttt{heapq.nlargest()}}. The complexity should be better than $O(n \log n)$. -\begin{lstlisting}[language=Python] -from collections import Counter -import heapq -def topKFrequent(nums, k): - count = collections.Counter(nums) - # Use the value to compare with - return heapq.nlargest(k, count.keys(), key=lambda x: count[x]) -\end{lstlisting} -\texttt{key=lambda x: count[x]} can also be replaced with \texttt{key=lambda x: count[x]}. - -\item \textbf{PriorityQueue():} We put the negative count into the priority queue so that it can perform as a max-heap. -\begin{lstlisting}[language=Python] -from queue import PriorityQueue -def topKFrequent(self, nums, k): - count = Counter(nums) - pq = PriorityQueue() - for key, c in count.items(): - pq.put((-c, key)) - return [pq.get()[1] for i in range(k)] -\end{lstlisting} -\end{itemize} -\section{Bonus} -\label{heap_sec_bonus} -\paragraph{Fibonacci heap} With fibonacc heap, insert() and getHighestPriority() can be implemented in O(1) amortized time and deleteHighestPriority() can be implemented in O(Logn) amortized time. -%%%%%%%%%%%%%%%%%%%%%%%LeetCode problems%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} - -\textbf{selection with key word: kth. These problems can be solved by sorting, using heap, or use quickselect} -\begin{enumerate} -\item 703. Kth Largest Element in a Stream (easy) - \item 215. Kth Largest Element in an Array (medium) - \item 347. Top K Frequent Elements (medium) - \item 373. Find K Pairs with Smallest Sums (Medium - \item 378. Kth Smallest Element in a Sorted Matrix (medium) -\end{enumerate} -\textbf{priority queue or quicksort, quickselect} -\begin{enumerate} - \item 23. Merge k Sorted Lists (hard) - \item 253. Meeting Rooms II (medium) - \item 621. Task Scheduler (medium) -\end{enumerate} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_combinatorial_search.tex b/Easy-Book/chapters/chapter_combinatorial_search.tex deleted file mode 100644 index 3812786..0000000 --- a/Easy-Book/chapters/chapter_combinatorial_search.tex +++ /dev/null @@ -1,788 +0,0 @@ -\documentclass[../main.tex]{subfiles} - -\begin{document} -\chapter{Combinatorial Search} -So far, we have learned the most fundamental search strategies on general data structures such as array, linked list, graph, and tree. In this chapter, instead of searching on explicit and well defined data structures, we extend and discuss more \textit{exhaustive search} algorithms that can solve rather obscure and challenging \textit{combinatorial problems}, such as sudoku and the famous Travels Salesman Problem. For combinatorial problems, we have to figure out the potential search space, and rummage a solution. -\section{Introduction} -Combinatorial search problems consists of $n$ items and a requirement to find a solution, i.e., a set of $L < N$ items that satisfy specified conditions or constraints. For example, a sudoku problem where a $9\times 9$ grid is partially filled with number between 1 and 9, fill the empty spots with numbers that satisfy the following conditions: -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714.png} - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714_solution.png} - \caption{A Sudoku puzzle and its solution} - \label{fig:backtrack_puzzle_1} -\end{figure} -\begin{enumerate} - \item Each row has all numbers form 1 to 9. - \item Each column has all numbers form 1 to 9. - \item Each sub-grid ($3 \times3$) has all numbers form 1 to 9. -\end{enumerate} -This sudoku together with one possible solution is shown in Fig.~\ref{fig:backtrack_puzzle_1}. In this case, we have $81$ items, and we are required to fill 51 empty items with the above three constraints. -\paragraph{Model Combinatorial Search Problems} -We can model the combinatorial search solution as a vector $s = (s_0, s_1, ..., s_{L-1})$, where each variable $s_i$ is selected from a finite set $A$, which is called the \textit{domain} for each variable. Such a vector might represent an arrangement where $s_i$ contains the i-th item of a permutation, in the combination problem, a boolean denotes if the i-th item is selected already, or it can represent a path in a graph or a sequence of moves in a game. In the sudoku problem, each $s_i$ can choose from a number in range $[1, 9]$. - - -\paragraph{Problem Categories} Combinatorial search problems arise in many areas of computer science such as artificial intelligence, operations search, bioinformatics, and electronic commerce. These problems typically involve finding a \textit{grouping}, \textit{ordering}, or \textit{assignment} of a discrete, finite set of objects that satisfy given conditions or constraints. We introduce two well-studied types of problems that are more likely to be NP-hard and of at least exponential complexity: -% in computer science mainly studies algorithms that solve exponential or even NP-hard problems, such as: -\begin{enumerate} - \item Constraint Satisfaction Problems (CSP) are mathematical questions defined as a set of variables whose state must satisfy a number of constraints or limitations(mathematical equations or inequations), such as Sudoku, N-queen, map coloring, Crosswords, and so on. The size of the search space of CSPs can be roughly given as: - \begin{align} - O(cd^L) - \end{align} - Where there are $L$ variables, each with domain size $d$, and there are $c$ constraints to check out. - \item Combinatorial optimization problems consist of searching for maxima or minima of an objective function $F$ whose domain is a discrete but large configuration space. Some classic examples are: - \begin{itemize} - \item Travelling Salesman Problems (TSP): given position $(x, y)$ of $n$ different cities, find the shortest possible path that visits each city exactly once. - \item Integer Linear Programming: maximize a specified linear combination of a set of integers $X_1, .., X_{n}$ subject to a set of linear constraints each of the form: - \begin{align} - a_1X_1 + ... + a_{n}X_{n} \leq c - \end{align} - \item Knapsack Problems: Given a set of items, each with a weight and a value, determine the number of each item to include in a collection so that the total weight is less than or equal to a given limit and the total value is as large as possible. - \end{itemize} -\end{enumerate} - - -\paragraph{Search Strategies} From Chapter Discreet Programming, we have learned the basic enumerative combinatorics, including counting principles and knowledge on permutations, combinations, partitions, subsets, and subsequences. Combinatorial Search builds atop this subject, and together through different search strategies such as depth-first search and best-first search, it is able to enumerate the search space and find the solution(s) with necessary speedup methods. In this chapter, we only discuss about complete search and only acknowledge the existence of approximate search techniques. - -\textit{Backtracking} is a process of depth-first based search where it ``builds'' the search tree on the fly incrementally instead of having a tree/graph structure beforehand to search through. Backtracking fits to solve combinatorial search problems because: -\begin{enumerate} - \item It is space efficient for the usage of a DFS and the candidates are built incrementally and their validity to fit a solution is checked right away. - \item It is time efficient for that some partial candidates can be pruned if the algorithm believes that it will not lead to our final complete solution. -\end{enumerate} - - Because the ordering of variables $s_0, ..., s_{L-1}$ can potentially affect the size of the search space sometimes. Thus, backtracking search relies on one or more heuristics to select which variable to consider next. \textit{Look-ahead} is one such heuristic that is preferably applied to check the effects of choosing a given variable to evaluate or to decide the order of values to give to it. - - There are other Breath-first Search based strategies that might work better than backtracking, such as for combinatorial optimization problems, best-first branch and bound search might be more efficient than its depth-first counterpart. - -\paragraph{Speedups} - -The speedup methods are well studied in computer science, and we list two general ways to prune unqualified or unnecessary branches during the search of backtracking: -\begin{enumerate} - \item Branch and Prune: This method prunes the unqualified branches with constraints of the problems. This is usually applied to solve constraint restricted problems (CSPs). - \item Branch and Bound: This method prunes unnecessary branches via comparing an estimation of a partial candidate with a found global best solution. If the estimation states that the partial candidate will never lead us to a better solution, we cut off this branch. This technique can be applied to solve a general optimization problems, such as Travel Salesman Problems (TSP), knapsack problems, and so. -\end{enumerate} -% To notice that these speedup techniques are general for searching, not specifically for Backtracking. For example, branch and bound often be linked to best-first search. - -\section{Backtracking} -In this section, we first introduce the technique of backtracking, and then demonstrate it by implementing common enumerative combinatorics seen in Chapter Discreet Programming. -\subsection{Introduction} -Backtracking search is an exhaustive search algorithm(depth-first search) that systematically assigns all possible combinations of values to the variables and checks if these assignments constitute a solution. Backtracking is all about choices and consequences and it shows the following two properties: - \begin{enumerate} - \item \textbf{No Repetition and Completion:} It is a systematic generating method that enumerates all possible states exactly at most once: it will not miss any valid solution but avoids repetitions. If there exists ``correct'' solution(s), it is guaranteed to be found. This property makes it ideal for solving combinatorial problems where the search space has to be constructed and enumerated. Therefore, the worst-case running time of backtracking search is exponential with the length of the state ($b^L$, $b$ is the average choice for each variable in the state).% such as combination and permutation which requires us to enumerate all possible solutions. We focus on demonstrating this property in this section. - \item \textbf{Search Pruning:} Along the way of working with partial solutions, in some cases, it is possible for us to decide if they will lead to a valid \textit{complete solution}. As soon as the algorithm is confident to say the partial configuration is either invalid or nonoptimal, it abandons this \textit{partial candidate}, an then ``backtracks'' (return to the upper level), and resets to the upper level's state so that the search process can continue to explore the next branch for the sake of efficiency. This is called \textit{search pruning} with which the algorithm ends up amortizely visiting each vertex less than once. This property makes backtracking the most promising way to solve CSPs and combinatorial optimization problems. - \end{enumerate} - -Solving sudoku problem with backtracking algorithm, each time at a level in the DFS, it tries to extend the last partial solution $s = (s_0, s_1, ..., s_{k})$ by trying out all 9 numbers at $s_{k+1}$, say we choose 1 at this step. It testifies the partial solution with the desired solution: -\begin{enumerate} - \item If the partial solution $s = (s_0, s_1, ..., s_{k}, 1)$ is still valid, move on to the next level and work on trying out $s_{k+2}$. - \item If the partial solution is invalid and is impossible to lead to a complete solution, it ``backtracks'' to the last level and resets the state as $s = (s_0, s_1, ..., s_{k})$ so that it can try our other choices if there are some left(which in our example, we will try $s_{k+1}=2$) or keep ``backtracking'' to even upper level. -\end{enumerate} -The process should be way clearer once we have learned the examples in the following subsections. - -\subsection{Permutations} -Given a list of items, generate all possible permutations of these items. If the set has duplicated items, only enumerate all unique permutations. -\subsubsection{No Duplicates(L46. Permutations)} When there are no duplicates, from Chapter Discreet Programming, we know the number of all permutations are: -\begin{equation} - p(n, m) = \frac{n!}{(n-m)!} -\end{equation} -where $m$ is the number of items we choose from the total $n$ items to make the permutations. -\begin{lstlisting}[numbers=none] -For example: -a = [1, 2, 3] -There are 6 total permutations: -[1, 2, 3], [1, 3, 2], -[2, 1, 3], [2, 3, 1], -[3, 1, 2], [3, 2, 1] -\end{lstlisting} -\paragraph{Analysis} Let us apply the philosophy of backtracking technique. We have to build a state with length $3$, and each variable in the state has three choices: 1, 2, and 3. The constraint here comes from permutation which requires that no two variables in the state will be having the same value. To build this incrementally with backtracking, we state with an empty state \texttt{[]}. At first, we have three options, we get three partial results $[1], [2]$, and $[3]$. Next, we handle the second variable in the state: for $[1]$, we can choose either 2 or 3,getting $[1,2]$ and $[1,3]$; same for $[2]$, where we end up with $[2,1]$ and $[2,3]$; for $[3]$, we have $[3,1]$ and $[3,2]$. At last, each partial result has only one option, we get all permutations as shown in the example. We visualize this incrementally building candidates in Fig.~\ref{fig:backtrack_permutation}. -\begin{figure}[h] - \centering - \includegraphics[width= 0.8\columnwidth]{fig/permutation.png} - \caption{The search tree of permutation} - \label{fig:backtrack_permutation} -\end{figure} - -However, we only managed to enumerate the search space, but not systematically or recursively with the Depth-first search process. With DFS, we depict the traverse order of the vertexes in the virtual search space with red arrows in Fig.~\ref{fig:backtrack_permutation}. The backward arrows mark the ``backtracking'' process, where we have to reset the state to the upper level. - -\paragraph{Implementation} We use a list of boolean \texttt{bUsed} to track which item is used in the search process. -\texttt{n} is the total number of items, \texttt{d} is the depth of the depth-first search process, \texttt{curr} is the current state, and \texttt{ans} is to save all permutations. The following code, we generate $p(n, m)$ -\begin{lstlisting}[language=Python] -def p_n_m(a, n, m, d, used, curr, ans): - if d == m: #end condition - ans.append(curr[::]) - return - - for i in range(n): - if not used[i]: - # generate the next solution from curr - curr.append(a[i]) - used[i] = True - print(curr) - # move to the next solution - p_n_m(a, n, m, d + 1, used, curr, ans) - #backtrack to previous partial state - curr.pop() - used[i] = False - return -\end{lstlisting} -Check out the running process in the source code. - -\paragraph{Alternative: Swapping Method} -\begin{figure}[h] - \centering - \includegraphics[width= 0.8\columnwidth]{fig/permutation_swap.png} - \caption{The search tree of permutation by swapping. The indexes of items to be swapped are represented as a two element tuple. } - \label{fig:backtrack_permutation_swap} -\end{figure} -We first start with a complete state, such that $s=[1, 2, 3]$ in our case. By swapping $1$ and $2$, we get $[2, 1, 3]$ and $[2, 3, 1]$ can be obtained by swapping $1$ and $3$ on top of $[2, 1, 3]$. With all permutations as leaves in the search space, the generating process is similar to Fig.~\ref{fig:backtrack_permutation}. We show this alternative process in Fig.~\ref{fig:backtrack_permutation_swap}. At first, we swap index 0 with all other indexes, including 0, 1, and 2. At the second layer, we move on to swap index 1 with all other successive indexes, and so on for all other layers. The Python code is as: -\begin{lstlisting}[language=Python] -ans = [] -def permutate(a, d): - global ans - if d == len(a): - ans.append(a[::]) - for i in range(d, len(a)): - a[i], a[d] = a[d], a[i] - permutate(a, d+1) - a[i], a[d] = a[d], a[i] - return -\end{lstlisting} -There is Johnson-Trotter algorithm that utilizes such swapping method, which avoids recursion, and instead computes the permutations by an iterative method. -\subsubsection{With Duplicates(47. Permutations II)} -We have already know that $p(n, n)$ is further decided by the duplicates within the $n$ items. Assume we have in total of $d$ items are repeated, and each item is repeated $x_i$ times, then the number of all arrangements $pd(n, n)$ are: -\begin{align} - pd(n, n) &= \frac{p(n, n)}{x_0!x_1!...x_{d-1}}, \\ - \texttt{w.r.t } & \sum_{i=0}^{d-1} x_i \leq n -\end{align} -For example, when $a = [1, 2, 2, 3]$, there are $\frac{4!}{2!}$ unique permutations, which is 12 in total, and are listed as bellow: -\begin{lstlisting}[numbers=none] -[1, 2, 2, 3], [1, 2, 3, 2], [1, 3, 2, 2], -[2, 1, 2, 3], [2, 1, 3, 2], [2, 2, 1, 3], -[2, 2, 3, 1], [2, 3, 1, 2], [2, 3, 2, 1], -[3, 1, 2, 2], [3, 2, 1, 2], [3, 2, 2, 1] -\end{lstlisting} -\paragraph{Analysis} -\begin{figure}[h] - \centering - \includegraphics[width= 1.2\columnwidth]{fig/permutation_repeat.png} - \caption{The search tree of permutation with repetition} - \label{fig:backtrack_permutation_duplicates} -\end{figure} -The enumeration of these all possible permutations can be obtained with backtracking exactly the same as if there are no duplicates. However, this is not efficient since it has doubled the search space with repeated permutations. Here comes to our first time applying the Branch and Prune method: we avoid repetition by pruning off redundant branches. - -One main advantage of backtracking is not to save all intermediate states, thus we should find a mechanism that avoids generating these intermediate states at the first place. One solution is that we sort all $n$ items, making all repeat items adjacent to each other. We know if the current intermediate state is redundant by simply comparing this item with its predecessor: if it equals, we move on from building state with this item to the next item in line. The search tree of our example is shown in Fig.~\ref{fig:backtrack_permutation_duplicates}. - -\paragraph{Implementation} The implementation is highly similar to previous standard permutation code other than three different points: -\begin{enumerate} -\item Before the items are called by \texttt{permutate}, they are sorted first. -\item A simple condition check to avoid generating repeat states. - \item We used a dictionary data structure \texttt{tracker} which has all unique items as keys and each item's corresponding occurrence as values to replace the boolean vector \texttt{used} for slightly better space efficiency. -\end{enumerate} -The Python code is as: -\begin{lstlisting}[language=Python] -from collections import Counter -def permuteDup(nums, k): - ans = [] - def permutate(d, n, k, curr, tracker): - nonlocal ans - if d == k: - ans.append(curr) - return - for i in range(n): - if tracker[nums[i]] == 0: - continue - if i - 1 >= 0 and nums[i] == nums[i-1]: - continue - tracker[nums[i]] -= 1 - curr.append(nums[i]) - - permutate(d+1, n, k, curr[:], tracker) - curr.pop() - tracker[nums[i]] += 1 - return - - nums.sort() - permutate(0, len(nums), k, [], Counter(nums)) - return ans -\end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Can you extend the swap method based permutation to handle duplicates?} -\end{bclogo} - -\subsubsection{Discussion} -From the example of permutation, we have demonstrated how backtracking works to construct candidates with an implicit search tree structure: the root node is the initial state, any internal node represents intermediate states, and all leaves are our candidates which in this case there are $n!$ for $p(n, n)$ permutation. In this subsection, we want to point out the unique properties and its computational and space complexities. -\paragraph{Two Passes} Backtracking builds an implicit search tree on the fly, and it does not memorize any intermediate state. It visits the vertices - in the search tree in two passes: - \begin{enumerate} - \item Forward pass: it builds the solution incrementally and reaches to the leaf nodes in a DFS fashion. One example of forward pass is $[]->[1]->[1,2]->[1,2,3]$. - \item Backward pass: as the returning process from recursion of DFS, it also backtracks to previous state. One example of backward pass is $[1,2,3]->[1,2],->[1]$. - \end{enumerate}First, the forward pass to build the solution \textbf{incrementally}. -The change of \texttt{curr} in the source code indicates all vertices and the process of backtracking, it starts with $[]$ and end with $[]$. This is the core character of backtracking. We print out the process for the example as: -\begin{lstlisting}[numbers=none] -[]->[1]->[1, 2]->[1, 2, 3]->backtrack: [1, 2] -backtrack: [1] -[1, 3]->[1, 3, 2]->backtrack: [1, 3] -backtrack: [1] -backtrack: [] -[2]->[2, 1]->[2, 1, 3]->backtrack: [2, 1] -backtrack: [2] -[2, 3]->[2, 3, 1]->backtrack: [2, 3] -backtrack: [2] -backtrack: [] -[3]->[3, 1]->[3, 1, 2]->backtrack: [3, 1] -backtrack: [3] -[3, 2]->[3, 2, 1]->backtrack: [3, 2] -backtrack: [3] -backtrack: [] -\end{lstlisting} - -\paragraph{Time Complexity of Permutation} -In the search tree of permutation in Fig.~\ref{fig:backtrack_permutation}, there are in total $V$ nodes, which equals to $\sum_{i=0}^{n}{p_{n}^{k}}$. Because in a tree the number of edges $|E|$ is $|v|-1$, making the time complexity $O(|V|+|E|)$ the same as of $O(|V|)$. Since $p(n, n)$ itself alone takes $n!$ time, making the permutation an NP-hard problem. - -\paragraph{Space Complexity} A standard depth-first search consumes $O(bd)$ space in worst-case to execute, where $b$ is branching factor and $d$ is the depth of the search tree. In the combinatorial search problems, usually depth and branching is decided by the total number of variables in the state, making $b \sim d \sim n$. In backtracking, we have space complexity $O(n^2)$. However, in normal standard DFS, the input--tree or graph data structure--is given and not attributed to space complexity. For a NP-hard combinatorial search problem, this input is often exponential. Backtracking search outcompetes the standard DFS by avoiding such space consumption; it only keeps a dynamic data structure(\texttt{curr}) to construct node on the fly. % The slight different can be critical for problems with large state description. - -\subsection{Combinations} -Given a list of $n$ items, generate all possible combinations of these items. If the input has duplicated items, only enumerate unique combinations. -\subsubsection{No Duplicates (L78. Subsets -)} -From Chapter Discrete Programming, we list the powerset--all $m$-subset, $m\in[0, n]$ as: -\begin{align} - C(n, m) = \frac{P(n, m)}{P(m, m)} = \frac{n!}{(n-m)!m!} -\end{align} -For example, when $a=[1, 2, 3]$, there are in total $7$ $m$-subsets, they are: -\begin{lstlisting}[numbers=none] -C(3, 0): [] -C(3, 1): [1], [2], [3] -C(3, 2): [1, 2], [1, 3], [2, 3] -C(3, 3): [1, 2, 3] -\end{lstlisting} -\paragraph{Analysis} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.5\columnwidth]{fig/combination.png} - \caption{The Search Tree of Combination.} - \label{fig:backtrack_combination} -\end{figure} -We can simply reuse the method of permutation, but with a problem that it generates lots of duplicates. For example, $P(3, 2)$ includes $[1, 2]$ and $[2, 1]$ which are indeed the same subset. Of course, we can check redundancy with saved $m$-subsets, but its not ideal. A systematical solution that avoids duplicates all along is preferred. If we limit the items we put into the $m$-subsets to be only increasing(of indexes of items or of values of items), in which case $[2, 1]$, $[3,1]$, and $[3,2]$ will never be generated. The enumeration of combination through backtracking search is shown in Fig.~\ref{fig:backtrack_combination}. - -\paragraph{Implementation} Two modifications based on permutation code: -\begin{enumerate} - \item \texttt{for} loop: in the loop to iterate all possible candidates, we limit the candidates to be having larger indexes only. - \item We do not have to use a data structure to track the state of each candidate because any candidate that has larger index is a valid candidate. -\end{enumerate} -We use \texttt{start} to track the starting position of valid candidates. The code of combination is: -\begin{lstlisting}[language=Python] -def C_n_k(a, n, k, start, d, curr, ans): - if d == k: #end condition - ans.append(curr[::]) - return - - for i in range(start, n): - curr.append(a[i]) - C_n_k(a, n, k, i+1, d+1, curr, ans) - curr.pop() - return -\end{lstlisting} - -\paragraph{Alternative: 0 and 1 Selection} We have discussed that a powerset written as $P(S)$. With each item either being appear or not appear in the resulting set makes the value set $\{0, 1\}$, resulting $|P(S)| = 2^n$. Follow this pattern, with our given example, we can alternatively generate a powerset like this: -\begin{lstlisting}[numbers=none] -s sets -1 {1}, {} -2 {1,2}, {1}, {2}, {} -3 {1,2,3}, {1,2}, {1, 3}, {3}, {2, 3}, {2}, {3}, {} -\end{lstlisting} -This process can be better visualized in a tree as in Fig.~\ref{}. We can see this process results $2^n$ leaves compared with our previous implementation which has a total of $2^n$ nodes is slightly less efficient. The code is as: -\begin{lstlisting}[language=Python] -def powerset(a, n, d, curr, ans): - if d == n: - ans.append(curr[::]) - return - - # Case 1: select item - curr.append(a[d]) - powerset(a, n, d + 1, curr, ans) - # Case 2: not select item - curr.pop() - powerset(a, n, d + 1, curr, ans) - return -\end{lstlisting} - -\paragraph{Time Complexity} The total nodes within the implicit search space of combination shown in Fig.~\ref{fig:backtrack_combination} is $\sum_{k=0}^{n}C_{n}^{k} =2^n$, which was explained in Chapter Discreet Programming. Thus, the time complexity of enumerating the powset is $O(2^n)$ and is less compared with $O(n!)$ that comes with the permutation. - -\paragraph{Space Complexity} Similarly, combination with backtracking search uses slightly less space. But, we can still acclaim the upper bound to be $O(n^2)$. - -\subsubsection{With Duplicates(L90. Subsets II)} - Assume we have $m$ unqiue items, and the frequency of each is marked as $x_i$, with $\sum_{i=0}^{m-1}x_i = n$. -\begin{align} -\sum_{k=0}^{n} c(n, k) = \prod_{i=0}^{m-1}(x_i + 1) -\end{align} - -For example, when $a=[1, 2, 2, 3]$, there are $2\times 3 \times 2 = 12$ combinations in the powerset, they are listed as bellow: -\begin{lstlisting}[numbers=none] -[], [1], [2], [3], [1, 2], [1, 3], [2, 2], [2, 3], -[1, 2, 2], [1, 2, 3], [2, 2, 3], -[1, 2, 2, 3] -\end{lstlisting} -However, counting $c(n, k)$ with duplicates in the input replies on the specific input with specific distribution of these items. We are still able to count by enumerating with backtracking search. -\paragraph{Analysis and Implementation} The enumeration of the powerset with backtracking search is the same as handling the iterations of choice in the enumeration of permutation with duplicates. We first sort our items in increasing order of the values. Then we replace the \texttt{for} loop from the above code with the following code snippet to handle the repetition of items from the input: -\begin{lstlisting}[language=Python] - for i in range(start, n): - if i - 1 >= start and a[i] == a[i-1]: - continue - ... -\end{lstlisting} -\subsection{More Combinatorics} -In this section, we supplement more use cases of backtracking search in the matter of other types of combinatorics. -\subsubsection{All Paths in Graph} -\begin{figure}[ht!] - \centering - \includegraphics[width=0.4\columnwidth]{fig/all_path_demo.png} - \caption{Acyclic graph} - \label{fig:my_label} -\end{figure} -For a given acyclic graph, enumerate all paths from a starting vertex $s$. For example, for the graph shown in Fig.~\ref{fig:my_label}, and a starting vertex $0$, print out the following paths: -\begin{lstlisting}[numbers=none] -0, 0->1, 0->1->2, 0->1->2->5, 0->1->3, 0->1->4, 0->2, 0->2->5 -\end{lstlisting} -\paragraph{Analysis} The backtracking search here is the same as how to apply a DFS on an explicit graph, with rather one extra point: a state $path$ which might have up to $n$ items ( the total vertices of a graph). In the implementation, the \texttt{path} vector will dynamically be modified to track all paths constructed as the go of the DFS. The code is offered as: -% Backtracking technique can be naturally used in graph path traversal. One example is to find all possible paths from a source to the target. One simpler occasion is when the graph has no cycles. Backtrack technique can enumerate all paths in the graph exactly once for each. - -% The implementation is as follow: we still use dfs, because there has no cycles, we have no need to track the visiting state of each node. We generate the possible answer with backtracking technique through the \texttt{path} variable to track each state. -\begin{lstlisting}[language=Python] -def all_paths(g, s, path, ans): - ans.append(path[::]) - for v in g[s]: - path.append(v) - all_paths(g, v, path, ans) - path.pop() -\end{lstlisting} -You can run the above code in the Goolge Colab to see how it works on our given example. -% Feed in the above network and run the following code: -% \begin{lstlisting}[language=Python] -% al = [[1], [2], [4], [], [3, 5], [6], []] -% ans = [] -% path = [0] -% all_paths(al, 0, path, ans) -% \end{lstlisting} -% With the printing, we can see the whole process, \texttt{path} changes as the description of backtrack. \begin{lstlisting}[numbers=none] -% [0, 1] -% [0, 1, 2] -% [0, 1, 2, 4] -% [0, 1, 2, 4, 3] -% [0, 1, 2, 4] backtrack -% [0, 1, 2, 4, 5] -% [0, 1, 2, 4, 5, 6] -% [0, 1, 2, 4, 5] backtrack -% [0, 1, 2, 4] backtrack -% [0, 1, 2] backtrack -% [0, 1] backtrack -% [0] backtrack -% \end{lstlisting} -% We can see each state, we can always have a matching backtrack state. -% \begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What to do if there is a cycle?} -% \end{bclogo} - -\subsubsection{Subsequences(940. Distinct Subsequences II)} Given a string, list all unique subsequences. There may or may not exist duplicated characters in the string. For example, when $s='123'$, there are in total 7 subsequences, which are: -\begin{lstlisting}[numbers=none] -'', '1', '2', '3', '12', '13', '23', '123' -\end{lstlisting} -When $s='1223'$ which comes with duplicates, there are 12 subsequences: -\begin{lstlisting}[numbers=none] -'', '1', '2', '3', '12', '13', '22', '23', -'122', '123', '223', -'1223' -\end{lstlisting} -\paragraph{Analysis} From Chapter Discrete Programming, we have explained that we can count the number of unique subsequences through recurrence relation and pointed out the relation of subsquences with subsets(combinations). Let the number of unique subsequences of a sequence as $seq(n)$ and the number of unique subsets of a set as $set(n)$ with $n$ items in the input. All subsequences are within subsets, and the subsequence set has larger cardinality than subsets, $|seq(n)|\geq |set(n)|$. From the above example, we can also see that when there are only unique items in the sequence or when there are duplicates but all duplicates of an item are adjacent to each other: -\begin{itemize} - \item The cardinality of subsequences and subsets equals, $|seq(n)= set(n)|$. - \item The subsequences and subsets share the same items when the ordering of the subsequences are ignored. -\end{itemize} -This indicates that the process of enumerating subsequences is almost the same as of enumerating a powerset. This should give us a good start. - -\paragraph{Implementation} However, if we change the ordering of the duplicated characters in the above string as $s='1232'$, there are in total 14 subsequences instead: -\begin{lstlisting}[numbers=none] -'', '1', '2', '3', '12', '13', '23', '22', '32', -'123', '122', '132', '232', -'1232' -\end{lstlisting} -\begin{figure}[!ht] - \centering - \includegraphics[width= 1.0\columnwidth]{fig/subsequence.png} - \caption{The Search Tree of subsequences.The red circled nodes are redundant nodes. Each node has a variable $s$ to indicate the starting index of candidates to add to current subsequence. $i$ indicate the candidate to add to the current node.} - \label{fig:backtrack_subsequence} -\end{figure} -Therefore, our code to handle duplicates should differ from that of a powerset. In the case of powerset, the algorithm first sorts items so that all duplicates are adjacent to each other, making the checking of repetition as simple as checking the equality of item with its predecessor. However, in a given sequence, the duplicated items are not adjacent most of the time, we have to do things differently. We draw the search tree of enumerating all subsequences of string ``1232'' in Fig.~\ref{fig:backtrack_subsequence}. From the figure, we can observe that to avoid redundant branches, we simply check if a current new item in the subsequence is repeating by comparing it with all of its predecessors in range $[s, i]$. The code for checking repetition is as: -\begin{lstlisting}[language=Python] -def check_repetition(start, i, a): - for j in range(start, i): - if a[i] == a[j]: - return True - return False -\end{lstlisting} -And the code to enumerate subsequences is: -\begin{lstlisting}[language=Python] -def subseqs(a, n, start, d, curr, ans): - ans.append(''.join(curr[::])) - if d == n: - return - - for i in range(start, n): - if check_repetition(start, i, a): - continue - curr.append(a[i]) - subseqs(a, n, i+1, d+1, curr, ans) - curr.pop() - return -\end{lstlisting} - - - -\subsection{Backtracking in Action} -So far, we have applied backtracking search to enumerate combinatorics. In this section, we shall see how backtracking search along with search pruning speedup methods solve two types of challenging NP-hard problems: Constraint Satisfication Problems (CSPs) and Combinatorial Optimization Problems. - -As we have briefly introduced the speedup methods needed to solve larger scale of CSPs and COPs. For example, assume within the virtual search tree, the algorithm is currently at level 2 with state $s=[s_0, s_1]$. If there are $c$ choices for state $s_1$, and if one choice is testified to be invalid, this will prune off $\frac{1}{c}$ of the whole search space. In this section, we demonstrate backtracking search armored with Branch and Prune method solving CSPs and Branch and Bound solving COPs. - - -\section{Solving CSPs} -Officially, a constraint satisfaction problem(CSP) consists of a set of $n$ variables, each denoted as $s_i$, $i\in[0, n-1]$; their respective value domains, each denoted as $d_i$; and a set of $m$ constraints, each denoted as $c_j$, $j \in [0, m-1]$. A \textit{solution} to a CSP is an assignment of values to all the variables such that no constraint is violated. A \textit{binary} CSP is one in which each of the constraints involves at most two variables. A CSP can be represented by a \textit{constraint graph} which has a node for each variable and each constraint, and an arc connecting variable nodes contained in a constraint to the corresponding constraint node. - -We explain a few strategies from the CSP-solver's arsenal that can potentially speedup the process: -\begin{enumerate} - \item Forward Checking: The essential idea is that when a variable $X$ from $s_i$ is instantiated with a value $x$ from its domain $d_i$, the domain of each future uninstantiated variable $Y$ is examined. If a value $y$ is found such that $X=x$ conflicts with $Y=y$, then $y$ is temporarily removed from the domain of $Y$. - \item Variable Ordering: The order in which variables are considered while solving a CSP method can have a substantial impact on the search space. One effective ordering is always select the next variable with the smallest remaining domain. In a dynamic variable ordering, the order of variables is determined as the search progresses, and often goes with forward checking which keeps updating the uninstantiated variables' domains. Selecting variable with the minimal domain first can pinpoint the solution quickly given the fact that the branch is still early on, and branch pruning at this stage is more rewarding. Another reasoning is that each step, when we are multiplying $d_i$ to the cost, we are adding the least expensive one, making this a greedy approach. -\end{enumerate} -\subsubsection{Sudoku (L37)} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714.png} - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714_solution.png} - \caption{A Sudoku puzzle and its solution} - \label{fig:backtrack_puzzle_2} -\end{figure} -A Sudoku grid shown in Fig.~\ref{fig:backtrack_puzzle_2} is a $n^2\times n^2$ grid, arranged into $n$ $n\times n$ mini-grids each containing the values $1,...,n$ such that no value is repeated in any row, column (or mini-grid). -\paragraph{Search Space} -First, we analyze the number of distinct states in the search space which relies on how we construct the intermediate states and our knowledge in Enumerative combinatorics. We discuss two different formulations on $9 \times 9$ grid: -\begin{enumerate} - \item For each empty cell in the puzzle, we create a set by taking values $1, ..., 9$ and removing from it those values that appear as a given in the same row, column, or mini-grid as that cell. Assume we have $m$ spots and the corresponding candidate set of each spot is $c_i$, and initial cost estimation can be obtained which is: - \begin{align} - T(n) = \prod_{i=0}^{m-1} c_i - \end{align} - \item Each row can be presented by a 9-tuples, there will be 9 rows in total, resulting 9 9-tuples to represent the search state. With $c_i$ as the number of non-given values in the i-th 9-tuples, there are $c_i!$ ways of ordering these values by permuting.The number of different states in the search space is thus: - \begin{align} - T(n) = \prod_{i=0}^{8} c_i! - \end{align} -\end{enumerate} -The two different ways each takes a different approach to formulate the state space, making its corresponding backtracking search differs too. We mainly focus on the first formulation with backtracking search. -\paragraph{Speedups} -% \begin{figure}[h] -% \centering -% \includegraphics[width= 0.5\columnwidth]{fig/sudoku_backtracking.png} -% \caption{Partial search tree of Sudoku in Fig.~\ref{fig:backtrack_puzzle_2}} -% \label{fig:backtrack_sudoku} -% \end{figure} -Assume we have known all empty spots(variables) to fill in and we construct the search tree using backtracking. In our source code, we did an experiment comparing the effect of ordering variables with minimal domain first rule with arbitrary ordering. The experiment shows that the first method is more than 100 times faster than the second solving the our exemplary Sudoku puzzle. Therefore, we decide to always select the variable that has the least domain set to proceed next in the backtracking. - -Further, we apply forward checking, for the current variable and a value we are able to assign, we recompute all the remaining empty spots' domain sets, and use the updated domain sets to decide: -\begin{itemize} - \item If this assigment will lead to empty domain for any of other remaining spots, and if so, we terminate the search and backtrack. - \item The spot to select next time with the ordering rule we choose. -\end{itemize} -\paragraph{Implementation} We set aside three vectors of length 9, \texttt{row\_state}, \texttt{col\_state}, and \texttt{block\_state} to track the state of all 9 rows, columns, and grids. The list has \texttt{set()} data structures as items, saving the numbers filled already in that row, col, and grid respectively. Two stages in the implementation: -\begin{enumerate} - \item Initialization: We scan the whole each spot in the $9\times 9$ grid to record the states of the filled spots and to find all empty spots that waiting to be filled in. With $(i,j)$ to denote the position of a spot, it corresponds to \texttt{row\_state[i]}, \texttt{col\_state[j]}, and \texttt{block\_state[i//3][j//3]}. We also write two functions to set and reset state with one assignment in the backtracking. The Python code is as follows: -\begin{lstlisting}[language=Python] -from copy import deepcopy -class Sudoku(): - def __init__(self, board): - self.org_board = deepcopy(board) - self.board = deepcopy(board) - - def init(self): - self.A = set([i for i in range(1,10)]) - self.row_state = [set() for i in range(9)] - self.col_state = [set() for i in range(9)] - self.block_state = [[set() for i in range(3)] for i in range(3)] - self.unfilled = [] - - for i in range(9): - for j in range(9): - c = self.org_board[i][j] - if c == 0: - self.unfilled.append((i, j)) - else: - self.row_state[i].add(c) - self.col_state[j].add(c) - self.block_state[i//3][j//3].add(c) - - def set_state(self, i, j, c): - self.board[i][j] = c - self.row_state[i].add(c) - self.col_state[j].add(c) - self.block_state[i//3][j//3].add(c) - - def reset_state(self, i, j, c): - self.board[i][j] = 0 - self.row_state[i].remove(c) - self.col_state[j].remove(c) - self.block_state[i//3][j//3].remove(c) -\end{lstlisting} - -\item Backtracking search with speedups: In the initialization, we have another variable $A$ used as the domain set of the current processing spot. To get the domain set according to the constraints, a simple set operation is executed as: $A-(row\_state[i]|col\_state[j]|block\_state[i//3][j//3])$. In the solver, each time, to pick a spot, we first update all remaining spots in the \texttt{unfilled} and then choose the one with minimal domain. This process takes $O(n)$ which is trivial compared with the cost of the searching, with $9$ for computing domain set of a single spot, $9n$ for $n$ spots, and adding another $n$ to $9n$ to choose the one with the smallest size. The solver is implemented as: -\begin{lstlisting}[language=Python] - def _ret_len(self, args): - i, j = args - option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3]) - return len(option) - - def solve(self): - if len(self.unfilled) == 0: - return True - # Dynamic variables ordering - i, j = min(self.unfilled, key = self._ret_len) - # Forward looking - option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3]) - if len(option) == 0: - return False - self.unfilled.remove((i, j)) - for c in option: - self.set_state(i, j, c) - if self.solve(): - return True - # Backtracking - else: - self.reset_state(i, j, c) - # Backtracking - self.unfilled.append((i, j)) - return False -\end{lstlisting} -\end{enumerate} - - -\section{Solving Combinatorial Optimization Problems} -% Combinatorial optimization problems are characterized by an \textit{input}, i.e., a general description of conditions and parameters, and an \textit{objective} defining - -Combinatorial optimization is an emerging field at the forefront of combinatorics and theoretical computer science that aims to use combinatorial techniques to solve discrete optimization problems. From a combinatorics perspective, it interprets complicated questions in terms of a fixed set of objects about which much is already known: sets, graphs, polytopes, and matroids. From the perspective of computer science, combinatorial optimization seeks to improve algorithms by using mathematical methods either to reduce the size of the set of possible solutions or to make the search itself faster. - -Genuinely, the inner complexity of a COP is at least of exponential, and its solutions fall into two classes: exact methods and heuristic methods. In some cases, we may be able to find efficient exact algorithms with either greedy algorithms or dynamic programming technique such as finding the shortest paths on a graph can be solved by the Dijkstra (greedy) or Bellman-Ford algorithms(dynamic programming) to provide exact optimal solutions in polynomial running time. For more complex problems, COP can be mathematically formulated as a Mixed Linear Programming(MILP) model and which is generally solved using a linear-programming based branch-and-bound algorithm. But, in other cases no exact algorithms are feasible, and the following randomized heuristic search algorithms though we do not cover in this section should be applied: -\begin{enumerate} - \item Random-restart hill-climbing. - \item Simulated annealing. - \item Genetic Algorithms. - \item Tabu search. -\end{enumerate} - -\paragraph{Model Combinatorial Optimization Problems} It is a good practice to formulate COPs with mathematical equations/inequations, which includes three steps: -\begin{enumerate} - \item Choose the decision variables that typically encode the result we are interested in, such that in a superset problem, each item is a variable, and each variable includes two decisions: take or not take, making its value set as ${0, 1}$. - \item Express the problem constraints in terms of these decision variables to specify what the feasible solutions of the problem are. - \item Express the objective function to specify the quality of each solution. -\end{enumerate} -There are generally many ways to model a COP. - - -\paragraph{Branch and Bound} Branch and bound (BB, B$\&$B, or BnB) is an algorithm design paradigm for discrete and combinatorial optimization problems, as well as mathematical optimization. A branch-and-bound algorithm consists of a systematic enumeration of candidate solutions by means of state space search: the set of candidate solutions is thought of as forming a rooted tree with the full set at the root. The algorithm explores branches of this tree, which represent subsets of the solution set. Before enumerating the candidate solutions of a branch, the branch is checked against upper and lower estimated bounds on the optimal solution, and is discarded if it cannot produce a better solution than the best one found so far by the algorithm. ``Branching'' is to split problem into a number of subproblems, and ``bounding'' is to find an optimistic estimation of the best solution to the the subproblems to either maximize the upper bound or minimize the lower bound. To get the optimistic estimation, we have to \textit{relax constraints}. In this section, we will exemplify both the minimization(TSP) and maximization problem(Knapsack). - -\paragraph{Search Strategies} In practice, we can apply different search strategies to enumerate the search space of the problem, such as depth-first, best-first, and least-discrepancy search. The way of how each listed strategy is applied in the combinatorial optimization problems is: -\begin{itemize} - \item Depth-First: it prunes when a node estimation is worse than the best found solution. - \item Best-First: it selects the node with the best estimation among the frontier set to expand each time. Worst scenario, the whole search tree have to be saved as long the best estimation is extremely optimistic and not a single branch is pruned in the process. - \item Least-Discrepancy: it trusts a greedy heuristic, and then move away from the heuristic in a very systematic fashion. -\end{itemize} - -In this section, we discuss exact algorithms using Branch and Bound with a variation of search strategies. During the interviews, questions that have polynomial exact solutions are more likely to appear, proving your mastery of dynamic programming or greedy algorithms design methodologies. However, it is still good to discuss this option. - -%%%%%%%%%%%%%%%%%%%% -\subsection{Knapsack Problem} -% In this section we want to showcase more searching strategies applied in solving optimization problems: comparing backtracking and a chance to use best-first search strategy. - -Given $n$ items with weight and value indicated by two vectors $W$ and $V$ respectively. Now, given a knapsack with capacity $c$, maximize the value of items selected into the knapsack with the total weight being bounded by $c$. Each item can be only used at most once. For example, given the following data, the optimal solution is to choose item 1 and 3, with total weight of 8, and optimal value of 80. -\begin{lstlisting}[numbers=none] -c = 10 -W = [5, 8, 3] -V = [45, 48, 35] -\end{lstlisting} -\paragraph{Search Space} In this problem, $x_i$ denotes each item, and $w_i$, $v_i$ for its corresponding weight and value, with $i\in[0, n-1]$. Each item can either be selected or left behind, indicating $x_i\in{0, 1}$. The selected items can not exceed the capacity, making $\sum_{i=0}^{n-1} w_i x_i \leq c$. And we capture the total value of the selected items as $\sum_{i=0}^{n-1} v_i x_i$. Putting it all together: -\begin{align} - \max_{v, x} \quad & \sum_{i=0}^{n-1} v_i x_i\\ - \textrm{s.t.} \quad & \sum_{i=0}^{n-1} w_i x_i \leq c\\ - & x_i\in{0, 1} -\end{align} -With each variable having two choices, our search space is as large as $2^n$. - -\paragraph{Branch and Bound} - -To bound the search, we have to develop a heuristic function to estimate an optimistic--maximum--total value a branch can lead to. - -In the case of knapsack problem, the simplest estimation is summing up the total values of selected items so far, and estimate the maximum value by adding the accumulated values of all remaining unselected items along the search. - -% And a branch is checked -% \begin{align} -% \texttt{if } estimate < found, -% \end{align} - -A tighter heuristic function can be obtained with \textbf{constraint relaxation}. By relaxing the condition of simply choose $\{0, 1\}$ to $[0, 1]$, that a fraction of an item can be chosen at any time. By sorting the items by the value per unit $\frac{v_i}{w_i}$, then a better estimate can be obtained by filling the remaining capacity of knapsack with unselected items, with larger unit value first be considered. A branch is checked on the optimal solution so far against the lower estimated bound in our case, and is discarded if it cannot produce a better solution than the best one found so far by the algorithm. Both heuristic functions are more optimistic compared with the true value, but the later is a tighter bound, being able to prune more branches along the search and making it more time efficient. We demonstrate branch and bound with two different search strategies: DFS(backtracking) and Best-First search. - - - -% the items while the capacity is not exhausted, then select a fraction of the last item to fill the remaining capacity, which is a closer estimate of the real one but still optimisitc compared with selecting the remaining items. What if we are allowed to get part of an item, so that we can fit the knapsack as full as possible. We can sort the items by their unit value, and take items in the order of decreasing unit values. Another bool vector is used to indicate if a certain item can be used or not. At first, all items are allowed, we can get an estimation of 92 in this case. For branches that decide not to take an item, that item is excluded using the bool vector. We compare the estimation with the best found value, if the estimated value will never be better, then this branch will prunned. - - -\subsubsection{Depth-First Branch and Bound} -\begin{figure}[h] - \centering - \includegraphics[width= 0.98\columnwidth]{fig/branch_and_bound_backtracking.png} - \caption{Depth-First Branch and bound} - \label{fig:knapsack_backtracking} -\end{figure} -We set up a class \texttt{BranchandBound} to implement this algorithm. First, in the initiation, we add additional $\frac{v_i}{w_i}$ to mark each item's value per unit, and sort these items by this value in decreasing order. Second, we have a function \texttt{estimate} which takes three parameters: \texttt{idx} as start index of the remaining items, \texttt{curval} is the total value based on all previous decision, and \texttt{left\_cap} as the left capacity of the knapsack. The code snippet is: -\begin{lstlisting}[language=Python] -import heapq - -class BranchandBound: - def __init__(self, c, v, w): - self.best = 0 - self.c = c - self.n = len(v) - self.items = [(vi/wi, wi, vi) for _, (vi, wi) in enumerate(zip(v, w))] - self.items.sort(key=lambda x: x[0], reverse=True) - - def estimate(self, idx, curval, left_cap): - est = curval - # use the v/w to estimate - for i in range(idx, self.n): - ratio, wi, _ = self.items[i] - if left_cap - wi >= 0: # use all - est += ratio * wi - left_cap -= wi - else: # use part - est += ratio * (left_cap) - left_cap = 0 - return est -\end{lstlisting} -In the Depth-first search process, it consists of two main branches: one considering to choose the current item, and the other to handle the case while the item is not selected. For the first branch, it has to be bounded by the capacity and the comparison of the best found solution against to the estimation. Additional \texttt{status} is to assist to visualize the process of the search, which tracks the combination of items. The process is shown in Fig.~\ref{fig:knapsack_backtracking}. And the code is as: -\begin{lstlisting}[language=Python] - def dfs(self, idx, est, val, left_cap, status): - if idx == self.n: - self.best = max(self.best, val) - return - print(status, val, left_cap, est ) - - _, wi, vi = self.items[idx] - # Case 1: choose the item - if left_cap - wi >= 0: # prune by constraint - # Bound by estimate, increase value and volume - if est > self.best: - status.append(True) - nest = self.estimate(idx+1, val+vi, left_cap - wi) - self.dfs(idx+1, nest, val+vi, left_cap - wi, status) - status.pop() - - # Case 2: not choose the item - if est > self.best: - status.append(False) - nest = self.estimate(idx+1, val, left_cap) - self.dfs(idx+1, nest, val, left_cap, status) - status.pop() - return -\end{lstlisting} -\subsubsection{Best-First Branch and Bound} -Within Best-First search, we use priority queue with the estimated value, and each time the one with the largest estimated value within the frontier set is expanded first. Similarly, with branch and bound, we prune branch that has estimated value that would never surpass the best solution up till then. The search space is the same as in Fig.~\ref{fig:knapsack_backtracking} except that the search process is different from depth-first. In the implementation, the priority queue is implemented with a min-heap where the minimum value is firstly popped out, thus we use the negative estimated value to make it always pop out the largest value conveniently instead of write code to implement a max-heap. -\begin{lstlisting}[language=Python] - def bfs(self): - # track val, cap, and idx is which item to add next - q = [(-self.estimate(0, 0, self.c), 0, self.c, 0)] # estimate, val, left_cap, idx - self.best = 0 - while q: - est, val, left_cap, idx = heapq.heappop(q) - est = -est - _, wi, vi = self.items[idx] - if idx == self.n - 1: - self.best = max(self.best, val) - continue - - # Case 1: choose the item - nest = self.estimate(idx + 1, val + vi, left_cap - wi) - if nest > self.best: - heapq.heappush(q, (-nest, val + vi, left_cap - wi, idx + 1)) - - # Case 2: not choose the item - nest = self.estimate(idx + 1, val, left_cap) - if nest > self.best: - heapq.heappush(q, (-nest, val, left_cap, idx + 1)) - return -\end{lstlisting} - -\subsection{Travelling Salesman Problem} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.4\columnwidth]{fig/tsp_graph.png} - \caption{A complete undirected weighted graph.} - \label{fig:tsp_graph} -\end{figure} -Given a set of cities and the distances between every pair, find the shortest possible path that visits every city exactly once and returns to the origin city. For example, with the graph shown in Fig.~\ref{fig:tsp_graph}, such shortest path is $[0, 1, 3, 2, 0]$ with a path weight $80$. -% \begin{lstlisting}[numbers=none] -% Assume our graph is a two dimensional list: -% g = [[(1, 10), (2, 15), (3, 20)], -% [(0, 10), (2, 35),(3,25)], -% [(0, 15),(1,35),(3,30)], -% [(0,20),(1,25),(2,30)]] -% g[0][0]=(1,10), means the edge between 0 and 1 with cost 10. -% \end{lstlisting} - -\paragraph{Search Space} In TSP, a possible complete solution is a \textit{Hamiltonian cycle}, a graph cycle that visits each vertex exactly once. Since it is a cycle, it does not matter where it starts. For convenience, we choose vertex $0$ as the origin city. Therefore, in our example, our path starts and ends at $0$, and the remaining $n-1$ vertices between will be a permutation of these vertices, making the complexity as $(n-1)!$. - -Because this is a complete graph, it might be tempting to apply backtracking on the graph to enumerate all possible paths and find and check possible solutions. However, this path searching will build a $n-1$-ary search tree with height equals to $n-1$, making the complexity as $\frac{(n-1)^{n}-1}{n-2}$, which is larger than the space of permutation among $n-1$ items. Therefore, in our implementation, we apply backtracking to enumerate all permutations of $n-1$ vertices and check its corresponding cost. - -\paragraph{Speedups} Since we only care about the minimum cost, then any partial result that has cost larger than the minimum cost of all known complete solutions can be prunned. This is the \textit{Branch and bound} method that we have introduced that is often used in the combinatorial optimization. -% solution is path with vertices forming a cycleneed to construct a list of vertices (\texttt{path}) and its total cost (\texttt{cost}) of all edges between as the state $s=(p, c)$, $p, c$ is short for path and cost respectively. A possible complete solution for path will have $n+1$ vertices which start with a vertex and end with the same, and $n-1$ vertices in between. Now, put together about constraints. -% \begin{itemize} -% \item ``Visits every city exactly once'' means the first vertex will be a permutation of all cities, we get $n!$ combination (the last vertex does not matter). -% \item We have $n!$ possible states. We can further spot redundant states. For a cycle, it does not matter where it starts, it is always the same cycle. For convenience, we choose vertex $0$ as the starting path, and there will only be $n-1$ vertex to permutate with, making the size of the state space to $(n-1)!$. -% \item We only care about the minimum cost, then any partial result that has cost larger than the minimum cost of all known complete solution can be prunned. This is called \textit{Branch and bound} method, which is the extension of backtracking into the optimization problems. -% \end{itemize} - -\paragraph{Implementation} We built the graph as a list of dictionaries, each dictionary stores the indexed vertex's other cites and its corresponding distance as key and value respectively. Compared with standard permutation with backtracking, we add four additional variables: \texttt{start} to track the starting vertex, \texttt{g} to pass the graph to refer the distance information, \texttt{mincost} to save the minimum complete solution so far found, and \texttt{cost} to track the current partial path's cost. The code is shown as: -\begin{lstlisting}[language=Python] -def tsp(a, d, used, curr, ans, start, g, mincost, cost): - if d == len(a): - # Add the cost from last vertex to the start - c = g[curr[-1]][start] - cost += c - if cost < mincost[0]: - mincost[0] = cost - ans[0] = curr[::] + [start] - return - - for i in a: - if not used[i] and cost + g[curr[-1]][i] < mincost[0] : - cost += g[curr[-1]][i] - curr.append(i) - used[i] = True - tsp(a, d + 1, used, curr, ans, start, g, mincost, cost) - curr.pop() - cost -= g[curr[-1]][i] - used[i] = False - return -\end{lstlisting} -TSP is a NP-hard problem, and there is no known polynomial time solution so far. - - - -\subsubsection{Other Solutions} -Whenever we are faced with optimization, we are able to consider the other two algorithm design paradigm--Dynamic Programming and Greedy Algorithms. In fact, the above two problems both have its corresponding dynamic programming solutions: for knapsack problem, polynomial solution is possible; for TSP, though it is still of exponential time complexity, it is much better than $O(n!)$. We will further discuss these two problems in Chapter Dynamic Programming. -\section{Exercises} -\begin{enumerate} -\item 77. Combinations -\item 17. Letter Combinations of a Phone Number -\item 797. All Paths From Source to Target - \item N-bit String: enumerate all n-bit strings with backtracking algorithm, for example: -\begin{lstlisting}[numbers=none] -n = 3, all 3-bit strings are: -000, 001, 010, 011, 100, 101, 110, 111 -\end{lstlisting} -\item 940. Distinct Subsequences II -\item N-queen -\item Map-coloring -\item 943. Find the Shortest Superstring (hard). Can be moduled as traveling salesman problem and dynamic programming -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_combinatorial_search_old.tex b/Easy-Book/chapters/chapter_combinatorial_search_old.tex deleted file mode 100644 index 2396162..0000000 --- a/Easy-Book/chapters/chapter_combinatorial_search_old.tex +++ /dev/null @@ -1,972 +0,0 @@ -\documentclass[../main.tex]{subfiles} - -\begin{document} -\chapter{Combinatorial Search} -What is the most straightforward way to solve problems? We form it as a search problem in \textit{search space}--a simple example is to enumerate all possibilities--and search among them what we need. We introduce the general searching strategies and learn some math--combinatorics that we strongly need. -\section{Introduction} -Combinatorial search problems consists of $n$ items and a requirement to find a solution, i.e., a set of $L < N$ items that satisfy specified conditions or constraints. For example, a sudoku problem where a $9\times 9$ grid is partially filled with number between 1 and 9, fill the empty spots with numbers that satisfy the following conditions: -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714.png} - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714_solution.png} - \caption{Example sudoku puzzle and its solution} - \label{fig:backtrack_puzzle_1} -\end{figure} -\begin{enumerate} - \item Each row has all numbers form 1 to 9. - \item Each column has all numbers form 1 to 9. - \item Each sub-grid ($3 \times3$) has all numbers form 1 to 9. -\end{enumerate} -The sudo problem together with one possible solution is shown in Fig.~\ref{fig:backtrack_puzzle_1}. In this case, we have $81$ items, and we are required to fill 51 empty items with the above three constraints. Combinatorial search in computer science mainly studies algorithms that solve exponential or even NP-hard problems, such as: -\begin{enumerate} - \item Constraint Satisfication Problems (CSP) such as sudoku, N-queen, and so on. - \item Optimization problems such as Travel Salesman Problems (TSP) and knapsack problems. -\end{enumerate} - -\paragraph{Techniques} From Chapter Discreet Programming, we have learned the basics of enumerative combinatorics, including counting principles and knowledge on permutations, combinations, partitions, subsets, and subsequences. Combinatorial search builds on top of this subject, and together through the technique in computer science which is called ``backtracking'', it is able to enumerate the search space and find the solution effectively and efficiently with necessary speedup methods. - - - -\paragraph{Search Problem} A search problem is defined as a problem that there is an algorithmic way to verify its answer. Finding a certain integer on an array of integers is a simplest example. - -For the searching on input instance with a peculiar data structure, the search space itself is all items in this data structure, searching in this space is a simply applying searching strategies specified by data structure and situation, which we have already covered in Chapter. Searching on Data Structures. However, for problems that are not explicitly a searching on a data structure, to form it as a search problem, we need to define \textit{state}, \textit{state space}, and \textit{goal test}. - -\subsubsection{State, State Space, and Goal Test} What is a state? A state can be imagined as a container that holds all information of it. A state Space is a set of all possible states in a problem domain. For a discrete problem, this set will be finite, which is good news. We use $S$ to make the state, and $V_s$ to mark the state space. Let's look at an example about subarray. -\paragraph{Example: Subarray Problem (L560)} Given an array of integers and an integer $t$, find the total number of continuous subarrays whose sum equals to $t$. -\begin{lstlisting}[numbers=none] -a=[1,1,1], t=2 -Return 2 -\end{lstlisting} -In this question, we care about information about subarray and its sum, state here can be further named as `a subarray start at index $i$ and end at index $j$, and its sum is $value$'. We use $a_{ij}, i\leq j, j\in[0,n-1]$ to make a subarray, and $s_{ij}=v$ a state. Simple math tells us we have total number of $n+(n-1)+(n-2)+...+1=\frac{n\times(n+1)}{2}$ subarries, which makes $|V_s|$. A goal test can be set as ``checking if any subarray has a sum value same as $t$''. Beside, there should always be an \textit{initial state}, wherein this case it should be an empty array with value of $0$, we mark it as $\empty$. - -So far, we can use the Python code to solve this problem by generating all states and do goal testing: -\begin{lstlisting}[language=Python] -# Generate all subarries -def naive_subarray_sum(a, t): - if not a: - return 0 - n = len(a) - ans = 0 - # simple enumeration - for i in range(n): - for j in range(i, n): - # define the state and compute its value - s_ij = 0 - for k in range(i,j+1): - s_ij += a[k] - # goal test - if s_ij == t: - ans += 1 - return ans -\end{lstlisting} - -\subsubsection{State Transfer Model} The problem of the above implementation is that we did just blindly compute each sate, but never think about connections between state. A easy one that comes to our mind is $s_{ij}=s_{ij-1}+a_j$. From $s_{ij-1}$ to $s_{ij}$ we need only one addition; whereas in our previous way, we treated the states independently and spend $j-i$ additions to compute its state. This is called a \textit{state transfer model}. With this prior knowledge, we can completely cut off the innermost \texttt{for} loop. We can draw the state transfer model as a graph, where there will be an arc $a\xrightarrow{}b$ if state $a$ can be converted to state $b$: -\begin{lstlisting}[numbers=none] -[]->a_00->a_01->a_02 -[]->a_11->12 -[]->a_22 -\end{lstlisting} -The code can be shown: -\begin{lstlisting}[language=Python] -# State transfer model -def state_transfer_subarray_sum(a, t): - if not a: - return 0 - n = len(a) - ans = 0 - # simple enumeration - for i in range(n): - s_ij = 0 - for j in range(i, n): - # a sate only depends on its previous state - s_ij = s_ij + a[j] - if s_ij == t: - ans += 1 - return ans -\end{lstlisting} -The state transfer actually used the \textit{reduce and conquer} principle. Each state is considered as a subproblem of the original problem. And a larger problem can be reduced into a set of smaller subproblems. The state transfer model is equivalently using this method. -\subsubsection{Reduce the State Space Further} There actually has a even better way to do this. We can define a state as ``the number of subarray that has sum equals to $t$ in an array of $a_{0,j}$, and we have the sum of the whole array as $sum$'', we short it for $s(j,sum,count)$. We need to use two data strcutures to track this state $dp[j]=count$ and $sum[j]$. In this definition, there is only one variable $j$ to indicate the total number of states. Now, what is the recurrence relation between $dp_j$ and $dp_{j-1}$? There are two options for the item $a_j$. First, it can be part of a qualified subarray, which means we need to find a previous state say $0-k$, then our current state is $0,1,..., k, k+1, ..,j$. We need a previous state $k$ which has $sum[k]=sum_[j]-t$. If we know how many such previous subproblem exist as $c$, our $dp[j]=c$. Second, when $a_j$ is not considered, we just simply has $dp[j]=dp[j-1]$. Sum up both cases: $dp=c+dp[j-1]$. The problem here become how to find $c$ efficiencly, and possibly just constant operations. We can save information $sum$ of a state into a hasing table (dictionary), which uses $sum$ as key and $c$ as values. The Python code is: -\begin{lstlisting}[language=Python] -from collections import defaultdict -def subarraySum(a, t): - """ - :type nums: List[int] - :type t: int - :rtype: int - """ - sum_i = 0 - dict = defaultdict(int) #sum 0, count = 0 - dict[0] = 1 - dp = [0]*(len(a)+1) - for idx, v in enumerate(a): - sum_i += v - if sum_i - t in dict: - dp[idx+1] = dict[sum_i-t] + dp[idx] - else: - dp[idx+1] = dp[idx] - dict[sum_i] += 1 - - return dp[-1] -\end{lstlisting} -\subsubsection{Problem Solving Guideline***} Heretofore, we learned the five key components of formulating a search problem: state, initial state, state space, state transfer, and goal test. - -\paragraph{Exhaustive Search} To solve a search problem, there are some directions: -\begin{itemize} -\item if it is explicit data structure, apply particular searching strategy will do the problem easily! -\item if it is implicit data structure, we generate and save our state such as in our example where enumerate the state and compute its values in \texttt{s\_{ij}}. This also depends on the data structure that the state space forms. This chapter focuses on solving problems in this stage via enumeration and goal test. To be able to enumerate and count the size of the state space, we need combinatorics and its implementation with depth-first search based \textbf{``backtracking''} technique. -\end{itemize} -\paragraph{Optimization via Searching} To optimize a search problem within a search strategy, we need to prune as much unqualified or unnecessary branch as possible. We introduce two ways: -\begin{itemize} - \item Branch and Prune: This method prunes the unqualified branches with constraints of the problems. This is usually applied to solve constraint restricted problems (CSPs), and backtracking is a top technique to do it. - \item Branch and Bound: This method prunes unnecessary branches via comparing an estimation of this node with a found global best solution to see; if the estimation will never lead us to a better solution, we cut off this branch. Either backtracking or best-first search can be applied. This technique can be applied to solve a general optimization problems, such as Travel Salesman Problems (TSP), knapsack problems, and so. -\end{itemize} -Therefore, these searching and prunning techniques are widely applied to solve combinatorial problems, constraint restricted problems (CSPs), and optimization problems. - -\paragraph{Other Optimization Techniques} -There are three directions to get better accuracy, which are trailers for the remaining chapters sitting in this part. So, DONNOT worry if you do not understand it all, read it first and come back to check it out later after you have learned these chapters. -\begin{itemize} -\item Reducing the cost of computing each state by finding connection between states. Such connections appears as an arc in the state transfer graph and can usually be obtained by \textbf{Reduce and Conquer} method which reduce problems to subproblems and uses recurrence relation to denote its conversion. We will detail this method in Chapter. \ref{chapter_divide_conquer}. The recurrence relation guarantees lower cost to get a state from a previous state. We showed the example of computing \texttt{s\_{ij}} in the section of State Transfer Model. And here the time complexity is reduced from $O(n^3)$ to $O(n^2)$. -\item Reducing the state space, which might requires us to define the state in another way. We have seen in the above section, we further reduced the cost to $O(n)$. -\item If the recurrence relation shows some states appear in the search tree multiple times or in the graph there exist vertices that have indegree larger than 1, we can use \textbf{dynamic programming} to cut off the redundancy. Or else, we can make greedy choice via \textbf{Greedy Algorithms}. -\end{itemize} - - - - -%%%%%%%%%%%%%%%%%%%%%%Backtracking%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Backtracking} - -We know how to count the possibilities and how to enumerate them, manually. Time to learn how to program them efficiently. If we were thinking about iteration programming, we would encounter at least $n$ levels of \texttt{for} loops, which is not manageable and a pain to writ the code. This is where the recursion is great! The recursive method we apply here is \textit{backtracking}. - -\paragraph{Introduction to Backtracking} Backtracking is a general problem-solving technique for finding all (or some) solutions to some computational problems, that \textit{incrementally} builds candidates to the solutions. Backtracking is all about choices and consequences. It shows the following two properties: - \begin{enumerate} - \item \textbf{No Repetition and Completion:} It is a systematic generating method that enumerate all possible states exactly at most once: will not miss any possible right solution but avoids repetitions. If there is the ``correct'' solution, it is guaranteed to find. This property makes it ideal for solving combinatorial problems such as combination and permutation which requires us to enumerate all possible solutions. We focus on demonstrating this property in this section. - \item \textbf{Search Pruning:} Along the way of working with partial solutions, in some cases, it is possible for us to decide if they will lead to a valid \textit{complete solution}. As soon as it is confident to judge its invalid configuration or it will not be optimal, it abandons this \textit{partial candidate}, ``backtracks'' (return to the upper level), and reset to the upper level's state so that the search process can continue to explore the next branch to provide more efficiency. This is called \textit{search pruning} among searching algorithms. With search pruning -and we end up amortizely visiting each vertex less than once which is more -efficient compared with an exhaustive graph search such as DFS and BFS. This property makes backtracking the most promising way to solve \textit{constraint satisfaction problem (CSP)}\footnote{CSPs are mathematical questions defined as a set of objects whose state must satisfy a number of constraints or limitations, visit \url{https://en.wikipedia.org/wiki/Constraint_satisfaction_problem} for more information}, where the goal is to find a set of value assignments to certain variables that will satisfy specific mathematical equations and inequations. For example, Eight Queens puzzle, Map Coloring problem, Sudoku, Crosswords, and many other logic puzzles. We show examples in Section.~\ref{chapter_combinatorics_backtracking_csp}. - \end{enumerate} - -\paragraph{Model Backtracking} -We can model the combinatorial search solution as a vector $s = (s_0, s_1, ..., s_{n-1})$, where each $s_i$ is selected from a finite ordered set $A$. Such a vector might represent an arrangement where $s_i$ contains the i-th item of the permutation. Or in the combination problem, a boolean denotes if the i-th item is selected already. Or it can represent a path in a graph or a sequence of moves in a game. At each step in the backtracking algorithm, we try to extend the last partial solution $s = (s_0, s_1, ..., s_{k})$ by adding another event at the end. And then we testify our partial solution with the desired solution to decide to (1) either collect this partial solution; and or (2) add $s_{k+1}$ to the state; or (3) backtrack and reset to previous state and go to next branch. The relation between partial state and its next state can be easily viewed as a recurrence relation. - - - - - -\subsection{Permutation} -In the case of permutate [1,2,3]. At first, we have three options: 1 or 2 or 3, we can get three possible partial result [1],[2],[3]. Second, we expand the option on the second position, for [1], we have option 2 and 3, we get [1,2], [1,3], same for [2]->[2,1],[2,3], for [3]->[3,1],[3,2]. At least, each partial result has only one option, we would get all permutations as: [1,2,3],[1,3,2],[2,1,3],[2,3,1],[3,1,2],[3,2,1]. We shall use a tree structure to better visualize this process. It is shown in Fig.~\ref{fig:backtrack_permutation}. -\begin{figure}[h] - \centering - \includegraphics[width= 0.8\columnwidth]{fig/permutation.png} - \caption{The search tree of permutation} - \label{fig:backtrack_permutation} -\end{figure} - -We start from an empty list, at this time, we have three possible options (moves), each edge represents a move, and we can go from state [], to state [1],[2],[3] with different moves. Now, how would you program to implement this? Here is one naive way: we implement it recursively as in the DFS. We use \texttt{state} to track the current partial result, \texttt{k} is used to mark the level of the recursion, it will be the same as the length of the \texttt{state}. \texttt{ans} is just used to collect the answer. -\begin{lstlisting}[language=Python] -def naive_recursion(a, state, k, ans): - if k == len(a): - ans.append(state[::]) - return - for i in range(len(a)): - if a[i] not in state: - naive_recursion(a, state + [a[i]], k+1, ans) -\end{lstlisting} -The problem with the above implementation that line $6$ takes $O(n)$ to check if an item is valid to put into the state or not. Another thing is, each time we call \texttt{naive\_recursion}, we make a copy of \texttt{state}.This can be further avoided. - -\texttt{state} is a list, when it is passed to function, it is just a pointer. However, in the case of \texttt{state + [a[i]]}, a new list is generated and passed to its recursive call. We can avoid this by appending \texttt{a[i]} at the end of \texttt{state}, and after the recursive call, we need to set the state back to its original, so that it can continue with the first \texttt{for} loop, and we can generate the next option. For example, if we are at [1], when the recursive call returned back from [1,2], if we do not set it back to [1], it can not be built to [1,3]. To avoid the second \texttt{for} loop, we can use a list of boolean \texttt{bUsed} to track which item is used. Same rule apply here, after the recursive call, we need to set its value back to False. For generality, we modify the code to generate $p(n,k)$ instead of $p(n,n)$. A better version is: -\begin{lstlisting}[language=Python] -def P_n_k(a, bUsed, state, d, k, ans): - ''' - state start from [] - d: the level of the traversal, starts from 0 - bUsed: mark if corresponding item in a is used or not - ''' - # reach to the last level - if d == k: - ans.append(state[::]) - return - # move the state - for i in range(len(a)): - if not bUsed[i]: - state.append(a[i]) - print(state) - bUsed[i] = True - P_n_k(a, bUsed, state, d+1, k, ans) - bUsed[i] = False - state.pop() - print('backtrack: ', state) -\end{lstlisting} -Some of the process being printed out shows the process of backtracking: -\begin{lstlisting}[language=Python] -[] -[1] -[1] -[1, 2] -[1, 2] -[1, 2, 3] -backtrack: [1, 2] -backtrack: [1] -[1, 3] -[1, 3] -[1, 3, 2] -backtrack: [1, 3] -backtrack: [1] -backtrack: [] -\end{lstlisting} - -\paragraph{Discussion} In this case, we can not prune any branch, because for the permutation, we need a full enumeration. - -\paragraph{Two Passes} Therefore, we can say backtracking visits these implicit vertices -in two passes: First, the forward pass to build the solution \textbf{incrementally}. -Second, the backward pass to \textbf{backtrack} to previous state. We can see within -these two passes, the \texttt{state} list is used as all vertices in the search tree, and -it start with [] and end with []. This is the core character of backtracking. - -\paragraph{Time Complexity of Permutation} -In the example of permutation, we can see that backtracking only visit each state once. The complexity of this is similar to the graph traversal of $O(|V|+|E|)$, where $|V| = \sum_{i=0}^{n}{A_{n}^{k}}$, because it is a tree structure, $|E| = |v|-1$. This actually makes the permutation problem NP-hard. - -\paragraph{Space Complexity} The implementation is depth-first search, which has $O(bd)$ as the space complexity, where $b$ is branching factor and $d$ is the depth of the tree. However, in the backtracking, it even saves more space because only one state is generated at a time rather than saving all of the states belong to the same predecessor state all at once; because once it returns to the predecessor, the \texttt{for} loop there can always generate the next state. Further, in our second solution, we reuse the state vector such as \texttt{state} in our case for enumerating all states by do modification and undo the modification once we go back to the predecessor and to generate the next successor. The slight different can be critical for problems with large state description. -\subsection{Combination} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.7\columnwidth]{fig/combination.png} - \caption{The Search Tree of Combination. Change each vertex to a set instead of a list} - \label{fig:backtrack_combination} -\end{figure} -Similarly, we try to build the combination of [1,2,3] incrementally, with initial state starting at $[]$ at the first level. Then three options give us three possible combination of $C(3,1)$: [1], [2], and [3]. For [1], we have 2 and 3, to get [1, 2], [1,3]. One option is to get exactly the same code as in permutation, but using a \texttt{set} for \texttt{state}, the second time a set appears, say for [2], because [2,1] is the same as of [1,2] in this case, we check if it already exist. But this step can be avoided. - -It makes better sense that in the search tree, we only visit each state exactly once. At [2] we need to avoid checking any item ahead of it, if you really want to use the permutation code, we can achieve the ``no repetion'' property by prunning branches: checking if one item is smaller than items in \texttt{state}, we prune the branch. From this prunning rule, you would say, wait for it, why do not we just add item that is behind the position of the very last item we added. For each recursive call, we pass it a \texttt{start} variable which starts at $0$ to point the recursive function to use items from the right location. Because just to get the combination of 3 items out of 3 is 1 option, we get all combinations instead, a superset. This process is illustrated in Fig.~\ref{fig:backtrack_combination}. The code is as following: -\begin{lstlisting}[language=Python] -def powerset(a, s, k, state, ans): - # Save the state - ans.append(state[::]) - # reach to the last level - if k == len(a): - return - for i in range(s, len(a)): - state.append(a[i]) - powerset(a, i+1, k+1, state, ans) - state.pop() -\end{lstlisting} -One thing I want to mention: algorithms are mostly obsessed with orders. Right ordering makes things more organized, easier to find a solution and potentially more efficient. - -\paragraph{Time Complexity of Combination} -Because backtracking ensures efficiency by visiting each state no more than once. For the combination(subset) problem, the total nodes of the implicit search graph/tree is $\sum_{k=0}^{n}C_{n}^{k} =2^n$. We can look it as another way, there are in total n objects, and each object we can make two decisions: inside of the subset or not, therefore, this makes $2^n$. - -%%%%%%%%%%%%%%%%%%%%%Others%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Other Combinatorics} -%include all paths, subsequence, and so. -\subsubsection{All Paths In Graph} -\label{subsec_all_paths} -% Try an example, we compute the $C_{4}^{2}$ from $[1, 2, 3, 4]$. We can get the following result. -% \begin{figure}[h] -% \centering -% \includegraphics[width = 0.8\columnwidth]{fig/combination_rslt.png} -% \end{figure} -% Actually, the above code has redundency, each time we do not need to set the range from $s$ to $n$, we can set it to $n-k+1$ (need furthe modification). - -% If we want all the results from $k=0$ to $k=n$, we can accumulate set $k=n$, and accumulate results all the time. Actually if -\begin{figure}[ht!] - \centering - \includegraphics[width=0.4\columnwidth]{fig/all_paths.png} - \caption{All paths from 0, include 0->1, 0->1->2,0->1->2->4, 0->1->2->4->3, 0->1->2->4->5, 0->1->2->4->5->6 } - \label{fig:my_label} -\end{figure} -Backtracking technique can be naturally used in graph path traversal. One example is to find all possible paths from a source to the target. One simpler occasion is when the graph has no cycles. Backtrack technique can enumerate all paths in the graph exactly once for each. - -The implementation is as follow: we still use dfs, because there has no cycles, we have no need to track the visiting state of each node. We generate the possible answer with backtracking technique through the \texttt{path} variable to track each state. -\begin{lstlisting}[language=Python] -def all_paths(g, s, path, ans): - '''generate all pahts with backtrack''' - ans.append(path[::]) - for v in g[s]: - path.append(v) - print(path) - all_paths(g, v, path, ans) - path.pop() - print(path) -\end{lstlisting} -Feed in the above network and run the following code: -\begin{lstlisting}[language=Python] -al = [[1], [2], [4], [], [3, 5], [6], []] -ans = [] -path = [0] -all_paths(al, 0, path, ans) -\end{lstlisting} -With the printing, we can see the whole process, \texttt{path} changes as the description of backtrack. \begin{lstlisting}[numbers=none] -[0, 1] -[0, 1, 2] -[0, 1, 2, 4] -[0, 1, 2, 4, 3] -[0, 1, 2, 4] backtrack -[0, 1, 2, 4, 5] -[0, 1, 2, 4, 5, 6] -[0, 1, 2, 4, 5] backtrack -[0, 1, 2, 4] backtrack -[0, 1, 2] backtrack -[0, 1] backtrack -[0] backtrack -\end{lstlisting} -We can see each state, we can always have a matching backtrack state. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What to do if there is a cycle?} -\end{bclogo} -\subsubsection{Subsequence} -If we observe carefully, the process of enumerating the subsequence is exactly showing the same search tree as in generateing the powerset (the better version). The only difference is in this case, the ordering actually matters. Therefore, our code will be exactly the same as we used to enumerate the powerset. - -We know the cost is $2^n$, this is correct in the poweset too; when we are scanning the list of item there is only two options: either end up in one of the subset or not. - -\section{Prune Search Space in Backtracking} -\label{chapter_combinatorics_backtracking_csp} -So far, curious and meticulous readers might ask a question: ``Seriously, Li, I dont see the difference of the backtracking you showed me with DFS''. It's a good question, and it is worth to clarify before we move on. -\paragraph{Backtracking VS DFS} All the above examples, we used DFS to implement our backtracking algorithm. Think of backtracking as a problem-solving approach that there is a hard requirement--we force ourselves to only visit each state or configuration at most once (once for enumeration and less than once if branch pruning is applied) for consideration of efficiency, that implies that our search needs to be happening on a tree, a free tree if you want to be more specific, but we know where to start--the initial state. How to set a rule to define the free tree? This is what we engineers need to do. How to search on a free tree? That is what DFS do. Why not BFS? The space is the main issue to block us from BFS. BFS can not backtrack thus we need to save a copy of all states, not like in the DFS with backtracking we just need to dynamically append and pop from \texttt{state} we are able to experience all states. For the case of the permutation, DFS is preferred because theoretically it took $O(\log n!)$ space used by stack (recursive call), while if use BFS, the number of vertices saved in the queue can be close to $n!$. - - -\paragraph{Search Space Prunning} In this section, we demonstrate backtracking can be optimized via prunning search space--prunning branches in the search tree--to solve CSPs or optimization problems. Suppose we are at level 2 with state $s=(s_0, s_1)$, and if we know that this state will never lead to valid or optimal solution, we do not need to traverse through this branch but backtrack to previous state at level one. This will end up prunning half of all nodes in the search tree. We show the prunning techniques through examples and summarize them in a section. We demonstrate the search space prunning via backtracking through two examples: TSP for branch and bound with estimates, sudoku for branch and prune with constraints. - - - - -\subsection{Sudoku} -Sudoku problem is almost the most popular brain teaser seen in magazines, news papers or even books of its own. I bet it can be a lot effort to crack the game. In this section, we would learn how to apply backtracking and search pruning to solve this problem. -\paragraph{Sudoku Problem (L37)} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714.png} - \includegraphics[width= 0.35\columnwidth]{fig/250px-Sudoku-by-L2G-20050714_solution.png} - \includegraphics[width= 0.25\columnwidth]{fig/sudoku_grid.png} - \caption{Example sudoku puzzle and its solution,( put a coloring to each block)} - \label{fig:backtrack_puzzle} -\end{figure} -Given a partially filled grid of size $n\times n$, completely fill the grid with number between 1 and n. The constraint is defined as: -\begin{enumerate} - \item Each row has all numbers form 1 to ‘n’. - \item Each column has all numbers form 1 to ‘n’. - \item Each sub-grid ($\sqrt{n}\times\sqrt{n}$) has all numbers form 1 to n. -\end{enumerate} -An example of a $9\times9$ sudoku problem is shown in Fig.~\ref{fig:backtrack_puzzle}. - -\subsubsection{Analysis and Solution} -\paragraph{State Space} We start by examining its state space. A state here can be defined as a $n\times n$ grid filled with numbers in range $[1,n]$. The brute force is to try 1 to n at each grid in this table, we have a state space of size $n^{n^2}$. This is beyond our current machine can handle. We apply rules: -\begin{enumerate} - \item Each row is essentially a permutation of integers in range $[1,n]$, this gives us $n!$ for a row, with $n$ rows, it would make the state space to $n\times n!$. - \item Further, each column needs to be a permutation of integers too. If we are at row 1, col 0, we would only have $n-1$ options, for position (1,1), this goes to $n-2$ options. This means our total possibility further decrease $n!+(n-1)!+...+2!+1!$. - \item Moreover, there is restriction in each subgrid, it becomes hard to get the exact possibility\footnote{This site \url{http://pi.math.cornell.edu/~mec/Summer2009/Mahmood/Count.html}offer some insights}. The possibility for $n=9$, is actually $6670903752021072936960$ which is approximately $6.671\times 10^{21}$. It is a hard problem to solve for sure. -\end{enumerate} - - Good news, we are almost always given some prior integers in some grids, this narrows down the possibility. Applying backtracking, we first find out all empty spots, and fill up these spots recursively. An initial cost estimation can be offered, assume we have $m$ spots, and each has number of candidates $c_i$, the upper bound cost will be $c_0\times c_1\times...\times c_m$. Does the filling ordering of these empty spots matters? For completeness, not so much; for efficiency, SURE! Considering two approaches: one we visit these spots arbitrarily and the other we always choose spot that has the least possible candidates number to fill in (not considering the implementation right now). In the second approach, starting with less candidates can help us quickly pinpoint the right answer and cut branches that are invalid. Because the branch is easily on in the search tree, and all others are having more candidates, this pruning makes sure the branch we cut off at this stage is rewarding. Another way to think, this is essentially an greedy approach, making sure when are multiplying $c_i$ to the total cost, we are adding the least expensive ones, larger probability to guess it right, and as it accumulates, we outrun the arbitrary ones in hundreds of times faster. - -\paragraph{Implement Sudoku with Backtracking} - -In implementation, we need to track state of each row, col, and grid that what numbers it has in the process. We set aside three data structures \texttt{row\_state}, \texttt{col\_state}, and \texttt{block\_state} for this purpose so that we can validate a candidate. -% \begin{enumerate} -% \item Initialization: we initialize the three states and prepare data structure to track empty spots. -% \item Backtracking and prunning: we use DFS to fill up empty spots, and each type we choose whichever in the remaining spots that has the least number of candidates. -% \end{enumerate} - - -\textbf{Step 1: Initialization} We scan the whole grid shown in Fig.~\ref{fig:backtrack_puzzle} and find all empty spots that waiting for filling in. -% From the constraints, we use the following data structures to record the state of each row, each column, and each grid. -% \begin{lstlisting}[language=Python] -% row_state = [0]*9 -% col_state = [0]*9 -% grid_state = [0]*9 -% \end{lstlisting} -We use (i,j) to denote the position of a grid. It correspond position $i$ in \texttt{row\_state[i]}, and $j$ in \texttt{col\_state[j]}, and \texttt{block\_state[i//3][j//3]} for corresponding sub-grid. In this stage, we iterate through the -\texttt{board} to record these states. -\begin{lstlisting}[language=Python] -from copy import deepcopy -class Sudoku(): - def __init__(self, board): - self.org_board = deepcopy(board) - self.board = deepcopy(board) - - def init(self): - self.A = set([i for i in range(1,10)]) - self.row_state = [set() for i in range(9)] - self.col_state = [set() for i in range(9)] - self.block_state = [[set() for i in range(3)] for i in range(3)] - self.unfilled = [] - - for i in range(9): - for j in range(9): - c = self.org_board[i][j] - if c == 0: - self.unfilled.append((i, j)) - else: - self.row_state[i].add(c) - self.col_state[j].add(c) - self.block_state[i//3][j//3].add(c) - - def set_state(self, i, j, c): - self.board[i][j] = c - self.row_state[i].add(c) - self.col_state[j].add(c) - self.block_state[i//3][j//3].add(c) - - def reset_state(self, i, j, c): - self.board[i][j] = 0 - self.row_state[i].remove(c) - self.col_state[j].remove(c) - self.block_state[i//3][j//3].remove(c) -\end{lstlisting} -% We see 5 we need to set \texttt{row\_state[0]}, \texttt{col\_state[0]} and \texttt{grid\_state[0]}. We treat each state as a serial of bits of maximum length of 9. For 5, we set the 5th bit into 1 for each state, which is through XOR with mask that left shift 1 by 4 digits. The details can be found in Chapter\ref{chapter_bit}. To check if 5 is in the row, or column or current grid we just need to get corresponding bit value, if it is one or not. Through the bit manipulation, we can use 27 extra spaces and able to check if a number if valid here in $O(1)$ time. Therefore, for each empty spot, we can find all possible values. The functions used to set/reset/check state is implemented as follows: -% \begin{lstlisting}[language=Python] -% def setState(i, j, v, row_state, col_state, grid_state): -% row_state[i] |= 1 << v -% col_state[j] |= 1 << v -% grid_index = (i//3)*3 + (j//3) -% grid_state[grid_index] |= 1 << v - -% def resetState(i, j, v, row_state, col_state, grid_state): -% row_state[i] &= ~(1 << v) -% col_state[j] &= ~(1 << v) -% grid_state[grid_index] &= ~(1 << v) - -% def checkState(i, j, v, row_state, col_state, grid_state): -% row_bit = (1 << v) & row_state[i] != 0 -% col_bit = (1 << v) & col_state[j] != 0 -% grid_index = (i//3)*3 + (j//3) -% grid_bit = (1 << v) & grid_state[grid_index] != 0 -% return not row_bit and not col_bit and not grid_bit -% \end{lstlisting} - -% The following function is implement to find empty spots and state record. -% \begin{lstlisting}[language=Python] -% def getEmptySpots(board, rows, cols, row_state, col_state, grid_state): -% ''' get empty spots and find its corresponding values in O(n*n)''' -% empty_spots = {} -% # initialize the state, and get empty spots -% for i in range(rows): -% for j in range(cols): -% if board[i][j]: -% # set that bit to 1 -% setState(i, j, board[i][j]-1, row_state, col_state, grid_state) -% else: -% empty_spots[(i,j)] = [] - -% # get possible values for each spot -% for i, j in empty_spots.keys(): -% for v in range(9): -% if checkState(i, j, v, row_state, col_state, grid_state): -% empty_spots[(i, j)].append(v+1) - -% return empty_spots -% \end{lstlisting} - -%The result is: -% \begin{lstlisting}[numbers=none] -% [((4, 4), [5]), ((6, 5), [7]), ((6, 8), [4]), ((7, 7), [3]), ((0, 3), [2, 6]), ((2, 0), [1, 2]), ((2, 3), [2, 3]), ((2, 4), [3, 4]), ((2, 5), [2, 4]), ((4, 1), [2, 5]), ((5, 1), [1, 5]), ((5, 3), [5, 9]), ((5, 5), [1, 4]), ((6, 4), [3, 5]), ((7, 0), [2, 3]), ((7, 6), [3, 6]), ((8, 5), [2, 6]), ((0, 2), [1, 2, 4]), ((0, 8), [2, 4, 8]), ((1, 1), [2, 4, 7]), ((1, 2), [2, 4, 7]), ((1, 7), [2, 3, 4]), ((2, 8), [2, 4, 7]), ((3, 1), [1, 2, 5]), ((3, 3), [5, 7, 9]), ((3, 5), [1, 4, 7]), ((4, 6), [5, 7, 9]), ((4, 7), [2, 5, 9]), ((5, 7), [4, 5, 9]), ((6, 0), [1, 3, 9]), ((6, 3), [3, 5, 7]), ((7, 1), [2, 7, 8]), ((7, 2), [2, 3, 7]), ((8, 0), [1, 2, 3]), ((0, 5), [2, 4, 6, 8]), ((0, 6), [1, 4, 8, 9]), ((0, 7), [1, 2, 4, 9]), ((1, 6), [3, 4, 7, 8]), ((1, 8), [2, 4, 7, 8]), ((3, 2), [1, 2, 5, 9]), ((3, 6), [4, 5, 7, 9]), ((3, 7), [2, 4, 5, 9]), ((4, 2), [2, 5, 6, 9]), ((5, 2), [1, 3, 5, 9]), ((5, 6), [4, 5, 8, 9]), ((8, 1), [1, 2, 4, 5]), ((8, 3), [2, 3, 5, 6]), ((8, 6), [1, 3, 4, 6]), ((2, 6), [1, 3, 4, 5, 7]), ((8, 2), [1, 2, 3, 4, 5]), ((6, 2), [1, 3, 4, 5, 7, 9])] -% \end{lstlisting} -\textbf{Step 2: Backtracking and Search Space Pruning} -\begin{lstlisting}[language=Python] - def solve(self): - '''implement solver restricted spot selection and look ahead''' - if len(self.unfilled) == 0: - return True - i, j = min(self.unfilled, key = self._ret_len) - option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3]) - #print(option) - if len(option) == 0: - return False - self.unfilled.remove((i, j)) - for c in option: - self.set_state(i, j, c) - if self.solve(): - return True - else: - self.reset_state(i, j, c) - # no candidate is valid, backtrack - self.unfilled.append((i, j)) - return False -\end{lstlisting} -In the backtracking, at each recursive call, we first choose the spot that has the least number of candidates. This requires us to compute the spot in real-time, we do it through a set union and set difference computation \texttt{A-(row\_state[i]|col\_state[j]|block\_state[i//3][j//3])}. We set the time cost for this is O(9), and each time the time cost to pick the best one is $O(9n)$, where $n$ is the number of total empty spots. The total time complexity is $O(n^2)$. Compared with the time complexity of $c^n$, where $c$ is the average number of candidate for each spot, the time spent here is trivial. Then we remove it from \texttt{unfilled} list, try each candidate with a \texttt{for} loop. We record this option in the state, and do a recursive call: if it returns with a valid configuration that all spots are filled up and valid, we end the program by return True; otherwise, we reset the state to try the next option. At the end of the \texttt{for} loop, this means no candidates at this spot would lead to a valid solution, we can only keep searching by returning to its parent branch and leave this spot unfilled by putting it back to \texttt{unfilled} list, or say resetting its state. we iterate through the empty spots and for each spot, we iterate through its candidates and fill in one at a time. Before we call recursive function to fill the next one, we record the state. If the sub recursive function returns True then we just need to return, otherwise, we recover the state and backtrack to previous state. - -% each time we choose to fill in the spot that with the least number of candidates. - -% In this problem, backtrack happens if the current path can not lead to valid solution. First, for an empty spot following on the path that has no candidate to choose from (line 5-6), then it is an invalid path, and requires backtrack to line 16. Second, for an empty spot, if none of its candidates can lead to valid solution, as shown in code line 11-16, it backtrack to previous empty spot. - -\begin{lstlisting}[language=Python] - def naive_solve(self): - '''implement naitve solver without restricted spot selection or look ahead''' - if len(self.unfilled) == 0: - return True - i, j = self.unfilled.pop() - option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3]) - for c in option: - self.set_state(i, j, c) - if self.naive_solve(): - return True - else: - self.reset_state(i, j, c) - # no candidate is valid, backtrack - self.unfilled.append((i, j)) - return False - def _ret_len(self, args): - i, j = args - option = self.A - (self.row_state[i] | self.col_state[j] | self.block_state[i//3 ][j//3]) - return len(option) -\end{lstlisting} -% We prun braches that appears invalid already and only handle brachs that up till now are valid. - -\paragraph{Time Complexity} Assume we have $n$ empty spots, and the number of possible values for each spot are $spot=[a_0, a_1, ..., a_{n-1}]$. To fill each spot, we need to search the possibility tree. The search tree will have a height of $n$, at the first level, the width of the tree will be $a_0$, second level $a_1$, and as such each level will have a total nodes of $\prod_{i=0}^{n-1} a_i=a_i!$. This will result in worst time complexity $O(\sum_{i=0}^{n-1} a_i!)$. - -\paragraph{Experiment: Different Ordering of Empty Slots} Let us do an experiment, with the same input of board, we track the time that we use the sorted or unsorted empty spots and see what is the time difference. The code is provided in colab. The time is 0.025 seconds for unsorted and 0.0005 seconds for sorted. - -\subsection{Travels Salesman Problems (TSP)} -\begin{figure}[!ht] - \centering - \includegraphics[width= 0.5\columnwidth]{fig/Euler12-300x225.png} - \caption{Example of a full connected graph.} - \label{fig:tsp_graph} -\end{figure} -\paragraph{Travels Salesman Problem Definition} Given a set of cities and distance between every pair of cities, the problem is to find the shortest possible route that visits every city exactly once and returns to the starting point. -\begin{lstlisting}[numbers=none] -Assume our graph is a two dimensional list: -g = [[(1, 10), (2, 15), (3, 20)], - [(0, 10), (2, 35),(3,25)], - [(0, 15),(1,35),(3,30)], - [(0,20),(1,25),(2,30)]] -g[0][0]=(1,10), means the edge between 0 and 1 with cost 10. -\end{lstlisting} -We show the above example in Fig.~\ref{fig:tsp_graph}. -\subsubsection{Analysis and Solution} -\paragraph{State Space} In TSP, we need to construct a list of vertices (\texttt{path}) and its total cost (\texttt{cost}) of all edges between as the state $s=(p, c)$, $p, c$ is short for path and cost respectively. A possible complete solution for path will have $n+1$ vertices which start with a vertex and end with the same, and $n-1$ vertices in between. Now, put together about constraints. -\begin{itemize} - \item ``Visits every city exactly once'' means the first vertex will be a permutation of all cities, we get $n!$ combination (the last vertex does not matter). - \item We have $n!$ possible states. We can further spot redundant states. For a cycle, it does not matter where it starts, it is always the same cycle. For convenience, we choose vertex $0$ as the starting path, and there will only be $n-1$ vertex to permutate with, making the size of the state space to $(n-1)!$. - \item We only care about the minimum cost, then any partial result that has cost larger than the minimum cost of all known complete solution can be prunned. This is called \textit{Branch and bound} method, which is the extension of backtracking into the optimization problems. -\end{itemize} -\paragraph{Implementation} The implementation is a combination of \textbf{all paths} and \textbf{permutation}. We set the start vertex as $0$. \texttt{bused} will be a list of boolean to track if the element is in the path so that the first $n$ vertices will be a permutation. \texttt{cost} is used to record the current total cost. \texttt{mincost} tracks the minimum known cost of complete paths. The Python code is as follows: -\begin{lstlisting}[language=Python] -def tsp(g, cv, path, mincost, cost, bused, ans): - ''' - cv represents the current node - path: a list to track all vertices, we start from vertex 0, or we can use ordered set. - ''' - if len(path) == len(g): # we can only choose 0 - cost += g[cv][0][1] - if cost < mincost[0]: - mincost[0] = cost - ans[0] = path[::] - return - for v, c in g[cv]: - # Bound the search by an estimation - if (not bused[v]) and (cost + c < mincost[0]): - bused[v] = True - path.append(v) - cost += c - tsp(g, v, path, mincost, cost, bused, ans) - bused[v] = False - path.pop() - cost -= c - return -\end{lstlisting} -In this example, we need constraint of permutation and the branch and bound, which is shown in line 14. The end condition of the problem is when we already have the first $n$ unique vertices, the last one must be the same as the start vertex. At this step, we track the \texttt{mincost} and save the path in \texttt{ans}. - -%%%%%%%%%%%%%%%%%%%% -\section{Knapsack Problem} -In this section we want to showcase more searching strategies applied in solving optimization problems: comparing backtracking and a chance to use best-first search strategy. - -\paragraph{Define Knapsack Problem} We are given $n$ items with a weight $w$ and a value vector $v$ and a knapsack with capacity $c$. Our goal is to choose a certain number of items with a total weight that is bounded by $c$. Each item can be only used at most once. -\begin{lstlisting}[numbers=none] -For example, -c = 10 -w = [5, 8, 3] -v = [45, 48, 35] - -The best would be choosing item 1 and 3, with total weight of 8, and value of 80. -\end{lstlisting} -\subsubsection{Analysis} This is essentially a combination problem, we have to search a leaf node that is both feasible--bounded by capacity and optimal--has the largest value. - -To bound the search, we have to develop a heuristic function to estimate the maximum total value a branch can lead to. A simplest estimation is based on the total value of all items. At first in our case it will be - -A most closely heuristic function can come with \textbf{constraint relaxation}. What if we are allowed to get part of an item, so that we can fit the knapsack as full as possible. We can sort the items by their unit value, and take items in the order of decreasing unit values. Another bool vector is used to indicate if a certain item can be used or not. At first, all items are allowed, we can get an estimation of 92 in this case. For branches that decide not to take an item, that item is excluded using the bool vector. We compare the estimation with the best found value, if the estimated value will never be better, then this branch will prunned. -\subsubsection{Branch and Bound with Backtracking} -In this process, we only search in order of depth-first and bound by the estimation. The process is shown in Fig.~\ref{}. -\begin{lstlisting}[language=Python] -class dfsBound: - def __init__(self, c, v, w): - self.best = 0 - self.c = c - self.v = v - self.w = w - self.n = len(v) - self.items = [(-vi/wi, wi, i) for i, (vi, wi) in enumerate(zip(v, w))] - self.items.sort(key=lambda x: x[0]) - self.dfs(0, self.estimate([True]*self.n), 0, 0,[True]*self.n) - - def estimate(self, blist): - est = 0 - # use the v/w to estimate - left = self.c - j = 0 - n = len(blist) - while left > 0 and j < n: - ratio, wi, i = self.items[j] - j += 1 - if not blist[i]: - continue - if left - wi >= 0: # use all - est += -ratio * wi - left -= wi - else: # use part - est += -ratio * (left) - left = 0 - print(est) - return est - - def dfs(self, idx, est, val, cap, blist): - if idx == self.n: - self.best = max(self.best, val) - return - if cap + self.w[idx] <= c: # prune by constraint - # bound by estimate - if est > self.best: - self.dfs(idx+1, est, val+self.v[idx], cap + self.w[idx], blist) - - # bound by estimate - if est > self.best: - blist[idx] = False - nest = self.estimate(blist) - self.dfs(idx+1, nest, val, cap, blist) - blist[idx] = True - return -\end{lstlisting} -\subsection{Branch and Bound with Best-First Search} -We can decide to expand branch that has the most optimistic estimation first, instead of blindly in order of depth-first, hoping it will find a good enough global value with a higher bar to serve as a good start for the bounding; a higher bar will help us prune more worse branches faster. Supposedly, best-first search will guide us to find the optimal solution faster than backtracking, but we know it comes with higher cost in space usage. The process is shown in Fig.~\ref{}. -\begin{lstlisting}[language=Python] -import heapq -def bfs(c, v, w): - # track val, cap, and idx is which item to add next - q = [(-sum(v), 0, 0, 0)] # first simply use the sume of values as estimation - n = len(v) - best = 0 - while q: - est, val, cap, idx = heapq.heappop(q) - #print(est, val, cap, idx, q) - if idx == n: - best = max(best, val) - continue - est = -est - nest = est - v[idx] - - if est > best: # bound, when all nodes are worse than the found best, prune - if cap + w[idx] <= c: # prune by constraint - heapq.heappush(q, (-nest, val+v[idx], cap + w[idx], idx+1)) - heapq.heappush(q, (-nest, val, cap, idx+1)) - return best -\end{lstlisting} - - -%%%%%%%%%summary%%%%%%%%%%%%%%%%%%%%%% -\subsection{Summary} - -\paragraph{Backtracking Implementation} At first, backtracking might sounds scary and not many books out there did great job to ease it down. If we were to write down a standard template for backtracking, what are the elements? First, imagine that we are building and traverse a tree. - \begin{enumerate} - \item \textbf{State Vector $s$}: state vector is something we use to track the solution. In permutation and combination, it is \texttt{curr} and \texttt{path} in all paths problem. For the sudoku solver, this can be implemented in \texttt{unfilled} with the value and position. However, instead, it is easier to directly tracking it on the \texttt{board}.The state vector tells us the height of the tree, the candidates for each level is based on the constraint before. - \item \textbf{State Map and Candidates}: This is a good example of trading space for efficiency. The constraint of what candidates we have for current node depends on the previous state. We can get previous state by looking at the current result as in permutation in \texttt{curr} or the \texttt{board} in sudoku. However, each lookup took $O(n)$ time to complete. A smarter choice is to set aside another space with boolean or dictionary-like data structure to track the state along with the result data structure. Such as \texttt{used} in help of permutation and \texttt{row\_state}, \texttt{col\_state}, and \texttt{block\_state} to assist tracking the constraint in sudoku. Now to lookup if a candidate is possible we only need $O(1)$. - \end{enumerate} - - \paragraph{Time and Space Complexity} The time complexity analysis is straightforward which is the same as the DFS, $O(b^d)$. For the space, we have analyzed in the permutation example, however, it is important enough to summarize it here for emphasize. The backtracking techniques improves the space efficiency on the basis of DFS, which has $O(bd)$ using in two possible ways: - \begin{enumerate} - \item The backtracking mechanism itself that only generate one state a time on the fly, and not worrying about its sibling states brings the space complexity down to $O(d)$. - \item In our practice, we have seen another trick that we do not pass a new state to our recursive function each time. Instead, we make modification that takes $O(1)$ time complexity instead of $O(d)$ to copy the state, and undo the modification once we returned from the recursive call. This reduces the memory requirement to just one state vector and $O(d)$ actions to modify the state. This trick is both time and space saving. - \end{enumerate} - -\paragraph{Search Space Prunning Methods} We summarize the following three methods that can be potentially applied in a CSP or an optimization problem. -\begin{enumerate} - \item \textbf{Make Global Decision:} The backtracking works correctly if we do not update the each slot's candidates since initialization, we only make decision based on current node's validity. However, it would be more wise if each time we try an option, we check how this decision change remaining slots' candidates; if any of the remaining slots have zero candidates, we should better off stopping this shot and simply go to next option. - \item \textbf{Be Greedy about Ordering:} Each time we choose the spot that has the least number of candidates among the remaining spots list to update state with. This can be carried out with or without executing candidates updating for all remaining spots at each step. In our example of Suduku, we did update for all slots, but in some cases the cost might be too much or you just simply don't have enough time to write the code. - \item \textbf{*Symmetry:} Exploiting symmetry is another avenue for reducing combinatorial searches, prunning away partial solutions identical to those previously considered requires recognizing underlying symmetries in the search space. - \item \textbf{Branch and Bound:} Branch and bound is the idea of backtracking extended to optimization problems. We are minimizing a function with this useful property: A partial solution is pruned if its cost >= cost of best known complete solution. -\end{enumerate} - - -% \paragraph{Backtracking VS Exhaustive Search} Backtracking helps in solving an overall problem by incrementally builds candidates {(implicit search tree)}. which is equivalent to finding a solution to the first sub-problem and then recursively attempting to resolve other sub-problems bases on the solution of the first sub-problem. Therefore, \textbf{Backtracking can be considered as a Divide-and-conquer method for exhaustive search.} Problems which are typically solved using backtracking technique have such property in common: they can only be solved by trying every possible configuration and each configuration is tried only once(every node one time). A Naive exhaustive search solution is to generate all configurations and “pick” a configuration that follows given problem constraints. Backtracking however works in incremental way and \textbf{prunes} branches that can not give a result. It is an optimization over the exhaustive search where all possible(possible still with constraints) configurations are generated and tried. This comparison is called named as \textbf{Generating VS Filtering}. Backtracking can be viewed as a smart exhaustive search. - -% Similarly, the difference between backtracking and DFS is the same. DFS is a searching technique applied on graph or tree data structures (more likely on explicit data structures). - - There is an interesting questions. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{As we explained DFS itself is an incomplete search technique, then why would backtracking search be complete?} -\end{bclogo} - - -% \section{Bonus} -% Generating combinations, permutation uniformly at random. - - -\section{Exercises} -\subsection{Knowledge Check} - -\subsection{Coding Practice} -\paragraph{Cycle Detection} -\begin{enumerate} - \item 207. Course Schedule (medium) -\end{enumerate} - -\paragraph{Topological Sort} - -\paragraph{Connected Component} -\begin{enumerate} - \item 323. Number of Connected Components in an Undirected Graph (medium). - \item 130. Surrounded Regions(medium) - \item -\end{enumerate} - -\paragraph{Mix} -\begin{enumerate} - \item 210. Course Schedule II (medium, cycle detection and topological sort). -\end{enumerate} - -\paragraph{Backtracking} -\begin{enumerate} -\item 77. Combinations -\begin{lstlisting} -Given two integers n and k, return all possible combinations of k numbers out of 1 ... n. - -Example: - -Input: n = 4, k = 2 -Output: -[ - [2,4], - [3,4], - [2,3], - [1,2], - [1,3], - [1,4], -] -\end{lstlisting} - -\item 17. Letter Combinations of a Phone Number -\begin{lstlisting} -Given a digit string, return all possible letter combinations that the number could represent. - -A mapping of digit to letters (just like on the telephone buttons) is given below. - -Input:Digit string "23" -Output: ["ad", "ae", "af", "bd", "be", "bf", "cd", "ce", "cf"]. - -Note: -Although the above answer is in lexicographical order, your answer could be in any order you want. -\end{lstlisting} - -\item 797. All Paths From Source to Target (medium). - -\item \textbf{37. Sudoku Solver (hard).} -Write a program to solve a Sudoku puzzle by filling the empty cells. - -A sudoku solution must satisfy all of the following rules: -\begin{enumerate} - \item Each of the digits 1-9 must occur exactly once in each row. - \item Each of the digits 1-9 must occur exactly once in each column. - \item Each of the the digits 1-9 must occur exactly once in each of the 9 3x3 sub-boxes of the grid. -\end{enumerate} -Empty cells are indicated by the character '.'. - -\item %%%%%%%%%%%%%%%%%%%%%%Eight Queen%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Eight Queen} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/8-queens.png} - \caption{An examplary solution to the eight-queen problem.} - \label{fig:solution_eight_queen} -\end{figure} -\paragraph{Eight Queen Problem Definition} -Given a chessboard which is of size $8\times 8$, how many distinct ways to position eight queens on the chessboard such that no two queens threaten each other. According to the chess rules: a queen can move any step either horizontally, or vertically, or diagonally. Which is kind of similar to the rules of sudoku. There is another type of question which asks to return all distinct solutions. -\begin{enumerate} - \item Each row can only has one queen. - \item Each column can only has one queen. - \item Each diagonal can only has one queen. -\end{enumerate} -One examplary solution is shown in Fig.~\ref{fig:solution_eight_queen}. The problem can be extend to $n$-queen problem, which states on any size of $n\times n$ chessboard, how many ways to place $n$ queens that they are mutually non-attacking. - -\subsubsection{Analysis and Solution} -\paragraph{State Space} For the $n$ queens, it does not matter about the ordering; like the example, if we switch the positions of them, we will not get another solution. Therefore, it is a combination problem instead of permutation. For combination, we just care the position and for each position, it only differs if there is a queen or not (two choices), while not 9 (no queen plus any of the other queen). We have different ways to arrange these queens: -\begin{enumerate} - \item No constraint: (1) if even no constraint of number of queens, for 64 positions, each has two states, this gives us $N=2^{64}$, (2) put the constraint of only 8 queens, we simply try to come out the possible combination of 8 queens on $8\times 8$ chessboard, it is going to be $N = C_{64}^{8} =4426, 165,368$. - \item Add constraint One: Now for the first row, we can have 8 different states, one and only one will have a queen, same for any other rows followed by. We end up with $N=8^8$. - \item Add constraint Two: If each column can only have one queen, then for the first row, it will have 8 possible states, while the second can only have 7, thus making $N=8!$, which is less that $10^6$ and is possible for programs to run. -\end{enumerate} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/n_queen_four.png} - \caption{Solutions shown of $4 \times 4$ chessboard} - \label{fig:n_queen_four} -\end{figure} -The above analysis reveals that our state vector should be of size of the total number of rows, $S=[None]*8$, with the index to represent the row in the chessboard, and the value to be an integer from 0 to n-1 to track the column that has a queen. It is similar to a permutation problem. For $4\times 4$ board, we have the following two solutions, represented with $S$, it is $S=[1, 3, 0, 2], [2, 0, 3, 1]$. Therefore, our search tree will be of height 8. For edges which represent possible candidate, we can have two ways to generate candidates: (1) easy one that we iterate through all 9 columns for each row and we just need to validate each candidate through \texttt{assist\_state\_tracker}. (2) generate candidate based on previous state vector $s$. - -\paragraph{Implementation} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/n_queen_diag.png} - \caption{Caption} - \label{fig:my_n_queen_diag} -\end{figure} - -We use \texttt{n\_queen} a list whose index indicates the row of the chessboard and the value represents the column that we put the queen in. It starts as an empty list and with a possible maximum length of $n$. Because in the backtracking, each level represents the row, thus we do not need to track the row state. We need to have \texttt{col\_state} to track if a column has a queen already. As indicates in Fig.~\ref{fig:my_n_queen_diag}, for each possible position, we need to check the left and right diagonal if a queen already put there. Here, we use two lists \texttt{left\_diag} and \texttt{right\_diag} to track them. For position $(r, c)$, the position at the \texttt{left\_diag} will be (r-1, c-1), (r+1, c+1), the rule is bit hidden that r-c = (r-1)-(c-1) = (r-2)-(c-2). For the \texttt{right\_diag}, it will be (r-1, c+1), (r+1, c-1), thus the same diag has the same value of (r+c). The implementation is as follows: -\begin{lstlisting}[language=Python] -def solveNQueens(self, n): - """ - :type n: int - :rtype: List[List[str]] - """ - # queen can move: vertically, horizontally, diagonally - col_state = [False]*n - #diag =[False]*n - left_diag = [False]* (2*n-1) # x+y -> index - right_diag = [False]* (2*n-1) # x+(n-1-y) ->index - n_queen = [] # to track the positions - ans = [] - board = [['.' for i in range(n)] for j in range(n)] #initialize as '.' we can try to flip - def collect_solution(): - board = [['.' for i in range(n)] for j in range(n)] - for i, j in enumerate(n_queen): - board[i][j] = 'Q' - - for i in range(n): - board[i] = ''.join(board[i]) - return board - - def is_valid(r, c): - return not (col_state[c] or left_diag[r+c] or right_diag[r+(n-1-c)]) - - def set_state(r, c, val): - col_state[c] = val - #diag[abs(r-c)] = val - left_diag[r+c] = val - right_diag[r+(n-1-c)] = val - - def backtrack(n_queen, k): - if k == n: # a valid result - ans.append(collect_solution()) - return - # generate candidates for kth queen - for col in range(n): - if is_valid(k, col): - set_state(k, col, True) - n_queen.append(col) - backtrack(n_queen, k+1) - set_state(k, col, False) - n_queen.pop() - - backtrack(n_queen, 0) - return ans -\end{lstlisting} - -There is another way to generate candidates based on \texttt{n\_queen}. At the first row, we have candidates of [0, 1, 2, 3]. Assume we choose 1 here, and at row 1, we generate candidates based on previous rows. We remove the ones on the diagnal and columns. The code is implemented as: -\begin{lstlisting}[language=Python] -def solveNQueens2(self, n): - """ - :type n: int - :rtype: List[List[str]] - """ - n_queen = [] # to track the positions - ans = [] - board = [['.' for i in range(n)] for j in range(n)] #initialize as '.' we can try to flip - def collect_solution(): - board = [['.' for i in range(n)] for j in range(n)] - for i, j in enumerate(n_queen): - board[i][j] = 'Q' - - for i in range(n): - board[i] = ''.join(board[i]) - return board - - def generate_candidate(n_queen, k, n): - if k == 0: #the first row, then the candidates row is all columns - return set([i for i in range(n)]) - # generate candidate in kth level based on previous levels - candidates = set([i for i in range(n)]) - for r, c in enumerate(n_queen): - if c in candidates: - candidates.remove(c) - c1 = c-(k-r) - if c1 >=0 and c1 in candidates: - candidates.remove(c1) - c2 = c+(k-r) - if c2 < n and c2 in candidates: - candidates.remove(c2) - return candidates - - def backtrack(n_queen, k): - if k == n: # a valid result - ans.append(collect_solution()) - return - # generate candidates for kth queen - candidates = generate_candidate(n_queen, k, n) - for c in candidates: - n_queen.append(c) - backtrack(n_queen, k+1) - n_queen.pop() - - backtrack(n_queen, 0) - return ans -\end{lstlisting} -\paragraph{Symmetry} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/n_queen_symmetry.png} - \caption{Mirroring can cut the search space into half.} - \label{fig:n_queen_symmetry} -\end{figure} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.6\columnwidth]{fig/n_queen_oddPicture1.png} - \caption{Mirroring can cut the search space into half.} - \label{fig:n_queen_odd} -\end{figure} -\url{https://www.aaai.org/Papers/AAAI/2006/AAAI06-257.pdf}. Start from an easy one, we can observe that we can obtain the second solution of $4\times 4$ chessboard by flipping around the first around the red axis. Assume our first solution is $S=[1, 3, 0, 2]$. We represent the $S=[a_0, a_1, a_2, a_3]$. Now the mirroring relation can be represented as $m_i+a_i=n-1$, thus $m_i=n-1-a_i$. If n is even, then at the first level of backtracking, we can elimiate the second half of candidates, which cut of search cost into half of the previous. If we just need to count the total number of distinct solutions, we can just double the number we find. The process is shown in Fig.~\ref{fig:n_queen_symmetry}. For odd one, for the middle spot at the first row, if we follow the same rule as in the even $n$, some solutions shown in follows will be doubled. So we need to distinguish the middle spot situation in odd $n$ case. If we place a queen in the middle spot of the first row, then for the following $n-1$ rows, no one will be in the middle column any more. For the second row, we can eliminate the other half of candidates on the right side as shown in Fig.~\ref{fig:n_queen_odd}. Our code become: -\begin{lstlisting}[language=Python] -def solveNQueensSymmetry(n): - """ - :type n: int - :rtype: List[List[str]] - """ - n_queen = [] # to track the positions - - def generate_candidate(n_queen, s, k, n): - if k == s: #apply symmetry - candidates = set([i for i in range(n//2)]) - else: - candidates = set([i for i in range(n)]) - - for r, c in enumerate(n_queen): - if c in candidates: - candidates.remove(c) - c1 = c-(k-r) - if c1 >=0 and c1 in candidates: - candidates.remove(c1) - c2 = c+(k-r) - if c2 < n and c2 in candidates: - candidates.remove(c2) - return candidates - - def backtrack(n_queen, s, k, ans): - '''add s to track the start depth''' - if k == n: # a valid result - ans += 1 - return ans - # generate candidates for kth queen - candidates = generate_candidate(n_queen, s, k, n) - for c in candidates: - n_queen.append(c) - ans = backtrack(n_queen, s, k+1, ans) - n_queen.pop() - return ans - - # deal with the left half of the first row - ans = 0 - - ans += backtrack(n_queen, 0, 0, 0)*2 - - # deal with the left half of the second row - if n%2 == 1: - n_queen = [n//2] - ans += backtrack(n_queen, 1, 1, 0)*2 - return ans -\end{lstlisting} -\end{enumerate} -\end{document}\paragraph{Alternative: Swapping Method} - diff --git a/Easy-Book/chapters/chapter_combinatorics.pdf b/Easy-Book/chapters/chapter_combinatorics.pdf deleted file mode 100644 index ca98c09..0000000 Binary files a/Easy-Book/chapters/chapter_combinatorics.pdf and /dev/null differ diff --git a/Easy-Book/chapters/chapter_combinatorics.synctex.gz b/Easy-Book/chapters/chapter_combinatorics.synctex.gz deleted file mode 100644 index e989df7..0000000 Binary files a/Easy-Book/chapters/chapter_combinatorics.synctex.gz and /dev/null differ diff --git a/Easy-Book/chapters/chapter_combinatorics.tex b/Easy-Book/chapters/chapter_combinatorics.tex deleted file mode 100644 index 4fa4b94..0000000 --- a/Easy-Book/chapters/chapter_combinatorics.tex +++ /dev/null @@ -1,226 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Introduction to Combinatorics} - -In discrete optimization, some or all of the variables in a model are required to belong to a discrete set; -this is in contrast to continuous optimization in which the variables are allowed to take on any value within a range of values. -There are two branches of discrete optimization: integer programming and combinatorial optimization where the discrete set is a set of objects, -or combinatorial structures, such as assignments, combinations, routes, schedules, or sequences. -Combinatorial optimization is the process of searching for maxima (or minima) of an objective function $F$ whose domain is a discrete but -large configuration space (as opposed to an N-dimensional continuous space). Typical combinatorial optimization problems are the travelling -salesman problem (``TSP''), the minimum spanning tree problem (``MST''), and the knapsack problem. -We start with basic combinatorics which is able to enumerate the all solutions exhaustively. -Later on, other chapters we will dive into different combinatorial/disrete optimization problems. - -%https://www.cs.cmu.edu/afs/cs.cmu.edu/project/learn-43/lib/photoz/.g/web/glossary/comb.html -%https://en.wikipedia.org/wiki/Combinatorial_optimization - - -%%%%%%%%%%%%%%%%%%%%%%%%%Combinatorics%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -Combinatorics, as a branch in mathematics that mainly concerns with counting and enumerating, is a means in obtaining results, and certain properties of finite structures. Combinatorics is used frequently in computer science to obtain formulas and estimates in both the design and analysis of algorithms. It is a broad and thus seemingly hard to define topic that can solve the following types of questions: -\begin{itemize} - \item The counting or enumerating of specified structures, sometimes referred to as arrangements or - configurations in a very general sense, associated with finite systems, - \item the existence of such structures that satisfy certain given criteria, - this is usually called Contraint Restricted Problems (CSPs). - \item optimization, finding the ``best'' structure or solution among several possibilities, - be it the ``largest'',``smallest'' or satisfying some other optimality criterion. -\end{itemize} -In this section, we introduce common combinatorics that can help us come up with the simplest which potentially be quite large state space. At least, this is the first step, and solving a small problem in this way might offer us more insights on continuing finding a better solution. - -When the situation is easy, we can mostly figure out the counting with some logic and get a closed-form solution; when the situation is more complex such as in the \textit{partition} section, we detour by using recurrence relation and math induction. -\section{Permutation} -Given a list of integer $[1,2,3]$, how many way can we order these three numbers? Imagine that we have three positions for these three integers. For the first position, it can choose 3 integers, leaving the second position with 2 options. Further, when it reaches to the last position, it can only choose whatever that is left, we have 1. The total count will be $3\times2\times1$. - -Similarly, for $n$ distinct numbers, we will get the number of permutation easily as $n\times(n-1)\times...\times1$. A factorial, denoted as as $n!$, is used to abbreviate it. Worth to notice, the factorial sequence grows even quicker than the exponential sequence, such as $2^n$. - -\subsection{$n$ Things in $m$ positions} Permutation of $n$ things on $n$ positions is denoted as $p(n,n)$. Think about what if we have $m\in[1, n-1]$ positions instead? How to get a closed-form function for $p(n,m)$. The process is the same: we fix each position and consider the number of choice of things each one has. -\begin{align} - p(n,m) &= n\times(n-1)\times...\times (n-(m-1)) \\ - &=\frac{n\times(n-1)\times...\times (n-m+1)\times (n-m)\times...\times 1}{(n-m)\times...\times 1}\\ - &=\frac{n!}{(n-m)!} -\end{align} -If we want $p(n,n)$ to follow the same form, it would require us to define $0!=1$. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What if there are repeated things, that things are not distinct? } -\end{bclogo} -% \paragraph{What if things are not distinct?} For example, how to get the permutation of [1,2,2,3]. - -\subsection{Recurrence Relation and Math Induction} The number or the full set of permuations can be generated incrementally. We demonstrate how with recurrence relation and math induction. We start from $P(0,0)=1$. Easily, we get $P(i, 0)=1, i\in[1, n]$. With math induction, now assume we know $P(n-1, m-1)$, for the m-th position, what choice does it have? First, we need to pick this thing from the $n-(m-1)$ things. Then, we have $m-1$ things lined up linearly, there are $m$ positions to insert the m-th item, resulting $P(n, m) = (n-m+1)*mP(n-1, m-1)$. - -Now, we can use iterative method to obtain the closed-form solution: -\begin{align} - P(n, m) &= (n-m+1)*m*P(n-1, m-1)\\ - &=(n-m+1)*m*(P(n-2, m-2) \\ - &...\\ - &=m!P(n-m+1, 1 -\end{align} - -\subsection{See Permutation in Problems} Suppose we want to sort an array of integers incrementally, say the array is $A=[4,8,2]$. The right order is $[2,4,8]$, which is trivial to obtain in this case. If we are about to form it as a search problem, we need to define a \textit{search space}. Using our knowledge in combinatorics, we know all possible ordering of these numbers are [4,8,2],[4,2,8],[2,4,8],[2,8,4],[8,2,4],[8,4,2]. Generating all possible ordering and save it in an array maybe. Then this sorting problem is converted into checking which array is incrementally sorted. However, it comes with large price on space usage, since for $n$ numbers there, the number of possible orderings are $n!$. A smarter way to do it is to check the ordering as we are generating the ordering set. - -% Another problem is the one we mentioned, the subarray sum problem. The subarray if be represented as $a_{i,j}$, it is like we need to select two $n$ items and put them at two positions: $i$ and $j$. For position $i$ we have $n$ options, for $j$, there is only -\section{Combination} -%\paragraph{Problem Definition} -Same as before, we have to choose $m$ things out of $n$ but with one difference--the order does not matter, how many ways we have? This problem is called \textbf{combination}, and it is denoted as $C(n,m)$. For example, for [1,2,3], $C(3, 2)=[1, 2], [2, 3], [1, 3]$. Comparatively, P(3, 2) = [1, 2], [2, 1], [2, 3], [3, 2], [1, 3], [3, 1]. - -To get combination, we can leverage and apply permutation first. However, this results over-counting. As shown in our example, when there are two things in the combination, a permutation would double count it. If there are $m$ things, we over count by $m!$ times. Therefore, if we divide the permutation by all permutation of $m$ things, we get out formula for combination: -\begin{align} - C(n,m)&=\frac{P(n,m)}{P(m,m)}\\ - &=\frac{n!}{(n-m)!m!} -\end{align} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Back to the last question, when there are repeats in the permutation. We can use the same idea. Assume we have $n,m$ that $n$ things in total but only $m$ types and $a_i, i\in[1,m]$ to denote the number of each type, this means $a_1+ a_2+ ... + a_m= n$. The number of ways to linearly order these objects is $\frac{n!}{a_1!a_2!...a_m!}$. } -\end{bclogo} - - -The combination of k things out of n, will be the same as choosing (n-k) things. - \begin{align} - C(n,k)=C(n, n-k) - \end{align} - -\subsection{Recurrence Relation and Math Induction} We also show how the combination can be generated incrementally. We start from $C(0,0)=1$, and easily we get $C(n, 0)=1$. Assume we know $C(n-1, k-1)$, now we need to add the k-th item into the combination? : -\begin{itemize} - \item Use k-th item, then we just need to put the the k-th item into any sets in $C(n-1,k-1)$, resulting $C(n-1,k-1)$. - \item Not use k-th item, this means we need to pick $k$ items from the other $n-1$ items, resulting $C(n-1,k)$. -\end{itemize} -Thus, we have $C(n, k) = C(n-1, k-1) + C(n-1, k)$, this is called \textbf{Pascal's Identity}. - -\paragraph{What if things are not distinct?} - -%%%%%%%%%%%%%%%%%%Partition%%%%%%%%%%%%%%% -\section{Partition} -We discuss three types of partitions: (1) integer partition, (2) set partition, and (3) array partition. In this section, counting becomes less obvious compared with combination and permutation, this is where we rely more on \textbf{recurrence relation} and \textbf{math induction}. - -% {14, 23},{1, 234},{1, 23, 4},{14,2,3},{1,24,3},{1,2,34}, and {1,2,3,4} for parts from 1 to 4. Vetex or edge coloring, and connected components are one of the set partition examples. Set partition is actually a combination problem. We will show how we can do set parititon with backtrack and dynamic programming sometimes. - -\subsection{Integer Partition} -\paragraph{Integer Partition Definition} Integer partitions is to partition a given integer $n$ into distinct subsets that add up to $n$. -\begin{lstlisting}[numbers=none] -For example, given n=5, the resulting partitioned subsets are these 7 subsets: -{5} -{4, 1}, -{3, 2} -{3, 1, 1}, -{2, 2, 1}, -{2, 1, 1, 1}, -{1, 1, 1, 1, 1} -\end{lstlisting} - -\paragraph{Analysis} Let us assume the resulting sequence is $(a_1, a_2, ..., a_k)$, and $ a_1\geq a_2\geq...\geq a_k\geq 1$, and $a_1+ a_2+ ... + a_k= n$. The ordering is simply to help us to track the sequence. We use - -The easiest way to generate integer partition is to construct them incrementally. We first start from the partition of {n}. For n=5, we get {5} first. Then we subtract one from the largest item that is larger than 1, and add it to the smallest item if it exists and that the resulting s+1 < l, s < l-1 , and other option is to put it aside. For {5}, there is no other item, so that it becomes {4, 1}. For {4,1}, following the same rule, we get {3, 2}, for {3, 2}, we get {3,1,1}. -\begin{lstlisting} -{5}, no other smaller item, put it aside -{4, 1}, satisfy s1. - - - -\subsection{Set Partition} -\paragraph{Set Partition Problem Definition} How many ways exist to partition a set of $n$ distinct items ${a_1, a_2, ..., a_n}$ into $k$ nonempty subsets, $k<=n$. -\begin{lstlisting}[numbers=none] -Here are 7 ways that we can partition the set {a1, a2, a3,a4} into 2 nonempty subsets. They are -{a1}, {a2,a3,a4}; -{a2}, {a1,a3,a4}; -{a3}, {a1, a2, a4}; -{a4}, {a1, a2, a3} -{a1, a2}, {a3, a4}; -{a1, a3}, {a2, a4}; -{a1, a4}, {a2, a3}; -\end{lstlisting} -Let us denote the total ways as $s(n,k)$. As seen in the example, given 2 groups and 4 items, there are two combination of each group's size: 1+3 and 2+2. For combination {1,3}, this is equivalent to choose one item from the set to put at the first subset $C(n,1)$, and then choose 3 items for the other subset $C(3, 3)$. For combination {2, 2}, we have $C(4,2)$ for one subset and $C(2,2)$ for the other subset. However, because the ordering of the subsets does not matter, we need to divide it by $2!$. The set partition problem thus consists of two steps: -\begin{itemize} - \item Partition $n$ into $k$ integers: This subrountine can be solved with integer partition we just learned. We have $b_1+b_2+...+b_k = n$. - \item For each combination of integer partition, we compute the number of ways choosing $b_i$ items for that set, we get $C(n, b_1)\times C(n-b_1, b_2)\times C(n-b_1-b_2, b_3)\times...\times C(b_k, b_k)$. Now, we find the distinct $b_i$ and its number of appearance in the sequence. If we have $m$ distinct number denoted as $b_i$, and its count $c_i$, then we divide the above ways by $c_1!c_2!...c_m!$. -\end{itemize} - -From this solution, it is hard to get a closed form for $s(n,k)$. - - - - - -\paragraph{Find Recurrence Relation} There is just one way to handle this problem, let us try the incremental method--find a recurrence relation. We first start with $s(0, 0)=0$, and we can also easily get $s(n,0)=0$. Now, with the mathematical induction, we assume we solved a subproblem, say $s(n-1, k-1)$, can we induce $s(n,k)$? What do we need? - -Now we have n-1 items in k-1 groups, now there is one addition group and one additional item. There are two ways: -\begin{itemize} - \item put the additional item into the additional group. In this way, $s(n,k)$ is simply the same as of $s(n-1,k-1)$. - \item spread the n-1 items from the original k-1 groups into k groups, that is $s(n-1, k)$ and our additional item has k options now, making $k\times s(n-1,k)$ in total -\end{itemize} -Combing together the count of these two ways, we get a recurrence relation that -\begin{align} - s(n,k)=s(n-1,k-1)+k s(n-1,k) -\end{align} - -\section{Array Partition} -\paragraph{Problem Definition} How many ways exist to partition an array of $n$ items ${a_1, a_2, ..., a_n}$ into subarrays. There are different subtypes depending on the number of subarrays, say $m$: -\begin{enumerate} - \item When the number $m$ is as flexible as $m\in[1, n-1]$. - \item When the number $m$ is fixed as a number in range $[2, n-1]$. -\end{enumerate} - -\paragraph{When the number of subarray is fixed} For example, it is common to partition an array into 2 or 3 subarrays. First, we find an item $a_i$ as a partition point, getting the last subarray $a[i:n]$ and left an array to further consider $a[0:i]$. If $m=2$, $a[0:i]$ results the first subarray and the partition process is done. This gives out $n$ ways of parition. When $m=3$, we need to further partition $a[0:i]$ into two parts. This can be represented with recurrence relation: -\begin{align} - d(n, m) = (d(i, m-1), a[i:n]), i \in[0, n-1] -\end{align} -Further, for $d(i, m-1)$: -\begin{align} - d(i, m-1) = (d(j, m-2), a[j:n]), j \in[0, i-1] -\end{align} -This can be done recursively: we will have a recursive function with depth $m$. - -\paragraph{When the number of subarray is flexible} The process is the same other than $m$ can be as large as $n-1$. If we are about to use dynamic programming, for all these states, we need to come up with an ordering of the state $(i, j)$, where $i$ is the subproblem $a[0:i]$, and $j$ is the number of partitions. We imagine it as a matrix with $i, j$ as row and column respectively: -\begin{lstlisting}[numbers=none] - 0 1 2 n-1: partition -0 X - - - -1 X X - - -2 X X X - -n-1 -n X X X X X -\end{lstlisting} -Does the ordering of the \texttt{for} loop matter? Actually it does not. - -\paragraph{Applications} There are many applications that involve splitting an array/string or cutting a rod. This relates to spliting type of dynamic programming. - -\section{Merge} - - -\section{More Combinatorics} -Combinatorics is about enuemrating specified structures, there are some structures are of our main interests through this book and often appears in the interviews, they are: \textit{subarray}, \textit{subsequence}, and \textit{subsets}. -\paragraph{Subarray} We have solved one example with subarray. Subarray is defined as a contigious sequence in the array, which can be represented as $a[i,...,j]$. The number of subarray exist in an array of size $n$ will be: -\begin{align} - sa=\sum_{i=1}^{i=n} i = n*(n+1)/2 -\end{align} -A substring is a contiguous sequence of characters within a string. For instance, "the best of" is a substring of "It was the best of times". This is not to be confused with subsequence, which is a generalization of substring. For example, "Itwastimes" is a subsequence of "It was the best of times", but not a substring. - -Prefix and suffix are special cases of substring. A prefix of a string S S is a substring of S that occurs at the beginning of S. A suffix of a string S is a substring that occurs at the end of S. -\paragraph{Subsequence} For a subsequence means any sequence we can find the array, which is not required to be contiguous, but the ordering still matters. For example, in the array of [ABCD], the subsequence will be -\begin{lstlisting} - [], -[A], [B], [C],[D], -[AB],[AC],[AD], [BC],[BD], [CD], -[ABC],[ABD], [ACD], [BCD], -[ABCD] -\end{lstlisting} -You would actually see for $n=4$, there are 16 possible subsequence, which is $2^4$. This is not coincidence. Imagine for each item in the array, they have two options, either be chosen into the possible sequence or not chosen, which make it to $2^n$. -\begin{align} - ss=2^n -\end{align} -\paragraph{Subset} The Subset B of a set A is defined as a set within all elements of this subset are from set A. In other words, the subset B is contained inside the set A, $B \in A$. There are two kinds of subsets: if the order of the subset does'nt matter, it is a combination problem, otherwise, it is a permutation problem. - -If it is the case that ordering does not matter, for $n$ distinct things, the number of possible subsets, also called \textit{the power set} will be: -\begin{align} - power_set = C(n,0)+C(n,1)+...+C(n,n) -\end{align} - - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_decrease_and_conquer.tex b/Easy-Book/chapters/chapter_decrease_and_conquer.tex deleted file mode 100644 index e8719f5..0000000 --- a/Easy-Book/chapters/chapter_decrease_and_conquer.tex +++ /dev/null @@ -1,1149 +0,0 @@ -\documentclass[main.tex]{subfiles} -\begin{document} -\chapter{Decrease and Conquer} -Want to do even better than linear complexity? Decrease and conquer reduces one problem into one smaller subproblem only, and the most common case is to reduce the state space into half of its original size. If the combining step takes only constant time, we get an elegant recurrence relation as: -\begin{equation} - T(n) = T(n/2) + O(1), -\end{equation} -which gives us logarithmic time complexity! - -We introduce three classical algorithms--binary search in array, binary search tree, and segment tree to enforce our understanding of decrease and conquer. Importantly, binary search and binary search tree consists \textbf{10\%} of the total interview questions. -\section{Introduction} -All the searching we have discussed before never assumed any ordering between the items, and searching an item in an unordered space is doomed to have a time complexity linear to the space size. This case is about to change in this chapter. - -Think about these two questions: What if we have a sorted list instead of an arbitrary one? What if the parent and children nodes within a tree are ordered in some way? With such special ordering between items in a data structures, can we increase its searching efficiency and be better than the blind one by one search in the state space? The answer is YES. - -Let's take advantage of the ordering and the decrease and conquer methodology. To find a target in a space of size $n$, we first divide it into two subspaces and each of size $n/2$, say from the middle of the array. If the array is increasingly ordered, all items in the left subspace are smaller than all items in the right subspace. If we compare our target with the item in the middle, we will know if this target is on the left or right side. With just one step, we reduced our state space by half size. We further repeat this process on the reduced space until we find the target. This process is called \textbf{Binary Search}. Binary search has recurrence relation: -\begin{equation} - T(n) = T(n/2) + O(1) , -\end{equation} -which decreases the time complexity from $O(n)$ to $O(\log n)$. - - -%%%%%%%%%Binary search%%%%%%%%% -\section{Binary Search} -Binary search can be easily applied in sorted array or string. -\begin{lstlisting}[numbers=none] -For example, given a sorted and distinct array -nums = [1, 3, 4, 6, 7, 8, 10, 13, 14, 18, 19, 21, 24, 37, 40, 45, 71] -Find target t = 7. -\end{lstlisting} -\paragraph{Find the Exact Target} -\begin{figure}[H] - \centering - \includegraphics[width=0.7\columnwidth]{fig/Binary_Search_Depiction.png} - \caption{Example of Binary Search} - \label{fig:binary_search_eg_1} -\end{figure} - -This is the most basic application of binary search. We can set two pointers, \texttt{l} and \texttt{r}, which points to the first and last position, respectively. Each time we compute the middle position \texttt{m = (l+r)//2}, and check if the item $num[m]$ is equal to the target \texttt{t}. -\begin{itemize} -\item If it equals, target found and return the position. -\item If it is smaller than the target, move to the left half by setting the right pointer to the position right before the middle position, $r = m - 1$. -\item If it is larger than the target, move to the right half by setting the left pointer to the position right after the middle position, $l = m + 1$. -\end{itemize} -Repeat the process until we find the target or we have searched the whole space. The criterion of finishing the whole space is when \texttt{l} starts to be larger than $r$. Therefore, in the implementation we use a \texttt{while} loop with condition \texttt{l$\leq$ r} to make sure we only scan once of the searching space. The process of applying binary search on our exemplary array is depicted in Fig.~\ref{fig:binary_search_eg_1} and the Python code is given as: -\begin{lstlisting}[language=Python] -def standard_binary_search(lst, target): - l, r = 0, len(lst) - 1 - while l <= r: - mid = l + (r - l) // 2 - if lst[mid] == target: - return mid - elif lst[mid] < target: - l = mid + 1 - else: - r = mid - 1 - return -1 # target is not found -\end{lstlisting} -In the code, we compute the middle position with \texttt{mid = l + (r - l) // 2} instead of just \texttt{mid = (l + r) //2} because these two always give the same computational result but the later is more likely to lead to overflow with its addition operator. - -\subsection{Lower Bound and Upper Bound} -\paragraph{Duplicates and Target Missing} What if there are duplicates in the array: -\begin{lstlisting}[numbers=none] -For example, -nums = [1, 3, 4, 4, 4, 4, 6, 7, 8] -Find target t = 4 -\end{lstlisting} -Applying the first standard binary search will return \texttt{3} as the target position, which is the second $4$ in the array. This does not seem like a problem at first. However, what if you want to know the predecessor or successor (3 or 5) of this target? In a distinct array, the predecessor and successor would be adjacent to the target. However, when the target has duplicates, the predecessor is before the first target and the successor is next to the last target. Therefore, returning an arbitrary one will not be helpful. - -Another case, what if our target is 6, and we first want to see if it exists in the array. If it does not, we would like to insert it into the array and still keep the array sorted. The above implementation simply returns $-1$, which is not helpful at all. - -The \textbf{lower and upper bound} of a binary search are the lowest and highest position where the value could be inserted without breaking the ordering. -% However, if we design our algorithm to find (1) find the first position that has value larger or equals to the target, and (2) find the last position that has value smaller or equals to the target. This might be bit confusing, let us see it through examples. - -\begin{figure}[H] - \centering - \includegraphics[width=0.9\columnwidth, height=4cm]{fig/binary_search_lower_bound.png} - \caption{Binary Search: Lower Bound of target 4.} - \label{fig:binary_search_eg_lower_bound} - \includegraphics[width=0.9\columnwidth, height=4cm]{fig/binary_search_upper_bound.png} - \caption{Binary Search: Upper Bound of target 4.} - \label{fig:binary_search_eg_upper_bound} -\end{figure} -For example, if our $t=4$, the first position it can insert is at index 2 and the last position is at index 6. -\begin{itemize} - \item -With index 2 as the lower bound, -items in $i \in [0, l-1], a[i] t$. An upper bound is also the first position that has a value \texttt{v > t}. This case is shown in Fig.~\ref{fig:binary_search_eg_upper_bound}. -\end{itemize} -\begin{figure}[H] - \centering - \includegraphics[width=0.9\columnwidth, height=4cm]{fig/binary_search_lower_upper.png} - \caption{Binary Search: Lower and Upper Bound of target 5 is the same.} - - \label{fig:binary_search_lower_upper} -\end{figure} - - -If $t=5$, the only position it can insert is at index 6, which indicates $l = u$. We show this case in Fig.~\ref{fig:binary_search_lower_upper}. - -Now that we know the meaning of the upper and lower bound, here comes to the question, ``How to implement them?'' - -\paragraph{Implement Lower Bound} Because if the target equals to the value at the middle index, we have to move to the left half to find its leftmost position of the same value. Therefore, the logic is that we move as left as possible until it can't further. When it stops, $l>r$, and \texttt{l} points to the first position that the value $v$ be $v\geq t$. Another way to think about the return value is with assumption: Assume the middle pointer $m$ is at the first position that equals to the target in the case of target 4, which is index 2. According to the searching rule, it goes to the left search space and changes the right pointer as $r=m-1$. At this point, in the valid search space, there will never be a value that can be larger or equals to the target, pointing out that it will only moving to the right side, increasing the \texttt{l} pointer and leave the \texttt{r} pointer untouched until $l > r$ and the search stops. When the first time that $l > r$, the left pointer will be $l = r + 1 = m$, which is the first position that its value equals to the target. - -The search process for target 4 and 5 is described as follows: -\begin{lstlisting}[numbers=none] -0: l = 0, r = 8, mid = 4 -1: mid = 4, 4==4, l = 0, r = 3 -2: mid = 1, 4>3, l = 2, r = 3 -3: mid = 2, 4==4, l = 2, r = 1 -return l=2 -\end{lstlisting} -Similarly, we run the case for target 5. -\begin{lstlisting}[numbers=none] -0: l = 0, r = 8, mid = 4 -1: mid = 4, 5>4, l = 5, r = 8 -2: mid = 6, 5<6, l = 5, r = 5 -3: mid = 5, 5>4, l = 6, r = 5 -return l=6 -\end{lstlisting} -The Python code is as follows: -\begin{lstlisting}[language=Python] -def lower_bound_bs(nums, t): - l, r = 0, len(nums) - 1 - while l <= r: - mid = l + (r - l) // 2 - if t <= nums[mid]: # move as left as possible - r = mid - 1 - else: - l = mid + 1 - return l -\end{lstlisting} -\paragraph{Implement Upper Bound} To be able to find the upper bound, we need to move the left pointer to the right as much as possible. Assume we have the middle index at 5, with target as 4. The binary search moves to the right side of the state space, making $l=mid+1=6$. Now, in the right state space, the middle pointer will always have values larger than 4, thus it will only moves to the left side of the space, which only changes the right pointer \texttt{r} and leaves the left pointer \texttt{l} touched when the program ends. Therefore, \texttt{l} will still return our final upper bound index. The Python code is as follows: -\begin{lstlisting}[language=Python] -def upper_bound_bs(nums, t): - l, r = 0, len(nums) - 1 - while l <= r: - mid = l + (r - l) // 2 - if t >= nums[mid]: # move as right as possible - l = mid + 1 - else: - r = mid - 1 - return l -\end{lstlisting} - - -\paragraph{Python Module \texttt{bisect}} -% Binary search is usually carried out on a Static sorted array or 2D matrix. There are three basic cases: (1) find the exact target that value = target; If there are duplicates, we are more likely to be asked to (2) find the first position that has value >= target; (3) find the first position that has value <= target. Here, we use two example array: one without duplicates and the other has duplicates. -% \begin{lstlisting}[language=Python] -% a = [2, 4, 5, 9] -% b = [0, 1, 1, 1, 1, 1] -% \end{lstlisting} - - -% From the example, we can see that multiple \textbf{duplicates} of the target exist, it can possibly return any one of them. And for the case when the target does not exist, it simply returns -1. In reality, we might need to find a position where we can potentially insert the target to keep the sorted array sorted. There are two cases: (1) the first position that we can insert, which is the first position that has value>= target (2) and the last position we can insert, which is the first position that has value > target. For example, if we try to insert 3 in a, and 1 in b, the first position should be 1 and 1 in each array, and the last position is 1 and 6 instead. -Conveniently, we have a Python built-in Module \texttt{bisect} that offers two methods: \texttt{bisect\_left()} for obtaining the lower bound and \texttt{bisect\_right()} to obtain the upper bound. For example, we can use it as: -\begin{lstlisting}[language=Python] -from bisect import bisect_left,bisect_right, bisect -l1 = bisect_left(nums, 4) -r1 = bisect_right(nums, 5) -l2 = bisect_right(nums, 4) -r2 = bisect_right(nums, 5) -\end{lstlisting} -It offers six methods as shown in Table~\ref{tab:method_bisect}. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Methods of \textbf{bisect}} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -\texttt{bisect\_left(a, x, lo=0, hi=len(a)} & The parameters lo and hi may be used to specify a subset of the list; the function is the same as bisect\_left\_raw \\\hline -\texttt{bisect\_right(a, x, lo=0, hi=len(a)} & The parameters lo and hi may be used to specify a subset of the list; the function is the same as bisect\_right\_raw \\\hline -\texttt{bisect(a, x, lo=0, hi=len(a))} &Similar to bisect\_left(), but returns an insertion point which comes after (to the right of) any existing entries of x in a.\\ \hline -\texttt{insort\_left(a, x, lo=0, hi=len(a))} &This is equivalent to a.insert(bisect.bisect\_left(a, x, lo, hi), x).\\ \hline -\texttt{insort\_right(a, x, lo=0, hi=len(a))} & This is equivalent to a.insert(bisect.bisect\_right(a, x, lo, hi), x).\\ \hline -\texttt{insort(a, x, lo=0, hi=len(a))} & Similar to insort\_left(), but inserting x in a after any existing entries of x.\\ \hline -\end{tabular} - \label{tab:method_bisect} - \end{small} -\end{table} - -\paragraph{Bonus} For the lower bound, if we return the position as l-1, then we get the last position that \texttt{value < target}. Similarily, for the upper bound, we get the last position \texttt{value <= target}. - -% \paragraph{Python Built-in Module bisect} This module provides support for maintaining a list in sorted order without having to sort the list after each insertion. I -% Let's see come examplary code: -% \begin{lstlisting}[language=Python] -% from bisect import bisect_left,bisect_right, bisect -% print("bisect left: find 3 in a :", bisect_left(a,3), 'find 1 in b: ', bisect_left(b, 1)) # lower_bound, the first position that value>= target -% print("bisect right: find 3 in a :", bisect_right(a, 3), 'find 1 in b: ', bisect_right(b, 1)) # upper_bound, the last position that value <= target -% \end{lstlisting} -% The print out is: -% \begin{lstlisting} -% bisect left: find 3 in a : 1 find 1 in b: 1 -% bisect right: find 3 in a : 1 find 1 in b: 6 -% \end{lstlisting} -\subsection{Applications} -\label{concept_binary_search_in_array} -Binary Search is a powerful problem solving tool. Let's go beyond the sorted array: How about when the array is sorted in a way that is not as monotonic as what we are familiar with, or how about solving math functions with binary search, whether they are continuous or discrete, equations or inequations? -\paragraph{First Bad Version(L278)} You are a product manager and currently leading a team to develop a new product. Unfortunately, the latest version of your product fails the quality check. Since each version is developed based on the previous version, all the versions after a bad version are also bad. - -Suppose you have $n$ versions [1, 2, ..., n] and you want to find out the first bad one, which causes all the following ones to be bad. You are given an API \texttt{bool isBadVersion(version)} which will return whether version is bad. Implement a function to find the first bad version. You should minimize the number of calls to the API. -\begin{lstlisting}[numbers=none] -Given n = 5, and version = 4 is the first bad version. - -call isBadVersion(3) -> false -call isBadVersion(5) -> true -call isBadVersion(4) -> true - -Then 4 is the first bad version. -\end{lstlisting} -\paragraph{Analysis and Design} In this case, we have a search space in range $[1, n]$. Think the value at each position is the result from function \texttt{isBadVersion(i)}. Assume the first bad version is at position $b$, then the values from the positions are of such pattern: [F,..., F, ..., F, T, ..., T]. We can totally apply the binary search in the search space $[1, n]$: to find the first bad version is the same as finding the first position that we can insert a value \texttt{True}--the lower bound of value \texttt{True}. Therefore, whenever the value we find is \texttt{True}, we move to the left space to try to get its first location. The Python code is given below: -\begin{lstlisting}[language = Python] -def firstBadVersion(n): - l, r = 1, n - while l <= r: - mid = l + (r - l) // 2 - if isBadVersion(mid): - r = mid - 1 - else: - l = mid + 1 - return l -\end{lstlisting} -\subsubsection{Search in Rotated Sorted Array} -``How about we rotate the sorted array?'' -\paragraph{Problem Definition(L33, medium)} Suppose an array (without duplicates) is first sorted in ascending order, but later is rotated at some pivot unknown to you beforehand--it takes all items before the pivot to the end of the array. For example, an array [0, 1, 2, 4, 5, 6, 7] be rotated at pivot 4, will become [4, 5, 6, 7, 0, 1, 2]. If the pivot is at 0, nothing will be changed. If it is at the end of the array, say 7, it becomes [7, 0, 1, 2, 4, 5, 6]. You are given a target value to search. If found in the array return its index, otherwise return -1. -\begin{lstlisting}[numbers=none] -Example 1: -Input: nums = [3,4,5,6,7,0,1,2], target = 0 -Output: 5 - -target = 8 -Output: -1 -\end{lstlisting} -\paragraph{Analysis and Design} -In the rotated sorted array, the array is not purely monotonic. Instead, there will be at most one drop in the array because of the rotation, which we denote the high and the low item as $a_h, a_l$ respectively. This drop cuts the array into two parts: $a[0:h+1]$ and $a[l:n]$, and both parts are ascending sorted. If the middle point falls within the left part, the left side of the state space will be sorted, and if it falls within the right part, the right side of the state space will be sorted. Therefore, at any situation, there will always be one side of the state space that is sorted. To check which side is sorted, simply compare the value of middle pointer with that of left pointer. -\begin{itemize} - \item If \texttt{nums[l] < nums[mid]}, then the left part is sorted. - \item If \texttt{nums[l] > nums[mid]}, then the right part is sorted. - \item Otherwise when they equal to each other, which is only possible that there is no left part left, we have to move to the right part. For example, when \texttt{nums=[1, 3]}, we move to the right part. -\end{itemize} - -With a sorted half of state space, we can check if our target is within the sorted half: if it is, we switch the state space to the sorted space; otherwise, we have to move to the other half that is unknown. The Python code is shown as: -\begin{lstlisting}[language=Python] -def RotatedBinarySearch(nums, t): - l, r = 0, len(nums)-1 - while l <= r: - mid = l + (r-l)//2 - if nums[mid] == t: - return mid - # Left is sorted - if nums[l] < nums[mid]: - if nums[l] <= t < nums[mid]: - r = mid - 1 - else: - l = mid + 1 - # Right is sorted - elif nums[l] > nums[mid]: - if nums[mid] < t <= nums[r]: - l = mid + 1 - else: - r = mid - 1 - # Left and middle index is the same, move to the right - else: - l = mid + 1 - return -1 -\end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What happens if there are duplicates in the rotated sorted array? } In fact, the same comparison rule applies, with one minor change. When \texttt{nums=[1, 3, 1, 1, 1]}, the middle pointer and the left pointer has the same value, and in this case, the right side will only consist of a single value, making us to move to the left side instead. However, if \texttt{nums=[1,1,3]}, we need to move to the right side instead. Moreover, for \texttt{nums=[1, 3]}, it is because there is no left side we have to search the the right side. Therefore, in this case, it is impossible for us to decide which way to go, a simple strategy is to just move the left pointer forward by one position and retreat to the linear search. -\begin{lstlisting}[language=Python] -# The left half is sorted -if nums[mid] > nums[l]: -# The right half is sorted -elif nums[mid] < nums[l]: -# For third case -else: - l +=1 -\end{lstlisting} -\end{bclogo} - - - - -%%%%%%%%%%%%%%binary search on result space%%%%%%% -\subsubsection{Binary Search to Solve Functions} -Now, let's see how it can be applied to solve equations or inequations. Assume, our function is $y = f(x)$, and this function is monotonic, such as $y = x, y = x^2+1, y = \sqrt{x}$. To solve this function is the same as finding a solution $x_t$ to a given target $y_t$. We generally have three steps to solve such problems: % If the question gives us the context: the target is in the range [left, right], we need to search the first or last position that satisfy a condition function. We can apply the concept of standard binary search and bisect\_left and bisect\_right and its mutant. Where we use the condition function to replace the value comparison between target and element at middle position. The steps we need: -\begin{enumerate} - \item Set a search space for $x$, say it is $[x_l, x_r]$. - \item If the function is equation, we find a $x_t$ that either equals to $y_t$ or close enough such as $|y_t - y| <= 1e-6$ using standard binary search. - \item If the function is inequation, we see if it wants the first or the last $x_t$ that satisfy the constraints on $y$. It is the same as of finding the lower bound or upper bound. -\end{enumerate} - -\paragraph{Arranging Coins (L441, easy)} You have a total of n coins that you want to form in a staircase shape, where every $k$-th row must have exactly $k$ coins. Given n, find the total number of full staircase rows that can be formed. n is a non-negative integer and fits within the range of a 32-bit signed integer. -\begin{lstlisting}[numbers=none] -Example 1: -n = 5 -The coins can form the following rows: -* -* * -* * - -Because the 3rd row is incomplete, we return 2. -\end{lstlisting} - -\paragraph{Analysis and Design} Each row $x$ has $x$ coins, summing it up, we get $1+2+...+x= \frac{x(x+1)}{2}$. The problem is equvalent to find the last integer $x$ that makes $\frac{x(x+1)}{2}\leq n$. Of course, this is just a quadratic equation which can be easily solved if you remember the formula, such as the following Python code: -\begin{lstlisting}[language=Python] -import math -def arrangeCoins(n: int) -> int: - return int((math.sqrt(1+8*n)-1) // 2) -\end{lstlisting} -However, if in the case where we do not know a direct closed-form solution, we solicit binary search. First, the function of $x$ is monotonically increasing, which indicates that binary search applies. We set the range of $x$ to $[1, n]$, what we need is to find the last position that the condition of $\frac{x(x+1)}{2}\leq n$ satisfies, which is the position right before the upper bound. The Python code is given as: -\begin{lstlisting}[language=Python] -def arrangeCoins(n): - def isValid(row): - return (row * (row + 1)) // 2 <= n - - def bisect_right(): - l, r = 1, n - while l <= r: - mid = l + (r-l) // 2 - # Move as right as possible - if isValid(mid): - l = mid + 1 - else: - r = mid - 1 - return l - return bisect_right() - 1 -\end{lstlisting} - -% \subsection{Bisection Method} (second edition) -% The binary search principle can be used to find the root of a function that may be difficult to compute mathematically. We have not seen any problems that require this method on LeetCode yet. Thus we define the problem as: - -% Find the monthly payment for a loan: You want to buy a car using loan and want to pay in monthly installment of d d -% \subsection{Python Library} -% Python has \textbf{bisect} module for binary search. -% \begin{lstlisting}[numbers=none] -% bisect.bisect_left(a, x): Return the leftmost index where we can insert x into a to maintain sorted order! Leftmost rl that satisfy: x<=a[rl] - -% bisect.bisect_right(a, x): Return the rightmost index where we can insert x into a to maintain sorted order! Right most rr that satisfy: x>=a[rr] -% \end{lstlisting} -% For example: -% \begin{lstlisting}[language=Python] -% from bisect import bisect_left,bisect_right -% a = [1, 2, 3, 3, 3, 4, 5] -% p1, p2= bisect_left(a,3), bisect_right(a, 3) -% print(p1, p2) -% # output -% # 2, 5 -% \end{lstlisting} - - -%%%%%%%%%%%%%%%%%%%%%binary search tree%%%%%%%%%%%%%%%%%%%%%% -\section{Binary Search Tree} -\label{sec_binary_search_tree} - -A sorted array supports logarithmic query time with binary search, however it still takes linear time to update--delete or insert items. Binary search tree (BSTs), a type of binary tree designed for fast access and updates to items, on the other hand, only takes $O(\log n)$ time to update. How does it work? - -In the array data structure, we simply sort the items, but how to apply sorting in a binary tree? Review the min-heap data structure, which recursively defining a node to have the largest value among the nodes that belong to the subtree of that node, will give us a clue. In the binary search tree, we define that for any given node \texttt{x}, all nodes in the left subtree of \texttt{x} have keys smaller than \texttt{x} while all nodes in the right subtree of \texttt{x} have keys larger than \texttt{x}. An example is shown in Fig.~\ref{fig:bst}. With this definition, simply comparing a search target with the root can point us to half of the search space, given the tree is balanced enough. Moreover, if we do in-order traversal of nodes in the tree from the root, we end up with a nice and sorted keys in ascending order, making binary search tree one member of the sorting algorithms. -\begin{figure}[H] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/bst_example.png} - \caption{Example of Binary search tree of depth 3 and 8 nodes.} - \label{fig:bst} -\end{figure} - - - - -% The advantage of search trees is their efficient search time ( $O(\log n)$) given the tree is reasonably balanced, which is to say the leaves at either end are of comparable depths as we introduced the \textbf{balanced binary tree}. -Binary search tree needs to support many operations, including searching for a given key, the minimum and maximum key, and a predecessor or successor of a given key, inserting and deleting items while maintaining the binary search tree property. Because of its efficiency of these operations compared with other data structures, binary search tree is often used as a dictionary or a priority queue. - - -% Search trees are often used to implement an associative array. The search tree algorithm uses the key from the key-value pair to find a location, and then the application stores the entire key–value pair at that location. - -% In this section, we will introduce the most commonly used two types of searching trees: binary searching tree (BST) and Trie where the keys are usually numeric numbers and strings respectively. - -% \subsection{Binary Searching Tree} -% \label{concept_binary_search_tree} -With $l$ and $r$ to represent the left and right child of node $x$, there are two other definitions other than the binary search tree definition we just introduced: (1)$l.key \leq x.key < r.key$ and (2) $l.key < x.key \leq r.key$. In these two cases, our resulting BSTs allows us to have duplicates. The exemplary implementation follow the definition that does not allow duplicates. - -% an organized searching tree structure in binary tree, as the name suggests. Binary search trees whose internal nodes each store a key (and optionally, an associated value), each node have two distinguished sub-trees (if only one sub-tree the other is None). - -% BST keep their keys in sorted order, so that lookup and other operations can use the \textit{principle of binary search tree}: - -% \indent Let $x$ be a node in a binary search tree, if $y$ is a node in the left subtree of x, them $y.key \leq x.key$. If $y$ is a node in the right subtree of $x$, then $y.key \geq x.key$. - - - -\subsection{Operations} -% When looking for a key in a tree (or a place to insert a new key), we traverse the tree from root to leaf, making comparisons to keys stored in the nodes of the tree and deciding, on the basis of the comparison, to continue searching in the left or right subtrees. On average, this means that each comparison allows the operations to skip about half of the tree, so that each SEARCH, INSERT or DELETE takes time proportional to the logarithm of the number of items stored in the tree. This is much better than the linear time required to find items by key in an (unsorted) array, but slower than the corresponding operations on hash tables. - -% \textbf{Definition} A binary search tree is a rooted binary tree, whose internal nodes each store a key (and optionally, an associated value) and each have two distinguished sub-trees, commonly denoted left and right. The tree additionally satisfies the binary search property, which states that the key in each node must be greater than or equal to any key stored in the left sub-tree, and less than or equal to any key stored in the right sub-tree.[1]:287 The leaves (final nodes) of the tree contain no key and have no structure to distinguish them from one another. - -In order to build a BST, we need to insert a series of items in the tree organized by the search tree property. And in order to insert, we need to search for a proper position first and then insert the new item while sustaining the search tree property. Thus, we introduce these operations in the order of search, insert and generate. - -\paragraph{Search} - -The search is highly similar to the binary search in the array. It starts from the root. Unless the node's value equals to the target, the search proceeds to either the left or right child depending upon the comparison result. The search process terminates when either the target is found or when an empty node is reached. It can be implemented either recursively or iteratively with a time complexity $O(h)$, where $h$ is the height of the tree, which is roughly $\log n$ is the tree is balanced enough. The recursive search is shown as: -\begin{lstlisting}[language = Python] -def search(root, t): - if not root: - return None - if root.val == t: - return root - elif t < root.val: - return search(root.left, t) - else: - return search(root.right, t) -\end{lstlisting} -Because this is a tail recursion, it can easily be converted to iteration, which helps us save the heap space. The iterative code is given as: -\begin{lstlisting}[language = Python] -# iterative searching -def iterative_search(root,key): - while root is not None and root.val != key: - if root.val < key: - root = root.right - else: - root = root.left - return root -\end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Write code to find the minimum and maximum key in the BST. } The minimum key locates at the leftmost of the BST, while the maximum key locates at the rightmost of the tree. -\end{bclogo} - -%%%%%%%%%%%%Insertion%%%%%%%%%%%%%%%%%%%%%% -\paragraph{Insert} -\begin{figure}[H] - \centering - \includegraphics[width=0.6\columnwidth]{fig/bst_insert_9.png} - \caption{The red colored path from the root down to the position where the key 9 is inserted. The dashed line indicates the link in the tree that is added to insert the item. } - \label{fig:bst_insert} -\end{figure} -Assuming we are inserting a node with key 9 into the tree shown in Fig~\ref{fig:bst}. We start from the root, compare 9 with 8, and goes to node 10. Next, the search process will lead us to the left child of node 10, and this is where we should put node 9. The process is shown in Fig.~\ref{fig:bst_insert}. - -The process itself is easy and clean. Here comes to the implementation. We treat each node as a subtree: whenever the search goes into that node, then the algorithm hands over the insertion task totally to that node, and assume it has inserted the new node and return its updated node. The main program will just simply reset its left or right child with the return value from its children. The insertion of new node happens when the search hits an empty node, it returns a new node with the target value. The implementation is given as: -\begin{lstlisting}[language = Python] -def insert(root, t): - if not root: - return BiNode(t) - if root.val == t: - return root - elif t < root.val: - root.left = insert(root.left, t) - return root - else: - root.right = insert(root.right, t) - return root -\end{lstlisting} -In the notebook, I offered a variant of implementation, check it out if you are interested. To insert iteratively, we need to track the parent node while searching. The \texttt{while} loop stops when it hit at an empty node. There will be three cases in the case of the parent node: -\begin{enumerate} - \item When the parent node is \texttt{None}, which means the tree is empty. We assign the root node with the a new node of the target value. -\item When the target's value is larger than the parent node's, the put a new node as the right child of the parent node. -\item When the target's value is smaller than the parent node's, the put a new node as the left child of the parent node. -\end{enumerate} -The iterative code is given as: -\begin{lstlisting}[language = Python] -def insertItr(root, t): - p = None - node = root #Keep the root node - while node: - # Node exists already - if node.val == t: - return root - if t > node.val: - p = node - node = node.right - else: - p = node - node = node.left - # Assign new node - if not p: - root = BiNode(t) - elif t > p.val: - p.right = BiNode(t) - else: - p.left = BiNode(t) - return root -\end{lstlisting} -\paragraph{BST Generation} -To generate our exemplary BST shown in Fig.~\ref{fig:bst}, we set \texttt{keys = [8, 3, 10, 1, 6, 14, 4, 7, 13]}, then we call \texttt{insert} function we implemented for each key to generate the same tree. The time complexity will be $O(n\log n)$. -\begin{lstlisting}[language=Python] -keys = [8, 3, 10, 1, 6, 14, 4, 7, 13] -root = None -for k in keys: - root = insert(root, k) -\end{lstlisting} -\paragraph{Find the Minimum and Maximum Key} Because the minimum key is the leftmost node within the tree, the search process will always traverse to the left subtree and return the last non-empty node, which is our minimum node. The time complexity is the same as of searching any key, which is $O(\log n)$. -\begin{lstlisting}[language=Python] -def minimum(root): - if not root: - return None - if not root.left: - return root - return minimum(root.left) -\end{lstlisting} -It can easily be converted to iterative: -\begin{lstlisting}[language=Python] -def minimumIter(root): - while root: - if not root.left: - return root - root = root.left - return None -\end{lstlisting} -To find the maximum node, replacing \texttt{left} with \texttt{right} will do. -%%%%%%%%%%%Predecessor and Successor -Also, sometimes we need to search two additional items related to a given node: successor and predecessor. The structure of a binary search tree allows us to determine the successor or the predecessor of a tree without ever comparing keys. - -\paragraph{Successor} A successor of node $x$ is the smallest node in BST that is strictly greater than $x$. It is also called \textbf{in-order successor}, which is the node next to node $x$ in the inorder traversal ordering--sorted ordering. Other than the maximum node in BST, all other nodes will have a successor. The simplest implementation is to return the next node within inorder traversal. This will have a linear time complexity, which is not great. The code is shown as: -\begin{lstlisting}[language=Python] -def successorInorder(root, node): - if not node: - return None - if node.right is not None: - return minimum(node.right) - # Inorder traversal - succ = None - while root: - if node.val > root.val: - root = root.right - elif node.val < root.val: - succ = root - root = root.left - else: - break - return succ -\end{lstlisting} - -Let us try something else. In the BST shown in Fig.~\ref{fig:bst_insert}, the node 3's successor will be node 4. For node 4, its successor will be node 6. For node 7, its successor is node 8. What are the cases here? -\begin{itemize} - \item An easy case is when a node has right subtree, its successor is the minimum node within its right subtree. - \item However, if a node does not have a right subtree, there are two more cases: - \begin{itemize} - \item If it is a left child of its parent, such as node 4 and 9, its direct parent is its successor. - \item However, if it is a right child of its parent, such as node 7 and 14, we traverse backwards to check its parents. If a parent node is the left chid of its parent, then that parent will be the successor. For example, for node 7, we traverse through 6, 3, and 3 is a left child of node 8, making node 8 the successor for node 7. - \end{itemize} - The above two rules can be merged as: starting from the target node, traverse backward to check its parent, find the first two nodes which are in left child--parent relation. The parent node in that relation will be our targeting successor. Because the left subtree is always smaller than a node, when we backward, if a node is smaller than its parent, it tells us that the current node is smaller than that parent node too. -\end{itemize} -We write three functions to implement the successor: -\begin{itemize} - \item Function \texttt{findNodeAddParent} will find the target node and add a \texttt{parent} node to each node along the searching that points to their parents. The Code is as: -\begin{lstlisting}[language=Python] -def findNodeAddParent(root, t): - if not root: - return None - if t == root.val: - return root - elif t < root.val: - root.left.p = root - return findNodeAddParent(root.left, t) - else: - root.right.p = root - return findNodeAddParent(root.right, t) -\end{lstlisting} -\item Function \texttt{reverse} will find the first left-parent relation when traverse backward from a node to its parent. -\begin{lstlisting}[language=Python] -def reverse(node): - if not node or not node.p: - return None - # node is a left child - if node.val < node.p.val: - return node.p - return reverse(node.p) -\end{lstlisting} -\item Function \texttt{successor} takes a node as input, and return its sccessor. -\begin{lstlisting}[language=Python] -def successor(root): - if not root: - return None - if root.right: - return minimum(root.right) - else: - return reverse(root) -\end{lstlisting} -\end{itemize} -To find a successor for a given key, we use the following code: -\begin{lstlisting}[language=Python] -root.p = None -node = findNodeAddParent(root, 4) -suc = successor(node) -\end{lstlisting} -This approach will gives us $O(\log n)$ time complexity. -% \parabutbut if is only possible with parent nodes. For BST that has no parent nodes designed, we can add parent nodes along searching the target node. After the target node is found, we stop and check different cases. The code is given as: -% \begin{lstlisting}[language=Python] -% def successor(root, t): -% # Traverse backward and see if a node is a left child -% def reverse(node): -% if not node or not node.p: -% return None -% # node is a left child -% if node.val < node.p.val: -% return node.p -% return reverse(node.p) - -% # Find the target and set its parent while searching -% def helper(root, t): -% # t is not found -% if not root: -% return None -% if t == root.val: -% if root.right: -% return minimum(root.right) -% else: -% return reverse(root) -% elif t < root.val: -% root.left.p = root -% return helper(root.left, t) -% else: -% root.right.p = root -% return helper(root.right, t) - -% root.p = None -% return helper(root, t) -% \end{lstlisting} - -% Use parent node: the algorihtm has two cases on the basis of the right subtree of the input node. -% \begin{lstlisting}[numbers=none] -% For the right subtree of the node: -% 1) If it is not None, then the successor is the minimum node in the right subtree. e.g. for node 12, successor(12) = 13 = min(12.right) -% 2) If it is None, then the successor is one of its ancestors. We traverse up using the parent node until we find a node which is the left child of its parent. Then the parent node here is the successor. e.g. successor(2)=5 -% \end{lstlisting} -% The Python code is provided: -% \begin{lstlisting}[language = Python] -% def Successor(root, n): -% # Step 1 of the above algorithm -% if n.right is not None: -% return get_minimum(n.right) -% # Step 2 of the above algorithm -% p = n.parent -% while p is not None: -% if n == p.left :# if current node is the left child node, then we found the successor, p -% return p -% n = p -% p = p.parent -% return p -% \end{lstlisting} - - -\paragraph{Predecessor} A predecessor of node $x$ on the other side, is the largest item in BST that is strictly smaller than $x$. It is also called \textbf{in-order predecessor}, which denotes the previous node in Inorder traversal of BST. For example, for node 6, the predecessor is node 4, which is the maximum node within its left subtree. For node 4, its predecessor is node 3, which is the parent node in a right child--parent relation while tracing back through parents. Now, assume we find the targeting node with function \texttt{findNodeAddParent}, we first write \texttt{reverse} function as \texttt{reverse\_right}. -\begin{lstlisting}[language=Python] -def reverse_right(node): - if not node or not node.p: - return None - # node is a right child - if node.val > node.p.val: - return node.p - return reverse_right(node.p) -\end{lstlisting} -Next, we implement the above rules to find predecessor of a given node. -\begin{lstlisting}[language = Python] -def predecessor(root): - if not root: - return None - if root.left: - return maximum(root.left) - else: - return reverse_right(root) -\end{lstlisting} - The expected time complexity is $O(\log n)$. And the worst is when the tree line up and has no branch, which makes it $O(n)$. - Similarily, we can use inorder traversal: -\begin{lstlisting}[language=Python] -def predecessorInorder(root, node): - if not node: - return None - if node.left is not None: - return maximum(node.left) - # Inorder traversal - pred = None - while root: - if node.val > root.val: - pred = root - root = root.right - elif node.val < root.val: - root = root.left - else: - break - return pred -\end{lstlisting} -\paragraph{Delete} -When we delete a node, we need to restructure the subtree of that node to make sure the BST property is maintained. There are different cases: -\begin{enumerate} - \item Node to be deleted is leaf: Simply remove from the tree. For example, node 1, 4, 7, and 13. - \item Node to be deleted has only one child: Copy the child to the node and delete the child. For example, to delete node 14, we need to copy node 13 to node 14. - \item Node to be deleted has two children, for example, to delete node 3, we have its left and right subtree. We need to get a value, which can either be its predecessor-node 1 or successor--node 4, and copy that value to the position about to be deleted. -\end{enumerate} -To support the delete operation, we write a function \texttt{deleteMinimum} to obtain the minimum node in that subtree and return a subtree that has that node deleted. -\begin{lstlisting}[language=Python] -def deleteMinimum(root): - if not root: - return None, None - if root.left: - mini, left = deleteMinimum(root.left) - root.left = left - return mini, root - # the minimum node - if not root.left: - return root, None -\end{lstlisting} -Next, we implement the above three cases in function \texttt{\_delete} when a deleting node is given, which will return a processed subtree deleting its root node. -\begin{lstlisting}[language=Python] -def _delete(root): - if not root: - return None - # No chidren: Delete it - if not root.left and not root.right: - return None - # Two children: Copy the value of successor - elif all([root.left, root.right]): - succ, right = deleteMinimum(root.right) - root.val = succ.val - root.right = right - return root - # One Child: Copy the value - else: - if root.left: - root.val = root.left.val - root.left = None - else: - root.val = root.right.val - root.right = None - return root -\end{lstlisting} -Finally, we call the above two function to delete a node with a target key. -\begin{lstlisting}[language=Python] -def delete(root, t): - if not root: - return - if root.val == t: - root = _delete(root) - return root - elif t > root.val: - root.right = delete(root.right, t) - return root - else: - root.left = delete(root.left, t) - return root -\end{lstlisting} -% \paragraph{Summary} -% Now we put a table here to summarize the space and time complexity for each operation. -% \begin{table}[h] -% \begin{small} -% \centering -% \noindent\captionof{table}{ Time complexity of operations for BST in big O notation } -% \noindent \begin{tabular}{|p{0.33\columnwidth}|p{0.33\columnwidth}| p{0.33\columnwidth}|} -% \hline -% Algorithm & Average & Worst Case \\ \hline -% Space & $O(n)$& $O(n)$ \\ -% Search & $O(\log n)$ & $O(n)$ \\ \hline - -% Insert & $O(\log n)$ & $O(n)$ \\ -% Delete & $O(\log n)$ & $O(n)$ \\ \hline -% \end{tabular} -% \label{tab:msrc_precession} -% \end{small} -% \end{table} - -% \paragraph{Advanced Features} -% For a BST, the left subtree all have smaller values than the current node, and the right subtree are all bigger than the current node. This concept is useful in trimming BST, see example, $669$. Trim a Binary Search Tree. - - -% \section{Augmented Tree} -% According to \textit{Introduction to Algorithms}, augmenting data stuctures are defined as a textbook data structure augmented by storing additional information in it. In this Section, we introduce two types of augmented tree: Trie for pattern matching in static String and Segment Tree for Range Query. - -%https://www.mimuw.edu.pl/~szczurek/TSG2/04_suffix_arrays.pdf - -%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Binary Search Tree with Duplicates} -If we use any of the other two definitions we introduced that allows duplicates, things can be more complicated. For example, if we use the definition $x.left.key <= x.key < x.right.key$, we will end up with a tree looks like Fig.~\ref{fig:bst_duplicate}: -\begin{figure}[H] - \centering - \includegraphics[width=0.3\columnwidth]{fig/bst_duplicate.png} - \caption{A BST with nodes 3 duplicated twice.} - \label{fig:bst_duplicate} -\end{figure} -Note that the duplicates are not in contiguous levels. This is a big issue when allowing duplicates in a BST representation as, because duplicates may be separated by any number of levels, making the detection of duplicates difficult. - -An option to avoid this issue is to not represent duplicates structurally (as separate nodes) but instead use a \texttt{counter} that counts the number of occurrences of the key. The previous example will be represented as in Fig.~\ref{fig:bst_duplicate_counter}: -\begin{figure}[H] - \centering - \includegraphics[width=0.3\columnwidth]{fig/bst_duplicate_counter.png} - \caption{A BST with nodes 3 marked with two occurrence.} - \label{fig:bst_duplicate_counter} -\end{figure} - -This simplifies the related operations at the expense of some extra bytes and counter operations. Since a heap is a complete binary tree, it has a smallest possible height - a heap with N nodes always has O(log N) height. - - - -%%%%%%%%%%%%%%Segment Tree%%%%%%%%%%%%%%% -\section{Segment Tree} -\label{sec_segment_tree} -To answer queries over an array is called a \textit{range query problem}, e.g. finding the sum of consecutive subarray $a[l:r]$, or finding the minimum item in such a range. A direct and linear solution is to compute the required query on the subarray on the fly each time. When the array is large, and the update is frequent, even this linear approach will be too slow. Let's try to solve this problem faster than linear. How about computing the query for a range in advance and save it in a dictionary? If we can, the query time is constant. However, because there are $n^2$ subarray, making the space cost polynomial, which is definitely not good. Another problem, ``what if we need to change the value of an item'', we have to update $n$ nodes in the dictionary which includes the node in its range. - -We can balance the search, update, and space from the dictionary approach to a logarithmic time with the technique of decrease and conquer. In the binary search, we keep dividing our search space into halves recursively until a search space can no longer be divided. We can apply the dividing process here, and construct a binary tree, and each node has \texttt{l} and \texttt{r} to indicate the range of that node represents. For example, if our array has index range $[0, 5]$, its left subtree will be [0, mid], and right subtree will be [mid+1, 5]. a binary tree built with binary search manner is shown in Fig.~\ref{fig:segment_tree_range}. -\begin{figure}[H] - \centering - \includegraphics[width=0.6\columnwidth]{fig/segment_tree_range.png} - \caption{A Segment Tree } - \label{fig:segment_tree_range} -\end{figure} -To get the answer for range query $[0, 5]$, we just return the value at root node. If the range is $[0, 1]$, which is on the left side of the tree, we go to the left branch, and cutting half of the search space. For a range that happens to be between two nodes, such as $[1, 3]$, which needs node \texttt{[0, 1]} and \texttt{[2-5]}, we search [0, 1] in the left subtree and [2, 3] in the right subtree and combine them together. Any searching will be within $O(\log n)$, relating to the height of the tree. \textcolor{red}{needs better complexity analysis} - -\paragraph{Segment tree} The above binary tree is called \textbf{segment tree}. From our analysis, we can see a segment tree is a static full binary trees. 'Static` here means once the data structure is built, it can not be modified or extended. However, it can still update the value in the original array into the segment tree. Segment tree is applied widely to efficiently answer numerous \textit{dynamic range queries} problems -(in logarithmic time), such as finding minimum, maximum, sum, greatest common divisor, and least common denominator in array. - - -Consider an array $A$ of size $n$ and a corresponding segment tree $T$: -\begin{enumerate} - \item The root of $T$ represents the whole array $A[0:n]$. - \item Each internal node in $T$ represents the interval of $A[i:j]$ where $0 < i < j <= n$. - \item Each leaf in $T$ represents a single element A[i], where $0 \leq i 15 -update(1, 3) -sumRange(0, 2) -> 9 -\end{lstlisting} - -\paragraph{Tree Construction} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/segment_tree_construction.png} - \caption{Illustration of Segment Tree for Sum Range Query. } - \label{fig:segment_tree_construction} -\end{figure} -The function \texttt{\_buildSegmentTree()} takes three arguments: \texttt{nums}, \texttt{s} as the start index, and \texttt{e} as the end index. Because there are totally $2n-1$ nodes, which makes the time and space complexity both be $O(n)$. -\begin{lstlisting}[language=Python] -def _buildSegmentTree(nums, s, e): - ''' - s, e: start index and end index - ''' - if s > e: - return None - if s == e: - return TreeNode(nums[s], s, e) - - m = (s + e)//2 - # Divide: return a subtree - left = _buildSegmentTree(nums, s, m) - right = _buildSegmentTree(nums, m+1, e) - - # Conquer: merge two subtree - node = TreeNode(left.val + right.val, s, e) - node.left = left - node.right = right - return node -\end{lstlisting} -Building a segment tree for our example as: -\begin{lstlisting}[language=Python] -nums = [2, 9, 4, 5, 8, 7] -root = _buildSegmentTree(nums, 0, len(nums) - 1) -\end{lstlisting} -It will generate a tree shown in Fig.~\ref{fig:segment_tree_construction}. -\paragraph{Range Query} Each query within range $[i, j], i < j, i\geq s, j \leq e$, will be found on a node or by combining multiple node. In the query process, check the following cases: -\begin{itemize} - \item - If range $[i, j]$ matches the range $[s, e]$, if it matches, return the value of the node, otherwise, processed to other cases. - \item Compute middle index $m = (s + e) // 2$. Check if range $[i, j]$ is within the left state space $[s, m]$ if $j\leq m$, or within the right state space $[m+1, e]$ if $i\geq m+1$, or is cross two spaces if otherwise. - \begin{itemize} - \item For the first two cases, a recursive call on that branch will return our result. - \item For the third case, where the range crosses two space, two recursive calls on both children of our current node are needed: the left one handles range $[i, m]$, and the right one handles range $[m+1, j]$. The final result will be a combination of these two. - \end{itemize} -\end{itemize} -The code is as follows: -\begin{lstlisting}[language=Python] -def _rangeQuery(root, i, j, s, e): - if s == i and j == e: - return root.val if root else 0 - m = (s + e)//2 - if j <= m: - return _rangeQuery(root.left, i, j, s, m) - elif i > m: - return _rangeQuery(root.right, i, j, m+1, e) - else: - return _rangeQuery(root.left, i, m, s, m) + _rangeQuery(root.right, m+1, j, m+1, e) -\end{lstlisting} -% The complete code is given: -% \begin{lstlisting}[language=Python] -% class NumArray: -% class TreeNode: -% def __init__(self, val): -% self.val = val -% self.left = None -% self.right = None - -% def __init__(self, nums): -% self.n = 0 -% self.st = None -% if nums: -% self.n = len(nums) -% self.st = self._buildSegmentTree(nums, 0, self.n-1) - -% def update(self, i, val): -% self._updateNode(i, val, self.st, 0, self.n -1) - -% def sumRange(self, i, j): -% return self._rangeQuery(self.st, i, j, 0, self.n-1) -% \end{lstlisting} -\paragraph{Update} To update \texttt{nums[1]=3}, all nodes on the path from root to the leaf node will be affected and needed to be updated with to incorporate the change at the leaf node. We search through the tree with a range $[1, 1]$ just like we did within \texttt{\_rangeQuery} except that we no longer need the case of crossing two ranges. Once we reach to the leaf node, we update that node's value to the new value, and it backtracks to its parents where we recompute the parent node's value according to the result of its children. This operation takes $O(\log n)$ time complexity, and we can do it inplace since the structure of the tree is not changed. -\begin{lstlisting}[language=Python] -def _update(root, s, e, i, val): - if s == e == i: - root.val = val - return - m = (s + e) // 2 - if i <= m: - _update(root.left, s, m, i, val) - else: - _update(root.right, m + 1, e, i, val) - root.val = root.left.val + root.right.val - return -\end{lstlisting} - -\paragraph{Minimum and Maximum Range Query} To get the minimum or maximum value within a given range, we just need to modify how to value is computed. For example, to update, we just need to change the line 10 of the above code to \texttt{root.val = min(root.left.val, root.right.val)}. - -There are way more other variants of segment tree, check it out if you are into knowing more at \url{https://cp-algorithms.com/data_structures/segment_tree.html}. - -% \paragraph{Dynamic Programming for Static Array} - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%Exercise%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} - -\begin{enumerate} - \item 144. Binary Tree Preorder Traversal - \item 94. Binary Tree Inorder Traversal - \item 145. Binary Tree Postorder Traversal - \item 589. N-ary Tree Preorder Traversal - \item 590. N-ary Tree Postorder Traversal - \item 429. N-ary Tree Level Order Traversal - \item 103. Binary Tree Zigzag Level Order Traversal(medium) - \item 105. Construct Binary Tree from Preorder and Inorder Traversal -\end{enumerate} - -938. Range Sum of BST (Medium) - -Given the root node of a \textbf{binary search tree}, return the sum of values of all nodes with value between L and R (inclusive). - -The binary search tree is guaranteed to have unique values. -\begin{lstlisting} -Example 1: - -Input: root = [10,5,15,3,7,null,18], L = 7, R = 15 -Output: 32 - -Example 2: - -Input: root = [10,5,15,3,7,13,18,1,null,6], L = 6, R = 10 -Output: 23 -\end{lstlisting} -\textbf{Tree Traversal+Divide and Conquer}. We need at most $O(n)$ time complexity. For each node, there are three cases: 1) L <= val <= R, 2)val < L, 3)val > R. For the first case it needs to obtain results for both its subtrees and merge with its own val. For the others two, because of the property of BST, only the result of one subtree is needed. -\begin{lstlisting}[language=Python] -def rangeSumBST(self, root, L, R): - if not root: - return 0 - if L <= root.val <= R: - return self.rangeSumBST(root.left, L, R) + self.rangeSumBST(root.right, L, R) + root.val - elif root.val < L: #left is not needed - return self.rangeSumBST(root.right, L, R) - else: # right subtree is not needed - return self.rangeSumBST(root.left, L, R) -\end{lstlisting} - -\subsection{Exercises} -\begin{examples} -\item \textbf{35. Search Insert Position (easy).} Given a sorted array and a target value, return the index if the target is found. If not, return the index where it would be if it were inserted in order. - -You can assume that there are no duplicates in the array. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [1,3,5,6], 5 -Output: 2 - -Example 2: -Input: [1,3,5,6], 2 -Output: 1 - -Example 3: -Input: [1,3,5,6], 7 -Output: 4 - -Example 4: -Input: [1,3,5,6], 0 -Output: 0 -\end{lstlisting} - -\textbf{Solution: Standard Binary Search Implementation.} For this problem, we just standardize the Python code of binary search, which takes $O(logn)$ time complexity and O(1) space complexity without using recursion function. In the following code, we use exclusive right index with len(nums), therefore it stops if l == r; it can be as small as 0 or as large as n of the array length for numbers that are either smaller or equal to the nums[0] or larger or equal to nums[-1]. We can also make the right index inclusive. -\begin{lstlisting}[language = Python] -# exclusive version -def searchInsert(self, nums, target): - l, r = 0, len(nums) #start from 0, end to the len (exclusive) - while l < r: - mid = (l+r)//2 - if nums[mid] < target: #move to the right side - l = mid+1 - elif nums[mid] > target: #move to the left side, not mid-1 - r= mid - else: #found the traget - return mid - #where the position should go - return l -\end{lstlisting} - -\begin{lstlisting}[language = Python] -# inclusive version -def searchInsert(self, nums, target): - l = 0 - r = len(nums)-1 - while l <= r: - m = (l+r)//2 - if target > nums[m]: #search the right half - l = m+1 - elif target < nums[m]: # search for the left half - r = m-1 - else: - return m - return l -\end{lstlisting} -\end{examples} -Standard binary search -\begin{enumerate} - \item 611. Valid Triangle Number (medium) - \item 704. Binary Search (easy) - -\item 74. Search a 2D Matrix) Write an efficient algorithm that searches for a value in an m x n matrix. This matrix has the following properties: -\begin{enumerate} - \item Integers in each row are sorted from left to right. - \item The first integer of each row is greater than the last integer of the previous row. - \end{enumerate} -\begin{lstlisting}[numbers=none] -For example, -Consider the following matrix: - -[ - [1, 3, 5, 7], - [10, 11, 16, 20], - [23, 30, 34, 50] -] - -Given target = 3, return true. -\end{lstlisting} - -% Solution: 2D matrix search, time complexity from $O(n^2)$ to $O(lgm+lgn)$. -% \begin{lstlisting}[language = Python] -% def searchMatrix(self, matrix, target): -% """ -% :type matrix: List[List[int]] -% :type target: int -% :rtype: bool -% """ - -% if not matrix: -% return False -% row, col = len(matrix), len(matrix[0]) -% if row==0 or col==0: #for [[]] -% return False -% sr, er = 0, row-1 -% #fisrst search the mid row -% while sr<=er: -% mid = sr+(er-sr)//2 -% if target>matrix[mid][-1]: #go to the right side -% sr=mid+1 -% elif target < matrix[mid][0]: #go the the left side -% er = mid-1 -% else: #value might be in this row -% #search in this row -% lc, rc = 0, col-1 -% while lc<=rc: -% midc = lc+(rc-lc)//2 -% if matrix[mid][midc]==target: -% return True -% elif target prev: -% taken = 1 + dfs(idx + 1, nums[idx]) -% # choice two: skip current element -% not_taken = dfs(idx + 1, prev) -% return max(taken, not_taken) -% \end{lstlisting} -% However, the above solution makes it difficult to optimize with memoization, , as the solution it uses previous index and current index as keyes. because we have no sense of subproblems, it was simply doing search at the solution space. -\section{Direct Approach} -\subsection{Search in Graph} -In a subsequence, an item can only have two choice: either in or out of the resulting subsequence, this makes our total subsequence $O(2^n)$. As we know the searching process shall always be a search tree, we now start to generate this subsequence, which starts from \texttt{[]}. At the first level, we have item 10 that have two actions: not adding or adding, which makes it a two branches. At the second level, we consider item 9. This makes a search space of a binary tree, and we generate node implicitly since we only need to track the length of the path so far, and we need value of the last item to decide the children of current level. So far, we managed to model our problem as finding the longest path in the search tree, which is a binary tree and with height $n$. We can have the Python code: -\begin{lstlisting}[language=Python] -def lengthOfLIS(self, nums: List[int]) -> int: - def dfs(nums, idx, cur_len, last_num, ans): - if idx >= len(nums): - ans[0] = max(ans[0], cur_len) - return - if nums[idx] > last_num: - dfs(nums, idx+1, cur_len + 1, nums[idx], ans) - dfs(nums, idx+1, cur_len, last_num, ans) - ans = [0] - last_num = -sys.maxsize - dfs(nums, 0, 0, last_num, ans) - return ans[0] -\end{lstlisting} -\subsection{Self-Reduction} - Now, let us us an example smaller than before, say [2, 5, 3, 7], which has the LIS 3 with [2, 3, 7]. Let us consider each state not atomic but as a subproblem. The same tree, but we translate each node differently. We start to consider the problem top down: we have problem [2, 5, 3, 7], and our start index = 0, meaning start from item 2, then our problem is can be divided into different situations: -\begin{itemize} - \item not take 2: we find the LIS length of subproblem [5, 3, 7]. In this case, our subsequence can start from any of these 3 items, we indicate this case by not changing the previous value. Use \texttt{idx} to indicate the subproblem/subarray, we call dfs that \texttt{idx+1}. - \item take 2: we need to find the LIS length of subproblem [5, 3, 7] whose subsequence must start from 5. Thus, we set the \texttt{last\_num} to 5 in the recursive call. -\end{itemize} -Therefore, our code becomes: -\begin{lstlisting}[language=Python] -def lengthOfLIS(self, nums: List[int]) -> int: - def dfs(nums, idx, last_num): - if not nums: - return 0 - if idx >= len(nums): - return 0 - len1 = 0 - if nums[idx] > last_num: - len1 = 1 + dfs(nums, idx+1, nums[idx]) - len2 = dfs(nums, idx+1, last_num) - return max(len1, len2) - - last_num = -sys.maxsize - return dfs(nums, 0, last_num) -\end{lstlisting} -In this solution, the time complexity has not improved yet, but from this approach, we can further increase the efficiency with dynamic programming. -% In the example of Fibonacci Sequence, we discussed four steps to come out with a bottom-up tabulation dynamic programming solution and the key words of each step is highlighted with italic -- state, initialization, recurrence function, and answer. With the example of longest increasing subsequence (LIS), we would further enhance the comparison of complete search with dynamic programming and the tabulation method with the step of formulating the recurrence relation (or state transfer function) by ourselves. - -% \textit{Note: (1) There may be more than one LIS combination, it is only necessary for you to return the length. (2) Your algorithm should run in $O(n^2)$ complexity. Follow up: Could you improve it to $O(n\log n)$ time complexity?} - - -\subsection{Dynamic Programming} -\paragraph{Memoization} We have known that the recurrence relation takes $LIS(i, prev) = max(LIS(i+1, prev), LIS(i+1, nums[i])$. How many possible states for $LIS(i, prev)$? $i \in [0, n-1]$, and $prev$ can have $n$ candidates too, this makes the whole state space only $n^2$. While, using the depth-first tree search we revisited a state multiple times, which eventually make the time complexity to $O(2^n)$. Now, let us modify the approach and use \texttt{memo} which is a dictionary and takes a tuple \texttt{(i, prev)} as key. If we found the state is not computed, we compute as we do in the previous implementation, if it exists in the memory, however, we just directly return the value and avoid recomputing again: -\begin{lstlisting}[language=Python] -def lengthOfLIS(self, nums: List[int]) -> int: - def dfs(nums, idx, last_num, memo): - if idx >= len(nums): - return 0 - if (idx, last_num) not in memo: - len1 = 0 - if nums[idx] > last_num: - len1 = 1 + dfs(nums, idx+1, nums[idx], memo) - len2 = dfs(nums, idx+1, last_num, memo) - memo[(idx, last_num)] = max(len1, len2) - return memo[(idx, last_num)] - - last_num = -sys.maxsize - memo = {} - return dfs(nums, 0, last_num, memo) -\end{lstlisting} - - - - -\section{A to B} Another approach is to use the concept of ``prefix'' or ``suffix''. The LIS must start from one of the items in the array. Finding the length of the LIS in the original array can be achieved by comparing n subproblems, the length of LIS of: -\begin{lstlisting} -[2, 5, 3, 7], LIS starts at 2, -[5, 3, 7], LIS starts at 5, -[3, 7], LIS starts at 3 -[7], LIS starts at 7 -\end{lstlisting} -\subsection{Self-Reduction} -We model the problem as in Fig.~\ref{fig:tree_lis}. -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/LIS_tree.png} - \caption{Graph Model for LIS, each path represents a possible solution. %Each arrow represents an move: find an element in the following elements %that's larger than the current node. - } - \label{fig:tree_lis} -\end{figure} -Same here, our problem become finding the longest path in a N-ary tree instead of a binary tree. Define $f(i)$ as the LIS starting with index $i$ in the array. then, its relation with other state will be $f(i) = \max_j(f(j))+1, j>i$, $a[j]>a[i]$, and $f[n]=0$. Here, the base case is when there has element to start from which will have 0 LIS. -\begin{lstlisting}[language=Python] -def lengthOfLIS(self, nums: List[int]) -> int: - def dfs(nums, idx, cur_num): - max_len = 0 - # Generate the next node - for i in range(idx+1, len(nums)): - if nums[i] > cur_num: - max_len = max(max_len, 1 + dfs(nums, i, nums[i])) - return max_len - return dfs(nums, -1, -sys.maxsize) -\end{lstlisting} -\subsection{Dynamic Programming} -\paragraph{Memoization} Similar to the last approach, we can write code: -\begin{lstlisting}[language=Python] -def lengthOfLIS(self, nums: List[int]) -> int: - def dfs(nums, idx, cur_num, memo): - max_len = 0 - # Generate the next node - if (idx, cur_num) not in memo: - for i in range(idx+1, len(nums)): - if nums[i] > cur_num: - max_len = max(max_len, 1 + dfs(nums, i, nums[i], memo)) - memo[(idx, cur_num)] = max_len - return memo[(idx, cur_num)] - memo = {} - return dfs(nums, -1, -sys.maxsize, memo) -\end{lstlisting} -\paragraph{Tabulation} With the bottom-up manner, we need to tweet our above recurrence function and definition of state. The subproblem $f(i)$ here will be defined as the LIS ending at index $i$. We shall pay attention that with $n$ elements there should exist $n+1$ states in total, that there is an empty state with empty array $[]$. The recurrence function will be shown in Eq.~\ref{LIS_equation}. It can be explained the LIS ending at index $i$ will be transitioned from LIS ending at any previous index by plusing one. The whole analysis process is illustrated in Fig~\ref{fig:lis}. -\begin{equation} -\label{LIS_equation} - f(i) = \begin{cases} - 1 + max(f(j)),& -1\leq j[4,10],->[4,10],[3,10],->[3,8]->[3,8,9] -\begin{lstlisting}[language = Python] -def lengthOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - def binarySearch(arr,l,r,num): - while larr[mid]: - l=mid+1 - elif numarr[mid]: - l=mid+1 - elif num>> 1 -1 -\end{lstlisting} -We use \texttt{type()} built-in function to see its underlying type--class, for example: -\begin{lstlisting}[language=Python] ->>> type([1,2,3,4]) - ->>> type(1) - ->>> type([1,2,3,4]) - ->>> type(range(10)) - ->>> type(1) - ->>> type('abc') - -\end{lstlisting} - -\paragraph{Operators} Operators are used to perform operations on variables and instance of objects. The example shows operator \texttt{+} performed on two instances of objects. -\begin{lstlisting}[language=Python] ->>> [1, 2, 3] + [4, 5, 6] -[1, 2, 3, 4, 5, 6] -\end{lstlisting} -\paragraph{Variables} When we are creating an instance of objects, a common practice is to have variables which are essentially pointers pointing to the instance of object's location in memory. -\begin{lstlisting}[language=Python] ->>> a = [1, 2, 3] ->>> b = [4, 5, 6] ->>> c = a + b ->>> c -[1, 2, 3, 4, 5, 6] -\end{lstlisting} - -\paragraph{Tools} To be able to help ourselves; knowing what attributes, built-in function that, we use built-in function \texttt{dir(object)}. - -To check object or function's information, we use built-in function \texttt{help}. And when we are done with viewing, type \texttt{q} to exit. - -\subsubsection{Properties} - -\paragraph{In-place VS Standard Operations} In-place operation is an operation that changes directly the content of a given linear algebra, vector, matrices(Tensor) without making a copy. The operators which helps to do the operation is called in-place operator. Eg: \texttt{a+= b} is equivalent to \texttt{a= operator.iadd(a, b)}. A standard operation, on the other hand, will return a new instance of object. - -\paragraph{Mutable VS Immutable Objects} All objects can be either mutable or immutable. Simply put, for immutable data types/objects, we can not add, remove, or replace its content on the fly, whereas the mutable objects can not but rather return new objects when attempting to update. Custom classes are generally mutable. The different behavior of mutable and immutable objects can be shown by using operations. A in-place operation can only be performed on mutable objects. - -\paragraph{Object, Type, Identity, Value} -Everything in Python is an object including different data types, modules, classes, and functions. Each object in Python has a \textbf{type}, a \textbf{value}, and an \textbf{identity}. When we are creating an instance of an object such as string with value `abc' it automatically comes with an ``identifier''. The identifier of the object acts as a pointer to the object's location in memory. The built-in function \texttt{id()} can be used to return the identity of an object as an integer which usually corresponds to the object's location in memory. \texttt{is} identity operator can be used directly to compare the identity of two objects. The built-in function \texttt{type()} can return the type of an object and operator \texttt{==} can be used to see if two objects has the same value. - -\subsubsection{Examples} -\paragraph{Behavior of Mutable Objects} -Let us see an example, we create three variables/instances \texttt{a, b, c}, and \texttt{a, b} are assigned with object of the same value, and \texttt{c} is assigned with variable \texttt{a}. -\begin{lstlisting}[language=Python] ->>> a = [1, 2, 3] ->>> b = [1, 2, 3] ->>> c = a ->>> id(a), id(b), id(c) -(140222162413704, 140222017592328, 140222162413704) -\end{lstlisting} -We use our introduced function and operators to demonstrate its behavior. First, check \texttt{a} and \texttt{b}: -\begin{lstlisting}[language=Python] ->>> a == b, a is b, type(a) is type(b) -(True, False, True) -\end{lstlisting} -We see that \texttt{a} and \texttt{b} are having different identity, meaning the object of each points to different location in memory, they are indeed two independent objects. Now, let us compare \texttt{a} and \texttt{c} the same way: -\begin{lstlisting}[language=Python] ->>> a == c, a is c, type(a) is type(c) -(True, True, True) -\end{lstlisting} -Ta-daa! They have the same identity, meaning they point to the same piece of memory and \texttt{c} is more like an alias to \texttt{a}. Now, let's change a value in \texttt{a} use in-place operation and see its ids: -\begin{lstlisting}[language=Python] ->>> a[2] = 4 ->>> id(a), id(b), id(c) -(140222162413704, 140222017592328, 140222162413704) ->>> a += [5] ->>> id(a), id(b), id(c) -(140222162413704, 140222017592328, 140222162413704) -\end{lstlisting} -We do not see any change about identity but change of values. Now, let us use other standard operations and see the behavior: -\begin{lstlisting}[language=Python] ->>> a = a + [5] ->>> a -[1, 2, 4, 5, 5] ->>> id(a), id(b), id(c) -(140222017592392, 140222017592328, 140222162413704) -\end{lstlisting} -Now, we see \texttt{a} has a different \texttt{id} compared with \texttt{c}, meaning they are no longer the same instance of the same object any more. - -\paragraph{Behavior of Immutable Objects} For the mutable objects, we see the the reassignment of \texttt{c} to \texttt{a} results having same identity, however, this is not the case in the immutable objects, see an example: -\begin{lstlisting}[language=Python] ->>> a = 'abc' ->>> b = 'abc' ->>> c = a ->>> id(a), id(b), id(c) -(140222162341424, 140222162341424, 140222162341424) -\end{lstlisting} -These three variables \texttt{a, b, c} all share the same identity, meaning they all point to the same instance of object in the same piece of memory. This ends up more efficient usage of memory. Now, let's try to change the value of the variable \texttt{a}. We called \texttt{+=} operator which is in-place operator for mutable objects: -\begin{lstlisting}[language=Python] ->>> a += 'd' ->>> a -'abcd' ->>> id(a), id(b), id(c) -(140222017638952, 140222162341424, 140222162341424) -\end{lstlisting} -We see still a new instance of string object is created and with an new id \texttt{140222017638952}. - -%%%%%%%%%%%%%%%%%%%Components%%%%%%%%%% -\subsection{Python Components} -\label{python_subsec_components} -The plethora of built-in data types, built-in modules, third party modules or package/libraries, and frameworks contributes to the popularity and efficiency of coding in Python. - -\paragraph{Python Data Types} Python contains 12 built-in data types. These include four scalar data types( \textbf{int}, \textbf{float}, \textbf{complex} and \textbf{bool}), four sequence types(\textbf{string}, \textbf{list}, \textbf{tuple} and \textbf{range}), one mapping type(\textbf{dict}) and two set types(\textbf{set} and \textbf{frozenset}). All the four scalar data types together with string, tuple, range and fronzenset are immutable, and the others are mutable. Each of these can be manipulated using: -\begin{itemize} - \item Operators - \item Functions - \item Data-type methods -\end{itemize} - -\textbf{Module} is a file which contains python functions, global variables etc. It is nothing but .py file which has python executable code / statement. With the build-in modules, we do not need to install external packages or include these .py files explicitly in our Python project, all we need to do is importing them directly and use their objects and corresponding methods. For example, we use built-in module Array: -\begin{lstlisting}[language=Python] -import Array -# use it -\end{lstlisting} -We can also write a .py file ourselves and import them. We provide reference to some of the popular and useful built-in modules that is not covered in Part~\ref{part_data_structure} in Python in Section~\ref{python_sec_supplemental_tools} of this chapter, they are: -\begin{itemize} - \item Re -\end{itemize} - - -\paragraph{Package/Library} Package or library is namespace which contains multiple package/modules. It is a directory which contains a special file \_\_init\_\_.py - -Let’s create a directory user. Now this package contains multiple packages / modules to handle user related requests. -\begin{lstlisting}[numbers=none] - user/ # top level package - __init__.py - - get/ # first subpackage - __init__.py - info.py - points.py - transactions.py - - create/ # second subpackage - __init__.py - api.py - platform.py - -\end{lstlisting} - -Now you can import it in following way -\begin{lstlisting}[language=Python] -from user.get import info # imports info module from get package -from user.create import api #imports api module from create package -\end{lstlisting} - -When we import any package, python interpreter searches for sub directories / packages. - -Library is collection of various packages. There is no difference between package and python library conceptually. Have a look at requests/requests library. We use it as a package. - -\paragraph{Framework} It is a collection of various libraries which architects the code flow. Let’s take example of Django which has various in-built libraries like Auth, user, database connector etc. Also, in artifical intelligence filed, we have TensorFlow, PyTorch, SkLearn framework to use. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Mutable Vs Immutable -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Mutable Vs Immutable} -% Not all python objects handle changes the same way. Some objects are mutable, meaning they can be altered. Others are immutable; they cannot be changed but rather return new objects when attempting to update. What does this mean when writing python code? - -% This post will talk about (a) the mutability of common data types and (b) instances where mutability matters. - -% \subsubsection{Mutability of Common Types} -% The following are some immutable objects: basic scalar data types include: int, float, decimal, complex, bool, bytes. Plus the following container types -% \begin{itemize} -% \item string -% \item tuple -% \item range, what is the type of range?? -% \item frozenset; which is an immutable version of set -% \end{itemize} -% The following are some mutable objects which are mostly container types: -% \begin{itemize} -% \item list -% \item dict -% \item set -% \item bytearray -% \item user-defined classes (unless specifically made immutable) -% \end{itemize} -% Because strings are immutable; what if you want to do some in-place modifications like character swapping? Use a bytearray. - - - -\subsubsection{When Mutability Matters} -Mutability might seem like an innocuous topic, but when writing an efficient program it is essential to understand. For instance, the following code is a straightforward solution to concatenate a string together: -\begin{lstlisting}[language = Python] -string_build = "" -for data in container: - string_build += str(data) -\end{lstlisting} -In reality, this is very \textit{inefficient}. Because strings are immutable, concatenating two strings together actually creates a third string which is the combination of the previous two. If you are iterating a lot and building a large string, you will waste a lot of memory creating and throwing away objects. Also, at the end of the iteration you will be allocating and throwing away very large string objects which is even more costly. - -The following is a more efficient and pythonic way: -\begin{lstlisting}[language = Python] -builder_list = [] -for data in container: - builder_list.append(str(data)) -"".join(builder_list) - -### Another way is to use a list comprehension -"".join([str(data) for data in container]) - -### or use the map function -"".join(map(str, container)) -\end{lstlisting} -This code takes advantage of the mutability of a single list object to gather your data together and then allocate a single result string to put your data in. That cuts down on the total number of objects allocated by almost half. - -Another pitfall related to mutability is the following scenario: -\begin{lstlisting}[language = Python] -def my_function(param=[]): - param.append("thing") - return param - -my_function() # returns ["thing"] -my_function() # returns ["thing", "thing"] -\end{lstlisting} - -What you might think would happen is that by giving an empty list as a default value to param, a new empty list is allocated each time the function is called and no list is passed in. But what actually happens is that every call that uses the default list will be using the same list. This is because Python (a) only evaluates functions definitions once, (b) evaluates default arguments as part of the function definition, and (c) allocates one mutable list for every call of that function. - -Do not put a mutable object as the default value of a function parameter. Immutable types are perfectly safe. If you want to get the intended effect, do this instead: -\begin{lstlisting}[language = Python] -def my_function2(param=None): - if param is None: - param = [] - param.append("thing") - return param -Conclusion -\end{lstlisting} - -Mutability matters. Learn it. Primitive-like types are probably immutable. Container-like types are probably mutable. -%%%%%%%%%%%%Operators%%%%%%%%%% -\section{Data Types and Operators} - -Operators are special symbols in Python that carry out arithmetic or logical computation. The value that the operator operates on is called the operand. Python offers Arithmetic operators, Assignment Operator, Comparison Operators, Logical Operators, Bitwise Operators (shown in Chapter~\ref{chapter_bit}), and two special operators like the identity operator or the membership operator. -\subsection{Arithmetic Operators} -Arithmetic operators are used to perform mathematical operations like addition, subtraction, multiplication etc. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Arithmetic operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Operator& Description & Example \\ \hline - -+ & Add two operands or unary plus & x + y+2\\ \hline -- & Subtract right operand from the left or unary minus & x - y -2\\ \hline -* & Multiply two operands & x * y\\ \hline -/ & Divide left operand by the right one (always results into float) & x / y \\ \hline -% Modulus - remainder of the division of left operand by the right x % y (remainder of x/y) -// & Floor division - division that results into whole number adjusted to the left in the number line & x // y \\ \hline -** & Exponent - left operand raised to the power of right & x**y (x to the power y)\\ \hline -\% & Modulus - Divides left hand operand by right hand operand and returns remainder & x\% y -\end{tabular} - \label{tab:arithematic_operators} - \end{small} -\end{table} - -\subsection{Assignment Operators} -Assignment operators are used in Python to assign values to variables. - -a = 5 is a simple assignment operator that assigns the value 5 on the right to the variable a on the left. - -There are various compound operators that follows the order: variable\_name (arithemetic operator) = variable or data type. Such as a += 5 that adds to the variable and later assigns the same. It is equivalent to a = a + 5. - -\subsection{Comparison Operators} -Comparison operators are used to compare values. It either returns True or False according to the condition. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Comparison operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Operator& Description & Example \\ \hline -> & Greater that - True if left operand is greater than the right & x > y\\ \hline -< & Less that - True if left operand is less than the right & x < y\\ \hline -== & Equal to - True if both operands are equal & x == y\\ \hline -!= & Not equal to - True if operands are not equal & x != y\\ \hline ->= & Greater than or equal to - True if left operand is greater than or equal to the right & x >= y\\ \hline -<= & Less than or equal to - True if left operand is less than or equal to the right & x <= y\\ \hline -\end{tabular} - \label{tab:comparison_operators} - \end{small} -\end{table} -\subsection{Logical Operators} -Logical operators are the $and$, $or$, $not$ operators. It is important for us to understand what are the values that Python considers False and True. The following values are considered False, and all the other values are considered $True$. -\begin{itemize} - \item The $None$ type - \item Boolean False - \item An integer, float, or complex zero - \item An empty sequence or mapping data type - \item An instance of a user-defined class that defines a \_\_len\_\_() or \_\_bool\_\_() method that returns zero or False. -\end{itemize} -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Logical operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Operator& Description & Example \\ \hline -and & True if both the operands are true & x and y\\ \hline -or & True if either of the operands is true & x or y\\ \hline -not & True if operand is false (complements the operand) & not x \\ \hline -\end{tabular} - \label{tab:logical_operators} - \end{small} -\end{table} -\subsection{Special Operators} -Python language offers some special type of operators like the identity operator or the membership operator. - -\paragraph{Identity operators} Identity operators are used to check if two values (or variables) are located on the same part of the memory. Two variables that are equal does not imply that they are identical as we have shown in the last section. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Identity operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Operator& Description & Example \\ \hline -is & True if the operands are identical (refer to the same object) & x is y\\ \hline -is not & True if the operands are not identical (do not refer to the same object) & x is not y \\ \hline -\end{tabular} - \label{tab:identity_operators} - \end{small} -\end{table} - -\paragraph{Membership Operators} -$in$ and $not in$ are the membership operators in Python. They are used to test whether a value or variable is found in a sequence (string, list, tuple, set and dictionary). -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{Membership operators in Python} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Operator& Description & Example \\ \hline -in & True if value/variable is found in the sequence & 5 in x\\ \hline -not in & True if value/variable is not found in the sequence & 5 not in x \\ \hline -\end{tabular} - \label{tab:membership_operators} - \end{small} -\end{table} - -% Identity operators in Python Operator Meaning Example - -% check out this link to rewrite this section. -% % https://docs.python.org/3/library/stdtypes.html -% This section covers the various built-in operators, which Python has to offer. Having a deeper understanding of the operators can help us choose the right operator. -% \begin{lstlisting}[numbers=none] -% Operator Description Example -% +, - Addition, Subtraction 10-3 -% *, % Multiplication, Modulo with reminder 27%7=6 -% / Division 10/3 -% // Truncation Division (also known as floordivision or floor division) -% ~x Bitwise negation ~3 - 4, Result: -8 -% ** Exponentiation 10 ** 3, Result: 1000 -% or, and, not Boolean Or, Boolean And, Boolean Not -% in "Element of" 1 in [3, 2, 1] -% <, <=, >, >=, !=, == The usual comparison operators -% |, &, ^ Bitwise Or, Bitwise And, Bitwise XOR -% <<, >> Shift Operators -% \end{lstlisting} -% \textbf{Division} This operator results in different results for Python 2.x (floor division) and Python 3.x (always float). For example: -% \begin{lstlisting}[language=Python] -% # Python 3: -% 10 / 3 -% # output -% # 3.3333333333333335 - -% #Python 2.x: -% # output -% # 3 -% \end{lstlisting} -% \textbf{Truncation Division} The result of this division is the integral part of the result, i.e. the fractional part is truncated, if there is any. It works both for integers and floating-point numbers, but there is a difference in the type of the results: If both the divident and the divisor are integers, the result will be also an integer. If either the divident or the divisor is a float, the result will be the truncated result as a float. For example: -% \begin{lstlisting}[language=Python] -% 10 // 3 -% 10.0 // 3 -% # output -% # 3 -% # 3.0 -% \end{lstlisting} -% A note about efficiency: The results of int(10 / 3) and 10 // 3 are equal. But the "//" division is more than two times as fast! -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Function -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Function} -\subsection{Python Built-in Functions} -Check out here \url{https://docs.python.org/3/library/functions.html}. -\paragraph{Built-in Data Types} -We have functions like int(), float(), str(), tuple(), list(), set(), dict(), bool(), chr(), ord(). These functions can be used for intialization, and also used for type conversion between different data types. - -\subsection{Lambda Function} -In Python, \textit{lambda function} is anonymous function, which is a function that is defined without a name. While normal functions are defined using the $def$ keyword, in anonymous functions are defined using the $lambda$ keyword, this is where the name comes from. - -\paragraph{Syntax} The syntax of lambda function in Python is: -\begin{lstlisting}[language=Python] -lambda arguments: expression -\end{lstlisting} -Lambda function can has zero to multiple arguments but only one expression, which will be evaluated and returned. For example, we define a lambda function which takes one argument $x$ and return $x^2$. -\begin{lstlisting}[language=Python] -square1 = lambda x: x**2 -\end{lstlisting} - -The above lambda function is equal to a normal function defined as: -\begin{lstlisting}[language=Python] -def square(x): - return x**2 -\end{lstlisting} -Calling the following code has the same output: -\begin{lstlisting}[language=Python] -square1(5) == square(5) -\end{lstlisting} - -\paragraph{Applications} - -Hence, anonymous functions are also called lambda functions. The use of lambda creates an anonymous function (which is callable). In the case of sorted the callable only takes one parameters. Python's lambda is pretty simple. It can only do and return one thing really. - -The syntax of lambda is the word lambda followed by the list of parameter names then a single block of code. The parameter list and code block are delineated by colon. This is similar to other constructs in python as well such as while, for, if and so on. They are all statements that typically have a code block. Lambda is just another instance of a statement with a code block. - -We can compare the use of lambda with that of def to create a function. -\begin{lstlisting}[language = Python] -adder_lambda = lambda parameter1,parameter2: parameter1+parameter2 -\end{lstlisting} -The above code equals to the following: -\begin{lstlisting}[language = Python] -def adder_regular(parameter1, parameter2): - return parameter1+parameter2 -\end{lstlisting} - - - -\subsection{Map, Filter and Reduce} - -These are three functions which facilitate a functional approach to programming. We will discuss them one by one and understand their use cases. -\subsubsection{Map} -Map applies a function to all the items in an input\_list. Here is the blueprint: -\begin{lstlisting}[language=Python] -map(function_to_apply, list_of_inputs) -\end{lstlisting} - -Most of the times we want to pass all the list elements to a function one-by-one and then collect the output. For instance: -\begin{lstlisting}[language=Python] -items = [1, 2, 3, 4, 5] -squared = [] -for i in items: - squared.append(i**2) -\end{lstlisting} -Map allows us to implement this in a much simpler and nicer way. Here you go: -\begin{lstlisting}[language=Python] -items = [1, 2, 3, 4, 5] -squared = list(map(lambda x: x**2, items)) -\end{lstlisting} -Most of the times we use lambdas with map so I did the same. Instead of a list of inputs we can even have a list of functions! Here we use $x(i)$ to call the function, where $x$ is replaced with each function in funcs, and i is the input to the function. -\begin{lstlisting}[language=Python] -def multiply(x): - return (x*x) -def add(x): - return (x+x) - -funcs = [multiply, add] -for i in range(5): - value = list(map(lambda x: x(i), funcs)) - print(value) - -# Output: -# [0, 0] -# [1, 2] -# [4, 4] -# [9, 6] -# [16, 8] -\end{lstlisting} -\subsubsection{Filter} - -As the name suggests, filter creates a list of elements for which a function returns true. Here is a short and concise example: -\begin{lstlisting}[language=Python] -number_list = range(-5, 5) -less_than_zero = list(filter(lambda x: x < 0, number_list)) -print(less_than_zero) - -# Output: [-5, -4, -3, -2, -1] -\end{lstlisting} -The filter resembles a for loop but it is a builtin function and faster. - -Note: If map and filter do not appear beautiful to you then you can read about list/dict/tuple comprehensions. -\subsubsection{Reduce} - -Reduce is a really useful function for performing some computation on a list and returning the result. It applies a rolling computation to sequential pairs of values in a list. For example, if you wanted to compute the product of a list of integers. - -So the normal way you might go about doing this task in python is using a basic for loop: -\begin{lstlisting}[language=Python] -product = 1 -list = [1, 2, 3, 4] -for num in list: - product = product * num - -# product = 24 - -\end{lstlisting} -Now let’s try it with reduce: -\begin{lstlisting}[language=Python] -from functools import reduce -product = reduce((lambda x, y: x * y), [1, 2, 3, 4]) - -# Output: 24 -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%%%Class %%%%%%%%%%%%%%%%%%%%%% -\section{Class} -\subsection{Special Methods} -From ~\cite{beazley2009python}. \url{http://www.informit.com/articles/article.aspx?p=453682&seqNum=6} All the built-in data types implement a collection of special object methods. The names of special methods are always preceded and followed by double underscores (\_\_). These methods are automatically triggered by the interpreter as a program executes. For example, the operation x + y is mapped to an internal method, x.\_\_add\_\_(y), and an indexing operation, x[k], is mapped to x.\_\_getitem\_\_(k). The behavior of each data type depends entirely on the set of special methods that it implements. - -User-defined classes can define new objects that behave like the built-in types simply by supplying an appropriate subset of the special methods described in this section. In addition, built-in types such as lists and dictionaries can be specialized (via inheritance) by redefining some of the special methods. In this book, we only list the essential ones so that it speeds up our interview preparation. -\paragraph{Object Creation, Destruction, and Representation} We first list these special methods in Table~\ref{tab:special_methods_for_object_creation}. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Special Methods for Object Creation, Destruction, and Representation} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}|} - \hline -Method& Description \\ \hline -*\_\_init\_\_(self [,*args [,**kwargs]]) & Called to initialize a new instance\\ \hline -\_\_del\_\_(self) & Called to destroy an instance \\ \hline -*\_\_repr\_\_(self) & Creates a full string representation of an object \\ \hline -\_\_str\_\_(self) & Creates an informal string representation \\\hline -\_\_cmp\_\_(self,other) & Compares two objects and returns negative, zero, or positive \\\hline -\_\_hash\_\_(self) & Computes a 32-bit hash index \\hline -\_\_nonzero\_\_(self) & Returns 0 or 1 for truth-value testing \\\hline -\_\_unicode\_\_(self) & Creates a Unicode string representation \\\hline -\end{tabular} - \label{tab:special_methods_for_object_creation} - \end{small} -\end{table} -A good and useful way to implement a class is through \_\_repr\_\_() method. By calling built-in function repr(built-in object) and implement self-defined class as the same as built-in object. Doing so avoids us implementing a lot of other special methods for our class and still has most of behaviors needed. For example, we define a Student class and represent it as of a tuple: -\begin{lstlisting} -class Student: - def __init__(self, name, grade, age): - self.name = name - self.grade = grade - self.age = age - def __repr__(self): - return repr((self.name, self.grade, self.age)) -a =Student('John', 'A', 14) -print(hash(a)) -print(a) -\end{lstlisting} -If we have no \_\_repr\_\_(), the output for the following test cases are: -\begin{lstlisting}[language=Python] -8766662474223 -<__main__.Student object at 0x7f925cd79ef0> -\end{lstlisting} -Doing so, we has \_\_hash\_\_(), -\paragraph{Comparison Operations} Table~\ref{tab:special_methods_for_comparison_operations} lists all the comparison methods that might need to be implemented in a class in order to apply comparison in applications such as sorting. - -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Special Methods for Object Creation, Destruction, and Representation} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}|} - \hline -Method& Description \\ \hline -\_\_lt\_\_(self,other) & self < other \\ \hline -\_\_le\_\_(self,other) & self <= other \\ \hline -\_\_gt\_\_(self,other) & self > other \\ \hline -\_\_ge\_\_(self,other) & self >= other \\ \hline -\_\_eq\_\_(self,other) & self == other \\ \hline -\_\_ne\_\_(self,other) & self != other \\ \hline -\end{tabular} - \label{tab:special_methods_for_comparison_operations} - \end{small} -\end{table} - - -\subsection{Class Syntax} -\subsection{Nested Class} -When we solving problem on leetcode, sometimes we need to wrap another class object inside of the solution class. We can do this with the nested class. When we re newing an instance, we use mainClassName.NestedClassName(). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Shallow Copy and the deep copy -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Shallow Copy and the deep copy} -\label{shallow_deep_copy} -For list and string data structures, we constantly met the case that we need to copy. However, in programming language like C++, Python, we need to know the difference between shallow copy and deep copy. Here we only introduce the Python version. - -Given the following two snippets of Python code: -\begin{lstlisting}[language = Python] -colours1 = ["red", "green"] -colours2 = colours1 -colours2 = ["rouge", "vert"] -print(colours1) ->>> ['red', 'green'] -\end{lstlisting} - -\begin{lstlisting}[language = Python] -colours1 = ["red", "green"] -colours2 = colours1 -colours2[1] = "blue" -print(colours1) -['red', 'blue'] -\end{lstlisting} - -From the above outputs, we can see that the colors1 list is the same but in the second case, it is changed although we are assigning value to colors2. The result can be either wanted or not wanted. In python, to assign one list to other directly is similar to a pointer in C++, which both point to the same physical address. In the first case, colors2 is reassigned a new list, which has an new address, so now colors2 points to the address of this new list instead, which leaves the values of colors2 untouched at all. We can visualize this process as follows: -\begin{figure}[h] -\begin{subfigure}[b]{0.5\linewidth} -\centering -\includegraphics[width = 0.98\linewidth]{fig/deep_copy_1.png} -\caption{The copy process for code 1} -\end{subfigure} -\begin{subfigure}[b]{0.5\linewidth} -\centering -\includegraphics[width = 0.98\linewidth]{fig/deep_copy_2.png} -\caption{The copy process for code 2} -\end{subfigure} -\caption{Copy process} -\label{fig:copy_shallow} -\end{figure} -However, we often need to do copy and leave the original list or string unchanged. Because there are a variety of list, from one dimensional, two-dimensional to multi-dimensional. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Shallow Copy -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Shallow Copy using Slice Operator} -It's possible to completely copy shallow list structures with the slice operator without having any of the side effects, which we have described above: -\begin{lstlisting}[language = Python] -list1 = ['a','b','c','d'] -list2 = list1[:] -list2[1] = 'x' -print(list2) -['a', 'x', 'c', 'd'] -print(list1) -['a', 'b', 'c', 'd'] -\end{lstlisting} -Also, for Python 3, we can use list.copy() method -\begin{lstlisting}[language = Python] -list2 = list1.copy() -\end{lstlisting} - -But as soon as a list contains sublists, we have the same difficulty, i.e. just pointers to the sublists. -\begin{lstlisting}[language = Python] -lst1 = ['a','b',['ab','ba']] -lst2 = lst1[:] -\end{lstlisting} - -This behaviour is depicted in the following diagram: -\begin{figure}[h] - \centering - \includegraphics{fig/deep_copy_3.png} - \caption{Caption} - \label{fig:copy_3} -\end{figure} - -If you assign a new value to the 0th Element of one of the two lists, there will be no side effect. Problems arise, if you change one of the elements of the sublist. -\begin{lstlisting}[language = Python] ->>> lst1 = ['a','b',['ab','ba']] ->>> lst2 = lst1[:] ->>> lst2[0] = 'c' ->>> lst2[2][1] = 'd' ->>> print(lst1) -['a', 'b', ['ab', 'd']] -\end{lstlisting} - - -The following diagram depicts what happens, if one of the elements of a sublist will be changed: Both the content of lst1 and lst2 are changed. -\begin{figure}[h] - \centering - \includegraphics{fig/deep_copy_4.png} - \caption{Caption} - \label{fig:copy_4} -\end{figure} -\subsection{Iterables, Generators, and Yield} -\url{https://pythontips.com/2013/09/29/the-python-yield-keyword-explained/}. Seems like it can not yeild a list. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Deep Copy -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Deep Copy using copy Module} -A solution to the described problems is to use the module "copy". This module provides the method "copy", which allows a complete copy of a arbitrary list, i.e. shallow and other lists. - -The following script uses our example above and this method: -\begin{lstlisting}[language = Python] -from copy import deepcopy - -lst1 = ['a','b',['ab','ba']] - -lst2 = deepcopy(lst1) - -lst2[2][1] = "d" -lst2[0] = "c"; - -print lst2 -print lst1 -\end{lstlisting} -If we save this script under the name of deep\_copy.py and if we call the script with``python deep\_copy.p'', we will receive the following output: -\begin{lstlisting}[language = Python] -$ python deep_copy.py -['c', 'b', ['ab', 'd']] -['a', 'b', ['ab', 'ba']] -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics{fig/deep_copy_5.png} - \caption{Caption} - \label{fig:copy_4} -\end{figure} - -This section is cited from %https://www.python-course.eu/deep_copy.php -Need to modify. -\section{Global Vs nonlocal} - -\section{Loops} -The for loop can often be needed in algorithms we have two choices: -\textit{for} and \textit{while}. So to learn the basic grammar to do for loop easily could help us be more efficienct in programming. - -Usually for loop is used to iterate over a sequence or matrix data. For example, the following grammar works for either string or list. -\begin{lstlisting}[language=python] -# for loop in a list to get the value directly -a = [5, 4,3, 2, 1] -for num in a: - print(num) -# for loop in a list use index -for idx in range(len(a)): - print(a[idx]) -# for loop in a list get both index and value directly -for idx, num in enumerate(a): - print(idx, num) -\end{lstlisting} -Sometimes, we want to iterate two lists jointly at the same time, which requires they both have the same length. We can use \textit{zip} to join them together, and all the others for loop works just as the above. For example: -\begin{lstlisting}[language=Python] -a, b = [1, 2, 3, 4, 5], [5, 4, 3, 2, 1] -for idx, (num_a, num_b) in enumerate(zip(a, b)): - print(idx, num_a, num_b) -\end{lstlisting} -\section{Special Skills} -\begin{enumerate} - \item Swap the value of variable - \begin{lstlisting}[language=Python] - a, b = 7, 10 - print(a, b) - a, b = b, a - print(a, b) - \end{lstlisting} - \item Join all the string elements in a list to a whole string - \begin{lstlisting}[language=Python] - a = ["Cracking", "LeetCode, "Problems"] - print("",join(a)) - \end{lstlisting} - \item Find the most frequent element in a list - \begin{lstlisting}[language=Python] - a = [1, 3,5,6,9,9,4,10,9] - print(max(set(a), key = a.count)) - # or use counter from the collections - from collections import counter - cnt = Counter(a) - print(cnt.most_common(3)) - \end{lstlisting} - \item Check if two strings are comprised of the same letters. - \begin{lstlisting}[language=Python] - from collections import Counter - Counter(str1) == Counter(str2) - \end{lstlisting} - \item Reversing - \begin{lstlisting}[language=Python] - # 1. reversing strings or list - a = 'crackingleetcode' - b = [1,2,3,4,5] - print(a[::-1], a[::-1]) - # 2. iterate over each char of the string or list contents in reverse order efficiently, here we use zip to - for char, num in zip(reversed(a), reversed(b)): - print(char, num) - #3. reverse each digit in an integer or float number - num = 123456789 - print(int(str(num)[::-1])) - \end{lstlisting} - \item Remove the duplicates from list or string. We can convert it to set at first, but this wont keep the original order of the elements. If we want to keep the order, we can use the OrderdDict method from collections. - \begin{lstlisting}[language=Python] - a = [5, 4, 4, 3, 3, 2, 1] - no_duplicate = list(set(a)) - - from collections import OrderedDict - print(list(OrderedDict.fromkeys(a).keys()) - \end{lstlisting} - \item Find the min or max element or the index. -\end{enumerate} -%%%%%%%%%%%%%%%Supplemental Tools%%%%%%%%% -\section{Supplemental Python Tools} -\label{python_sec_supplemental_tools} -\subsection{Re} -\subsection{Bitsect} -%https://docs.python.org/2/library/bisect.html -\begin{lstlisting}[language = Python] -def index(a, x): - 'Locate the leftmost value exactly equal to x' - i = bisect_left(a, x) - if i != len(a) and a[i] == x: - return i - raise ValueError - -def find_lt(a, x): - 'Find rightmost value less than x' - i = bisect_left(a, x) - if i: - return a[i-1] - raise ValueError - -def find_le(a, x): - 'Find rightmost value less than or equal to x' - i = bisect_right(a, x) - if i: - return a[i-1] - raise ValueError - -def find_gt(a, x): - 'Find leftmost value greater than x' - i = bisect_right(a, x) - if i != len(a): - return a[i] - raise ValueError - -def find_ge(a, x): - 'Find leftmost item greater than or equal to x' - i = bisect_left(a, x) - if i != len(a): - return a[i] - raise ValueError -\end{lstlisting} - -\subsection{collections} -\textbf{collections} is a module in Python that implements specialized container data types alternative to Python's general purpose built-in containers: dict, list, set, and tuple. The including container type is summarized in Table~\ref{tab:collections_container}. Most of them we have learned in Part \ref{part_data_structure}, therefore, in the table we simply put the reference in the table. Before we use them, we need to import each data type as: -\begin{lstlisting}[language=Python] -from collections import deque, Counter, OrderedDict, defaultdict, namedtuple -\end{lstlisting} - -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Container Data types in \textbf{collections} module.} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.45\columnwidth}| p{0.20\columnwidth}| } - \hline -Container& Description & Refer \\ \hline -namedtuple & factory function for creating tuple subclasses with named fields & \\\hline -deque &list-like container with fast appends and pops on either end &\\ \hline -Counter &dict subclass for counting hashable objects &\\ \hline -defaultdict &dict subclass that calls a factory function to supply missing values & \\ \hline -OrderedDict &dict subclass that remembers the order entries were added&\\ \hline -\end{tabular} - \label{tab:collections_container} - \end{small} -\end{table} -% make a table here - -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% % Exercise -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Exercises} -% \begin{Exercise}\label{EX11} % this gives out 2.1 exercises -% \vspace{-\baselineskip}% <-- You don't need this line of code if there's some text here -% \Question In problem \ref{EX11-1-i}-\ref{EX11-1-iii}, determine whether the given differential equation is separable -% \begin{tasks}(2) -% \task\label{EX11-1-i} $\frac{dy}{dx}-\sin{(x+y)}=0$ -% \task $\frac{dy}{dx}=4y^2-3y+1$ -% \task\label{EX11-1-iii} $\frac{ds}{dt}=t\ln{(s^{2t})}+8t^2$ -% \end{tasks} -% \Question In problem \ref{EX11-2-iv}-\ref{EX11-2-viii}, solve the equation -% \begin{tasks}[resume=true](2) -% \task\label{EX11-2-iv} $\frac{dx}{dt}=3xt^2$ -% \task $y^{-1}dy+ye^{\cos{x}}\sin{x}dx=0$ -% \task $(x+xy^2)dx+ye^{\cos{x}}\sin{x}dx=0$ -% \task\label{EX11-2-viii} $\frac{dy}{dt} = \frac{y}{t+1} + 4t^2 + 4t$, $\quad$ $y(1) = 10$ -% \end{tasks} -% \end{Exercise} -% \setboolean{firstanswerofthechapter}{true} -% %\begin{multicols}{2} % multicol is to make it has two cols -% \begin{Answer}[ref={EX11}] -% \Question -% \begin{tasks} -% \task This is a solution of Ex 1 -% \task This is a solution of Ex 2 -% \task This is a solution of Ex 3 -% \end{tasks} -% \Question -% \begin{tasks}[resume=true] -% \task This is a solution of Ex 4 -% \task This is a solution of Ex 5 -% \task This is a solution of Ex 6 -% \task This is a solution of Ex 7 -% \end{tasks} -% \end{Answer} -% % \end{multicols} -% \setboolean{firstanswerofthechapter}{false} - - % \begin{Exercise}\label{EX12} - % Another exercise. - % \Question If you don't need a horizontal list, you can simply use \verb|\Question| - % \end{Exercise} - % \begin{multicols}{2} - % \begin{Answer}[ref={EX12}] - % \Question This is a solution of Ex 1 - % \end{Answer} - % \end{multicols} - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_non_linear_iterative_search.tex b/Easy-Book/chapters/chapter_non_linear_iterative_search.tex deleted file mode 100644 index c4a6281..0000000 --- a/Easy-Book/chapters/chapter_non_linear_iterative_search.tex +++ /dev/null @@ -1,7 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -Up till now, we have learned both DFS and BFS in graph search. From the last chapter, we have seen how the backtracking method utilized DFS to solve enumeration and CSP based problems. The type of problem we should solve with backtracking is the one that requires enumeration all possibilities. We have seen that most of the time, backtracking comes with high complexity. If the problem only requires us to find if -\section{Bidirectional BFS} -\url{http://theoryofprogramming.com/2018/01/21/bidirectional-search/} -\section{} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_reading_of_this_book.tex b/Easy-Book/chapters/chapter_reading_of_this_book.tex deleted file mode 100644 index 87e127c..0000000 --- a/Easy-Book/chapters/chapter_reading_of_this_book.tex +++ /dev/null @@ -1,82 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Reading of This Book} -\section{Structures} -I summarize the characteristics that potentially set this book apart from other books seen in the market; starting from introducing technically what I think of the core principles of algorithm design are--the ``source'' of the wisdom I was after as mentioned in the preface, to illustrating the concise organization of the content, and to highlighting other unique features of this book. -\subsubsection{Core Principles} -Algorithm problem solving follows a few core principles: Search and Combinatorics, Reduce and Conquer, Optimization via Space-Time Trade--off or be Greedy. We specifically put these principles in one single part of the book--Part.~\ref{part_core_principles}. -\begin{enumerate} - \item In Chapter.~\ref{chapter_search_problem_combinatorics} (Search and Combinatorics), I teach how to formulate problems as searching problems via combinatorics in the field of math to enumerate its state space—solution space or all possibilities. Then we further optimize and improve the efficiency through ``backtracking'' techniques. -\item In Chapter.~\ref{chapter_divide_and_conquer}(Reduce and Conquer) we can either reduce problem A to problem B (solving problem B means we solved problem A) or Self-Reduction to reduce problem to a series of subproblems (Such as these algorithm design methodologies fall into this area: divide and conquer, some search algorithms, dynamic programming and greedy algorithms). \textbf{Mathematical induction} and \textbf{recurrence relations} play as an important role in problem solving, complexity analysis and even correctness proof. -\item When optimization is needed, we have potentially two methods: -when we see the subproblems/states overlap, space-time trade-off can be applied such as in dynamic programming; -Or we can make greedy choice based on current situation. -\end{enumerate} -\subsubsection{Concise and Clear Organization} -\begin{figure}[h] - \centering - \includegraphics[width=1\columnwidth]{fig/four_umbreallas.png} - \caption{Four umbrellas: each row indicates corresponding parts as outlined in this book.} - \label{fig:four_umbreallas} -\end{figure} - -In this book, we organize in the ordering of Part, Chapter, Section, Subsection, Subsubsection and Paragraph. The parts will be categorized under four umbrellas and each serves an essential purpose: -\begin{enumerate} - \item Preparation: Introduce the global picture of algorithmic problem solving and coding interviews, learn abstract data structures and highly related and useful math such as recurrence relation, and hands-on Python practice by relating the abstract data structures to Python data structures. -\item Principles: As we introduced in the core principle part, we organize the design and principle here so that readers can use them as guidance while not seeking for peculiar algorithm for solving a problem. -\item Classical algorithms: We enhance our algorithm database via learning how to apply the core principles to a variety of classical problems. A database that we can quickly relate to when seeing problems. -\item Coding interview problem patterns: We close our book with the analyzing and categorizing problems by patterns. We address classical and best solutions for each problem pattern. -\end{enumerate} -\subsubsection{Other Features and Uniqueness} -\begin{enumerate} - \item The exercise and answer setting: at the problem-pattern section, the first chapter will be named problem pool which list all problems with description. At each exercise section across chapters, only problem id is referred. Instead the answers to problems are organized by different patterns so that users can review problem solving skills quickly when preparing for an interview.This is also practical to problem solving skills. -\item Real coding interview problems referred from LeetCode, users can easily practice online and join discussions with other users. -\item Real Python Code included in the textbook and offered via Google Colab instead of using Pseudo-code. -\item The content is grain-scaled, great for users to skim when necessary to prepare for interviews. -\item Included practical algorithms that are extremely useful for solving coding interview problems and yet are almost never be included in other books, such as monotone stack, two-pointer techniques, and bit manipulation with Python. -\item Included highly related math methods to ease the learning of the topic, including recurrence relations, math formulas, math induction method. -\item Explanation of concepts are problem solving oriented, this makes it easier for users to grasp the concepts. We introduce the concepts along with examples, we strengthen and formalize the concepts in the summary section. -\end{enumerate} - -\subsubsection{Q \& A} -\paragraph{What do we not cover?} In the spectrum of coding interviews and the spectrum of the algorithms, we do not include: -\begin{itemize} - \item Although this book is a comprehensive combination of Algorithmic Problem Solving and Coding Interview Coaching, I decided not to provide preparation guideline to the topic of \textbf{System Design} to avoid deviation from our main topic. An additional reason is, personally, I have no experience yet about this topic and meanwhile it is not a topic that I am currently interested in either, so a better option is to look for that in another book. -\item On the algorithm side, we briefly explain what is \textbf{approximate algorithms}, \textbf{heuristic search}, and linear programming, which is mainly seen in Artificial Intelligence, such as machine learning algorithms and neural networks. We do mention it because I think it is important to know that the field of artificial intelligence are just simply a subfield of algorithms, it is powerful because of its high dimensional modeling and large amount of training data. - -\end{itemize} -\paragraph{How much we include about Python 3? } We use Python 3 as our programming language to demonstrate the algorithms for its high readability and popularity in both industry and academics. We mainly focus on Python built-in Data types, frequently used modules, and a single class, and leave out knowledge such as object-oriented programming that deals with class heritages and composition, exception handling, an so on. Our approach is to provide brief introduction to any prior Python 3 knowledge when it is first used in the book, and put slightly more details in the Appendix for further reading and reference. We follow PEP 8 Python programming style. If you want to the object-oriented programming in Python, Python 3 Object-oriented programming is a good book to use. - -\paragraph{Problem Setting} Compared with other books that talk about the problem solving (e.g. \textit{Problem Solving with Algorithms and Data Structures}, we do not talk about problems in complex setting. We want the audience to have a simple setting so that they can focus more on analyzing the algorithm or data structures' behaviors. This way, we keep out code clean and it also serves the purpose of coding interview in which interviewees are required to write simpler and less code compared with a real engineering problems because of the time limit. - - -Therefore, the purpose of this book is three-fold: to answer your questions about interview process, prepare you fully for the ``coding intervie'', and the most importantly master algorithm design and analysis principles and sense the beauty of them and in the future to use them in your work. - -\section{Reading Suggestions} -We divide the learning of this book in four stages, each stage builds up on each other. Evaluate which stage you are, and we kindly suggest you to read in these orders: -\begin{itemize} - \item \textbf{First Stage} I recommend readers first start with Part Two, fundamental algorithm design and analysis, part Three, bit manipulation and data structures to know the basics in both algorithm design and data structures. In this stage, for graph data structures, we learn BFS and DFS with their corresponding properties to help us understand more graph and tree based algorithms. Also, DFS is a good example of recursive programming. -\item \textbf{Second Stage} -In the second stage, we move further to Part Four, Complete Search and Part Five, Advanced Algorithm Design. The purpose of this stage is to move further to learn more advanced algorithm design methodologies: universal search, dynamic programming, and greedy algorithms. At the end, we will understand under what condition, we can improve our algorithms with efficiency from searching-based algorithms to dynamic programming, and similarly from dynamic programming to greedy algorithms. -\item \textbf{Third Stage} -After we know and practiced the universal algorithm design and know their difference and handle their basic problems. We can move to the third stage, where we push ourselves further in algorithms, we learn more advanced and special topics which can be very helpful in our career. The content is in Part Six, Advanced and Special Topics. -\begin{enumerate} - \item For example, we learn move advanced graph algorithms. They can be either BFS or DFS based. - \item Dynamic programming special, where we explore different types of dynamic programming problems to gain even better understanding to this topic. - \item String pattern Matching Special: -\end{enumerate} - -\item \textbf{Fourth Stage} -In this stage, I recommend audience to review the content by topics: -\begin{enumerate} - \item Graph: Chapter Graphs, Chapter Non-linear Recursive Backtracking, Chapter Advanced Graph Algorithms, Chapter Graph Questions. - \item Tree: Chapter Trees, Chapter Tree Questions - \item String matching: Chapter String Pattern Matching Special, Chapter String Questions - \item Other topics: Chapter Complete Search, Chapter Array Questions, Chapter Linked List, Stack, Queue and Heap. -\end{enumerate} -\end{itemize} - -\subsubsection{Wanna Finish the Book ASAP? Or just review it for interviews?} I organize the book in all of forty chapters, it is a lot but they are carefully put under different parts to highlight each individual's purpose in the book. We can skim difficult chapters marked by asterisk($*$) that will unlikely appear in a short-time interview. The grained categorization helps us to skim on the chapter levels, if you are confident enough with some chapters or you think they are too trivial, just skim, given that the book is designed to be self-contained of multiple fields(programming languages, algorithmic problem solving and the coding interview problem patterns). - -The content within the book is almost always partitioned into paragraphs with titles. This conveniently allows us to skip parts that are just for enhancement purpose, such as ``stores'' or . This helps us skim within each chapter. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_recurrence_relation.tex b/Easy-Book/chapters/chapter_recurrence_relation.tex deleted file mode 100644 index b7c4884..0000000 --- a/Easy-Book/chapters/chapter_recurrence_relation.tex +++ /dev/null @@ -1,288 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Recurrence Relations} -As we mentioned briefly about the power of recursion is in the whole algorithm design and analysis, we dedicate this chapter to recurrence relation. To summarize, recurrence relation can help with: -\begin{itemize} - \item Recurrence relation naturally represent the relation of recursion. Examples will be shown in Chapter.~\ref{chapter_divide_conquer}. - \item Any iteration can be translated into recurrence relation. Some examples can be found in Chapter.~\ref{chapter_complexity_analysis}. - \item Recurrence relation together with mathematical induction is the most powerful tool to \textbf{design} and \textbf{prove} the correctness of algorithm(chapter.~\ref{chapter_divide_conquer} and Chapter.~\ref{chapter_complexity_analysis}). - \item Recurrence relation can be applied to algorithm complexity analysis( Chapter.~\ref{chapter_complexity_analysis}). -\end{itemize} - -In the following chapters of this part, we endow application meanings to these formulas and discuss how to realize the mentioned uses. - -\section{Introduction} -\paragraph{Definition and Concepts} A recurrence relation is function expressed with the same function. More precisely, as defined in mathematics, recurrence relation is an equation that recursively defines a sequence or multidimensional array of values; once one or more initial terms are given, each further term of the sequence or array is defined as a function of the preceding terms. Fibonacci sequence is one of the most famous recurrence relation which is defined as $f(n)=f(n-1)+f(n-2), f(0)=0, f(1)=1$. -\begin{equation} - a_n = \Psi(n, a_{n-1}) \text{ for $n \leq 0$,} -\end{equation} - -We use $a_n$ to denote the value at index $n$, and the recurrence function is marked as $\Psi(n, P)$, $P$ is all preceding terms that needed to build up this recurrence relation. Like the case of factorial, each factorial number only relies on the result of the previous number and its current index, this recurrence relation can be written as the following equation: - -A recurrence relation needs to start from \textit{initial value(s)}. For the above relation, $a_0$ needs to be defined and it will be the first element of a recurrence relation. The above relation is only related to the very first preceding terms, which is called recurrence relation of \textit{first order}. If $P$ includes multiple preceding terms, a recurrence relation of order $k$ can be easily extended as: -\begin{equation} - a_n = \Psi(n, a_{n-1}, a_{n-2}, ..., a_{n-k}) \text{ for $n \leq k$,} -\end{equation} -In this case, $k$ initial values are needed for defining a sequence. Initial values can be given any values but then once initial values are decided, the recurrence determines the sequence uniquely. Thus, initial values are also called the \textit{degree of freedom} for solutions to the recurrence. - -Many natural functions are easily expressed as recurrence: -\begin{itemize} - \item Polynomial: $a_n = a_{n-1}+1, a_1=1 \xrightarrow{} a_n = n$. - \item Exponential: $a_n = 2 \times a_{n-1}, a_1=1 \xrightarrow{} a_n = 2^{n-1}$. - \item Factorial: $a_n = n\times a_{n-1}, a_1=1 \xrightarrow{}a_n = n!$ -\end{itemize} - -\paragraph{Solving Recurrence Relation} In real problems, we might care about the value of recursion at $n$, that is compute $a_n$ for any given $n$, and there are two ways to do it: -\begin{itemize} - \item Programming: we utilize the computational power of computer and code in either iteration or recursion to build up the value at any given $n$. For example, $f(2)=f(1)+f(0)=1$, $f(3)=f(2)+f(1)=2$, and so on. With this iteration, we would need $n-1$ steps to compute $f(n)$. - \item Math: we solve the recurrence relation by obtaining an explicit or closed-form expression which is a non-recursive function of $n$. With the solution at hand, we can get $a_n$ right away. -\end{itemize} - Recurrence relations plays an important role in the analysis of algorithms. Usually, time recurrence relation $T(n)$ is defined to analyze the time complexity of solving a problem with input instance of size $n$. The field of complexity analysis studies the closed-form solution of $T(n)$; that is to say the functional relation between $T(n)$ with $n$ that it cares, not each exact value. - - -In this section, we focus on solving the recurrence relation using math to get a closed-form solution. Categorizing the recurrence relation can help us pinpoint each type's solving methods. - -\paragraph{Categorizes} Recurrence relation is essentially discreet function, which can be naturally categorized as \textbf{linear} (such as function $y=mx+b)$ and \textbf{non-linear}; quadratic, cubic and so on (such as $y=ax^2+bx+c, y=ax^3+bx^2+cx+d$). In the field of algorithmic problem solving, linear recurrence relation is commonly used and researched, thus we deliberately leave the non-linear recurrence relation and its method of solving out of the scope of this book. -% \begin{itemize} -% \item Linear Recurrence Relation: - \begin{itemize} - \item \textbf{Homogeneous linear recurrence relation:} When the recurrent relation is linear homogeneous of degree $k$ with constant coefficients, it is in the form, and is also called order-k homogeneous linear recurrence with constant coefficients. - \begin{equation} - a_n=c_1a_{n-1} + c_2a_{n-2} + ... + c_k a_{n-k}. - \label{eq_homogeneous_recurrence_relation} - \end{equation} - $a_0, a_1, ..., a_{k-1}$ will be initial values. - - \item \textbf{Non-homogeneous linear recurrence relation:} An order-k non-homogeneous linear recurrence with constant coefficients is defined in the form: - \begin{equation} - a_n=c_1a_{n-1} + c_2a_{n-2} + ... + c_k a_{n-k}+f(n). - \label{eq_non_homogeneous_recurrence_relation} - \end{equation} - f(n) can be 1 or $n$ or $n^2$ and so on. - \item \textbf{Divide-and-conquer recurrence relation}: When $n$ is not decreasing by a constant as does in Eq.~\ref{eq_homogeneous_recurrence_relation} and Eq.~\ref{eq_non_homogeneous_recurrence_relation}, instead by a constant factor, with the equality as shown below, it is called divide and conquer recurrence relation. - \begin{equation} - a_n=a_{n/b}+f(n) - \label{divide_conquer_eq1} -\end{equation} -where $a\leq 1, b>1$, and $f(n)$ is a given function, which usually has $f(n)= cn^k$. -%The special method for solving divide and conquer which is named \textit{master method} will be introduced in Chapter.~\ref{chapter_divide_conquer} when we have enough understanding of the term--divide and conquer. - - \end{itemize} -% \item Non-linear Recurrence Relation -% \end{itemize} -We will introduce general methods to solve a linear recurrence relation but leave out the part of divide and conquer recurrence relation in this chapter for reason that divide and conquer recurrence relation will most likely to be solved with just roughly, as shown in Chapter.~\ref{chapter_complexity_analysis} to just estimate the time complexity resulted from the divide and conquer method. - -\section{General Methods to Solve Linear Recurrence Relation} - No general method for solving recurrence function is known yet, however, linear recurrence relation with finite initial values and previous states, constant coefficients can always be solved. Due to the fact that the recursion is essentially mathematical induction, the most general way of solving any recurrence relation is to use \textit{mathematical induction} and \textit{iterative method}. This also makes the the mathematical induction, in some form, the foundation of all correctness proofs for computer programs. We examine these two methods by solving two recurrence relation: $a_n = 2\times a_{n-1} + 1, a_0 = 0$ and $a_n=a_{n/2} + 1$. - -\subsection{Iterative Method} -The most straightforward method for solving recurrence relation no matter its linear or non-linear is the \textit{iterative method}. Iterative method is a technique or procedure in computational mathematics that it iteratively replace/substitute each $a_n$ with its recurrence relation $\Psi(n, a_{n-1}, a_{n-2}, ..., a_{n-k})$ till all items ``disappear'' other than the initial values. Iterative method is also called substitution method. - -We demonstrate iteration with a simple non-overlapping recursion. -\begin{align} -\label{complexity_eq_binary_search} - T(n)&=T(n/2)+O(1)\\ - &=T(n/2^2)+O(1)+O(1)\notag\\ - &=T(n/2^3)+3O(1)\notag\\ - &=...\notag\\ - &=T(1)+kO(1) -\end{align} -We have $\frac{n}{2^k}=1$, we solve this equation and will get $k=\log_2 n$. Most likely $T(1)=O(1)$ will be the initial condition, we replace this, and we get $T(n)=O(\log_2 n)$. - -However, when we try to apply iteration on the third recursion: $T(n)=3T(n/4)+O(n)$. It might be tempting to assume that $T(n)=O(n\log n)$ due to the fact that $T(n)=2T(n/2)+O(n)$ leads to this time complexity. -\begin{align} -\label{complexity_non_overlap_1} - T(n)&=3T(n/4)+O(n)\\ - &=3(3T(n/4^2)+n/4)+n=3^2T(n/4^2)+n(1+3/4)\notag\\ - &=3^2(3T(n/4^3)+n/4^2)+n(1+3/4)=3^3T(n/4^3)+n(1+3/4+3/4^2)\\ - &=...\\ - &=3^kT(n/4^k)+n\sum_{i=0}^{k-1}(\frac{3}{4})^{i} -\end{align} -\subsection{Recursion Tree} -Since the term of T(n) grows, the iteration can look messy. We can use recursion tree to better visualize the process of iteration. In a recursive tree, each node represents the value of a single subproblem, and a leaf would be a subproblem. As a start, we expand $T(n)$ as a node with value $n$ as root, and it would have three children each represents a subproblem $T(n/4)$. We further do the same with each leaf node, until the subproblem is trivial and be a base case. In practice, we just need to draw a few layers to find the rule. The cost will be the sum of costs of all layers. The process can be seen in Fig.~\ref{fig:recursive_tree}. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.98\columnwidth]{fig/recursion_tree_non_overlap.png} - \caption{The process to construct a recursive tree for $T(n) = 3T(\floor*{n/4}) + O(n)$. There are totally k+1 levels. Use a better figure. } - \label{fig:recursive_tree} -\end{figure} - In this case, it is the base case $T(1)$. Through the expansion with iteration and recursion tree, our time complexity function becomes: -\begin{align} -\label{complexity_non_overlap_2} - T(n)&=\sum_{i=1}^{k}L_i + L_{k+1}\\ - &=n\sum_{i=1}^{k}(3/4)^{i-1}+3^kT(n/4^k) -\end{align} - -In the process, we can see that Eq.~\ref{complexity_non_overlap_2} and Eq.~\ref{complexity_non_overlap_1} are the same. Because $T(n/4^k)=T(1)=1$, we have $k=\log_4 n$. -\begin{align} -\label{complexity_non_overlap_2} - T(n)&\leq n\sum_{i=1}^{\infty}(3/4)^{k-1}+3^kT(n/4^k)\\ - &\leq 1/(1-3/4)n+3^{\log_4 n} T(1)= 4n+n^{log_4 3} - &\leq 5n \\ - &=O(n) -\end{align} - - - -\subsection{Mathematical Induction} -Mathematical induction is a mathematical proof technique, and is essentially used to prove that a property $P(n)$ holds for every natural number $n$, i.e. for $n=0, 1, 2, 3$, and so on. Therefore, in order to use induction, we need to make a \textit{guess} of the closed-form solution for $a_n$. Induction requires two cases to be proved. -\begin{enumerate} - \item - \textit{Base case:} proves that the property holds for the number $0$. -\item \textit{Induction step:} proves that, if the property holds for one natural number $n$, then it holds for the next natural number $n+1$. -\end{enumerate} - -For $T(n)=2\times T(n-1) +1, T_0 = 0$, we can have the following result by expanding $T(i), i \in [0, 7]$. -\begin{lstlisting}[numbers=none] -n 0 1 2 3 4 5 6 7 -T_n 0 3 7 15 31 63 127 -\end{lstlisting} -It is not hard that we find the rule and guess $T(n) = 2^n-1$. Now, we prove this equation by induction: -\begin{enumerate} - \item Show that the basis is true: $T(0) = 2^0 -1 = 0$. - \item Assume it holds true for $T(n-1)$. By induction, we get - \begin{align} - T(n)&=2T(n-1) + 1 \\ - &=2 (2^{n-1} - 1) + 1 \\ - &= 2^n -1 - \end{align} - Now we show that the induction step holds true too. -\end{enumerate} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Solve $T(n)=T(n/2)+O(1)$ and $T(2n)\leq2T(n)+2n-1, T(2)=1$.} -\end{bclogo} - - -\paragraph{Briefying on Other Methods} -When the form of the linear recurrence is more complex, say large degree of $k$, more complex of the $f(n)$, none of the iterative and induction methods is practical and managable. For iterative method, the expansion will be way too messy for us to handle. On the side of induction method, it is quite challenging or sometimes impossible for us just to ``guess'' or ``generalize'' the exact closed-form of recurrence relation solution purely based on observing a range of expansion. - -The more general and approachable method for solving homogeneous linear recurrence relation derives from making a rough guess rather than exact guess, and then solve it via \textit{characteristic equation}. This general method is pinpointed in Section.~\ref{subsec_homogeneous_linear_recurrence} with examples. For non-homogeneous linear recurrence relation (Section.~\ref{subsec_non_homogeneous}), there are generally two ways -- \textit{symbolic differentiation} and \textit{method of undetermined coefficients} to solve non-homogeneous linear recurrence relation and both of them relates to solving homogeneous linear relation. The study of the remaining content is most math saturated in the book, while we later on will find out its tremendous help in complexity analysis in Chapter.~\ref{chapter_complexity_analysis} and potentially in problem solving. - -% \paragraph{Examples} Maybe - -% \subsection{Solving Linear Recursion} -\section{Solve Homogeneous Linear Recurrence Relation} - -\label{subsec_homogeneous_linear_recurrence} -In this section, we offer a more general and more managable method for solving recurrence relation that is homogeneous defined in Eq.~\ref{eq_homogeneous_recurrence_relation}. There are three broad methods: using characteristic equation which we will learn in this section, and the other two-- {linear algebra, and Z-transofrm}~\footnote{Visit \url{https://en.wikipedia.org/wiki/Recurrence_relation} for details.} will not be included. -\paragraph{Make a General ``Guess''} From our previous examples, we can figure out the closed-form solution for simplied homogeneous linear recurrence such as the fibonacci recurrence relation: -\begin{equation} -a_n = a_{n-1}+a_{n-2}, a_0=0, a_1=1 -\label{homogeneous_linear_recurrence_guess} -\end{equation} -A reasonable guess would be that $a_n$ is doubled every time; namely, it is approximately $2^n$. Let's guess $a_n=c2^n$ for some constant $c$. Now we substitute Eq.~\ref{homogeneous_linear_recurrence_guess}, we get -\begin{equation} -c2^n = c2^{n-1} + c2^{n-2} = c2^n -\label{homogeneous_linear_recurrence_guess} -\end{equation} -We can see that $c$ will be canceled and the left side is always greater than the right side. Thus we learned that $c2^n$ is a too large guess, and the multiplicative constant $c$ plays no role in the induction step. - -Based on the above example, we introduce a parameter $\gamma$ as a base, $a_n = \gamma ^{n}$ for some $\gamma$. We then compute its value through solving \textit{Characteristic Equation} as introduced below. -\paragraph{Characteristic Equation} -Now, we substitute our guess into the Eq.\ref{eq_homogeneous_recurrence_relation}, then -\begin{align} - \gamma^n & = a_n \\ - &= c_1 \gamma^{n-1} + c_2 \gamma^{n-2} + ... + c_k \gamma^{n-k}. - \label{eq_characteristic_equation_1} -\end{align} -We rewrite Eq.~\ref{eq_characteristic_equation_1} as: -\begin{align} - \gamma^n - c_1 \gamma^{n-1} - c_2 \gamma^{n-2} - ... - c_k \gamma^{n-k} = 0. - \label{eq_characteristic_equation_2} -\end{align} -By dividing $\gamma^{n-k}$ from left and right side of the equation, we get the simplified equation, which is called the \textit{characteristic equation} of the recurrence relation in the form of Eq.~\ref{eq_homogeneous_recurrence_relation}. -\begin{align} - \gamma^k - c_1 \gamma^{k-1} - c_2 \gamma^{k-2} - ... - c_k = 0. - \label{eq_characteristic_equation_3} -\end{align} -The concept of characteristic equation is related to generating function\footnote{}. The solutons of characteristic equation are called \textit{characteristic roots}. - -\paragraph{Characteristic Roots and Solution} Now, we have a linear homogeneous recurrence relation and its characteristic equation, -% \begin{align} -% a_n&=c_1a_{n-1} + c_2a_{n-2} + ... + c_k a_{n-k}. \\ -% 0&= \gamma^k - c_1 \gamma^{k-1} - c_2 \gamma^{k-2} - ... - c_k. -% \end{align} -and assume that the equation has $k$ distinct roots, $\gamma_1, \gamma_2, ..., \gamma_k$, then we can build upon these chracteristic roots, the general guess, and some other $k$ constants, $d_1, d_2, ,,, d_k$ of $\{a_n\}$ as: -\begin{align} - a_n = d_1\gamma_1^n + d_2\gamma_2^n +...+d_k\gamma_k^n -\end{align} -The unknown constants, $d_1, d_2, ,,, d_k$ of $\{a_n\}$ can be found using the initial values $a_0, a_1, ..., a_{k-1}$ by solving the following equations: -\begin{align} - a_0 &= d_1\gamma_1^0 + d_2\gamma_2^0 +...+d_k\gamma_k^0,\\ - a_1 &= d_1\gamma_1^1 + d_2\gamma_2^1 +...+d_k\gamma_k^1, \\ - &...,\\ - a_{k-1} &= d_1\gamma_1^{k-1} + d_2\gamma_2^{k-1} +...+d_k\gamma_k^{k-1}. -\end{align} -Within the context of computer science, the degree is mostly within 2. Here, we introduce the formula solving the character roots for characteristic equation with the following form: -\begin{equation} - 0 = ax^2+bx+c -\end{equation} -% The root(s) of the function is the value(s) of $x$ which makes $f(x)=0$. -The root(s) can be computed from the following formula~\footnote{Visit {http://www.biology.arizona.edu/biomath/tutorials/Quadratic/Roots.html} for derivation} : -\begin{equation} - x = \frac{-b \pm \sqrt{b^2-4ac}}{2a} -\end{equation} -\paragraph{Hands-on Example} -For $a_n = 2a_{n-1} + 3a_{n-2}, a_0=3, a_1=5$, we can write the characteristic equation as $\gamma^2-2\gamma-3=0$. Because $\gamma^2-2\gamma-3 = (\gamma-3)+(\gamma+1)$, which make the characteristic roots $\gamma_1=3, \gamma_2=-1$. Now our solution has the form: -\begin{align} - a_n = d_13^n+d_2{(-1)}^{n} -\end{align} -Now, we find the constants via listing the initial values we know: -\begin{align} - a_0 &= d_13^0+d_2{(-1)}^{0} = d_1+d_2=3, \\ - a_1 &= d_13^1+d_2{(-1)}^{1} = 3d_1-d_2=5. -\end{align} -We would get $d_1=2, d_2=1$. Finally, we have a solution $a_n = 2*3^n+(-1)^n$. -% \paragraph{Linear Algebra} -% \paragraph{Z-transform} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Continue to solve $a_n=a_{n-1}+a_{n-2}$.} -\end{bclogo} - -\section{Solve Non-homogeneous Linear Recurrence Relation} -\label{subsec_non_homogeneous} - \textit{method of undetermined coefficients} where the solution is comprised of the solution of the homogeneous part and the particular $f(n)$ part by summing up; and the method of \textit{symbolic differentiation} which converts from the equation the same form of homogeneous linear recurrence relation. - -The complexity analysis for most algorithms fall into the form of non-homogeneous linear recurrence relation. For examples: in fibonacci sequence, if it is be solved by using recursion shown in Chapter.~\ref{chapter_dynamic-programming} without caching mechanism, the time recurrence relation is $T(n)=T(n-1)+T(n-2)+1$; in the merge sort discussed in Chapter.~\ref{chapter_divide_conquer}, the recurrence relation is $T(n)=T(n/2)+n$. Examples of recurrence relation $T(n)=T(n-1)+n$ can be easily found, such as the maximum subarray. -% \subsection{Solving None-linear Recursion} - -\paragraph{Method of Undetermined Coefficients} Suppose we have a recurrence relation in the form of Eq.~\ref{eq_non_homogeneous_recurrence_relation}. - -Suppose we ignore the non-linear part and just look at the homogeneous part: -\begin{equation} - h_n=c_1h_{n-1} + c_2h_{n-2} + ... + c_k h_{n-k}. - \label{eq_non_homogeneous_recurrence_relation_2} -\end{equation} - -\paragraph{Symbolic Differentiation} - - - -% \section{Hands-on Examples} -% \label{sec_iter_recur_examples} -\section{Useful Math Formulas} -Knowing these facts can be very important in practice, we can treat each as an element in the problem solving. Sometimes, when its hard to get the closed form of a recurrence relation or finding the recurrence relation, we decompse it to multiple parts with these elements. Put some examples. -\paragraph{binomial theorem} -\begin{align} - \sum_{k=0}^{n}C_{n}^{k}x^k = (1+x)^n -\end{align} -An example of using this the cost of generating a powerset, where $x=1$. -\section{Exercises} -\begin{enumerate} - \item Compute factorial sequence using \texttt{while} loop. - \item Greatest common divisor: The Euclidean algorithm, which computes the greatest common divisor of two integers, can be written recursively. - \begin{equation} - gcd(x, y)= - \begin{cases} - x & \text{if $y=0$,}\\ - gcd(y, x\% y) & \text{if $y>0$} -\end{cases} - \end{equation} - -Function definition: -\end{enumerate} - -\section{Summary} -If a cursive algorithm can be further optimized, the optimization method can either be divide and conquer or decrease and conquer. We have put much effort into solving recurrence relation of both: the linear recurrence relation for decrease and conquer, the divide and conquer recurrence relation for divide and conquer. Right now, do not struggle and eager to know what is divide or decrease and conquer, it will be explained in the next two chapters. - -Further, Akra-Bazzi Method~\footnote{} applies to recurrence such that $T(n)=T(n/3)+T(2n/3)+O(n)$. Please look into more details if interested. Generating function is used to solve the linear recurrence. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_reduce_and_conquer.tex b/Easy-Book/chapters/chapter_reduce_and_conquer.tex deleted file mode 100644 index 6142a49..0000000 --- a/Easy-Book/chapters/chapter_reduce_and_conquer.tex +++ /dev/null @@ -1,337 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Reduce and Conquer} -\label{chapter_divide_conquer} -\begin{chapquote} -{Albert Einstein, } -``Everything should be made as simple as possible, but not simpler.'' -\end{chapquote} - -This chapter is the essence of the algorithmic problem solving--`Reduction'. - - -\begin{importantnote} -Reduction is the essence of problem solving, and self-reduction is the ``center'' of the essence. Recurrence relation is our tool from math. The correctness of self-reduction is proved with mathematical induction. And the complexity analysis relies on solving recurrence relation. -\end{importantnote} - - -\section{Introduction} -\paragraph{Story}Imagine that your mom asks you to get 10,000 pounds of corns, what would you do? First, you would think, where should I get the corns? I can go to Walmart or I can go to grow the corns in the farm. This is when one problem/task is reduced to some other problems/tasks. Solving the other ones means you solved your assignment from your mom. This is one example of the reduction; converting problem $A$ to problem $B$. - -Now, you are at Walmart and are ready to load the 10,000 pounds of bagged corns, but the trunk of your car can not fit all corns at once. You just decide that you want to do 10 rounds of loading and transporting to home. Now, your task becomes loading 1,000 pounds of corns. After you are done with this, you just solved a subtask--getting 1,000 pounds of corns. In the second round, you load another 1,000 pounds. You solved another subtask--getting 2,000 pounds of corns. After 10 rounds in total, you will solve the original task. This is the other side of reduction, reduce one problem to one or multiple smaller instances of itself. - - -\paragraph{Definition of Reduction} In computational theory and computational complexity theory, a reduction is an algorithm for transforming one problem $A$ into another problem or other problems. There are two types of reduction: -\begin{enumerate} - - \item \textbf{Self-Reduction}: it can also be a problem that are just a smaller instance or we say \textit{subproblems} of itself, say if the original problem is $a_n$, then the smaller problems can be $a_{n/2}$, $a_{n-1}$, $a_{n-2}$, and so on. Self-reduction is a recursive process; we reduce the problem into one or more subproblems of smaller size recursively until the subproblem is small enough to be a base case. We need to differentiate whether the subproblem is reduced by constant factor or just by constant size. -\begin{itemize} - \item If it is by constant size, say $a_{n-k}$, this will characterize \textit{searching}, \textit{dynamic programming} and \textit{greedy algorithm}. - \item If it is by constant factor, say in the form of $a_{n/b}$, b is integer $b\geq 2$, this can be used to characterize \textit{divide and conquer} which we detail on it further in Section.~\ref{sec_divide_conquer}. - -\end{itemize} - The \textbf{Recurrence relations} which we have put so much effort on in last chapter will conveniently represent and interpret the relation between problem and its subproblems in self-reduction. Optionally, we can also use \textit{recursion tree} to help with visualization. In the next two sections, we shall see how and discuss additional techniques for each type. - \item \textbf{$A$ to $B$}: The other problem can be a totally different problem, say $B$. Intuitively, if we know how to solve $B$, this induces a solution to $A$. On the other hand, this also means that if any of $A$ and $B$ is unsolvable, then it indicates or proves that the other is unsolvable. More details will be given in Section.~\ref{chapter_reduce_conquer_sec_a_b}. -\end{enumerate} - -\subsubsection{Reducing a Problem to Subproblem(s)} ``Reducing'' a problem into subproblem(s) as the first step of using self-reduction will result potentially two types of subproblems: \textit{non-overlapping subproblems} and \textit{overlapping subproblems}. - -\paragraph{Non-overlapping subproblems} Like cutting a rod into multiple pieces, the resulting subproblems each stand alone, disjoint with each other and become another rod which is just smaller. The most general way is to divide equally, thus conventionally $a_{n/b}$ means the problem is reduced into non-overlapping subproblems and each with size $n/b$. - -\paragraph{Overlapping subproblems} Different from the non-overlapping problems, the feature of overlapping problem is more abstract. Easily put, it means subproblems share subproblems. Say $a_n$ is reduced to $a_{n-1}$ and $a_{n-2}$, and according to this recursive rule, $a_{n-1}$ will be reduced to $a_{n-2}$ and $a_{n-3}$. Now we can see that problem $a_n$ and $a_{n-1}$ both share a common subproblem $a_{n-2}$, this is to say that these problems might overlap. Overlapping subproblems is one of the signals that further optimization might apply, which is detailed in Dynamic programming in Chapter.~\ref{chapter_dynamic-programming}. - - -\subsubsection{Self-Reduction and Mathematical Induction} -The word `self-Reduction' is not commonly or even put under the umbrella of `reduction'. In other materials, you might see that the content of self-reduction appears in the form of mathematical induction\footnote{Such as \textit{Introduction to Algorithms: A Creative Approach}.}. Self-Reduction and Mathematical Induction are inseparable, as self-reduction can be represented with recurrence relation, and the mathematical induction is the most straightforward and powerful tool to prove its correctness and their concentration aligns--``concentrating on reducing a problem and solving subproblems rather than solving it directly''. - -Mathematical induction can guide us to reduce the problem: we assume we know the solution from problems of size $a_{n/b}$, or $a_{n-k}$, we focus on how to construct a solution for $a_n$ with solutions to our subproblems such as $a_{n/b}$ and $a_{n-k}$. - -We will further see the distinction of these two characteristics of problems in our following examples. - - - -%%%%%%%%%%%%%%%%%%%%%%%%Divide and conquer%%%%%%%%%%%%%%%%%%%%%%% -\section{Divide and Conquer} -\subsection{Concepts} -\label{sec_divide_conquer} -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/divide_conquer.png} - \caption{Divide and Conquer Diagram} - \label{fig:divide_conquer} -\end{figure} -Divide and conquer is the most fundamental problem solving paradigm for computer programming; the strategy is to divide a problem into smaller problems recursively until the subproblem is trivial to solve. In more details, it consists of two process: -\begin{enumerate} - \item \textbf{Divide:} divide one problems into a series of non-overlapping subproblems that are smaller instances of the same problem recursively until reaching to the \textit{bases cases}, when the subproblem is trivial to solve. Usually, the problem is divided equally, and most likely breaking into half and half. We say a problem of size $n$, denote as $p_n$ is divided into $a$ subproblems and each with size $n/b$, denote as $a p_{n/b}$, $a, b$ are mostly integer and $a\geq 1, b\geq 2$. As we explained in Chapter.~\ref{chapter_iteration_recursion}, this process happens at the top-down pass. - \item \textbf{Conquer:} this step means that in the bottom-up pass, say we have the solutions of the $a$ subproblems each with size $n/b$ available, we need to \textit{combine} these solutions to our current problem of size $n$. -\end{enumerate} -We can interpret the divide and conquer with a recurrence relation as in Eq.~\ref{divide_conquer_relation} - \begin{equation} - p_n= \Psi(n,a p_{n/b}) - \label{divide_conquer_relation} - \end{equation} - $\Psi$ will no longer be a function any more, but instead it represents the operations needed to combine the the solutions to subproblems to solution of current problem, $n$ means the size of the combined solutions will be mostly $n$, which also means $n$ elements. - -\paragraph{Decrease and Conquer} -When $a=1$, each problem reduced to only one sub-problem, and this case is named as \textbf{Decrease and Conquer}. Decrease and conquer will reduce search space each step. If our time complexity is $T(n) = T(n/2)+O(1)$, we get $O(\log n)$. This decrease and conquer method cuts the search space into half of its original at each step until it reaches its target. Because logarithmic is way faster even compared with linear, this is a significant efficiency growth. We will discuss classical algorithms with this paradigm such as Binary Search, Binary Search Tree, Segment Tree in the next chapter. -\subsubsection{Common Applications of Divide and Conquer} - Divide-and-conquer is mostly used in some well-developed algorithms and some data structures. In this book, we covered the follows: -\begin{itemize} - \item Various sorting algorithms like Merge Sort, Quick Sort (Chapter~\ref{chapter_sorting}); - \item Binary Search (Section~\ref{sec_binary_search}); - \item Heap(Section~\ref{sec_heap}); - \item Binary Search Tree (Section~\ref{sec_binary_search_tree}); - \item Segment Tree(Section~\ref{sec_segment_tree}). -\end{itemize} - -% We can further represent its time complexity with recurrence relation: -% \begin{equation} \label{dp_equation} -% \begin{split} -% T(n) &= T(n-1) + T(n-2) +...+T(1) + f(n)\\ -% \end{split} -% \end{equation} - -\subsection{Hands-on Examples} -\subsubsection{Merge Sort} -The concept can be quite dry, let us look at a simple example of merge sort. Given an array, [2, 5,1,8,9], the task is to sort the array to [1, 2, 5, 8, 9]. To apply divide and conquer, we first divide it into two halves: [2, 5, 1], [8, 9], sort each half and with return result [1, 2, 5], [8, 9], and now we just need to merge the two parts. The process can be represented as the following: -\begin{lstlisting}[language=Python] -def divide_conquer(A, s, e): - # base case, can not be divided farther - if s == e: - return A[s] - # divide into n/2, n/2 from middle position - m = (s+e) // 2 - - # conquer - s1 = divide_conquer(A, s, m) - s2 = divide_conquer(A, m+1, e) - - # combine - return combine(s1, s2) -\end{lstlisting} -\begin{figure}[h!] - \centering - - \includegraphics[width=0.9\columnwidth]{fig/merge_sort.png} - \caption{Merge Sort with non-overlapping subproblems where subproblems form a tree} - \label{fig:merge_sort_tree} -\end{figure} -This process can be visualized in Fig.~\ref{fig:merge_sort_tree}. From the visualization, we can clearly see that all subproblems form a tree and they never interact or overlap with each other, and each subproblem will only be visited once. - -\subsubsection{Maximum Subarray (53. medium).} -Find the contiguous subarray within an array (containing at least one number) which has the largest sum. -\begin{lstlisting}[numbers=none] -For example, given the array [-2,1,-3,4,-1,2,1,-5,4], - the contiguous subarray [4,-1,2,1] has the largest sum = 6. -\end{lstlisting} -\textbf{Solution: divide and conquer.} $T(n) = max(T(left),T(right), T(cross))$, max is for merging and the T(cross) is for the case that the potential subarray across the mid point. For the complexity, $T(n)=2T(n/2)+n$, if we use the master method, it would give us $O(nlgn)$. We write the following Python code -\begin{lstlisting}[language = Python] -def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - def getCrossMax(low,mid,high): - left_sum,right_sum =0,0 - left_max, right_max = -maxint, -maxint - left_i,right_j=-1,-1 - for i in xrange(mid,low-1,-1): #[) - left_sum+=nums[i] - if left_sum>left_max: - left_max= left_sum - left_i = i - for j in xrange(mid+1,high+1): - right_sum+=nums[j] - if right_sum>right_max: - right_max= right_sum - right_j = j - return (left_i,right_j,left_max+right_max) - - def maxSubarray(low,high): - if low==high: - return (low,high, nums[low]) - mid = (low+high)//2 - rslt=[] - #left_low, left_high, left_sum = maxSubarray(low,mid) #[low,mid] - rslt.append(maxSubarray(low,mid)) #[low,mid] - #right_low,right_high,right_sum = maxSubarray(mid+1,high)#[mid+1,high] - rslt.append(maxSubarray(mid+1,high)) - #cross_low,cross_high,cross_sum = getCrossMax(low, mid, high) - rslt.append(getCrossMax(low, mid, high)) - return max(rslt, key=lambda x: x[2]) - return maxSubarray(0,len(nums)-1)[2] -\end{lstlisting} - - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%Reduction by constant size%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Constant Reduction} -\label{chapter_reduce_conquer_constant_size} -\subsection{Concepts} -In this category, problem instance of size $n$ is reduced to one or more instances of size $n-1$ or less recursively until the subproblem is small and trivial to solve. This process can be interpreted with Eq.~\ref{dp_relation_by_constant_size}. - \begin{equation} - p_n = \Psi(n, p_{n-1}, p_{n-2}, ..., p_{n-k}) \text{ for $n \leq k$,} - \label{dp_relation_by_constant_size} - \end{equation} -The number of subproblems that a current problem relies on should be as less as possible. The ideal option is when it only relates to $n-1$, this is the case of an exhaustive search, which can be implemented easily both with recursion and iteration. - -\subsubsection{Overlapping Subproblems} When the number of the subproblems appears in this relation is larger or equals to 2, the subproblems might overlap. This implies that a straightforward recursion based solution without optimization will be expensive because these overlapped problems are solved again and again; the optimization is possible with dynamic programming or greedy algorithm shown in Part.~\ref{part_dp_greedy} which optimize it using \textit{caching mechanism} by saving the solution of each subproblem and thus avoiding recomputation. However, to stick to just the reduction itself, we delay our examples' possible optimization to Part.~\ref{part_dp_greedy}. - -\subsubsection{Subproblem Space} To count all possible subproblems-- the subproblem space--is important for us to understand the complexity. For array, a subproblem can be a subarray that $[a_i,...,a_j], i=2. Return the value for any given n. -\end{lstlisting} -The above is the classical Fibonacci Sequence, to get the fibonacci number at position n, we first need to know the answer for subproblems f(n-1) and f(n-2), we can solve it easily using recursion function: -\begin{lstlisting}[language=Python] -def fib(n): - if n <= 1: - return n - return fib(n-1) + fib(n-2) -\end{lstlisting} -The above recursion function has recursion tree shown in Fig~\ref{fig:fibonacci_number}. And we also draw the recursion tree of recursion function call for merge sort and shown in Fig~\ref{fig:merge_sort_tree}. We notice that we call f(2) multiple times for fibonacci but in the merge sort, each call is unique and wont be called more than once. The recurrence function of merge sort is $T(n) = 2*T(n/2)+n$, and for fibonacci sequence it is $T(n) = T(n-1)+T(n-2)+1$. - -\paragraph{Maximum Subarray} Using reduction by constant size, the problem is to find a subarray $a_i,a_{i+1}, ...,a_{j}, i>=0, i\geq j=0$ in the array that has the maximum value. We simply only have $n$ candidates to compare. To constrcut this reduced problem B back to A, the maximum subarray of A is the maximum subarray of $n$ problems of B, say $[a_0]$, $[a_0, a_1]$, ..., $[a_0,...,a_{n-1}]$. The rule of reduction can happen into B, that is case $j=n-1$ in the original problem is enough to construct it. We can write $p_n=\max(p_{n-1}, p_{n-1}+a_n, a_n)$. - -In the array, a \textit{suffix} is defined as any subarry which includes its last item. Another way to put the induction hypothesis into the problem B: We know how to find the maximum suffix of size $k O(n), then we use prefix\_sum, the difference is we set prefix\_sum to 0 when it is smaller than 0, O(n) -\begin{lstlisting}[language = Python] -from sys import maxint -class Solution(object): - def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_so_far = -maxint - 1 - prefix_sum= 0 - for i in range(0, len(nums)): - prefix_sum+= nums[i] - if (max_so_far < prefix_sum): - max_so_far = prefix_sum - - if prefix_sum< 0: - prefix_sum= 0 - return max_so_far -\end{lstlisting} -% We know if the array is empty, then the empty array is our target with 0 as its maximum sum. For any non-empty array, what is the possible solution? It can be zero is all items in the array are negative, otherwise it must be a subarray that has a non-negative sum with at least one item. From another angel, the problem equals to a problem that find the maximum value between the maximum subarray for $n$ subsequences that ends at each index--suffixs. For example, in this case, it becomes find the maximum value of the following problems: - - - - - - - - - - - - - - - - - - - - - -\section{The Skyline Problem} -Define and solve it by two cases. - -Both skyline problem and maximum subarray problem has illustrated how we can use reduction to solve our problem, either self-reduction or the $A$ to $B$ is used. The real algorithm design is usually a composite of multiple design steps and methods. - -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% %%%%%%%%%%%%% Recursive Programming -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Recursive Programming} -% For recursive function, we can draw recursive tree to denote the state transfer graph. With recursive function, it can simplify the programming of certain programs, once we mastered recursive function, we would feel it is a lot simpler than iterative implementation. However, it does take extra space complexity compared with the iterative implementation. - -% \paragraph{Recursion} - -% \paragraph{Recurrence Function} For recusrive program, we need to figure out the recursive transfer function between $f(i)$ and the next level $f(i+1)$. For example, the fibonacci number we could have $f(i) = f(i-1) + f(i-2)$. For the merge sort, we cant use a math function to represent this operation, we have $T(n) = 2*T(n/2) + O(n)$. $f(i) = merge(f(left), f(right))$. Some recursive function would have redundancy which we can improve the efficiency and avoid to compute the same subproblem twice by memoization which saves the result, and the iterative peer of the recursive implementation is called \textit{dynamic programming}. We will discuss the dynamic programming in details in the following chapter ~\ref{dynamic-programming}. Some other recursive function, there is no overlaps between subproblems which are the \textit{divide and conquer} cases, which will be discussed in detain in chapter~\ref{divide-conquer}. Or we have the universal \textit{Depth-first-searching} which can be implemented with recursive function, we will include this in chapter~\ref{searching}. - -% % \textbf{Recursive Function and Tree Structure}: - - - -% % 递归函数关注以下几个因素 -% % ·退出条件 -% % ·参数有哪些 -% % ·返回值是什么 -% % ·局部变量有哪些 -% % ·全局变量有哪些 -% % ·何时输出 -% % ·会不会导致堆栈溢出 - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Exercises -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} -\begin{enumerate} - \item Binary Search. - \item Use Self-Reduction by constant size to solve maximum subarray problem. - \item Skyline problem. -\end{enumerate} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_search_strategies_back_up.tex b/Easy-Book/chapters/chapter_search_strategies_back_up.tex deleted file mode 100644 index ddd070b..0000000 --- a/Easy-Book/chapters/chapter_search_strategies_back_up.tex +++ /dev/null @@ -1,1789 +0,0 @@ -\documentclass[main.tex]{subfiles} -\begin{document} -\chapter{Search Strategies} - -Searching~\footnote{\url{https://en.wikipedia.org/wiki/Category:Search_algorithms}} is one of the most effective tools in algorithms. We have seen them being widely applied in the field of artificial intelligence to offer either exact or approximate solutions for complex problems such as puzzles, games, routing, scheduling, motion planning, navigation, and so on. On the spectrum of discrete problems, nearly every single one can be modeled as a searching problem together with enumerative combinatorics and \textbf{optimizations}. The searching solutions serve as either naive baselines or even as the only existing solutions for some problems. Understanding common searching strategies as the main goal of this chapter along with the search space of the problem lays the foundation of problem analysis and solving, it is just indescribably \textbf{powerful} and \textbf{important}! - - - -% In this chapter, we focus oncover the searching strategies from every possible aspect to make sure we understand it well and move on further. We - -% This chapter, we assume the graph is connected graph. - -% The basic concepts will be explained in Section.~\ref{section_general_searching_strategies}. - -% After this, we head out to apply these strategies given explicit data structures for linear data structure in Sec.~\ref{section_linear_search}, Tree data structure in Sec.~\ref{section_tree_traversal} which are also called tree traversal algorithms, Graph data structures in Sec.~\ref{section_graph_search}. Along the applications, we will analyze them in terms of time and space complexity, completeness and optimality. - -% At the end, we will compare the difference of applying the searching strategies on the tree and graph data structures mainly how they would affect the completeness and optimaltities. - -\section{Introduction} -\label{section_linear_search} - -Linear, tree-like data structures, they are all subsets of graphs, making graph searching universal to all searching algorithms. There are many searching strategies, and we only focus on a few decided upon the completeness of an algorithm--being absolutely sure to find an answer if there is one. - -Searching algorithms can be categorized into the following two types depending on if the domain knowledge is used to guide selection of tbe best path while searching: -\begin{enumerate} - \item Uninformed Search: This set of searching strategies normally are handled with basic and obvious problem definition and are not guided by estimation of how optimistic a certain node is. The basic algorithms include: Depth-first-Search(DFS), Breadth-first Search(BFS), Bidirectional Search, Uniform-cost Search, Iterative deepening search, and so on. We choose to cover the first four. - \item Informed(Heuristic) Search: This set of searching strategies on the other hand, use additional domain-specific information to find a \textit{heuristic function} which estimates the cost of a solution from a node. Heuristics means ``serving to aid discovery''. Common algorithms seen here include: Best-first Search, Greedy Best-first Search, $A^{*}$ Search. And we only introduce Best-first Search. -\end{enumerate} - -% What types of searching algorithms are covered in this book? -% \begin{enumerate} -% \item Combinatorial Search. -% \item Backtracking. -% \item Breath-First Search. -% \item Depth-First Search -% \end{enumerate} - -Following this introductory chapter, in Chapter.~\ref{chapter_search_problem_combinatorics}, we introduce combinatorial problems and its search space, and how to prune the search space to search more efficiently. - -Because the search space of a problem can either be of linear or tree structure--an implicit free tree, which makes the graph search a ``big deal'' in practice of problem solving. Compared with reduce and conquer, searching algorithms treat states and actions atomically: they do not consider any internal/optimal structure they might posses. We recap the \textbf{linear search} given its easiness and that we have already learned how to search in multiple linear data structures. - -\paragraph{Linear Search} As the naive and baseline approach compared with other searching algorithms, linear search, a.k.a sequential search, simply traverse the linear data structures sequentially and checking items until a target is found. It consists of a \texttt{for/while} loop, which gives as $O(n)$ as time complexity, and no extra space needed. For example, we search on list $A$ to find a target $t$: -\begin{lstlisting}[language=Python] -def linearSearch(A, t): #A is the array, and t is the target - for i,v in enumerate(A): - if A[i] == t: - return i - return -1 -\end{lstlisting} - -Linear Search is rarely used practically due to its lack of efficiency compared with other searching methods such as hashmap and binary search that we will learn soon. -\paragraph{Searching in Un-linear Space} -For the un-linear data structure, or search space comes from combinatorics, they are generally be a graph and sometimes be a rooted tree. Because mostly the search space forms a search tree, we introduce searching strategies on a search tree first, and then we specifically explore searching in a tree, recursive tree traversal, and search in a graph. - -\subsubsection{Generatics of Search Strategies} -Assume we know our state space, searching or state-space search is the process of searching through a state space for a solution by making explicit a sufficient portion of an implicit state-space graph, in the form of a search tree, to include a goal node. - -\paragraph{Nodes in Searching Process} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/searchsp.png} - \caption{Graph Searching} - \label{fig:search_sp} -\end{figure} -In the searching process, nodes in the targeting data structure can be categorized into three sets as shown in Fig.\ref{fig:search_sp} and we distinguish the state of a node--which set they are at with a color each. -\begin{itemize} - \item Unexplored set--WHITE: initially all nodes in the graph are in the unexplored set, and we assign WHITE color. Nodes in this set have not yet being visited yet. - \item Frontier set--GRAY: nodes which themselves have been just discovered/visited and they are put into the \textit{frontier} set, waiting to be expanded; that is to say their children or adjacent nodes (through outgoing edges) are about to be discovered and have not all been visited--not all being found in the frontier set yet. This is an intermediate state between WHITE and BLACK, which is ongoing, visiting but not yet completed. Gray vertex might have adjacent vertices of all three possible states. -%The set of nodes that are available for expansion at any given point is called the \textbf{frontier}. - \item Explored set--BLACK: nodes have been fully explored after being in the frontier set; that is to say none of their children is not explored and being in the unexplored set. For black vertex, all vertices adjacent to them are nonwhite.% And the nodes that are expanded are distinguished as the \texttt{explored} set. - -\end{itemize} -All searching strategies follow the general tree search algorithm: -\begin{enumerate} - \item At first, put the state node in the frontier set. -\begin{lstlisting} -frontier = {S} -\end{lstlisting} -\item Loop through the frontier set, if it is empty then searching terminates. Otherwise, pick a node $n$ from frontier set: -\begin{enumerate} - \item If $n$ is a goal node, then return solution - \item Otherwise, generate all of $n$'s successor nodes and add them all to frontier set. - \item Remove $n$ from frontier set. -\end{enumerate} -\end{enumerate} -Search process constructs a \textit{search tree} where the root is the start state. Loops in graph may cause the search tree to be infinite even if the state space is small. In this section, we only use either acyclic graph or tree for demonstrating the general search methods. In acyclic graph, there might exist multiple paths from source to a target. For example, the example shown in Fig.~\ref{} has multiple paths from to. Further in graph search section, we discuss how to handle cycles and explain single-path graph search. Changing the ordering in the frontier set leads to different search strategies. - - -%%%%%%%%%%%%%%%Uninformed search strategies%%%%%%%%%%%%%%%%%%% -\section{Uninformed Search Strategies} -% in Search Tree - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/ucs.png} - \caption{Exemplary Acyclic Graph. } - \label{fig:ucs} -\end{figure} -Through this section, we use Fig.~\ref{fig:ucs} as our exemplary graph to search on. The data structure to represent the graph is as: -\begin{lstlisting}[language=Python] -from collections import defaultdict -al = defaultdict(list) -al['S'] = [('A', 4), ('B', 5)] -al['A'] = [('G', 7)] -al['B'] = [('G', 3)] -\end{lstlisting} -%\subsection{Uninformed Search} - -With uninformed search, we only know the goal test and the adjacent nodes, but without knowing which non-goal states are better. Assuming and limiting the state space to be a tree for now so that we won't worry about repeated states. - -There are generally two ways to order nodes in the frontier without domain-specific information: -\begin{itemize} - \item Queue that nodes are first in and first out (FIFO) from the frontier set. This is called breath-first search. - \item Stack that nodes are last in but first out (LIFO) from the frontier set. This is called depth-first search. - \item Priority queue that nodes are sorted increasingly in the path cost from source to each node from the frontier set. This is called Uniform-Cost Search. -\end{itemize} -%%%%%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Breath-first Search} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.96\columnwidth]{fig/general_breath_first_search.png} - \caption{Breath-first search on a simple search tree. At each stage, the node to be expanded next is indicated by a marker. } - \label{fig:breath_first_search_strategy} -\end{figure} -Breath-first search always expand the shallowest node in the frontier first, visiting nodes in the tree level by level as illustrated in Fig.~\ref{fig:breath_first_search_strategy}. Using $Q$ to denote the frontier set, the search process is explained: -\begin{lstlisting}[numbers=none] -Q=[A] -Expand A, add B and C into Q -Q=[B, C] -Expand B, add D and E into Q -Q=[C, D, E] -Expand C, add F and G into Q -Q=[D, E, F, G] -Finish expanding D -Q=[E, F, G] -Finish expanding E -Q=[F, G] -Finish expanding F -Q=[G] -Finish expanding G -Q=[] -\end{lstlisting} -The implementation can be done with a FIFO queue iteratively as: -\begin{lstlisting}[language=Python] -def bfs(g, s): - q = [s] - while q: - n = q.pop(0) - print(n, end = ' ') - for v, _ in g[n]: - q.append(v) -\end{lstlisting} -Call the function with parameters as \texttt{bfs(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S A B G G -\end{lstlisting} - -%Instead of traversing the tree recursively deepening down each time, the alternative is to visit the nodes level by level, - -\paragraph{Properties} Breath-first search is \textbf{complete} because it can always find the goal node if it exists in the graph. It is also \textbf{optimal} given that all actions(arcs) have the same constant cost, or costs are positive and non-decreasing with depth. - -\paragraph{Time Complexity} We can clearly see that BFS scans each node in the tree exactly once. If our tree has $n$ nodes, it makes the time complexity $O(n)$. However, the search process can be terminated once the goal is found, which can be less than $n$. Thus we measure the time complexity by counting the number of nodes expanded while searching is running. Assuming the tree has a branching factor $b$ at each non-leaf node and the goal node locates at depth $d$, we sum up the number of nodes from depth 0 to depth $d$, the total number of nodes expanded are: -\begin{align} - n &= \sum_{i=0}^{d} b^{i} \\ - &= \frac{b^{d+1} -1}{b-1} -\end{align} -Therefore, we have a time complexity of $O(b^d)$. It is usually very slow to find solutions with a large number of steps because it must look at all shorter length possibilities first.%$in cases that we do not know the total nodes we estimate it with the branching factor, say $b$, as about how many children a node can have. In a binary tree, this would be 2, and with the depth $d$ of the tree, we get the time complexity as of $O(b^d)$. -\paragraph{Space Complexity} -The space is measured in terms of the maximum size of frontier set during the search. In BFS, the maximum size is the number of nodes at depth $d$, making the $O(b^d)$ as the space complexity. - -% However, because the different traversal ordering will result in different space usage. The BFS's implementation does not rely on stack but queue. The first implementation is more straightforward, the upper bound of space usage of \texttt{nodes\_same\_level} decides our memory space. Given a tree with branching factor $b$ and depth $d$, the lowest level will potentially have the most nodes, which has $O(b^d)$. - -% In the implementation that uses a queue, the analysis of the space usage is slight more obscure. We know it is decided by the maximum nodes storing in the \texttt{q} at one moment. Imagine the state change of the queue: -% \begin{lstlisting}[numbers=none] -% d = 0, [A] -% d = 1, process A, [B, C] -% d = 2, process B, [C, D, E], process C, [D, E, F, G] -% \end{lstlisting} -% This means the maximum space of our nodes still the same as our previous implementation, saving the nodes as large as the last level, which is $O(b^d)$. The implementation using the queue avoids the usage of the temporary list which is a slight improvement. -%%%%%%%%%%%%%%%%%DFS%%%%%%%%%%%%%%%%%%% -\subsection{Depth-first Search} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.96\columnwidth]{fig/general_depth_first_search.png} - \caption{Depth-first search on a simple search tree. The unexplored region is shown in light gray. Explored nodes with no descendants in the frontier are removed from memory as node L disappears. Dark gray marks nodes that is being explored but not finished. } - \label{fig:depth_first_search_strategy} -\end{figure} -Depth-first search on the other hand always expand the deepest node from the frontier first. As shown in Fig.~\ref{fig:depth_first_search_strategy}, Depth-first search starts at the root node and continues branching down a particular path. Using $S$ to denote the frontier set which is indeed a stack, the search process is explained: -\begin{lstlisting}[numbers=none] -S=[A] -Expand A, add C and B into S -S=[C, B] -Expand B, add E and D into S -S=[C, E, D] -Expand D -S=[C, E] -Expand E -S=[C] -Expand C, add G and F into S -S=[C, G, F] -Expand F -S=[C, G] -Expand G -S=[C] -Expand C -S=[] -\end{lstlisting} -Depth-first can be implemented either recursively or iteratively. -\paragraph{Recursive Implementation}In the recursive version, the recursive function keeps calling the recursive function itself to expand its adjacent nodes. Starting from a source node, it always deepen down the path until a leaf node is met and then it backtrack to expand its other siblings (or say other adjacent nodes). The code is as: -\begin{lstlisting}[language=Python] -def dfs(g, vi): - print(vi, end=' ') - for v, _ in g[vi]: - dfs(g, v) -\end{lstlisting} -Call the function with parameters as \texttt{dfs(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S A G B G -\end{lstlisting} -\paragraph{Iterative Implementation} According to the definition, we can implement DFS with LIFO \texttt{stack} data structure. The code is similar to that of BFS other than using different data structure from the frontier set. -\begin{lstlisting}[language=Python] -def dfs_iter(g, s): - stack = [s] - while stack: - n = stack.pop() - print(n, end = ' ') - for v, _ in g[n]: - stack.append(v) -\end{lstlisting} -Call the function with parameters as \texttt{dfs\_iter(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S B G A G -\end{lstlisting} -We observe that the ordering is not exactly the same as of the recursive counterpart. To keep the ordering consistent, we simply need to add the adjacent nodes in reversed order. In practice, we replace \texttt{$g[n]$} with \texttt{$g[n][::-1]$}. - -\paragraph{Properties} DFS may not terminate without a fixed depth bound to limit the amount of nodes that it expand. DFS is \textbf{not complete} because it always deepens the search and in some cases the supply of nodes even within the cutting off fixed depth bound can be infinitely. DFS is \textbf{not optimal}, in our example, of our goal node is C, it goes through nodes A, B, D, E before it finds node C. While, in the BFS, it only goes through nodes A and C. However, when we are lucky, DFS can find long solutions quickly. - -\paragraph{Time and Space Complexity} -For DFS, it might need to explore all nodes within graph to find the target, thus its worst case time and space complexity is not decided upon by the depth of the goal, but the total depth of the graph, $m$ instead. The time complexity of DFS is $O(b^m)$. - -The stack will at most stores a single path from the root to a leaf node (goal node) along with the remaining unexpanded sibling nodes for each node on the path. Therefore, the space that needed for DFS is $O(bm)$. In most cases, the branching factor is a constant, which makes the space complexity be mainly influenced by the depth of the search tree. Obviously, DFS has great efficiency in space, which is why it is adopted as the basic technique in many areas of computer science, such as solving constraint satisfaction problems(CSPs). The backtracking technique we are about to introduce even further optimizes the space complexity on the basis of DFS. - -\subsection{Uniform-Cost Search(UCS)} -When a priority queue is used to order nodes measured by the path cost of each node to the root in the frontier, this is called uniform-cost search, aka Cheapest First Search. In UCS, frontier set is expanded only in the direction which requires the minimum cost to travel to from root node. UCS only terminates when a path has explored the goal node, and this path is the cheapest path among all paths that can reach to the goal node from the initial point. When UCS is applied to find shortest path in a graph, it is called Dijkstra's Algorithm. - -We demonstrate the process of UCS with the example shown in Fig.~\ref{fig:ucs}. - -Here, our source is `S', and the goal is `G'. We are set to find a path from source to goal with minimum cost. The process is shown as: -\begin{lstlisting}[numbers=none] -Q = [(0, S)] -Expand S, add A and B -Q = [(4, A), (5, B)] -Expand A, add G -Q = [(5, B), (11, G)] -Expand B, add G -Q = [(8, G), (11, G)] -Expand G, goal found, terminate. -\end{lstlisting} -And the Python source code is: -\begin{lstlisting}[language=Python] -import heapq -def ucs(graph, s, t): - q = [(0, s)] # initial path with cost 0 - while q: - cost, n = heapq.heappop(q) - # Test goal - if n == t: - return cost - else: - for v, c in graph[n]: - heapq.heappush(q, (c + cost, v)) - return None -\end{lstlisting} -\paragraph{Properties} Uniformed-Cost Search is \textbf{complete} as a similar search strategy compared with breath-first search(using queue). It is optimal even if there exist negative edges. - -\paragraph{Time and Space Complexity} Similar to BFS, both the worst case time and space complexity is $O(b^d)$. When all edge costs are $c$, and $C^{*}$ is the best goal path cost, the time and space complexity can be more precisely represented as $O(b^{C^{*}/c})$. -\subsection{Iterative-Deepening Search} -Iterative-Deepening Search(IDS) is a modification on top of DFS, more specifically depth limited DFS(DLS); as the name suggests, IDS sets a maximum depth as a ``depth bound'', and it calls DLS as a subroutine looping from depth zero to maximum depth to expand nodes just as DFS will do and it only does goal test for nodes at the testing depth. - -Using the graph in Fig.~\ref{fig:ucs} as an example. The process is shown as: -\begin{lstlisting}[numbers=none] -maxDepth = 3 - -depth = 0: S = [S] -Test S, goal not found - -depth = 1: S =[S] -Expand S, S = [B, A] -Test A, goal not found -Test B, goal not found - -depth = 2: S=[S] -Expand S, S=[B, A] -Expand A, S=[B, G] -Test G, goal found, STOP -\end{lstlisting} -The implementation of the DLS goes easier with recursive DFS, we use a count down to variable \texttt{maxDepth} in the function, and will only do goal testing util this variable reaches to zero. The code is as: -\begin{lstlisting}[language=Python] -def dls(graph, cur, t, maxDepth): - # End Condition - if maxDepth == 0: - if cur == t: - return True - if maxDepth < 0: - return False - - # Recur for adjacent vertices - for n, _ in graph[cur]: - if dls(graph, n, t, maxDepth - 1): - return True - return False -\end{lstlisting} -With the help of function \texttt{dls}, the implementation of DLS is just an iterative call to the subroutine: -\begin{lstlisting}[language=Python] -def ids(graph, s, t, maxDepth): - for i in range(maxDepth): - if dls(graph, s, t, i): - return True - return False -\end{lstlisting} -\paragraph{Analysis} It appears to us that we are undermining the efficiency of the original DFS since the algorithm ends up visiting top level nodes of the goal multiple times. However, it is not as expensive as it seems to be, since in a tree most of the nodes are in the bottom levels. If the goal node locates at the bottom level, DLS will not have an obvious efficiency decline. But if the goal locates on topper levels on the right side of the tree, it avoids to visit all nodes across all depths on the left half first and then be able to find this goal node. -\paragraph{Properties} Through the depth limited DFS, IDS has advantages of DFS: -\begin{itemize} - \item Limited space linear to the depth and branching factor, giving $O(bd)$ as space complexity. - \item In practice, even with redundant effort, it still finds longer path more quickly than BFS does. -\end{itemize} -By iterating through from lower to higher depth, IDS has advantages of BFS, which comes with \textbf{completeness} and \textbf{optimality} stated the same as of BFS. -\paragraph{Time and Space Complexity} -The space complexity is the same as of BFS, $O(bd)$. The time complexity is slightly worse than BFS or DFS due to the repetitive visiting nodes on top of the search tree but it still has the same worst case exponential time complexity, $O(b^d)$. -%%%%%%%%%%%%%%%%Bidirectional Search%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Bidirectional Search**} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.5\columnwidth]{fig/bidrectional_search.png} - \caption{Bidirectional search. } - \label{fig:bidirectional_search} -\end{figure} -Bidirectional search applies breadth-first search from both the start and the goal node, with one BFS from start moving forward and one BFS from the goal moving backward until their frontiers meet. This process is shown in Fig.~\ref{fig:bidirectional_search}. As we see, each BFS process only visit $O(b^{d/2})$ nodes comparing with one single BFS that visits $O(b^d)$ nodes. This will improve both the time and space efficiency by $b^{d/2}$ times compared with vanilla BFS. -\paragraph{Implementation} Because the BFS that starts from the goal needs to move backwards, the easy way to do this is to create another copy of the graph wherein each edge has opposite direction compared with the original. By creating a reversed graph, we can use a forward BFS from the goal. - -We apply level by level BFS instead of updating the queue one node by one node. For better efficiency of the intersection of the frontier set from both BFS, we use \texttt{set} data structure instead of simply a \texttt{list} or a FIFO queue. - -Use Fig.~\ref{fig:ucs} as an example, if our source and goal is `S' and `G' respectively, if we proceed both BFS simultaneously, the process looks like this: -\begin{lstlisting}[numbers=none] -qs = ['S'] -qt = ['G'] -Check intersection, and proceed -qs = ['A', 'B'] -qt = ['A', 'B'] -Check intersection, frontier meet, STOP -\end{lstlisting} -No process in this case, however, the above process will end up missing the goal node if we change our goal to be `A'. This process looks like: -\begin{lstlisting}[numbers=none] -qs = ['S'] -qt = ['A'] -Check intersection, and proceed -qs = ['A', 'B'] -qt = ['S'] -Check intersection, and proceed -qs = ['G'] -qt = [] -STOP -\end{lstlisting} -This because for source and goal nodes that has a shortest path with even length, if we proceed the search process simultaneously, we will always end up missing the intersection. Therefore, we process each BFS iteratively--one at a time to avoid such troubles. - -The code for one level at a time BFS with \texttt{set} and for the intersection check is as: -\begin{lstlisting}[language=Python] -def bfs_level(graph, q, bStep): - if not bStep: - return q - nq = set() - for n in q: - for v, c in graph[n]: - nq.add(v) - return nq - -def intersect(qs, qt): - if qs & qt: # intersection - return True - return False -\end{lstlisting} -The main code for bidirectional search is as: -\begin{lstlisting}[language=Python] -def bis(graph, s, t): - # First build a graph with opposite edges - bgraph = defaultdict(list) - for key, value in graph.items(): - for n, c in value: - bgraph[n].append((key, c)) - # Start bidirectional search - qs = {s} - qt = {t} - step = 0 - while qs and qt: - if intersect(qs, qt): - return True - qs = bfs_level(graph, qs, step%2 == 0) - qt = bfs_level(bgraph, qt, step%2 == 1) - step = 1 - step - return False -\end{lstlisting} -\subsection{Summary} -\begin{table}[!ht] -\begin{small} -\centering -\noindent\captionof{table}{ Performance of Search Algorithms on Trees or Acyclic Graph} - \noindent \begin{tabular}{|p{0.2\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}| } - \hline -Method & Complete & Optimal & Time & Space \\ \hline -BFS & Y& Y, if & $O(b^d)$ & $O(b^d)$ \\\hline -UCS &Y & Y & $O(C^{*}/c)$ & $O(C^{*}/c)$\\ \hline -DFS & N & N & $O(b^m)$ & $O(bm)$\\ \hline -IDS & Y & Y, if & $O(b^d)$ & $O(bd)$\\ \hline -Bidireactional Search & Y& Y, if& $O(b^{d/2})$ & $O(b^{d/2})$\\ \hline -\end{tabular} - \label{tab:performance of searching strategy} - \end{small} -\end{table} -Using $b$ as branching factor, $d$ as the depth of the goal node, and $m$ is the maximum graph depth. The properties and complexity for the five uninformed search strategies are summarized in Table.~\ref{tab:performance of searching strategy}. - - - - - - %%%%%%%%%%%%%%%%%Graph Search%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Graph Search} -\paragraph{Cycles} -This section is devoted to discuss more details about two search strategies--BFS and DFS in more general graph setting. In the last section, we just assumed our graph is either a tree or acyclic directional graph. In more general real-world setting, there can be cycles within a graph which will lead to infinite loops of our program. -\paragraph{Print Paths} Second, we talked about the paths, but we never discuss how to track all the paths. In this section, we would like to see how we can track paths first, and then with the tracked paths, we detect cycles to avoid getting into infinite loops. -\paragraph{More Efficient Graph Search} -Third, the last section is all about tree search, however, in a large graph, this is not efficient by visiting some nodes multiple times if they happen to be on the multiple paths between the source and any other node in the graph. Usually, depends on the application scenarios, graph search which remembers already-expanded nodes/states in the graph and avoids expanding again by checking any about to be expanded node to see if it exists in frontier set or the explored set. This section, we introduce graph search that suits for general purposed graph problems. -% \paragraph{Handle Cycles} In this section, we assumed our graph is either a tree or acyclic directional graph. When there are cycles, we have to track the path and avoid cycles, which you will see more details in Section Graph Search. -% \paragraph{Graph Search} -% This section is all about tree search, however, in a large graph, this is not efficient by visiting some nodes multiple times if they happen to be on the multiple paths between the source and any other node in the graph. Usually, depends on the application scenarios, graph search which remembers already-expanded nodes/states in the graph and avoids expanding again by checking any about to be expanded node to see if it exists in frontier set or the explored set. Check Section. Graph search for more details. -% \paragraph{Print Paths} We have known that the uniformed search were all doing tree based search, but we never try to track all the paths, which we would like to resolve in the next section. -% In this Chapter, we expand the BFS and DFS tree searching strategy on a graph which is more general. - -%For convenience, we use two sets: \textit{explored set} and \textit{frontier set} to distinguish vertices that have been finished exploring and vertices that are being explored. This make three different states between all vertices in the searching process: -\paragraph{Visiting States} -We have already explained that we can use three colors: WHITE, GREY, and BLACK to denote nodes within the unexpanded, frontier, and explored set, respectively. We are doing so to avoid the hassles of tracking three different sets, with visiting state, it is all simplified to a color check. We define a \texttt{STATE} class for convenience. -% Because in graph, it is reasonable to expect it contains cycles. In our example, we have a cycle \texttt{[0, 1, 2, 0]} and \texttt{[1, 2, 3, 4, 1]} as shown in Fig.~\ref{fig:cyclic_graph_search_1}. Therefore, in the graph search, it is a necessity to avoid traversing a cycle which will make the program running nonstop. The solution is during the search process, we mark states for each vertex. - - - -\begin{lstlisting}[language=Python, numbers=none] -class STATE: - white = 0 - gray = 1 - black = 2 -\end{lstlisting} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.25\columnwidth]{fig/free_tree.png} - \includegraphics[width=0.25\columnwidth]{fig/directed_cyclic_graph.png} - \includegraphics[width=0.25\columnwidth]{fig/undirected_cyclic_graph.png} - \caption{Exemplary Graph: Free Tree, Directed Cyclic Graph, and Undirected Cyclic Graph.} - \label{fig:graph_search_example} -\end{figure} -In this section, we use Fig.~\ref{fig:graph_search_example} as our exemplary graphs. Each's data structure is defined as: -\begin{itemize} -\item Free Tree: -\begin{lstlisting}[language=Python] -ft = [[] for _ in range(6)] -ft[0] = [1] -ft[1] = [2] -ft[2] = [4] -ft[4] = [3, 5] -\end{lstlisting} - \item Directed Cyclic Graph: - \begin{lstlisting}[language=Python] -dcg = [[] for _ in range(6)] -dcg[0] = [1] -dcg[1] = [2] -dcg[2] = [0, 4] -dcg[3] = [1] -dcg[4] = [3, 5] -\end{lstlisting} -\item Undirected Cyclic Graph - \begin{lstlisting}[language=Python] -ucg = [[] for _ in range(6)] -ucg[0] = [1, 2] -ucg[1] = [0, 2, 3] -ucg[2] = [0, 1, 4] -ucg[3] = [1, 4] -ucg[4] = [2, 3, 5] -ucg[5] = [4] -\end{lstlisting} -\end{itemize} - -% Then we introduce more searching strategies such as priority-first searching and give out more categorization. - -\paragraph{Search Tree} It is important to realize the Searching ordering is always forming a tree, this is terminologized as \textbf{Search Tree}. In a tree structure, the search tree is itself. In a graph, we need to figure out the search tree and it decides our time and space complexity. - - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%Graph Search%%%%%%%%%%%%%%%%%%%%% -\subsection{Depth-first Search in Graph} -In this section we will further the depth-first tree search and explore depth-first graph search to compare their properties and complexity. -\subsubsection{Depth-first Tree Search} - -\paragraph{Vanilla Depth-first Tree Search} Our previous code slightly modified to suit for the new graph data structure works fine with the free tree in Fig.~\ref{fig:graph_search_example}. The code is as: -\begin{lstlisting}[language=Python] -def dfs(g, vi): - print(vi, end=' ') - for nv in g[vi]: - dfs(g, nv) -\end{lstlisting} -However, if we call it on the cyclic graph, \texttt{dfs(dcg, 0)}, it runs into stack overflow. - -\paragraph{Cycle Avoiding Depth-first Tree Search} -So, how to avoid cycles? We know the definition of a cycle is a closed path that has at least one node that repeats itself; in our failed run, we were stuck with cycle [0, 1, 2, 0]. Therefore, let us add a \texttt{path} in the recursive function, and whenever we want to expand a node, we check if it forms a cycle or not by checking the membership of a candidate to nodes comprising the path. We save all paths and the visiting ordering of nodes in two list: \texttt{paths} and \texttt{orders}. The recursive version of code is: -\begin{lstlisting}[language=Python] -def dfs(g, vi, path): - paths.append(path) - orders.append(vi) - for nv in g[vi]: - if nv not in path: - dfs(g, nv, path+[nv]) - return -\end{lstlisting} -Now we call function \texttt{dfs} for \texttt{ft}, \texttt{dcg}, and \texttt{ucg}, the \texttt{paths} and \texttt{orders} for each example is listed: -\begin{itemize} - \item For the free tree and the directed cyclic graph, they have the same output. The \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]] -\end{lstlisting} - \item For the undirected cyclic graph, \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0], -[0, 1], -[0, 1, 2], -[0, 1, 2, 4], -[0, 1, 2, 4, 3], -[0, 1, 2, 4, 5], -[0, 1, 3], -[0, 1, 3, 4], -[0, 1, 3, 4, 2], -[0, 1, 3, 4, 5], -[0, 2], -[0, 2, 1], -[0, 2, 1, 3], -[0, 2, 1, 3, 4], -[0, 2, 1, 3, 4, 5], -[0, 2, 4], -[0, 2, 4, 3], -[0, 2, 4, 3, 1], -[0, 2, 4, 5]] -\end{lstlisting} -\end{itemize} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.2\columnwidth]{fig/free_tree.png} - % \includegraphics[width=0.15\columnwidth]{fig/free_tree.png} - \includegraphics[width=0.75\columnwidth]{fig/search_tree_dfs.png} - \caption{Search Tree for Exemplary Graph: Free Tree and Directed Cyclic Graph, and Undirected Cyclic Graph.} - \label{fig:graph_search_example_search_tree} -\end{figure} - -These paths mark the search tree, we visualize the search tree for each exemplary graph in Fig.~\ref{fig:graph_search_example_search_tree}. - -% \paragraph{Efficient Path Backtrace} Previously we save paths each as a list, the shared partial paths locating on top part of the search tree are repeating, such as partial path [0, 1, 2, 4] in our example, which wastes memory unnecessarily. We can save paths by saving all edges in the search tree - -\subsubsection{Depth-first Graph Search} - We see that from the above implementation, for a graph with only 6 nodes, we have been visiting nodes for a total of 19 times. A lot of nodes have been repeating. 1 appears 3 times, 3 appears 4 times, and so on. As we see the visiting order being represented with a \texttt{search tree} in Fig.~\ref{fig:graph_search_example_search_tree}, our complexity is getting close to $O(b^h)$, where $b$ is the branching factor and $h$ is the total vertices of the graph, marking the upper bound of the maximum depth that the search can traverse. If we simply want to search if a value or a state exists in the graph, this is insanely complicating the situation. What we do next is to avoid revisiting the same vertex again and again, we conquer this by tracking the visiting state of a node. - - In the implementation, we only track the longest path--from source vertex to vertex that has no more unvisited adjacent vertices. -\begin{lstlisting}[language=Python] -def dfgs(g, vi, visited, path): - orders.append(vi) - bEnd = True # node without unvisited adjacent nodes - for nv in g[vi]: - if nv not in visited: - if bEnd: - bEnd = False - visited.add(nv) - dfgs(g, nv, visited, path + [nv]) - if bEnd: - paths.append(path) -\end{lstlisting} -Now, we call this function with \texttt{ucg} as: -\begin{lstlisting}[language=Python] -paths, orders = [], [] -dfgs(ucg, 0, set([0]), [0]) -\end{lstlisting} -The output for \texttt{paths} and \texttt{orders} are: -\begin{lstlisting}[numbers=none] -([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5]) -\end{lstlisting} -Did you notice that the depth-first graph search on the undirected cyclic graph shown in Fig.~\ref{fig:graph_search_example} has the same visiting order of nodes and same search tree as the free tree and directed cyclic graph in Fig.~\ref{fig:graph_search_example}? - - -\paragraph{Properties} The completeness of DFS depends on the search space. If your search space is finite, then Depth-First Search is complete. However, if there are infinitely many alternatives, it might not find a solution. For example, suppose you were coding a path-search problem on city streets, and every time your partial path came to an intersection, you always searched the left-most street first. Then you might just keep going around the same block indefinitely. - -The depth-first graph search is \textbf{nonoptimal} just as Depth-first tree search. For example, if the task is to find the shortest path from source 0 to target 2. The shortest path should be 0->2, however depth-first graph search will return 0->1->2. For the search tree using depth-first tree search, it can find the shortest path from source 0 to 2. However, it will explore the whole left branch starts from 1 before it finds its goal node on the right side. - -\paragraph{Time and Space Complexity} For the depth-first graph search, we use aggregate analysis. The search process covers all edges, $|E|$ and vertices, $|V|$, which makes the time complexity as $O(|V|+|E|)$. For the space, it uses space $O(|V|)$ in the worst case to -store the stack of vertices on the current search path as well as the set of -already-visited vertices. - -\subsubsection{Applications} Depth-first tree search is adopted as the basic workhorse of many areas of AI, such as solving CSP. Depth-first graph search is widely used to solve graph related tasks, such as Cycle Check, Topological sort, backtracking. -% Let's call it as \texttt{recursive(al, 0, [0])}, and the output that indicates the visiting order of vertices will be: -% \begin{lstlisting}[numbers=none] -% 0 1 2 4 3 5 6 3 4 2 5 6 2 1 3 4 5 6 4 3 1 5 6 -% \end{lstlisting} -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.9\columnwidth]{fig/depth_first_tree_search.png} -% \caption{The search tree using depth-first tree search } -% \label{fig:df_tree_search} -% \end{figure} -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.5\columnwidth]{fig/free_tree_search.png} -% \caption{The search tree using depth-first graph search.} -% \label{fig:dfs_graph_search} -% \end{figure} -% And the following are all the paths comprise the search tree shown in Fig.~\ref{fig:df_tree_search}. -% \begin{lstlisting}[language=Python] -% [0] [0, 1] [0, 1, 2] [0, 1, 2, 4] [0, 1, 2, 4, 3] [0, 1, 2, 4, 5] [0, 1, 2, 4, 5, 6] [0, 1, 3] [0, 1, 3, 4] [0, 1, 3, 4, 2] [0, 1, 3, 4, 5] [0, 1, 3, 4, 5, 6] [0, 2] [0, 2, 1] [0, 2, 1, 3] [0, 2, 1, 3, 4] [0, 2, 1, 3, 4, 5] [0, 2, 1, 3, 4, 5, 6] [0, 2, 4] [0, 2, 4, 3] [0, 2, 4, 3, 1] [0, 2, 4, 5] [0, 2, 4, 5, 6] -% \end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Questions to ponder: } -\begin{itemize} -\item Only track the longest paths. -\item How to trace the edges of the search tree? -\item Implement the iterative version of the recursive code. -\end{itemize} -\end{bclogo} - -%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%% -\subsection{Breath-first Search in Graph} -We further breath-first tree search and explore breath-first graph search in this section to grasp better understanding of one of the most general search strategies. Because that BFS is implemented iteratively, the implementation in this section of sheds light to the iterative counterparts of DFS's recursive implementations from last section. -\subsubsection{Breath-first Tree Search} -Similarly, out vanilla breath-first tree search shown in Section.~\ref{} will get stuck with the cyclic graph in Fig.~\ref{fig:graph_search_example}. -\paragraph{Cycle Avoiding Breath-first Tree Search} We avoid cycles with similar strategy to DFS tree search that traces paths and checks membership of node. In BFS, we track paths by explicitly adding paths to the \texttt{queue}. Each time we expand from the frontier (queue), the node we need is the last item in the path from the queue. In the implementation, we only track the longest paths from the search tree and the visiting orders of nodes. The Python code is: -\begin{lstlisting}[language=Python] -def bfs(g, s): - q = [[s]] - paths, orders = [], [] - while q: - path = q.pop(0) - n = path[-1] - orders.append(n) - bEnd = True - for v in g[n]: - if v not in path: - if bEnd: - bEnd = False - q.append(path + [v]) - if bEnd: - paths.append(path) - return paths, orders -\end{lstlisting} -Now we call function \texttt{bfs} for \texttt{ft}, \texttt{dcg}, and \texttt{ucg}, the \texttt{paths} and \texttt{orders} for each example is listed: -\begin{itemize} - \item For the free tree and the directed cyclic graph, they have the same output. The \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]] -\end{lstlisting} - \item For the undirected cyclic graph, \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 2, 3, 1, 4, 4, 4, 3, 3, 5, 3, 5, 2, 5, 4, 1, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0, 2, 4, 5], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 3, 4, 2], [0, 1, 3, 4, 5], [0, 2, 4, 3, 1], [0, 2, 1, 3, 4, 5]] -\end{lstlisting} -\end{itemize} -We can see the visiting orders of nodes are different from Depth-first tree search counterparts. However, the corresponding search tree for each graph in Fig.~\ref{fig:graph_search_example} is the same as of counterpart Depth-first Tree Search illustrated in Fig.~\ref{fig:graph_search_example_search_tree}. This highlights how different searching strategies differ by visiting ordering of nodes but not differ at the search-tree which depicts the search space. - - - - -\paragraph{Applications} However, the Breath-first Tree Search and path tracing is extremely more costly compared with DFS counterpart. When our goal is to enumerate paths, go for the DFS. When we are trying to find shortest-paths, mostly use BFS. - -\subsubsection{Breath-first Graph Search} -Similar to Depth-first Graph Search, we use a \texttt{visited} set to make sure each node is only added to the frontier(queue) once and thus expanded only once. - -\paragraph{Efficient Path Backtrace} In graph search, each node is added into the frontier and expanded only once, and the search tree of a $|V|$ graph will only have $|V|-1$ edges. Tracing paths by saving each path as a list in the frontier set is costly; for a partial path in the search tree, it is repeating itself multiple times if it happens to be part of multiple paths, such as partial path \texttt{0->1->2->4}. We can bring down the memory cost to $O(|v|)$ if we only save edges by using a \texttt{parent dict} with key and value referring as the node and its parent node in the path, respectively. For example, edge \texttt{0->1} is saved as \texttt{parent[1] = 0}. Once we find out goal state, we can backtrace from this goal state to get the path. The backtrace code is: -\begin{lstlisting}[language=Python] -def backtrace(s, t, parent): - p = t - path = [] - while p != s: - path.append(p) - p = parent[p] - path.append(s) - return path[::-1] -\end{lstlisting} -\paragraph{BFGS Implementation} The implementation of Breath-first Graph Search with goal test is: -\begin{lstlisting}[language=Python] -def bfgs(g, s, t): - q = [s] - parent = {} - visited = {s} - while q: - n = q.pop(0) - if n == t: - return backtrace(s, t, parent) - for v in g[n]: - if v not in visited: - q.append(v) - visited.add(v) - parent[v] = n -\end{lstlisting} - -\paragraph{Time and Space Complexity} Same to DFGS, the time complexity as $O(|V|+|E|)$. For the space, it uses space $O(|V|)$ in the worst case to -store vertices on the current search path, the set of -already-visited vertices, as well as the dictionary used to store edge relations. The shortage that comes with costly memory usage of Breath-first Graph Search to Depth-first Graph Search is less obvious compared to Breath-first Tree Search to Depth-first Graph Search. - -%%%%%%%%%%%%%%%%DFS Graph Search%%%%%%%%%%%%%% -\subsection{Depth-first Graph Search} -Within this section and the next, we focus on explaining more characteristics of the graph search that avoids repeatedly visiting a vertex. We will make use of the three color visiting states. Seemingly these features and details are not that useful judging from current context, but we will see how it can be applied to solve problems more efficiently in Chapter Advanced Graph Algorithms, such as detecting cycles, topological sort, and so on. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process0.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process1.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process2.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process3.png} - - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process4.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process5.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process6.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process7.png} - - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process8.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process9.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process10.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process11.png} - \caption{The process of Depth-first Graph Search. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. And the red arrow marks the backtrack edge. } - \label{fig:depth_first_graph_search_process} -\end{figure} - -As shown in Fig.~\ref{fig:depth_first_graph_search_process}, we -%Depth-first Search starts from a given source, and follows a single path in the graph to go as ``far'' as possible to visit unvisited nodes until (1) it meets a vertex that has no edge out; or (2) no unvisited adjacent vertices or say white vertices. Then it ``backtracks'' to its predecessor and start the above process again. DFS will discover all vertices that are reachable from the given source. -start from 0, mark it gray, and visit its first unvisited neighbor 1, mark 1 as gray, and visit 1's first unvisited neighbor 2, then 2's unvisited neighbor 4, 4's unvisited neighbor 3. For node 3, it does'nt have white neighbors, we mark it to be complete with black. Now, here, we ``backtrack'' to its predecessor, which is 4. And then we keep the process till 5 become gray. Because 5 has no edge out any more, it becomes black. Then the search backtracks to 4, to 2, to 1, and eventually back to 0. We should notice the ordering of vertices become gray or black is different. From the figure, the gray ordering is \texttt{[0, 1, 2, 4, 3, 5]}, and for the black is \texttt{[3, 5, 4, 2, 1, 0]}. Therefore, it is necessary to distinguish the three states in the depth-first graph search at least. - - -\paragraph{Three States Recursive Implementation} -We add additional \texttt{colors} list to track the color of each vertices, \texttt{orders} to track the ordering of the gray, and \texttt{completed\_orders} for ordering vertices by their ordering of turning into black--when all of a node's neighbors become black which is after the recursive call in the code. -\begin{lstlisting}[language = Python] -def dfs(g, s, colors, orders, complete_orders): - colors[s] = STATE.gray - orders.append(s) - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors, orders, complete_orders) - colors[s] = STATE.black - complete_orders.append(s) - return -\end{lstlisting} -Now, we try to call the function with the undirected cyclic graph in Fig.~\ref{fig:graph_search_example}. -\begin{lstlisting}[language=Python] -v = len(ucg) -orders, complete_orders = [], [] -colors = [STATE.white] * v -dfs(ucg,0, colors, orders, complete_orders) -\end{lstlisting} -Now, the \texttt{orders} and \texttt{complete\_orders} will end up differently: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0] -\end{lstlisting} - -\paragraph{Edges} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.33\columnwidth]{fig/directed_cyclic_graph_2.png} - \includegraphics[width=0.28\columnwidth]{fig/depth_first_graph_search_edges(1).png} - \caption{Classification of Edges: black, red, yellow, and blue marks tree edges, back edges, forward edges, and cross edges, respectively.} - \label{fig:depth_first_graph_search_edges} -\end{figure} -Depth-first Graph Search on graph $G=(V, E)$ connects all reachable vertices from a given source in the graph in the form of a depth-first forest $G_\pi$. Edges within $G_\pi$ are called \textbf{tree edges}. Tree edges are edges marked with black arrows in Fig.~\ref{fig:depth_first_graph_search_edges}. Other edges in $G$ can be classified into three categories based on Depth-first forest $G_\pi$, they are: -\begin{enumerate} -\item Back edges point from a node to one of its ancestors in the depth-first forest $G_\pi$. Marked as red edges in Fig.~\ref{fig:depth_first_graph_search_edges}. - -\item Forward edges point from a node to one of its descendants in the depth-first forest $G_\pi$. Marked as yellow edges in Fig.~\ref{fig:depth_first_graph_search_edges}. - -\item Cross edges point from a node to a previously visited node that is neither an ancestor nor a descendant in the depth-first forest $G_\pi$. Marked as blue edges in Fig.~\ref{fig:depth_first_graph_search_edges}. -\end{enumerate} - -Classification of edges provide important information about the graph, e.g. to if we detect a back edge in directed graph, we find a cycle. - - - -\paragraph{Parenthesis Structure} In either undirected or directed graph, the discovered time when state goes from WHITE to GRAY and the finish time when state turns to BLACK from GRAY has the parenthesis structure. We modify \texttt{dfs} to track the time: a static variable \texttt{t} is used to track the time, \texttt{discover} and \texttt{finish} is used to record the first discovered and finished time. The implementation is shown: -\begin{lstlisting}[language=Python] -def dfs(g, s, colors): - dfs.t += 1 # static variable - colors[s] = STATE.gray - dfs.discover[s] = dfs.t - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors) - # complete - dfs.t += 1 - dfs.finish[s] = dfs.t - return -\end{lstlisting} -Now, we call the above function with directed graph in Fig.~\ref{fig:depth_first_graph_search_edges}. -\begin{lstlisting}[language=Python] -v = len(dcg) -colors = [STATE.white] * v -dfs.t = -1 -dfs.discover, dfs.finish = [-1] * v, [-1] * v -dfs(dcg,0, colors) -\end{lstlisting} -The output for \texttt{dfs.discover} and \texttt{dfs.finish} are: -\begin{lstlisting}[numbers=none] -([0, 1, 2, 4, 3, 6], [11, 10, 9, 5, 8, 7]) -\end{lstlisting} -From \texttt{dfs.discover} and \texttt{dfs.finish} list, we can generate a new list of merged order, \texttt{merge\_orders} that arranges nodes in order of there discovered and finish time. The code is as: -\begin{lstlisting}[language=Python] -def parenthesis(dt, ft, n): - merge_orders = [-1] * 2 * n - for v, t in enumerate(dt): - merge_orders[t] = v - for v, t in enumerate(ft): - merge_orders[t] = v - - print(merge_orders) - nodes = set() - for i in merge_orders: - if i not in nodes: - print('(', i, end = ', ') - nodes.add(i) - else: - print(i, '),', end = ' ') -\end{lstlisting} -The output is: -\begin{lstlisting}[language=Python] -[0, 1, 2, 4, 3, 3, 5, 6, 6, 5, 4, 2, 1, 0] -( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, ( 6, 6 ), 5 ), 4 ), 2 ), 1 ), 0 ), -\end{lstlisting} -We would easily find out that the ordering of nodes according to the discovery and finishing time makes a well-defined expression in the sense that the parentheses are properly nested. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Questions to ponder: } -\begin{itemize} -\item Implement the iterative version of the recursive code. -\end{itemize} -\end{bclogo} - -%%%%%%%%%%%%%%%%BFS Graph Search%%%%%%%%%%%%%% -\subsection{Breadth-first Graph Search} -We can observe the visiting ordering of the nodes are exactly the same as the free tree. If we add one edge of the predecessor node and the current node, for example, at first, we start at 0, we add edges (0, 1) and (0, 2) in a tree structure. Next, we expand 1, which will have unvisited neighbors 3 and 4, we add edges (1, 3) and (1, 4). At the end, all of these tracked edges will form a tree, which is exactly the same as of the free tree in Fig.~\ref{fig:cyclic_graph_search_bfs}. We call such tree a \textbf{Breath-first Search Tree}. The tree contains all vertices reachable from $s$, if we denote nodes as $V_t$ and the edges are from each node's predecessor to this node, denotes as $E_t = {(pi[V_t], V_t), V_t \neq s}$. The subgraph of $(V_t, E_t)$ is called the \textbf{Predecessor Subgraph}. With the search tree, we can see that the paths between any two vertices are the shortest path that is defined by the length of a path, which clearly in the Depth-first Search Tree that is not the case. -\subsection{Introduction} - Given a graph $G = (V, E)$, and a \textit{source} vertex $s$, the aim of Breadth-first search is to explore the edges of $G$ to discover all vertices that are reachable from the source $s$ just as the depth-first search. However, BFS visits vertices that are reachable from the source in the order of distance from the source. More specifically, let us use $d$ to denote the distance, in the example, we are given vertex $0$ as the source. And first, we visit its neighbors $1, 2$ since they are the closest ones among all the other vertices in the graph with $d=1$. The edge $(0, 1), (0, 2)$ are added to the BFS tree. Next, move to 0's first neighbor 1, and visited 1's unvisited neighbors, $3, 4$ with $d=2$. The whole process is depicted in Fig.~\ref{fig:bfs_search}. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/bfs_example_1.png} - \caption{The process of Breath-first-search. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. All these edges constructs a breath-first-tree. The visiting orders of BFS starting from vertex $0$ is [0, 1, 2, 3, 4, 5, 6].} - \label{fig:bfs_search} -\end{figure} -\paragraph{Shortest Paths, Completeness, and Optimality} The BFS strategy can produce the shortest-path from a source to any reachable vertex. This indicates that Breath-first graph search is \textbf{complete}: if there exists paths that can be reached from the source, we would find one of them--the shortest one. Thus, if our goal test is to find the shortest path to a target, we would find it at the distance $d_t$, which further makes it \textbf{optimal}. While in the depth-first graph search, it is complete that we can find a reachable path, but it is not optimal. To be able to find the shortest path, we have to use the depth-first tree search to enumerate all possible acycle paths between the source and the target, only after then, we can compare all the candidates and get the shortest one, which makes the complexity as large as $O(b^d)$ instead of in the case of bfs, which is $O(b^{d_t})$. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/bfs_shortest_dis.png} - \caption{Visualize the BFS in level by level fashion.} - \label{fig:bfs_search_shortest_path} -\end{figure} - -\paragraph{Shortest Paths} To prove breath-first graph search generate a breath-first search where any path between the root and other node is the shortest path evaluated in length. We can prove the correctness using mathematical induction. At first the frontier has only one node, the source, it is a path with length 0, which will be a trivial case. Next, we assume our frontier set has $n-1$ vertices, and $m$ nodes are still in exploring state, and all are the shortest paths to the source, the length of the exploring set to the source is $l_m$. Next, we just need to prove that the $m_u$ unvisited neighboring vertices of the $m$ exploring nodes makes the shortest path to the source. We can argue, because $m_u$ is not visited yet, thus they do not belong to the explored set or the frontiner set, making them impossible to have a path length as short as $l_m$. Because they are neighboring of the exploring nodes, which makes their path length to the source $l_m+1$, which is the minimum among all options. Breath-first graph search is indeed a greedy algorithm in the matter of the path length. We visualize this mathematical induction in Fig.~\ref{fig:bfs_search_shortest_path}. - - -\subsubsection{Exploring} -Extend the tree traversal to the free tree. Before I give you all the definition and details about the graph search, let's explore together by extending the tree traversal to its equivalent free tree. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.4\columnwidth]{fig/free_tree_search.png} - \includegraphics[width=0.4\columnwidth]{fig/cyclic_graph_searching.png} - \caption{Left: A free tree, Right: A cyclic Graph} - \label{fig:cyclic_graph_search} -\end{figure} -\paragraph{Free Tree Traversal} Assume the counterpart of recursive binary tree for the free tree shown in Fig.~\ref{fig:cyclic_graph_search} is a tree rooting at node 0, and if an internal node has only one child, say a left child. What is the output of preorder traversal? Represent the graph with adjacency list, and develop a recursive function that print out the same as the preorder traversal in the recursive tree. - -\paragraph{Analysis} First, we can just get the preorder traversal manually without coding, which will be [0, 1, 2, 4, 3, 5, 6]. Now, to develop a recursive traversal function in the free tree, we need to give our recursive function an argument to indicate which node to deal with in that function just as in the recursive tree traversal it takes a node. We set it as vertex number which starts at 0. Instead of call recursive function for the left and right child, in the free tree, it is replaced by checking its neighbors, and assume the neighbors are ordered by vertex number incrementally. The equivalent function will be: -\begin{lstlisting}[language=Python] -def recursive(g, vi): - ''' - g: graph as an adjacency list - vi: the vertex index - ''' - print(vi, end=' ') - for nv in g[vi]: - recursive(g, nv) -\end{lstlisting} -We call this function, it will have the exact output as the preorder traversal output. - -\paragraph{Cycle} However, if we directly apply the above recursive function on the cyclic graph on the right side of Fig.~\ref{fig:cyclic_graph_search}, we will end up getting stack overflow error because we would meet a cycle [0, 1, 2, 0]. In a graph, it is unavoidable to have cycles, that is why it is a graph and not a tree. So, how to avoid cycles? We know the definition of a cycle is a closed path that has has at least one node that repeats; in our failed run, it is 0. Therefore, let us add a \texttt{path} in the recursive function, and whenever we want to expand a node, we check if it forms a cycle or not by comparing the candidate with our path. -\begin{lstlisting}[language=Python] -def recursive(g, vi, path): - #print(path, end=' ') - print(vi, end=' ') - for nv in g[vi]: - if nv not in path: - recursive(g, nv, path+[nv]) -\end{lstlisting} -Let's call it as \texttt{recursive(al, 0, [0])}, and the output that indicates the visiting order of vertices will be: -\begin{lstlisting}[numbers=none] -0 1 2 4 3 5 6 3 4 2 5 6 2 1 3 4 5 6 4 3 1 5 6 -\end{lstlisting} - \begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/depth_first_tree_search.png} - \caption{The search tree using depth-first tree search } - \label{fig:df_tree_search} -\end{figure} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.5\columnwidth]{fig/free_tree_search.png} - \caption{The search tree using depth-first graph search.} - \label{fig:dfs_graph_search} -\end{figure} -And the following are all the paths comprise the search tree shown in Fig.~\ref{fig:df_tree_search}. -\begin{lstlisting}[language=Python] -[0] [0, 1] [0, 1, 2] [0, 1, 2, 4] [0, 1, 2, 4, 3] [0, 1, 2, 4, 5] [0, 1, 2, 4, 5, 6] [0, 1, 3] [0, 1, 3, 4] [0, 1, 3, 4, 2] [0, 1, 3, 4, 5] [0, 1, 3, 4, 5, 6] [0, 2] [0, 2, 1] [0, 2, 1, 3] [0, 2, 1, 3, 4] [0, 2, 1, 3, 4, 5] [0, 2, 1, 3, 4, 5, 6] [0, 2, 4] [0, 2, 4, 3] [0, 2, 4, 3, 1] [0, 2, 4, 5] [0, 2, 4, 5, 6] -\end{lstlisting} - - -\paragraph{Further Avoid Revisiting a Vertex} We see that from the above implementation, for a graph with only 7 nodes, we have been visiting nodes for 23 times. A lot of nodes have been repeating. 1 appears 3 times, 3 appears 4 times, and so on. As we see the visiting order being represented with a \texttt{search tree} in Fig.~\ref{fig:df_tree_search}, our complexity is getting close to $O(b^h)$, where $b$ is the branching factor and $h$ is the total vertices of the graph. If we simply want to search if a value exist in the graph or not, this is insanely complicating the situation. What we do next is to avoid revisiting the same vertex again and again, we conquer this by tracking the visiting state of a node. -\begin{lstlisting}[language=Python] -def recursive(g, vi, visited): - print(vi, end=' ') - for nv in g[vi]: - if nv not in visited: - visited.add(nv) - recursive(g, nv, visited) -\end{lstlisting} -Now, call the function as \texttt{recursive(al, 0, set([0]))}, the output is: -\begin{lstlisting}[numbers=none] -0 1 2 4 3 5 6 -\end{lstlisting} -Did you notice that this visiting order is the same as the ordering in the free tree! -\subsubsection{Introduction} -The last section actually gives us a very nice comprehensive peep at the variants of \textbf{depth-first search} in graph and its properties. -\paragraph{Two Types of Depth-first Search in Graph} We have seen how to recursively search and avoids cycle or avoids revisiting nodes, these are two variants of DFS in graph: -\begin{enumerate} - \item Depth-first Tree Search: If we recursively search as if our graph is a tree structure, this is called depth-first tree search. However, it is not \textbf{complete} because once we meet a cycle, it will never finishing checking all the vertices and the program will be eventually terminate due to stack overflow problem. We can resolve this cyclic issue by checking a vertex or a new state that is about to be expanded against with those on the path from the root to the current vertex: if it is a membership relation, we skip visiting this vertex to avoid cycle, making the search \textbf{complete}. - \item Depth-first Graph Search: If we limit the search to only visit each vertex exactly once, which is complete and we would both avoid cycles and the redundant paths--for example, in the tree search version, we have path [0, 1, 3], [0, 2, 1, 3], [0, 2, 4, 3], and [0, 1, 2, 4, 3], there are multiple paths between the same two vertices 0 and 3. However, in the graph search, there is only one [0, 1, 2, 4, 3]. The graph search process, from one vertex to another forms an edge, we end up with a tree connecting all vertex of the graph, which is called \textbf{Depth-first Search Tree}. In our example, it is shown in Fig.~\ref{fig:dfs_graph_search} and is exactly the same as in Fig.\ref{fig:cyclic_graph_search}. -\end{enumerate} -\paragraph{Completeness and Optimality} Both version of searching in graph can be complete, meaning if there exists one vertex that is what we are looking for, or a path that we need to find between a pair of vertices, we are sure we will find it. - -However, both of them are \textit{nonoptimal}. For example, if the task is to find the shortest path from source 0 to target 2. The shortest path should be 0->2, however depth-first graph search will return 0->1->2. For the search tree using depth-first tree search, it can find the shortest path from source 0 to 2. However, it will explore the whole left branch starts from 1 before it finds its goal node on the right side. - -\paragraph{Time and Space Complexity} We have already discussed the depth-first tree search's complexity in the last chapter. For the depth-first graph search, we use aggregate analysis. The search process covers all edges and vertices, which makes the time complexity as $O(|V|+|E|)$. For the space, it uses space $O(|V|)$ in the worst case to -store the stack of vertices on the current search path as well as the set of -already-visited vertices. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Depth- first graph search%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Depth-First Graph Search} -Further in this section, we focus on Graph Search and discuss more advanced properties that might help us design graph algorithms. Let's see how the DFS process using BLACK, WHITE, and GREY state work. - -As shown in Fig.~\ref{fig:dfs_search_1}, we -%Depth-first Search starts from a given source, and follows a single path in the graph to go as ``far'' as possible to visit unvisited nodes until (1) it meets a vertex that has no edge out; or (2) no unvisited adjacent vertices or say white vertices. Then it ``backtracks'' to its predecessor and start the above process again. DFS will discover all vertices that are reachable from the given source. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/dfs_procedure.png} - \caption{The process of Depth-first-search. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. And the red arrow marks the backtrack edge. } - \label{fig:dfs_search_1} -\end{figure} -start from 0, mark it gray, and visit its first unvisited neighbor 1, mark 1 as gray, and visit 1's first unvisited neighbor 2, then 2's unvisited neighbor 4, 4's unvisited neighbor 3. Then we are at the fifth and the sixth subgraph in the first row. Because for 3, it does'nt have white neighbors, we mark it to be complete with black. Now, here, we ``backtrack'' to its predecessor, which is 4. In this figure, red arrow marks the backtrack edges. And then we keep the process till 6 become gray. Because 6 has no edge out any more, the state will be complete. Then backtrack to 5, 5 become black, backtrack to 4, then to 2, to 1, and eventually back to 0. We should notice the ordering of vertices become gray or black is different. From the figure, the gray ordering is \texttt{[0, 1, 2, 4, 3, 5, 6]}, and for the black is \texttt{[3, 6, 5, 4, 2, 1, 0]}. Therefore, it is necessary to distinguish the three states in the depth-first graph search at least. - - -\paragraph{Three states Recursive Implementation} We have already known how to implement DFS with \texttt{visited} to track the state, in this version, we want to track the three states. -We add additional \texttt{colors} list to track the color of each vertices, \texttt{orders} to track the ordering of the gray, and \texttt{completed\_orders} for ordering vertices by their ordering of turning into black--when all of a node's neighbors become black which is after the recursive call in the code. -\begin{lstlisting}[language = Python] -def dfs(g, s, colors, orders, complete_orders): - colors[s] = STATE.gray - orders.append(s) - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors, orders, complete_orders) - colors[s] = STATE.black - complete_orders.append(s) - return -\end{lstlisting} -Now, we try to call the function with the same source of 0: -\begin{lstlisting}[language=Python] -v = len(al) -orders, complete_orders = [], [] -colors = [STATE.white] * v -dfs(al,0, colors, orders, complete_orders) -\end{lstlisting} -Now, the \texttt{orders} and \texttt{complete\_orders} will end up differently: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5, 6] [3, 6, 5, 4, 2, 1, 0] -\end{lstlisting} - -\subsubsection{Properties of Depth-first Graph Search} - -\paragraph{Depth-first Search Tree} We have already defined the depth-first search tree which connects all reachable vertices from a given source in the graph in the form of a tree, where an edge is called \textit{tree edge}, which is the predecessor and successor, say it is $u$ and $v$ respectively. It can only be a tree edge if the color of $v$ is white when it is first explored in the search. A \textbf{back edge} $(u, v)$ is an edge that connects $v$ back to its predecessor $u$. \textcolor{red}{The condition is when the edge is explored, it will be an back edge if $u$ is gray--meaning it is being explored but not done with all of its children. The back edge will have opposite direction compared with the tree edge, and they are where }. - -\paragraph{Parenthesis Structure} In DFS, the discovered time and the finish time has the parenthesis structure. In our example, we use a static variable \texttt{t} of function \texttt{dfs} to track the time. \texttt{dt} and \texttt{ft} is used to record the first discovered and finished time. Now our dfs is defined as: -\begin{lstlisting}[language=Python] -def dfs(g, s, colors, dt, ft): - dfs.t += 1 # static variable - colors[s] = STATE.gray - dt[s] = dfs.t - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors, dt, ft) - dfs.t += 1 - ft[s] = dfs.t - return -\end{lstlisting} -Now, we call the above function: -\begin{lstlisting}[language=Python] -v = len(al) -dt, ft = [-1] * v, [-1] * v -colors = [STATE.white] * v -dfs.t = -1 -dfs(al,0, colors, dt, ft) -\end{lstlisting} -From the discover time and finish time list, we can generate a new list of merged order \texttt{merge\_orders} that arrange the node along the time. And we print out the node the first time it appears as `(v,' and second time as `v)'. -\begin{lstlisting}[language=Python] -def parenthesis(dt, ft, n): - merge_orders = [-1] * 2 * n - for v, t in enumerate(dt): - merge_orders[t] = v - for v, t in enumerate(ft): - merge_orders[t] = v - - print(merge_orders) - nodes = set() - for i in merge_orders: - if i not in nodes: - print('(', i, end = ', ') - nodes.add(i) - else: - print(i, '),', end = ' ') -\end{lstlisting} -The output is: -\begin{lstlisting}[language=Python] -[0, 1, 2, 4, 3, 3, 5, 6, 6, 5, 4, 2, 1, 0] -( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, ( 6, 6 ), 5 ), 4 ), 2 ), 1 ), 0 ), -\end{lstlisting} -We would easily find out that the ordering of nodes according to the discovery and finishing time makes a well-defined expression in the sense that the parentheses are properly nested. - -\subsubsection{Iterative Implementation} -In this book, we give two different versions of iterative implementations: (1) DFS, but not preserving the same discovering order (1) keep only the discovering order (2) keep both the discovering order and the finishing order. With these two iterative implementations, we have flexibility to pick one that work out the best. -\paragraph{Method 1: Not preserving the discovering ordering} - -The states that the node has first discovered last completed property, which means using a \texttt{stack} data structure we are able to implement iterative DFS. -For example, we first have stack $[0]$, then we put all of its unvisited vertices in $[1, 2]$, then we deal 2, and put all of 2's white neighbors in $[1, 2, 4]$, then 4, $[1, 2, 3, 5]$. Then 5, [1, 2, 3, 6]. Then it is 6, 3, 2, 1. Thereforce, the ordering of each vertex first in the stack is $[0, 1, 2, 4, 3, 5, 6]$. We have the following code: -\begin{lstlisting}[language=Python] -def dftIter(g, s): - '''not preserving the same discovery ordering''' - n = len(g) - orders = [] - colors = [STATE.white] * n - stack = [s] - - orders.append(s) # track gray order - colors[s] = STATE.gray - - while stack: - u = stack.pop() - - for v in g[u]: - if colors[v] == STATE.white: - colors[v] = STATE.gray - stack.append(v) - orders.append(v) # track gray order - - return orders -\end{lstlisting} -Run the above code with \texttt{dftIter(al, 1)}, we have ordering $[[1, 2, 3, 4, 5, 6, 0]$, which is different from the recursive DFS version $[[1, 2, 0, 4, 3, 5, 6]$. This is due to the different mechanism. To keep the same ordering of discovering order is not important. However, in our source code, we does provide a way to keep the same discovering ordering. - -\paragraph{Method 2: Preserving both Discover and Finish Ordering} - Because in DFS each time, we start from $u$, we find one unvisited node $u_1$ and move forward to find one unvisited node $u_2$ of $u_1$, until we met a node that has no unvisited adjacent nodes $v$. The visiting order will be $u, u_1, u_2,..., v$ , For this process, we use a \texttt{stack} to save these nodes, each time to append it at the end and this marks the state as gray, and each time visit the end node in the stack. In this process, there is no pop out from the stack. If we are at $v$, which can not move the path farther, this marks the state as black, and means the state is complete and this node is ready to be moved out of the stack. Therefore, in the implementation, a bool variable \texttt{bAdj} to check if we are able to find an unvisited node or not. If we can not find one, then we pop out, if we can, we break the loop because we just need one unvisited node. -\begin{lstlisting}[language = Python] -def dfsIter(g, s): - '''iterative dfs''' - v = len(g) - orders, complete_orders = [], [] - colors = [STATE.white] * v - stack = [s] - - orders.append(s) # track gray order - colors[s] = STATE.gray - - while stack: - u = stack[-1] - bAdj = False - for v in g[u]: - if colors[v] == STATE.white: - colors[v] = STATE.gray - stack.append(v) - orders.append(v) # track gray order - bAdj = True - break - - if not bAdj: # if no adjacent is found, pop out - # complete - colors[u] = STATE.black # this is not necessary in the code, just to help track the state - complete_orders.append(u) - stack.pop() - - return orders, complete_orders -\end{lstlisting} -Call \texttt{print(dfsIter(al, 0))}, the above code will have the same output as of the recursive implementation. -\begin{lstlisting}[numbers=none] -([0, 1, 2, 4, 3, 5, 6], [3, 6, 5, 4, 2, 1, 0]) -\end{lstlisting} - - - - - - - - - -% As we have mentioned in Chapter~\ref{chapter_introduction_to_search}, there are generally three different searching strategies according to the orders in which nodes are expanded: breath-first search, depth-first search and priority-first search. In this section, we focus on the concepts instead of the exact implementation which varies to different data structures and will be detailed in the remaining sections. - -% Also, it is important to make clear of the concept of the tree-search and graph search. Tree search can happen on either a tree or a graph data structure. - -% \paragraph{Tree Search on Tree Data Structure} On the tree data structure, because there will be one and only one path between any two nodes in the tree, so normally we do not need to check the repeated nodes. - -% \paragraph{Graph or Tree Search on Graph Data Structure} On the graph data structure, first, there might be more than one path between some two nodes in the graph. Second, there might have loop exist. Therefore, the common strategy in graph search is to use a set data structure to check repeated nodes, that is graph search will visit each node one and only once. The visiting order of the nodes can be connected as a search tree( that connects all $n$ vertexs with $n-1$ edges). This avoids the redundant paths and the loop. - -% We can also treat the graph as a tree, that the source node is a root, and any neighboring nodes will be children. In the graph, whenever to judge if a neighboring node is a child or not, we check if we have already visited this node from our path (it can not be our parent or grandparent node). So, this is a tree-search version on the graph data structure. - -\subsection{Breath-First Search in Graph} -\subsubsection{Exploring} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.4\columnwidth]{fig/bfs_free_tree.png} - \includegraphics[width=0.4\columnwidth]{fig/cyclic_graph_searching.png} - \caption{Left: A free tree, Right: A cyclic Graph} - \label{fig:cyclic_graph_search_bfs} -\end{figure} -\paragraph{Free Tree Search} Similarly, we can smoothly translating the breath-first search in the recursive tree with queue into the free tree traversal. We need to choose a starting vertex in the free tree which is indeed a graph. The code is: -\begin{lstlisting}[language=Python] -def bfs(g, start): - if not g: - return - q = [start] - while q: - node = q.pop(0) # get node at the front of the queue - print(node, end=' ') - # Visist the neighbors - for neig in g[node]: - q.append(neig) -\end{lstlisting} -Now, calling the function with \texttt{start=0}, we have a visiting ordering of: -\begin{lstlisting}[numbers=none] -0 1 2 3 4 5 6 -\end{lstlisting} -\paragraph{Cycle} Move on to the same graph used in DFS. Same here, to avoid the \texttt{while} loop to hang and run forever, we need an approach to avoid cycle. If our purpose it to enumerate all acyclic paths in the graph, we won't try to use breath-first search, because in order to do so, for each node that goes into the queue, we need to save a corresponding longest path from start to that node, and it is a lot of extra memory compared with using DFS. -\paragraph{Avoid Revisiting a Vertex} We follow the depth-first graph search where each vertex is only visited once. We use a \texttt{visited} set too. -\begin{lstlisting}[language=Python] -def bfs(g, start): - if not g: - return - q = [start] - visited = set([start]) - while q: - node = q.pop(0) # get node at the front of the queue - print(node, end=' ') - # Visist the neighbors - for neig in g[node]: - if neig not in visited: - visited.add(neig) - q.append(neig) -\end{lstlisting} -The print out from calling the function with \texttt{start=0} and with the graph, we have: -\begin{lstlisting}[numbers=none] -0 1 2 3 4 5 6 -\end{lstlisting} -We can observe the visiting ordering of the nodes are exactly the same as the free tree. If we add one edge of the predecessor node and the current node, for example, at first, we start at 0, we add edges (0, 1) and (0, 2) in a tree structure. Next, we expand 1, which will have unvisited neighbors 3 and 4, we add edges (1, 3) and (1, 4). At the end, all of these tracked edges will form a tree, which is exactly the same as of the free tree in Fig.~\ref{fig:cyclic_graph_search_bfs}. We call such tree a \textbf{Breath-first Search Tree}. The tree contains all vertices reachable from $s$, if we denote nodes as $V_t$ and the edges are from each node's predecessor to this node, denotes as $E_t = {(pi[V_t], V_t), V_t \neq s}$. The subgraph of $(V_t, E_t)$ is called the \textbf{Predecessor Subgraph}. With the search tree, we can see that the paths between any two vertices are the shortest path that is defined by the length of a path, which clearly in the Depth-first Search Tree that is not the case. -\subsection{Introduction} - Given a graph $G = (V, E)$, and a \textit{source} vertex $s$, the aim of Breadth-first search is to explore the edges of $G$ to discover all vertices that are reachable from the source $s$ just as the depth-first search. However, BFS visits vertices that are reachable from the source in the order of distance from the source. More specifically, let us use $d$ to denote the distance, in the example, we are given vertex $0$ as the source. And first, we visit its neighbors $1, 2$ since they are the closest ones among all the other vertices in the graph with $d=1$. The edge $(0, 1), (0, 2)$ are added to the BFS tree. Next, move to 0's first neighbor 1, and visited 1's unvisited neighbors, $3, 4$ with $d=2$. The whole process is depicted in Fig.~\ref{fig:bfs_search}. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/bfs_example_1.png} - \caption{The process of Breath-first-search. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. All these edges constructs a breath-first-tree. The visiting orders of BFS starting from vertex $0$ is [0, 1, 2, 3, 4, 5, 6].} - \label{fig:bfs_search} -\end{figure} -\paragraph{Shortest Paths, Completeness, and Optimality} The BFS strategy can produce the shortest-path from a source to any reachable vertex. This indicates that Breath-first graph search is \textbf{complete}: if there exists paths that can be reached from the source, we would find one of them--the shortest one. Thus, if our goal test is to find the shortest path to a target, we would find it at the distance $d_t$, which further makes it \textbf{optimal}. While in the depth-first graph search, it is complete that we can find a reachable path, but it is not optimal. To be able to find the shortest path, we have to use the depth-first tree search to enumerate all possible acycle paths between the source and the target, only after then, we can compare all the candidates and get the shortest one, which makes the complexity as large as $O(b^d)$ instead of in the case of bfs, which is $O(b^{d_t})$. - \begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/bfs_shortest_dis.png} - \caption{Visualize the BFS in level by level fashion.} - \label{fig:bfs_search_shortest_path} -\end{figure} - -\paragraph{Shortest Paths} To prove breath-first graph search generate a breath-first search where any path between the root and other node is the shortest path evaluated in length. We can prove the correctness using mathematical induction. At first the frontier has only one node, the source, it is a path with length 0, which will be a trivial case. Next, we assume our frontier set has $n-1$ vertices, and $m$ nodes are still in exploring state, and all are the shortest paths to the source, the length of the exploring set to the source is $l_m$. Next, we just need to prove that the $m_u$ unvisited neighboring vertices of the $m$ exploring nodes makes the shortest path to the source. We can argue, because $m_u$ is not visited yet, thus they do not belong to the explored set or the frontiner set, making them impossible to have a path length as short as $l_m$. Because they are neighboring of the exploring nodes, which makes their path length to the source $l_m+1$, which is the minimum among all options. Breath-first graph search is indeed a greedy algorithm in the matter of the path length. We visualize this mathematical induction in Fig.~\ref{fig:bfs_search_shortest_path}. - -So far, we know we can get the shortest path, but how to print out the shortest paths between the source and other vertexs at the same time? In the DFS, path recording is more straightforward. In BFS, a similar way as of DFS is to track each node's all predecessors. Say we have \texttt{(n, [s, p\_1, p\_2, ..., p\_m]}. It is doable, programmingly speaking. But it is too costly, we are enlarging the space complexity by $d$ times, making it $O(|V|*d)$. A better way, that only adds $O(|V|)$ space is to only save all edges of the Breath-first search tree, not with edge list representation, but with a list where it is either indexed by predecessor or successor and valued by the other. So, which one to choose? Look at the BF tree, say if we want to get the shortest path between 0 and 5. If we start from 0, there are branches, that we need to search through all the nodes in order to find and reconstruct the path $(0\rightarrow 5)$. However, if we start at node 5, and search in opposite direction, always track back its predecessor, we will eventually get to the source, where in our case is 0. This is more efficient, thus the answer is we index our edge list by successor and value it by the predecessor. Let's name it as predecessor list \texttt{pl}, and we can remember this with \texttt{pl[s]=p}. We start the predecessor of the source vertex to be itself, which will has a length of 0. This predecessor can also be replaced by \texttt{dict} to enable random access. In addition, we add a distance list to track the distance of the shortest path from source to other nodes. Now, our code looks like: -\begin{lstlisting}[language=Python] -def bfs_path(g, start): - if not g: - return - v = len(g) - pl = [None] * v # Predecessor list - dl = [0] * v # Distance list - q = [start] - visited = set([start]) - while q: - node = q.pop(0) - for neig in g[node]: - if neig not in visited: - pl[neig] = node - dl[neig] = dl[node] + 1 - visited.add(neig) - q.append(neig) - - return pl, dl -\end{lstlisting} -\paragraph{Print Shortest Path} To be able print the path of $s$ and $t$ we can start with $t$ and traverse back to $s$ through the predecessor. That is we first check out $t$'s predecessor if it has one, and then the predecessor's predecessor, and so on. There are two ways to do it: recursive and iterative. - -In the recursive solution, we first call \texttt{(s, t)}, and then \texttt{(s, pl[t])} to find its predecessor, and up till the base case where the source and the target is the same. Because we are visiting the path from source to target in reversed order, remember in the recursive function, there are always two passes: before the recursive call and after the recursive call. If we update \texttt{path} before the recursive call, we need to reverse the list, which can be avoid if we update it afterwards. -\begin{lstlisting}[language=Python] -def get_path(s, t, pl, path): - if s == t: - pass - elif pl[t] is None: - print('no path from ', s, ' to ', t) - else: - get_path(s, pl[t], pl, path) - path.append(t) - return -\end{lstlisting} -Now, with the example of \texttt{s=0, t=5}, we have the following output: -\begin{lstlisting}[numbers=none] -\end{lstlisting} - -In the iterative solution, we use a \texttt{while} loop to visit the predecessor till we meet the source. The path will needed to reversed at the end. -\begin{lstlisting}[language=Python] -def get_path(s, t, pl): - p = t - path = [] - while p != s: - path.append(p) - p = pl[p] - path.append(s) - return path[::-1] -\end{lstlisting} - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Please implement breath-first-search that multiple start vertices are given? } -\end{bclogo} - -\subsubsection{Complexity Analysis} Because in the process of BFS, each vertex is enqued and dequeued exactly one time, this process takes $O(|V|)$. Because at each iteration, it scans each edge exactly once too, this takes $O(|E|)$. Sums up we have the total time complexity of $O(|V|+|E|)$. - -\paragraph{Time Complexity} the time complexity is bounded by the size of the state space, which is essentially a graph, which makes our complexity bound to be $O(|V|+|E|)$. A tighter bound of the time complexity of breath-first search is the same as its nodes in the search tree. Starting from root node, assume an uniform tree which has equal branch factor for all nodes $b$. The nodes at each level $i$ will be $b^i, i\in[0, d]$. Now, suppose that the solution is at depth $d$. In the worst case, it is the last node generated at $d$-th level. Then the total number of nodes is : -\begin{equation} - \sum_{i=0}^{d} b^i = O(b^d) -\end{equation} - -\paragraph{Space Complexity} For breath-first search, in some condition such in the graph, we need to keep two sets: \textit{explored set} and \textit{frontier list}. The space complexity gets maximum at the last level, where there will be $O(b^{d-1})$ nodes in the explored set and $O(b^d)$ nodes in the frontier. If it is a tree, we do not need to keep the explored set to avoid loops, therefore, the space complexity comes from the space taken by the frontier list. An exponential complexity bound such as $O(b^d)$ is actually scary. - -(table time and memory requirements for breath-first search) - -Two lessons can be learned from this given table. First, the memory requirements are a bigger problem for breath-first search than is the execution time. The second lesson is that time is still a major factor. If your problem has a solution at depth 16, then it will takes about 350 years for breath-first search to find it. Knowing the time complexity of search strategies can guide us to find more efficient algorithms to solve the real problems. - - \subsubsection{Applications} The common problems that can be solved by BFS are those only need one solution: the best one such like getting the shortest path. As we will learn later that breath-first-search is commonly used as archetype to solve graph optimization problems, such as Prim's minimum-spanning-tree algorithm and Dijkstra's single-source-paths algorithm. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%Tree Traversal%%%%%%%%%%%%%%%%%%%% -\section{Tree Traversal} -\subsection{Depth-First Tree Traversal} -%\paragraph{Tree traversal for the rooted tree} -\begin{figure}[H] - \centering - \includegraphics[width = .45\columnwidth]{fig/binary_tree_example.png} - \caption{Exemplary Binary Tree } - \label{fig:binary_tree_traversal_example} -\end{figure} -\subsubsection{Introduction} -% Let us see how we can iterate all nodes of the recursive tree we just constructed. -Depth-first search starts at the root node and continues branching down a particular path; it selects a child node that is at the deepest level of the tree from the frontier to expand next and defers the expansion of this node's siblings. Only when the search hits a dead end (a node that has no child) does the search ``backtrack'' to its parent node, and continue to branch down to other siblings that were deferred. A recursive tree can be traversed recursively. We print out the value of current node, then apply recursive call on the left and right node; by treating each node as a subtree, naturally a recursive call to a node can be thought of handling the traversal of that subtree. The code is quite straightforward: - -%The root node is like a queen, she sent out two assistants to traverse all provinces, and these two assistants further send out its sub finish the tasks and to combine the result is what the queen herself needs to do. Let us write the following recursive traversal function and observe its output first: -\begin{lstlisting}[language=Python] -def recursive(node): - if not node: - return - print(node.val, end=' ') - recursive(node.left) - recursive(node.right) -\end{lstlisting} -Now, we call this function with a tree as shown in Fig.~\ref{fig:binary_tree_traversal_example}, the output that indicates the traversal order is: -\begin{lstlisting}[language=Python] -1 2 4 5 3 6 -\end{lstlisting} - -% As we see, all three types of traversal, the search process where we say search tree deepened as much as possible on each child before going visiting the next sibling, this is also called \textbf{depth-first search} and - -% \paragraph{Backing Implementation of Depth First Search} -% We know that the recursion is implemented implicitly with call stack, - -\subsubsection{Three Types of Depth-first Tree Traversal} -\begin{figure}[!ht] - \centering - \includegraphics[width = .99\columnwidth]{fig/tree_traversal.png} - \caption{Left: PreOrder, Middle: InOrder, Right: PostOrder. The red arrows marks the traversal ordering of nodes.} - \label{fig:binary_tree_traversal} -\end{figure} -The visiting ordering between the current node, its left child, and its right child decides the following different types of recursive tree traversals: - -\begin{enumerate}[label=(\alph*)] -\item Preorder Traversal with ordering of \texttt{[current node, left child, right child]}: it visits the nodes in the tree with ordering [1, 2, 4, 5, 3, 6].In our example, the recursive function first prints the root node 1, then goes to its left child, which prints out 2. Then it goes to node 4. From node 4, it next moves to its left child which is empty and leads to the termination of the recursive call and then the recursion backward to node 4. Since node 4 has no right child, it further backwards to node 2, and then it check 2's right child 5. The same process of node 4 happens on node 5. It backwards to node 2, backwards to node 1, and keep visiting its right child 3, and the process goes on. We draw out this process in Fig.~\ref{fig:binary_tree_traversal}. -\item Inorder Traversal with ordering of \texttt{[left child, current node, right child]}: it traverses the nodes in ordering of [4, 2, 5, 1, 3, 6]. Three segments will appear with the inorder traversal for a root node: nodes in left subtree, root, and nodes in the right subtree. -\item Postorder Traversal with ordering of \texttt{[left child, right child, current node]}: it traverses the nodes in ordering of [4, 5, 2, 6, 3, 1]. -\end{enumerate} -We offer the code of Inorder Traversal: -\begin{lstlisting}[language=Python] -def inorder_traversal(node): - if not node: - return - inorder_traversal(node.left) - print(node.val, end=' ') - inorder_traversal(node.right) -\end{lstlisting} - - - - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to check the other two orderings: \texttt{[left child, current node, right child]} and \texttt{[left child, right child, current node]} by hand first and then write the code to see if you get it right?} -\end{bclogo} - - - -\subsubsection{Return Values} -Here, we want to do the task in a different way: We do not want to just print out the visiting orders, but instead write the ordering in a list and return this list. How would we do it? The process is the same, other than we need to return something(not \texttt{None} which is default in Python). If we only have empty node, it shall return us an empty list \texttt{[]}, if there is only one node, returns \texttt{[1]} instead. - -Let us use PreOrder traversal as an example. To make it easier to understand, the same queen this time wants to do the same job in a different way, that she wants to gather all the data from these different states to her own hand. This time, she assumes the two generals A and B will return a \texttt{list} of the subtree, safely and sount. Her job is going to combine the list returned from the left subtree, her data, and the list returned from the right subtree. Therefore, the left general brings back $A=[2,4,5]$, and the right general brings back $B=[3, 6]$. Then the final result will be $queue + A + B = [1,2,4,5,3, 6]$. The Python code is given: -\begin{lstlisting} [language = Python] -def PreOrder(root): - if root is None: - return [] - ans = [] - left = PreOrder(root.left) - right = PreOrder(root.right) - ans = [root.val] + left + right - return ans -\end{lstlisting} -\paragraph{An Example of Divide and Conquer} Be able to understand the returned value and combine them is exactly the method of \texttt{divide and conquer}, one of the fundamental algorithm design principles. This is a seemingly trivial change, but it approaches the problem solving from a totally different angle: atomic searching to divide and conquer that highlights the structure of the problem. The printing traversal and returning traversal represents two types of problem solving: the first is through searching--searching and treating each node more separately and the second is through reduce and conquer--reducing the problem to a series of smaller subproblems(subtrees where the smallest are empty subtrees) and construct the result by using the information of current problem and the solutions of the subproblems. - -\subsubsection{Complexity Analysis} -It is straightforward to see that it only visit all nodes twice, one in the forward pass and the other in the backward pass of the recursive call, making the time complexity linear to total number of nodes, $O(n)$. The other way is through the recurrence relation, we would write $T(n)=2\times T(n/2)+O(1)$, which gives out $O(n)$ too. -% Similarly, the recursive code for the InOrder Traversal and PostTraversal: -% \begin{lstlisting}[language = Python] -% def InOrder(root): -% if root is None: -% return [] -% res = [] -% left = InOrder(root.left) -% #print(root.val, end=',') -% right = InOrder(root.right) -% res = left + [root.val]+ right -% return res - -% def PostOrder(root): -% if root is None: -% return [] -% res = [] -% left = PostOrder(root.left) -% #print(root.val, end=',') -% right = PostOrder(root.right) -% res = left + right + [root.val] -% return res -% print(InOrder(root)) -% print(PostOrder(root)) -% # output -% #[4, 2, 5, 1, 3] -% #[4, 5, 2, 3, 1] -% \end{lstlisting} -\subsection{Iterative Tree Traversal} -In Chapter Iteration and Recursion, we would know that the recursive function might suffer from the stack overflow, and in Python the recursion depth is $1000$. This section, we explore iterative tree traversals corresponding to PreOrder, InOrder, and PostOrder tree traversal. We know that the recursion is implemented implicitly with call stack, therefore in our iterative counterparts, they all use an explicit stack data structure to mimic the recursive behavior. -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal_preorder.png} - \caption{The process of iterative preorder tree traversal.} - \label{fig:iterative_tree_traveral_preorder} -\end{figure} - -\paragraph{Simple Iterative Preorder Traversal} If we know how to implement a DFS iteratively with stack in a graph, we know our iterative preorder traversal. In this version, the stack saves all our frontier nodes. -\begin{itemize} - \item At first, we start from the root, and put it into the stack, which is 1 in our example. - \item Our frontier set has only one node, thus we have to pop out node 1 and expand the frontiner set. When we are expanding node 1, we add its children into the frontier set by pushing them into the stack. In the preorder traversal, the left child should be first expanded from the frontier stack, indicating we should push the left child into the stack afterward the right child is pushed into. Therefore, we add node 3 and 2 into the stack. - \item We continue step 2. Each time, we expand the frontier stack by pushing the toppest node's children into the stack and after popping out this node. This way, we use the first come last ordering of the stack data structure to replace the recursion. -\end{itemize} -We illustrate this process in Fig. ~\ref{fig:iterative_tree_traveral_preorder}. The code is shown as: -\begin{lstlisting} [language = Python] -def PreOrderIterative(root): - if root is None: - return [] - res = [] - stack = [root] - while stack: - tmp = stack.pop() - res.append(tmp.val) - if tmp.right: - stack.append(tmp.right) - if tmp.left: - stack.append(tmp.left) - return res -\end{lstlisting} -%Even we know what is our main auxiliary data structure, we are no where close to the conversion. In the recursion, there are always two passes of visiting each state, while this is not the case of the iteration. -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal_postorder.png} - \caption{The process of iterative postorder tree traversal.} - \label{fig:iterative_tree_traveral_postorder} -\end{figure} - -\paragraph{Simple Iterative Postorder Traversal} Similar to the above preorder traversal, the postordering is the ordering of nodes finishing the expanding of both its left and right subtree, thus with the ordering of \texttt{left subtree}, \texttt{right subtree}, and \texttt{root}. In preorder traversal,we obtained the ordering of \texttt{root}, \texttt{left subtree}, and \texttt{right subtree}. We try to reverse the ordering, it becomes \texttt{right subtree}, \texttt{left subtree}, and \texttt{root}. This ordering only differs with postorder by a single a swap between the left and right subtree. So, we can use the same process as in the preorder traversal but expanding a node's children in the order of left and right child instead of right and left. And then the reversed ordering of items being popped out is the postoder traversal ordering. The process is shown in Fig.~\ref{fig:iterative_tree_traveral_postorder}. The Python implementation is shown as: -\begin{lstlisting}[language=Python] -def PostOrderIterative(root): - if root is None: - return [] - res = [] - stack = [root] - while stack: - tmp = stack.pop() - res.append(tmp.val) - if tmp.left: - stack.append(tmp.left) - if tmp.right: - stack.append(tmp.right) - return res[::-1] -\end{lstlisting} - -\paragraph{General Iterative Preorder and Inorder Traversal } In the depth-first-traversal, we always branch down via the left child of the node at the deepest level in the frontier. The branching only stops when it can no longer find a left child for the deepest node in the frontier. Only till then, it will look around at expanding the right child of this deepest node, and if no such right child exists, it backtracks to its parents node and continues to check its right child to continue the branching down process. - -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal.png} - \caption{The process of iterative tree traversal.} - \label{fig:iterative_tree_traveral} -\end{figure} - -Inspired by this process, we use a pointer, say \texttt{cur} to point to the root node of the tree, and we prepare an empty \texttt{stack}. The iterative process is: -\begin{itemize} - \item The branching down process can be implemented with visiting \texttt{cur} node, and pushing it into the \texttt{stack}. And then we set \texttt{cur=cur.left}, so that it keeps deepening down. - \item When one branch down process terminates, we pop out a node from \texttt{stack}, and we set \texttt{cur=node.right}, so that we expand the branching process to its right sibling. -\end{itemize} -We illustrate this process in Fig.~\ref{fig:iterative_tree_traveral}. The ordering of items pushed into the stack is the preorder traversal ordering, which is [1, 2, 4, 5, 3, 6]. And the ordering of items being popped out of the stack is the inorder traversal ordering, which is [4, 2, 5, 1, 3, 6]. - -\paragraph{Implementation} We use two lists--\texttt{preorders} and \texttt{inorders}--to save the traversal orders. The Python code is: -\begin{lstlisting}[language=Python] -def iterative_traversal(root): - stack = [] - cur = root - preorders = [] - inorders = [] - while stack or cur: - while cur: - preorders.append(cur.val) - stack.append(cur) - cur = cur.left - node = stack.pop() - inorders.append(node.val) - cur = node.right - return preorders, inorders -\end{lstlisting} - - -% \paragraph{Iterative PreOrder Traversal} Here is a common mistake we would make: we think we start at 1, put 1 in a stack, [1], then move to 2, have stack [1, 2], then move to 4, have a stack [1, 2, 4]. Now, 4 has no left child and no right child, we pop it out, and moves back to 2, then 2 would still have the left tree, which we end up with infinite loop. -% \begin{lstlisting}[language=Python] -% def preorder_iter(root): -% if not root: -% return -% stack = [root] -% print(root.val, end=' ') -% i = 0 -% while stack: -% i += 1 -% if i==10: -% return -% node = stack[-1] -% while node.left: -% print(node.left.val, end=' ') -% stack.append(node.left) -% node = node.left -% node = stack[-1] -% if node.right: -% print(node.right.val, end=' ') -% stack.append(node.right) -% else: -% stack.pop() -% \end{lstlisting} -% We will end up with the print out: -% \begin{lstlisting}[numbers=none] -% 1 2 4 4 4 4 4 4 4 4 4 -% \end{lstlisting} -% This means when we are - - -% \paragraph{PostOrder Iterative Tree Traversal} Need to explain better!!! -% \begin{lstlisting}[language = Python] -% def postorderTraversal(self, root): -% if root is None: -% return [] -% res = [] -% stack = [root] -% while stack: -% tmp = stack.pop() -% res.append(tmp.val) -% if tmp.left: -% stack.append(tmp.left) -% if tmp.right: -% stack.append(tmp.right) -% return res[::-1] -% \end{lstlisting} -% \paragraph{InOrder Iterative}. In the inorder, we need to print out all the left subtree first, and then the root, followed by the right. The process is as follows: -% \begin{lstlisting} -% 1) Create an empty stack S. -% 2) Initialize current node as root -% 3) Push the current node to S and set current = current->left until current is NULL -% 4) If current is NULL and stack is not empty then -% a) Pop the top item from stack. -% b) Print the popped item, set current = popped_item->right -% c) Go to step 3. -% 5) If current is NULL and stack is empty then we are done. -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% def InOrderIterative(root): -% if root is None: -% return [] -% res = [] -% stack = [] -% current = root -% while current: -% stack.append(current) -% current = current.left - -% while stack: -% tmp = stack.pop() -% res.append(tmp.val) -% current = tmp.right -% while current: -% stack.append(current) -% current = current.left - -% return res -% \end{lstlisting} -% Another way to write this: -% \begin{lstlisting}[language=Python] -% def inorder(self, root): -% cur, stack = root, [] -% while cur or stack: -% while cur: -% stack.append(cur) -% cur = cur.left -% node = stack.pop() -% print(node.val) -% cur = node.right -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% def inorder_iter(root): -% if not root: -% return -% stack = [] -% node = root -% i = 0 -% while stack or node: -% print_stack(stack) -% if node: -% stack.append(node) -% node = node.left -% else: -% node = stack.pop() -% print(node.val, end = ' ') -% node = node.right -% \end{lstlisting} - -%%%%%%%%%%%BFS tree traversl%%%%%%%%%%%%%% -\subsection{Breath-first Tree Traversal} -\label{bfs_tree_traversal} -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.96\columnwidth]{fig/general_breath_first_search.png} -% \caption{Breath-first search on a simple search tree. At each stage, the node to be expanded next is indicated by a marker. } -% \label{fig:breath_first_search_strategy} -% \end{figure} -\begin{figure}[H] - \centering - \includegraphics[width = .45\columnwidth]{fig/binary_tree_example.png} - \caption{Draw the breath-first traversal order } - \label{fig:binary_tree_traversal_example_bfs} -\end{figure} -Instead of traversing the tree recursively deepening down each time, the alternative is to visit nodes level by level, as illustrated in Fig.~\ref{fig:fig:binary_tree_traversal_example_bfs} for our exemplary binary tree. We first visit the root node 1, and then its children 2 and 3. Next, we visit 2 and 3's children in order, we goes to node 4, 5, and 6. This type of Level Order Tree Traversal uses the \textbf{breath-first search strategy} which differs from our covered depth-first search strategy. As we see in the example, the root node is expanded first, then all successors of the root node are expanded next, and so on, following a level by level ordering. We can also find the rule, the nodes first come and get first expanded. For example 2 is first visited and then 3, thus we expand 2's children first. Then we have 4 and 5. Next, we expand 3's children. This First come first expanded tells us we can rely on a queue to implement BFS. - - - - -\paragraph{Simple Implementation} We start from the root, say it is our first level, put it in a list named \texttt{nodes\_same\_level}. Then we use a \texttt{while} loop, and each loop we visit all children nodes of \texttt{nodes\_same\_level} from the last level. We put all these children in a temporary list \texttt{temp}, before the loop ends, we assign \texttt{temp} to \texttt{nodes\_same\_level}, until the deepest level where no more children nodes will be found and leave our \texttt{temp} list to be empty and our while loop terminates. -\begin{lstlisting}[language = Python] -def LevelOrder(root): - if not root: - return - nodes_same_level = [root] - while nodes_same_level: - temp = [] - for n in nodes_same_level: - print(n.val, end=' ') - if n.left: - temp.append(n.left) - if n.right: - temp.append(n.right) - nodes_same_level = temp -\end{lstlisting} -The above will output follows with our exemplary binary tree: -\begin{lstlisting}[language=Python] -1 2 3 4 5 6 -\end{lstlisting} - -\paragraph{Implementation with Queue} As we discussed, we can use a FIFO queue to save the nodes waiting for expanding. In this case, at each \texttt{while} we only handle one node that are at the front of the queue. -\begin{lstlisting}[language=Python] -def bfs(root): - if not root: - return - q = [root] - while q: - node = q.pop(0) # get node at the front of the queue - print(node.val, end=' ') - if node.left: - q.append(node.left) - if node.right: - q.append(node.right) -\end{lstlisting} - - - - - - - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Hands on examples%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \subsection{Hands-on Examples} - - \subsubsection{Get a more straightforward example} Add an example - - - -% If we model each element in the array as a node in the graph, and assume given node $\mu$ with index $i$, if the element $v$ with index $j$, $j > i$, is larger than $\mu$, there will be an edge $\mu \rightarrow v$. We draw the graph shown in Fig.~\ref{fig:tree_lis}. The problem is modeled as finding the longest path in the graph, which can be solved with either DFS or BFS. - -% We define \texttt{curlen} as the length of increasing sequence from start node $[]$ to the current node, which can have $-\infty$ as value. length up. For example, at the leftmost node $101$, $curlen=2$ and the lowest $101$ node will have $curlen=4$ which is our longest LIS. Therefore, we would need a global variable $maxlen$ to track the maximum LIS. - -% \paragraph{Depth-first Graph Search} The implementation of Python is provided: -% \begin{lstlisting}[language=Python] -% import sys - -% def dfs(curIdx, preV, curlen, a, maxlen): - -% for i in range(curIdx+1, len(a)): -% # if a condition is satisfied, move to that node instead -% if a[i] > preV: -% dfs(i, a[i], curlen+1, a, maxlen) -% maxlen[0] = max(maxlen[0], curlen+1) -% return -% \end{lstlisting} -% Now, we need to call the function with \texttt{curIdx=-1} and \texttt{preV=-sys.maxsize}, and \texttt{curlen=0} for the root node in the graph. - -% \paragraph{Breath-first Graph Search} The implementation of Python is provided: -% \begin{lstlisting}[language=Python] -% def bfs( nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% maxlen = 0 -% q = [(-1, -sys.maxsize, 0)] # start pos can be any number in nums -% while q: -% new_q = [] -% for idx, prev, curlen in q: -% # search for number that is larger that current -% for j in range(idx+1, len(nums)): -% if nums[j] > prev: -% maxlen = max(maxlen, curlen + 1) -% new_q.append((j, nums[j], curlen + 1)) -% q = new_q -% return maxlen -% \end{lstlisting} -\subsubsection{Triangle (L120)} -Given a triangle, find the minimum path sum from top to bottom. Each step you may move to adjacent numbers on the row below. -\begin{lstlisting}[numbers=none] -Example: -Given the following triangle: - -[ -[2], -[3,4], -[6,5,7], -[4,1,8,3] -] -The minimum path sum from top to bottom is 11 (i.e., 2 + 3 + 5 + 1 = 11). -\end{lstlisting} - - -\paragraph{Analysis} -Solution: first we can use dfs traverse as required in the problem, and use a global variable to save the minimum value. The time complexity for this is $O(2^n)$. When we try to submit this code, we get LTE error. The code is as follows: -\begin{lstlisting}[language = Python] -import sys -def min_path_sum(t): - ''' - Purely Complete Search - ''' - min_sum = sys.maxsize - def dfs(i, j, cur_sum): - nonlocal min_sum - # edge case - if i == len(t) or j == len(t[i]): - # gather the sum - min_sum = min(min_sum, cur_sum) - return - # only two edges/ choices at this step - dfs(i+1, j, cur_sum + t[i][j]) - dfs(i+1, j+1, cur_sum + t[i][j]) - dfs(0, 0, 0) - return min_sum -\end{lstlisting} - - - - - -\subsection{Categorization} -So far we have covered the most important searching strategies, mainly two types: Uninformed and Informed (Heuristic) searches. DFS, DFS, Bidirectional search in the uninformed search group. -\subsubsection{Explicit Search and Implicit Search} -\subsubsection{Complete Search and } -\subsubsection{exhaustive search and heuristic search} - -\subsubsection{Applications} - -An animation of DFS is available \url{https://www.cs.usfca.edu/~galles/visualization/DFS.html} -% The output will be $1, 2, 4, 6, 3, 5$. The path of the DFS actually composes a tree, we can this a \textbf{DFS tree}. - -% In the code snipet, line 5 is to check if the current neighbor is visited or not. We can either use a SET or a list of Booleans, or if we know the total vertices are within 32 or 64, we can use bit as shown in Section~\ref{chapter_bit_section_bitwise}. - - - - -\subsection{Comparison of BFS and DFS} -BFS and DFS is the most basic complete search in graph. They both search all vertices and edges by once, which made them share the same time complexity $O(|V|+|E|)$. We see, in the BFS, saving nodes of the gray state or black state has the same visiting ordering. Breadth-first search usually serves to find shortest path distances (and the associated predecessor subgraph) from a given source. Depth-first search is often a subroutine in another algorithm, as we shall see later in this chapter. -\section{Discussion of Graph Search} -\label{graph_types} -As we will in the future chapters, basic BFS and DFS lays the fundations of all graph and tree-based search. Understanding the properties of graph search throughly in this chapter will ease our journey to explore more advanced graph algorithms. -There are some properties related to graph that we need to learn before moving to the advanced algorithms. - -\paragraph{Completeness} -In the context of search, a complete algorithm is one that guarantees that if a path to the goal exists, the algorithm will reach the goal. Note that \textit{completeness} does not imply \textit{optimality} of the found path. - -For example, breadth-first search (BFS) is complete (and in fact optimal if step costs are identical at a given level), because it can find all paths starting from a given source vertex in the graph. (This might not be the case if step cost at a given level is not identical). while depth-first search (DFS) on trees is incomplete (consider infinite or repeated states). - -\begin{enumerate} - \item How to check if a graph is connected? A: We can check if a graph is connected by starting at an arbitrary node and finding out if we can reach all other nodes. (Both DFS and BFS works) - \item How to find cycle in a graph? A: A graph contains a cycle if during a graph search, we find a node whose neighbor has already been visited that marked as gray. - \item How to check if a graph is bipartite? A: A graph is bipartite if its nodes can be colored using two colors so that there are -no adjacent nodes with the same color. It is surprisingly easy to check if a graph -is bipartite using graph traversal algorithms. -The idea is to color the starting node blue, all its neighbors red, all their -neighbors blue, and so on. If at some point of the search we notice that two -adjacent nodes have the same color, this means that the graph is not bipartite. -Otherwise the graph is bipartite and one coloring has been found. Note that in the general case, it is difficult to find out if the nodes in a graph -can be colored using k colors so that no adjacent nodes have the same color. Even -when k Æ 3, no efficient algorithm is known but the problem is NP-hard. -\end{enumerate} -\subsection{Coding Practice} -\paragraph{Property of Graph} -\begin{enumerate} - \item 785. Is Graph Bipartite? (medium) - \item 261. Graph Valid Tree (medium) - \item 797. All Paths From Source to Target(medium) -\end{enumerate} - -\section{Informed Search Strategies**} -%%%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Best-first Search} - Best-first search is a search algorithm which explores a graph by expanding the most promising node chosen according to a specified rule. The degree of promising of a node is described by a \textbf{heuristic evaluation function $f(n)$} which, in general, may depend on the description of the node $n$, the description of the goal, and the information gathered by the search up to that point, and most important, on any extra knowledge about the problem domain. - - Breath-first search fits as a special case in Best-first search if the objective of the problem is to find the shortest path from source to other nodes in the graph; it uses the estimated distance to source as a heuristic function. At the start, the only node in the frontier set is the source node, expand this node and add all of its unexplored neighboring nodes in the frontier set and each comes with distance 1. Now, among all nodes in the frontier set, choose the node that is the most promising to expand. In this case, since they all have the same distance, expand any of them is good. Next, we would add nodes that have $f(n)=2$ in the frontier set, choose any one that has smaller distance. - - A Generic best-first search will need a priority queue to implement instead of a FIFO queue used in the breath-first search. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/chapter_searching_strategies.pdf b/Easy-Book/chapters/chapter_searching_strategies.pdf deleted file mode 100644 index a4a8100..0000000 Binary files a/Easy-Book/chapters/chapter_searching_strategies.pdf and /dev/null differ diff --git a/Easy-Book/chapters/chapter_searching_strategies.synctex.gz b/Easy-Book/chapters/chapter_searching_strategies.synctex.gz deleted file mode 100644 index 4dffabb..0000000 Binary files a/Easy-Book/chapters/chapter_searching_strategies.synctex.gz and /dev/null differ diff --git a/Easy-Book/chapters/chapter_searching_strategies.tex b/Easy-Book/chapters/chapter_searching_strategies.tex deleted file mode 100644 index 245fbd5..0000000 --- a/Easy-Book/chapters/chapter_searching_strategies.tex +++ /dev/null @@ -1,1461 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -\chapter{Search Strategies} -Our standing at graph algorithms: -\begin{enumerate} - \item \underline{Search Strategies} (Current) - \item Combinatorial Search(Chapter) - \item Advanced Graph Algorithm(Current) - \item Graph Problem Patterns(Future Chapter) -\end{enumerate} - -Searching~\footnote{\url{https://en.wikipedia.org/wiki/Category:Search_algorithms}} is one of the most effective tools in algorithms. We have seen them being widely applied in the field of artificial intelligence to offer either exact or approximate solutions for complex problems such as puzzles, games, routing, scheduling, motion planning, navigation, and so on. -On the spectrum of discrete problems, nearly every single one can be modeled as a searching problem together with enumerative combinatorics and \textbf{optimizations}. -The searching solutions serve as either naive baselines or even as the only existing solutions for some problems. -Understanding common searching strategies as the main goal of this chapter along with the search space of the problem lays the foundation of problem analysis and solving, it is just indescribably \textbf{powerful} and \textbf{important}! - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Introduction%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Introduction} -\label{section_linear_search} - -Linear, tree-like data structures, they are all subsets of graphs, making graph searching universal to all searching algorithms. There are many searching strategies, and we only focus on a few decided upon the completeness of an algorithm--being absolutely sure to find an answer if there is one. - -Searching algorithms can be categorized into the following two types depending on if the domain knowledge is used to guide selection of tbe best path while searching: -\begin{enumerate} - \item Uninformed Search: This set of searching strategies normally are handled with basic and obvious problem definition and are not guided by estimation of how optimistic a certain node is. The basic algorithms include: Depth-first-Search(DFS), Breadth-first Search(BFS), Bidirectional Search, Uniform-cost Search, Iterative deepening search, and so on. We choose to cover the first four. - \item Informed(Heuristic) Search: This set of searching strategies on the other hand, use additional domain-specific information to find a \textit{heuristic function} which estimates the cost of a solution from a node. Heuristics means ``serving to aid discovery''. Common algorithms seen here include: Best-first Search, Greedy Best-first Search, $A^{*}$ Search. And we only introduce Best-first Search. -\end{enumerate} - - -Following this introductory chapter, in {Chapter Combinatorial Search}, we introduce combinatorial problems and its search space, and how to prune the search space to search more efficiently. - -Because the search space of a problem can either be of linear or tree structure--an implicit free tree, which makes the graph search a ``big deal'' in practice of problem solving. Compared with reduce and conquer, searching algorithms treat states and actions atomically: they do not consider any internal/optimal structure they might posses. We recap the \textbf{linear search} given its easiness and that we have already learned how to search in multiple linear data structures. - -\paragraph{Linear Search} As the naive and baseline approach compared with other searching algorithms, linear search, a.k.a sequential search, simply traverse the linear data structures sequentially and checking items until a target is found. It consists of a \texttt{for/while} loop, which gives as $O(n)$ as time complexity, and no extra space needed. For example, we search on list $A$ to find a target $t$: -\begin{lstlisting}[language=Python] -def linearSearch(A, t): #A is the array, and t is the target - for i,v in enumerate(A): - if A[i] == t: - return i - return -1 -\end{lstlisting} - -Linear Search is rarely used practically due to its lack of efficiency compared with other searching methods such as hashmap and binary search that we will learn soon. -\paragraph{Searching in Un-linear Space} -For the un-linear data structure, or search space comes from combinatorics, they are generally be a graph and sometimes be a rooted tree. Because mostly the search space forms a search tree, we introduce searching strategies on a search tree first, and then we specifically explore searching in a tree, recursive tree traversal, and search in a graph. - -\subsubsection{Generatics of Search Strategies} -Assume we know our state space, searching or state-space search is the process of searching through a state space for a solution by making explicit a sufficient portion of an implicit state-space graph, in the form of a search tree, to include a goal node. - -\paragraph{Nodes in Searching Process} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.9\columnwidth]{fig/searchsp.png} - \caption{Graph Searching} - \label{fig:search_sp} -\end{figure} -In the searching process, nodes in the targeting data structure can be categorized into three sets as shown in Fig.\ref{fig:search_sp} and we distinguish the state of a node--which set they are at with a color each. -\begin{itemize} - \item Unexplored set--WHITE: initially all nodes in the graph are in the unexplored set, and we assign WHITE color. Nodes in this set have not yet being visited yet. - \item Frontier set--GRAY: nodes which themselves have been just discovered/visited and they are put into the \textit{frontier} set, waiting to be expanded; that is to say their children or adjacent nodes (through outgoing edges) are about to be discovered and have not all been visited--not all being found in the frontier set yet. This is an intermediate state between WHITE and BLACK, which is ongoing, visiting but not yet completed. Gray vertex might have adjacent vertices of all three possible states. - \item Explored set--BLACK: nodes have been fully explored after being in the frontier set; that is to say none of their children is not explored and being in the unexplored set. For black vertex, all vertices adjacent to them are nonwhite.% And the nodes that are expanded are distinguished as the \texttt{explored} set. -\end{itemize} -All searching strategies follow the general tree search algorithm: -\begin{enumerate} - \item At first, put the state node in the frontier set. -\begin{lstlisting} -frontier = {S} -\end{lstlisting} -\item Loop through the frontier set, if it is empty then searching terminates. Otherwise, pick a node $n$ from frontier set: -\begin{enumerate} - \item If $n$ is a goal node, then return solution - \item Otherwise, generate all of $n$'s successor nodes and add them all to frontier set. - \item Remove $n$ from frontier set. -\end{enumerate} -\end{enumerate} -Search process constructs a \textit{search tree} where the root is the start state. Loops in graph may cause the search tree to be infinite even if the state space is small. In this section, we only use either acyclic graph or tree for demonstrating the general search methods. In acyclic graph, there might exist multiple paths from source to a target. For example, the example shown in Fig.~\ref{} has multiple paths from to. Further in graph search section, we discuss how to handle cycles and explain single-path graph search. Changing the ordering in the frontier set leads to different search strategies. - - -%%%%%%%%%%%%%%%Uninformed search strategies%%%%%%%%%%%%%%%%%%% -\section{Uninformed Search Strategies} -% in Search Tree - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/ucs.png} - \caption{Exemplary Acyclic Graph. } - \label{fig:ucs} -\end{figure} -Through this section, we use Fig.~\ref{fig:ucs} as our exemplary graph to search on. The data structure to represent the graph is as: -\begin{lstlisting}[language=Python] -from collections import defaultdict -al = defaultdict(list) -al['S'] = [('A', 4), ('B', 5)] -al['A'] = [('G', 7)] -al['B'] = [('G', 3)] -\end{lstlisting} -%\subsection{Uninformed Search} - -With uninformed search, we only know the goal test and the adjacent nodes, but without knowing which non-goal states are better. Assuming and limiting the state space to be a tree for now so that we won't worry about repeated states. - -There are generally two ways to order nodes in the frontier without domain-specific information: -\begin{itemize} - \item Queue that nodes are first in and first out (FIFO) from the frontier set. This is called breath-first search. - \item Stack that nodes are last in but first out (LIFO) from the frontier set. This is called depth-first search. - \item Priority queue that nodes are sorted increasingly in the path cost from source to each node from the frontier set. This is called Uniform-Cost Search. -\end{itemize} -%%%%%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Breath-first Search} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.96\columnwidth]{fig/general_breath_first_search.png} - \caption{Breath-first search on a simple search tree. At each stage, the node to be expanded next is indicated by a marker. } - \label{fig:breath_first_search_strategy} -\end{figure} -Breath-first search always expand the shallowest node in the frontier first, visiting nodes in the tree level by level as illustrated in Fig.~\ref{fig:breath_first_search_strategy}. Using $Q$ to denote the frontier set, the search process is explained: -\begin{lstlisting}[numbers=none] -Q=[A] -Expand A, add B and C into Q -Q=[B, C] -Expand B, add D and E into Q -Q=[C, D, E] -Expand C, add F and G into Q -Q=[D, E, F, G] -Finish expanding D -Q=[E, F, G] -Finish expanding E -Q=[F, G] -Finish expanding F -Q=[G] -Finish expanding G -Q=[] -\end{lstlisting} -The implementation can be done with a FIFO queue iteratively as: -\begin{lstlisting}[language=Python] -def bfs(g, s): - q = [s] - while q: - n = q.pop(0) - print(n, end = ' ') - for v, _ in g[n]: - q.append(v) -\end{lstlisting} -Call the function with parameters as \texttt{bfs(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S A B G G -\end{lstlisting} - - -\paragraph{Properties} Breath-first search is \textbf{complete} because it can always find the goal node if it exists in the graph. It is also \textbf{optimal} given that all actions(arcs) have the same constant cost, or costs are positive and non-decreasing with depth. - -\paragraph{Time Complexity} We can clearly see that BFS scans each node in the tree exactly once. If our tree has $n$ nodes, it makes the time complexity $O(n)$. However, the search process can be terminated once the goal is found, which can be less than $n$. Thus we measure the time complexity by counting the number of nodes expanded while searching is running. Assume the tree has a branching factor $b$ at each non-leaf node and the goal node locates at depth $d$, we sum up the number of nodes from depth 0 to depth $d$, the total number of nodes expanded are: -\begin{align} - n &= \sum_{i=0}^{d} b^{i} \\ - &= \frac{b^{d+1} -1}{b-1} -\end{align} -Therefore, we have a time complexity of $O(b^d)$. It is usually very slow to find solutions with a large number of steps because it must look at all shorter length possibilities first.%$in cases that we do not know the total nodes we estimate it with the branching factor, say $b$, as about how many children a node can have. In a binary tree, this would be 2, and with the depth $d$ of the tree, we get the time complexity as of $O(b^d)$. -\paragraph{Space Complexity} -The space is measured in terms of the maximum size of frontier set during the search. In BFS, the maximum size is the number of nodes at depth $d$, resulting the total space cost to $O(b^d)$. - - -%%%%%%%%%%%%%%%%%DFS%%%%%%%%%%%%%%%%%%% -\subsection{Depth-first Search} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.96\columnwidth]{fig/general_depth_first_search.png} - \caption{Depth-first search on a simple search tree. The unexplored region is shown in light gray. Explored nodes with no descendants in the frontier are removed from memory as node L disappears. Dark gray marks nodes that is being explored but not finished. } - \label{fig:depth_first_search_strategy} -\end{figure} -Depth-first search on the other hand always expand the deepest node from the frontier first. As shown in Fig.~\ref{fig:depth_first_search_strategy}, Depth-first search starts at the root node and continues branching down a particular path. Using $S$ to denote the frontier set which is indeed a stack, the search process is explained: -\begin{lstlisting}[numbers=none] -S=[A] -Expand A, add C and B into S -S=[C, B] -Expand B, add E and D into S -S=[C, E, D] -Expand D -S=[C, E] -Expand E -S=[C] -Expand C, add G and F into S -S=[C, G, F] -Expand F -S=[C, G] -Expand G -S=[C] -Expand C -S=[] -\end{lstlisting} -Depth-first can be implemented either recursively or iteratively. -\paragraph{Recursive Implementation}In the recursive version, the recursive function keeps calling the recursive function itself to expand its adjacent nodes. Starting from a source node, it always deepen down the path until a leaf node is met and then it backtrack to expand its other siblings (or say other adjacent nodes). The code is as: -\begin{lstlisting}[language=Python] -def dfs(g, vi): - print(vi, end=' ') - for v, _ in g[vi]: - dfs(g, v) -\end{lstlisting} -Call the function with parameters as \texttt{dfs(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S A G B G -\end{lstlisting} -\paragraph{Iterative Implementation} According to the definition, we can implement DFS with LIFO \texttt{stack} data structure. The code is similar to that of BFS other than using different data structure from the frontier set. -\begin{lstlisting}[language=Python] -def dfs_iter(g, s): - stack = [s] - while stack: - n = stack.pop() - print(n, end = ' ') - for v, _ in g[n]: - stack.append(v) -\end{lstlisting} -Call the function with parameters as \texttt{dfs\_iter(al, 'S')}, the output is as: -\begin{lstlisting}[numbers=none] -S B G A G -\end{lstlisting} -We observe that the ordering is not exactly the same as of the recursive counterpart. To keep the ordering consistent, we simply need to add the adjacent nodes in reversed order. In practice, we replace \texttt{$g[n]$} with \texttt{$g[n][::-1]$}. - -\paragraph{Properties} DFS may not terminate without a fixed depth bound to limit the amount of nodes that it expand. DFS is \textbf{not complete} because it always deepens the search and in some cases the supply of nodes even within the cutting off fixed depth bound can be infinitely. DFS is \textbf{not optimal}, in our example, of our goal node is C, it goes through nodes A, B, D, E before it finds node C. While, in the BFS, it only goes through nodes A and C. However, when we are lucky, DFS can find long solutions quickly. - -\paragraph{Time Complexity} -For DFS, it might need to explore all nodes within graph to find the target, thus its worst-case time and space complexity is not decided upon by the depth of the goal, but the total depth of the graph, $d$ instead. DFS has the same time complexity as BFS, which is $O(b^d)$. - -\paragraph{Space Complexity} The stack will at most stores a single path from the root to a leaf node (goal node) along with the remaining unexpanded siblings so that when it has visited all children, it can backward to a parent node, and know which sibling to explore next. Therefore, the space that needed for DFS is $O(bd)$. In most cases, the branching factor is a constant, which makes the space complexity be mainly influenced by the depth of the search tree. Obviously, DFS has great efficiency in space, which is why it is adopted as the basic technique in many areas of computer science, such as solving constraint satisfaction problems(CSPs). The backtracking technique we are about to introduce even further optimizes the space complexity on the basis of DFS. - -\subsection{Uniform-Cost Search(UCS)} -When a priority queue is used to order nodes measured by the path cost of each node to the root in the frontier, this is called uniform-cost search, aka Cheapest First Search. In UCS, frontier set is expanded only in the direction which requires the minimum cost to travel to from root node. UCS only terminates when a path has explored the goal node, and this path is the cheapest path among all paths that can reach to the goal node from the initial point. When UCS is applied to find shortest path in a graph, it is called Dijkstra's Algorithm. - -We demonstrate the process of UCS with the example shown in Fig.~\ref{fig:ucs}. - -Here, our source is `S', and the goal is `G'. We are set to find a path from source to goal with minimum cost. The process is shown as: -\begin{lstlisting}[numbers=none] -Q = [(0, S)] -Expand S, add A and B -Q = [(4, A), (5, B)] -Expand A, add G -Q = [(5, B), (11, G)] -Expand B, add G -Q = [(8, G), (11, G)] -Expand G, goal found, terminate. -\end{lstlisting} -And the Python source code is: -\begin{lstlisting}[language=Python] -import heapq -def ucs(graph, s, t): - q = [(0, s)] # initial path with cost 0 - while q: - cost, n = heapq.heappop(q) - # Test goal - if n == t: - return cost - else: - for v, c in graph[n]: - heapq.heappush(q, (c + cost, v)) - return None -\end{lstlisting} -\paragraph{Properties} Uniformed-Cost Search is \textbf{complete} as a similar search strategy compared with breath-first search(using queue). It is optimal even if there exist negative edges. - -\paragraph{Time and Space Complexity} Similar to BFS, both the worst case time and space complexity is $O(b^d)$. When all edge costs are $c$, and $C^{*}$ is the best goal path cost, the time and space complexity can be more precisely represented as $O(b^{C^{*}/c})$. -\subsection{Iterative-Deepening Search} -Iterative-Deepening Search(IDS) is a modification on top of DFS, more specifically depth limited DFS(DLS); as the name suggests, IDS sets a maximum depth as a ``depth bound'', and it calls DLS as a subroutine looping from depth zero to maximum depth to expand nodes just as DFS will do and it only does goal test for nodes at the testing depth. - -Using the graph in Fig.~\ref{fig:ucs} as an example. The process is shown as: -\begin{lstlisting}[numbers=none] -maxDepth = 3 - -depth = 0: S = [S] -Test S, goal not found - -depth = 1: S =[S] -Expand S, S = [B, A] -Test A, goal not found -Test B, goal not found - -depth = 2: S=[S] -Expand S, S=[B, A] -Expand A, S=[B, G] -Test G, goal found, STOP -\end{lstlisting} -The implementation of the DLS goes easier with recursive DFS, we use a count down to variable \texttt{maxDepth} in the function, and will only do goal testing util this variable reaches to zero. The code is as: -\begin{lstlisting}[language=Python] -def dls(graph, cur, t, maxDepth): - # End Condition - if maxDepth == 0: - if cur == t: - return True - if maxDepth < 0: - return False - - # Recur for adjacent vertices - for n, _ in graph[cur]: - if dls(graph, n, t, maxDepth - 1): - return True - return False -\end{lstlisting} -With the help of function \texttt{dls}, the implementation of DLS is just an iterative call to the subroutine: -\begin{lstlisting}[language=Python] -def ids(graph, s, t, maxDepth): - for i in range(maxDepth): - if dls(graph, s, t, i): - return True - return False -\end{lstlisting} -\paragraph{Analysis} It appears to us that we are undermining the efficiency of the original DFS since the algorithm ends up visiting top level nodes of the goal multiple times. However, it is not as expensive as it seems to be, since in a tree most of the nodes are in the bottom levels. If the goal node locates at the bottom level, DLS will not have an obvious efficiency decline. But if the goal locates on topper levels on the right side of the tree, it avoids to visit all nodes across all depths on the left half first and then be able to find this goal node. -\paragraph{Properties} Through the depth limited DFS, IDS has advantages of DFS: -\begin{itemize} - \item Limited space linear to the depth and branching factor, giving $O(bd)$ as space complexity. - \item In practice, even with redundant effort, it still finds longer path more quickly than BFS does. -\end{itemize} -By iterating through from lower to higher depth, IDS has advantages of BFS, which comes with \textbf{completeness} and \textbf{optimality} stated the same as of BFS. -\paragraph{Time and Space Complexity} -The space complexity is the same as of BFS, $O(bd)$. The time complexity is slightly worse than BFS or DFS due to the repetitive visiting nodes on top of the search tree but it still has the same worst case exponential time complexity, $O(b^d)$. -%%%%%%%%%%%%%%%%Bidirectional Search%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Bidirectional Search**} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.5\columnwidth]{fig/bidrectional_search.png} - \caption{Bidirectional search. } - \label{fig:bidirectional_search} -\end{figure} -Bidirectional search applies breadth-first search from both the start and the goal node, with one BFS from start moving forward and one BFS from the goal moving backward until their frontiers meet. This process is shown in Fig.~\ref{fig:bidirectional_search}. As we see, each BFS process only visit $O(b^{d/2})$ nodes comparing with one single BFS that visits $O(b^d)$ nodes. This will improve both the time and space efficiency by $b^{d/2}$ times compared with vanilla BFS. -\paragraph{Implementation} Because the BFS that starts from the goal needs to move backwards, the easy way to do this is to create another copy of the graph wherein each edge has opposite direction compared with the original. By creating a reversed graph, we can use a forward BFS from the goal. - -We apply level by level BFS instead of updating the queue one node by one node. For better efficiency of the intersection of the frontier set from both BFS, we use \texttt{set} data structure instead of simply a \texttt{list} or a FIFO queue. - -Use Fig.~\ref{fig:ucs} as an example, if our source and goal is `S' and `G' respectively, if we proceed both BFS simultaneously, the process looks like this: -\begin{lstlisting}[numbers=none] -qs = ['S'] -qt = ['G'] -Check intersection, and proceed -qs = ['A', 'B'] -qt = ['A', 'B'] -Check intersection, frontier meet, STOP -\end{lstlisting} -No process in this case, however, the above process will end up missing the goal node if we change our goal to be `A'. This process looks like: -\begin{lstlisting}[numbers=none] -qs = ['S'] -qt = ['A'] -Check intersection, and proceed -qs = ['A', 'B'] -qt = ['S'] -Check intersection, and proceed -qs = ['G'] -qt = [] -STOP -\end{lstlisting} -This because for source and goal nodes that has a shortest path with even length, if we proceed the search process simultaneously, we will always end up missing the intersection. Therefore, we process each BFS iteratively--one at a time to avoid such troubles. - -The code for one level at a time BFS with \texttt{set} and for the intersection check is as: -\begin{lstlisting}[language=Python] -def bfs_level(graph, q, bStep): - if not bStep: - return q - nq = set() - for n in q: - for v, c in graph[n]: - nq.add(v) - return nq - -def intersect(qs, qt): - if qs & qt: # intersection - return True - return False -\end{lstlisting} -The main code for bidirectional search is as: -\begin{lstlisting}[language=Python] -def bis(graph, s, t): - # First build a graph with opposite edges - bgraph = defaultdict(list) - for key, value in graph.items(): - for n, c in value: - bgraph[n].append((key, c)) - # Start bidirectional search - qs = {s} - qt = {t} - step = 0 - while qs and qt: - if intersect(qs, qt): - return True - qs = bfs_level(graph, qs, step%2 == 0) - qt = bfs_level(bgraph, qt, step%2 == 1) - step = 1 - step - return False -\end{lstlisting} -\subsection{Summary} -\begin{table}[!ht] -\begin{small} -\centering -\noindent\captionof{table}{ Performance of Search Algorithms on Trees or Acyclic Graph} - \noindent \begin{tabular}{|p{0.2\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}|p{0.15\columnwidth}| } - \hline -Method & Complete & Optimal & Time & Space \\ \hline -BFS & Y& Y, if & $O(b^d)$ & $O(b^d)$ \\\hline -UCS &Y & Y & $O(C^{*}/c)$ & $O(C^{*}/c)$\\ \hline -DFS & N & N & $O(b^m)$ & $O(bm)$\\ \hline -IDS & Y & Y, if & $O(b^d)$ & $O(bd)$\\ \hline -Bidireactional Search & Y& Y, if& $O(b^{d/2})$ & $O(b^{d/2})$\\ \hline -\end{tabular} - \label{tab:performance of searching strategy} - \end{small} -\end{table} -Using $b$ as branching factor, $d$ as the depth of the goal node, and $m$ is the maximum graph depth. The properties and complexity for the five uninformed search strategies are summarized in Table.~\ref{tab:performance of searching strategy}. - - - - - - %%%%%%%%%%%%%%%%%Graph Search%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Graph Search} -\paragraph{Cycles} -This section is devoted to discuss more details about two search strategies--BFS and DFS in more general graph setting. In the last section, we just assumed our graph is either a tree or acyclic directional graph. In more general real-world setting, there can be cycles within a graph which will lead to infinite loops of our program. -\paragraph{Print Paths} Second, we talked about the paths, but we never discuss how to track all the paths. In this section, we would like to see how we can track paths first, and then with the tracked paths, we detect cycles to avoid getting into infinite loops. -\paragraph{More Efficient Graph Search} -Third, the last section is all about tree search, however, in a large graph, this is not efficient by visiting some nodes multiple times if they happen to be on the multiple paths between the source and any other node in the graph. Usually, depends on the application scenarios, graph search which remembers already-expanded nodes/states in the graph and avoids expanding again by checking any about to be expanded node to see if it exists in frontier set or the explored set. This section, we introduce graph search that suits for general purposed graph problems. -% \paragraph{Handle Cycles} In this section, we assumed our graph is either a tree or acyclic directional graph. When there are cycles, we have to track the path and avoid cycles, which you will see more details in Section Graph Search. -% \paragraph{Graph Search} -% This section is all about tree search, however, in a large graph, this is not efficient by visiting some nodes multiple times if they happen to be on the multiple paths between the source and any other node in the graph. Usually, depends on the application scenarios, graph search which remembers already-expanded nodes/states in the graph and avoids expanding again by checking any about to be expanded node to see if it exists in frontier set or the explored set. Check Section. Graph search for more details. -% \paragraph{Print Paths} We have known that the uniformed search were all doing tree based search, but we never try to track all the paths, which we would like to resolve in the next section. -% In this Chapter, we expand the BFS and DFS tree searching strategy on a graph which is more general. - -%For convenience, we use two sets: \textit{explored set} and \textit{frontier set} to distinguish vertices that have been finished exploring and vertices that are being explored. This make three different states between all vertices in the searching process: -\paragraph{Visiting States} -We have already explained that we can use three colors: WHITE, GREY, and BLACK to denote nodes within the unexpanded, frontier, and explored set, respectively. We are doing so to avoid the hassles of tracking three different sets, with visiting state, it is all simplified to a color check. We define a \texttt{STATE} class for convenience. -% Because in graph, it is reasonable to expect it contains cycles. In our example, we have a cycle \texttt{[0, 1, 2, 0]} and \texttt{[1, 2, 3, 4, 1]} as shown in Fig.~\ref{fig:cyclic_graph_search_1}. Therefore, in the graph search, it is a necessity to avoid traversing a cycle which will make the program running nonstop. The solution is during the search process, we mark states for each vertex. - - - -\begin{lstlisting}[language=Python, numbers=none] -class STATE: - white = 0 - gray = 1 - black = 2 -\end{lstlisting} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.25\columnwidth]{fig/free_tree.png} - \includegraphics[width=0.25\columnwidth]{fig/directed_cyclic_graph.png} - \includegraphics[width=0.25\columnwidth]{fig/undirected_cyclic_graph.png} - \caption{Exemplary Graph: Free Tree, Directed Cyclic Graph, and Undirected Cyclic Graph.} - \label{fig:graph_search_example} -\end{figure} -In this section, we use Fig.~\ref{fig:graph_search_example} as our exemplary graphs. Each's data structure is defined as: -\begin{itemize} -\item Free Tree: -\begin{lstlisting}[language=Python] -ft = [[1], [2], [4], [], [3, 5], []] -\end{lstlisting} - \item Directed Cyclic Graph: - \begin{lstlisting}[language=Python] -dcg = [[1], [2],[0, 4], [1], [3, 5], [] ] -\end{lstlisting} -\item Undirected Cyclic Graph - \begin{lstlisting}[language=Python] -ucg = [[1, 2], [0, 2, 3], [0, 1, 4], [1, 4], [2, 3, 5], [4]] -\end{lstlisting} -\end{itemize} - -% Then we introduce more searching strategies such as priority-first searching and give out more categorization. - -\paragraph{Search Tree} It is important to realize the Searching ordering is always forming a tree, this is terminologized as \textbf{Search Tree}. In a tree structure, the search tree is itself. In a graph, we need to figure out the search tree and it decides our time and space complexity. - - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%Graph Search%%%%%%%%%%%%%%%%%%%%% -\subsection{Depth-first Search in Graph} -In this section we will further the depth-first tree search and explore depth-first graph search to compare their properties and complexity. -\subsubsection{Depth-first Tree Search} - -\paragraph{Vanilla Depth-first Tree Search} Our previous code slightly modified to suit for the new graph data structure works fine with the free tree in Fig.~\ref{fig:graph_search_example}. The code is as: -\begin{lstlisting}[language=Python] -def dfs(g, vi): - print(vi, end=' ') - for nv in g[vi]: - dfs(g, nv) -\end{lstlisting} -However, if we call it on the cyclic graph, \texttt{dfs(dcg, 0)}, it runs into stack overflow. - -\paragraph{Cycle Avoiding Depth-first Tree Search} -So, how to avoid cycles? We know the definition of a cycle is a closed path that has at least one node that repeats itself; in our failed run, we were stuck with cycle [0, 1, 2, 0]. Therefore, let us add a \texttt{path} in the recursive function, and whenever we want to expand a node, we check if it forms a cycle or not by checking the membership of a candidate to nodes comprising the path. -We save all paths and the visiting ordering of nodes in two lists: \texttt{paths} and \texttt{orders}. The recursive version of code is: -\begin{lstlisting}[language=Python] -def dfs(g, vi, path): - paths.append(path) - orders.append(vi) - for nv in g[vi]: - if nv not in path: - dfs(g, nv, path+[nv]) - return -\end{lstlisting} -Now we call function \texttt{dfs} for \texttt{ft}, \texttt{dcg}, and \texttt{ucg}, the \texttt{paths} and \texttt{orders} for each example is listed: -\begin{itemize} - \item For the free tree and the directed cyclic graph, they have the same output. The \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]] -\end{lstlisting} - \item For the undirected cyclic graph, \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0], -[0, 1], -[0, 1, 2], -[0, 1, 2, 4], -[0, 1, 2, 4, 3], -[0, 1, 2, 4, 5], -[0, 1, 3], -[0, 1, 3, 4], -[0, 1, 3, 4, 2], -[0, 1, 3, 4, 5], -[0, 2], -[0, 2, 1], -[0, 2, 1, 3], -[0, 2, 1, 3, 4], -[0, 2, 1, 3, 4, 5], -[0, 2, 4], -[0, 2, 4, 3], -[0, 2, 4, 3, 1], -[0, 2, 4, 5]] -\end{lstlisting} -\end{itemize} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.2\columnwidth]{fig/free_tree.png} - % \includegraphics[width=0.15\columnwidth]{fig/free_tree.png} - \includegraphics[width=0.75\columnwidth]{fig/search_tree_dfs.png} - \caption{Search Tree for Exemplary Graph: Free Tree and Directed Cyclic Graph, and Undirected Cyclic Graph.} - \label{fig:graph_search_example_search_tree} -\end{figure} - -These paths mark the search tree, we visualize the search tree for each exemplary graph in Fig.~\ref{fig:graph_search_example_search_tree}. - -% \paragraph{Efficient Path Backtrace} Previously we save paths each as a list, the shared partial paths locating on top part of the search tree are repeating, such as partial path [0, 1, 2, 4] in our example, which wastes memory unnecessarily. We can save paths by saving all edges in the search tree - -\subsubsection{Depth-first Graph Search} - We see that from the above implementation, for a graph with only 6 nodes, we have been visiting nodes for a total of 19 times. - A lot of nodes have been repeating. 1 appears 3 times, 3 appears 4 times, and so on. As we see the visiting order being represented with a \texttt{search tree} in Fig.~\ref{fig:graph_search_example_search_tree}, our complexity is getting close to $O(b^h)$, where $b$ is the branching factor and $h$ is the total vertices of the graph, marking the upper bound of the maximum depth that the search can traverse. - If we simply want to search if a value or a state exists in the graph, this approach insanely complicates the situation. - What we do next is to avoid revisiting the same vertex again and again by tracking the visiting state of a node. - - In the implementation, we only track the longest path--from source vertex to vertex that has no more unvisited adjacent vertices. -\begin{lstlisting}[language=Python] -def dfgs(g, vi, visited, path): - visited.add(vi) - orders.append(vi) - bEnd = True # node without unvisited adjacent nodes - for nv in g[vi]: - if nv not in visited: - if bEnd: - bEnd = False - dfgs(g, nv, visited, path + [nv]) - if bEnd: - paths.append(path) -\end{lstlisting} -Now, we call this function with \texttt{ucg} as: -\begin{lstlisting}[language=Python] -paths, orders = [], [] -dfgs(ucg, 0, set(), [0]) -\end{lstlisting} -The output for \texttt{paths} and \texttt{orders} are: -\begin{lstlisting}[numbers=none] -([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5]) -\end{lstlisting} -Did you notice that the depth-first graph search on the undirected cyclic graph shown in Fig.~\ref{fig:graph_search_example} has the same visiting order of nodes and same search tree as the free tree and directed cyclic graph in Fig.~\ref{fig:graph_search_example}? - -\paragraph{Efficient Path Backtrace} In graph search, each node is added into the frontier and expanded only once, and the search tree of a $|V|$ graph will only have $|V|-1$ edges. Tracing paths by saving each path as a list in the frontier set is costly; for a partial path in the search tree, it is repeating itself multiple times if it happens to be part of multiple paths, such as partial path \texttt{0->1->2->4}. We can bring down the memory cost to $O(|v|)$ if we only save edges by using a \texttt{parent dict} with key and value referring as the node and its parent node in the path, respectively. For example, edge \texttt{0->1} is saved as \texttt{parent[1] = 0}. Once we find out goal state, we can backtrace from this goal state to get the path. The backtrace code is: -\begin{lstlisting}[language=Python] -def backtrace(s, t, parent): - p = t - path = [] - while p != s: - path.append(p) - p = parent[p] - path.append(s) - return path[::-1] -\end{lstlisting} -Now, we modify the dfs code as follows to find a given state (vertex) and obtaining the path from source to target: -\begin{lstlisting}[language=Python] -def dfgs(g, vi, s, t, visited, parent): - visited.add(vi) - if vi == t: - return backtrace(parent, s, t) - - for nv in g[vi]: - if nv not in visited: - parent[nv] = vi - fpath = dfgs(g, nv, s, t, visited, parent) - if fpath: - return fpath - - return None -\end{lstlisting} - -The whole Depth-first graph search tree constructed from the \texttt{parent} dict is delineated in Fig.~\ref{fig:depth_first_graph_search_tree} on the given example. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/undirected_cyclic_graph.png} - \includegraphics[width=0.3\columnwidth]{fig/depth_first_graph_search_tree.png} - - \caption{Depth-first Graph Search Tree.} - \label{fig:depth_first_graph_search_tree} -\end{figure} - - - - -\paragraph{Properties} The completeness of DFS depends on the search space. If your search space is finite, then Depth-First Search is complete. However, if there are infinitely many alternatives, it might not find a solution. For example, suppose you were coding a path-search problem on city streets, and every time your partial path came to an intersection, you always searched the left-most street first. Then you might just keep going around the same block indefinitely. - -The depth-first graph search is \textbf{nonoptimal} just as Depth-first tree search. For example, if the task is to find the shortest path from source 0 to target 2. The shortest path should be 0->2, however depth-first graph search will return 0->1->2. For the search tree using depth-first tree search, it can find the shortest path from source 0 to 2. However, it will explore the whole left branch starts from 1 before it finds its goal node on the right side. - -\paragraph{Time and Space Complexity} For the depth-first graph search, we use aggregate analysis. The search process covers all edges, $|E|$ and vertices, $|V|$, which makes the time complexity as $O(|V|+|E|)$. For the space, it uses space $O(|V|)$ in the worst case to -store the stack of vertices on the current search path as well as the set of -already-visited vertices. - -\subsubsection{Applications} Depth-first tree search is adopted as the basic workhorse of many areas of AI, such as solving CSP, as it is a brute-force solution. -In {Chapter Combinatorial Search}, we will learn how ``backtracking'' technique along with others can be applied to speed things up. -Depth-first graph search is widely used to solve graph related tasks in non-exponential time, such as Cycle Check(linear time) and shortest path. - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Questions to ponder: } -\begin{itemize} -\item Only track the longest paths. -\item How to trace the edges of the search tree? -\item Implement the iterative version of the recursive code. -\end{itemize} -\end{bclogo} - -%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%% -\subsection{Breath-first Search in Graph} -We further breath-first tree search and explore breath-first graph search in this section to grasp better understanding of one of the most general search strategies. Because that BFS is implemented iteratively, the implementation in this section of sheds light to the iterative counterparts of DFS's recursive implementations from last section. -\subsubsection{Breath-first Tree Search} -Similarly, out vanilla breath-first tree search shown in Section.~\ref{} will get stuck with the cyclic graph in Fig.~\ref{fig:graph_search_example}. -\paragraph{Cycle Avoiding Breath-first Tree Search} We avoid cycles with similar strategy to DFS tree search that traces paths and checks membership of node. In BFS, we track paths by explicitly adding paths to the \texttt{queue}. Each time we expand from the frontier (queue), the node we need is the last item in the path from the queue. In the implementation, we only track the longest paths from the search tree and the visiting orders of nodes. The Python code is: -\begin{lstlisting}[language=Python] -def bfs(g, s): - q = [[s]] - paths, orders = [], [] - while q: - path = q.pop(0) - n = path[-1] - orders.append(n) - bEnd = True - for v in g[n]: - if v not in path: - if bEnd: - bEnd = False - q.append(path + [v]) - if bEnd: - paths.append(path) - return paths, orders -\end{lstlisting} -Now we call function \texttt{bfs} for \texttt{ft}, \texttt{dcg}, and \texttt{ucg}, the \texttt{paths} and \texttt{orders} for each example is listed: -\begin{itemize} - \item For the free tree and the directed cyclic graph, they have the same output. The \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]] -\end{lstlisting} - \item For the undirected cyclic graph, \texttt{orders} are: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 2, 3, 1, 4, 4, 4, 3, 3, 5, 3, 5, 2, 5, 4, 1, 5] -\end{lstlisting} - And the \texttt{paths} are: -\begin{lstlisting}[numbers=none] -[[0, 2, 4, 5], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 3, 4, 2], [0, 1, 3, 4, 5], [0, 2, 4, 3, 1], [0, 2, 1, 3, 4, 5]] -\end{lstlisting} -\end{itemize} -\paragraph{Properties} We can see the visiting orders of nodes are different from Depth-first tree search counterparts. However, the corresponding search tree for each graph in Fig.~\ref{fig:graph_search_example} is the same as its counterpart--Depth-first Tree Search illustrated in Fig.~\ref{fig:graph_search_example_search_tree}. This highlights how different searching strategies differ by visiting ordering of nodes but not differ at the search-tree which depicts the search space--all possible paths. - - - - - - -\paragraph{Applications} However, the Breath-first Tree Search and path tracing is extremely more costly compared with DFS counterpart. When our goal is to enumerate paths, go for the DFS. When we are trying to find shortest-paths, mostly use BFS. - -\subsubsection{Breath-first Graph Search} -Similar to Depth-first Graph Search, we use a \texttt{visited} set to make sure each node is only added to the frontier(queue) once and thus expanded only once. - - -\paragraph{BFGS Implementation} The implementation of Breath-first Graph Search with goal test is: -\begin{lstlisting}[language=Python] -def bfgs(g, s, t): - q = [s] - parent = {} - visited = {s} - while q: - n = q.pop(0) - if n == t: - return backtrace(s, t, parent) - for v in g[n]: - if v not in visited: - q.append(v) - visited.add(v) - parent[v] = n - return parent -\end{lstlisting} -Now, use the undirected cyclic graph as example to find the path from source 0 to target 5: -\begin{lstlisting}[language=Python] -bfgs(ucg, 0, 5) -\end{lstlisting} -With the found path as: -\begin{lstlisting}[numbers=none] -[0, 2, 4, 5] -\end{lstlisting} -While this found path is the shortest path between the two vertices measured by the length. The whole Breath-first graph search tree constructed from the \texttt{parent} dict is delineated in Fig.~\ref{fig:breath_first_graph_search_tree} on the given example. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.3\columnwidth]{fig/undirected_cyclic_graph.png} - \includegraphics[width=0.3\columnwidth]{fig/breath_first_graph_search_tree.png} - - \caption{Breath-first Graph Search Tree.} - \label{fig:breath_first_graph_search_tree} -\end{figure} - - -\paragraph{Time and Space Complexity} Same to DFGS, the time complexity as $O(|V|+|E|)$. For the space, it uses space $O(|V|)$ in the worst case to -store vertices on the current search path, the set of -already-visited vertices, as well as the dictionary used to store edge relations. The shortage that comes with costly memory usage of Breath-first Graph Search to Depth-first Graph Search is less obvious compared to Breath-first Tree Search to Depth-first Graph Search. - -\subsubsection{Tree Search VS Graph Search} -There are two important characteristics about tree search and graph search: -\begin{itemize} - \item Within a graph $G=(V, E)$, either it is undirected or directed, acyclic or cyclic, both the breath-first and depth-first tree search results the same search tree: They both enumerate all possible states (paths) of the search space. - \item The conclusion is different for breath-first and depth-first graph search. For acyclic and directed graph (tree), both search strategies result the same search tree. However, whenever there exists cycles, the depth-first graph search tree might differ from the breath-first graph search tree. -\end{itemize} - -%%%%%%%%%%%%%%%%DFS Graph Search%%%%%%%%%%%%%% -\subsection{Depth-first Graph Search} -Within this section and the next, we focus on explaining more characteristics of the graph search that avoids repeatedly visiting a vertex. -% We will make use of the three color visiting states, which is especially useful for directed graph. -% For undirected graph, simply visited or not visted is enough. -Seemingly these features and details are not that useful judging from current context, but we will see how it can be applied to solve problems more efficiently in Chapter Advanced Graph Algorithms, such as detecting cycles, topological sort, and so on. -\begin{figure}[!ht] - \centering - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process0.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process1.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process2.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process3.png} - - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process4.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process5.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process6.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process7.png} - - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process8.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process9.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process10.png} - \includegraphics[width=0.2\columnwidth]{fig/depth_first_graph_search_process11.png} - \caption{The process of Depth-first Graph Search in Directed Graph. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. And the red arrow marks the backtrack edge. } - \label{fig:depth_first_graph_search_process} -\end{figure} - -As shown in Fig.~\ref{fig:depth_first_graph_search_process} (a directed graph), we -%Depth-first Search starts from a given source, and follows a single path in the graph to go as ``far'' as possible to visit unvisited nodes until (1) it meets a vertex that has no edge out; or (2) no unvisited adjacent vertices or say white vertices. Then it ``backtracks'' to its predecessor and start the above process again. DFS will discover all vertices that are reachable from the given source. -start from 0, mark it gray, and visit its first unvisited neighbor 1, mark 1 as gray, and visit 1's first unvisited neighbor 2, then 2's unvisited neighbor 4, 4's unvisited neighbor 3. -For node 3, it does'nt have white neighbors, we mark it to be complete with black. -Now, here, we ``backtrack'' to its predecessor, which is 4. And then we keep the process till 5 become gray. -Because 5 has no edge out any more, it becomes black. -Then the search backtracks to 4, to 2, to 1, and eventually back to 0. -We should notice the ordering of vertices become gray or black is different. -From the figure, the gray ordering is \texttt{[0, 1, 2, 4, 3, 5]}, and for the black is \texttt{[3, 5, 4, 2, 1, 0]}. -Therefore, it is necessary to distinguish the three states in the depth-first graph search at least. - - -\paragraph{Three States Recursive Implementation} -We add additional \texttt{colors} list to track the color of each vertices, \texttt{orders} to track the ordering of the gray, and \texttt{completed\_orders} for ordering vertices by their ordering of turning into black--when all of a node's neighbors become black which is after the recursive call in the code. -\begin{lstlisting}[language = Python] -def dfs(g, s, colors, orders, complete_orders): - colors[s] = STATE.gray - orders.append(s) - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors, orders, complete_orders) - colors[s] = STATE.black - complete_orders.append(s) - return -\end{lstlisting} -Now, we try to call the function with the undirected cyclic graph in Fig.~\ref{fig:graph_search_example}. -\begin{lstlisting}[language=Python] -v = len(ucg) -orders, complete_orders = [], [] -colors = [STATE.white] * v -dfs(ucg,0, colors, orders, complete_orders) -\end{lstlisting} -Now, the \texttt{orders} and \texttt{complete\_orders} will end up differently: -\begin{lstlisting}[numbers=none] -[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0] -\end{lstlisting} - -\paragraph{Three States and Edges} -\begin{figure}[!ht] - \centering - \includegraphics[width=0.33\columnwidth]{fig/directed_cyclic_graph_2.png} - \includegraphics[width=0.28\columnwidth]{fig/depth_first_graph_search_edges(1).png} - \caption{Classification of Edges: black marks tree edge, red marks back edge, yellow marks forward edge, and blue marks cross edge.} - \label{fig:depth_first_graph_search_edges} -\end{figure} -%%%resource: https://courses.csail.mit.edu/6.006/fall11/rec/rec14.pdf -Depth-first Graph Search on graph $G=(V, E)$ connects all reachable vertices from a given source in the graph in the form of a depth-first forest $G_\pi$. -Edges within $G_\pi$ are called \textbf{tree edges}. -Tree edges are edges marked with black arrows in Fig.~\ref{fig:depth_first_graph_search_edges}. -Other edges in $G$ can be classified into three categories based on Depth-first forest $G_\pi$, they are: -\begin{enumerate} -\item Back edges which connect a node back to one of its ancestors in the depth-first forest $G_\pi$. -Marked as red edges in Fig.~\ref{fig:depth_first_graph_search_edges}. -\item Forward edges point from a node to one of its descendants in the depth-first forest $G_\pi$. Marked as yellow edges in Fig.~\ref{fig:depth_first_graph_search_edges}. -\item Cross edges point from a node to a previously visited node that is neither an ancestor nor a descendant in the depth-first forest $G_\pi$. Marked as blue edges in Fig.~\ref{fig:depth_first_graph_search_edges}. -\end{enumerate} -We can decide the type of tree edge using the DFS execution with the states: for an edge $(u, v)$, -depends on whether we have visited $v$ before in the DFS and if so, the relationship between $u$ and $v$. -\begin{enumerate} - \item If $v$ is WHITE, then the edge is a tree edge. - \item If $v$ is GRAY--both $u$ and $v$ are both being visited--then the edge is a back edge. - In directed graph, this indicates that we meet a cycle. - \item If $v$ is BLACK, that $v$ is finished, and that the $start\_time[u] < start\_time[v]$, then the edge is a forward edge. - \item If $v$ is BLACK, but the $start\_time[u] > start\_time[v]$, then the edge is a cross edge. -\end{enumerate} -\textit{In undirected graph, there is no forward edge or cross edge. Therefore, it does not really need three colors. -Usually, we can simply mark it as visited or not visted.} - -Classification of edges provide important information about the graph, e.g. to if we detect a back edge in directed graph, we find a cycle. - - - -\paragraph{Parenthesis Structure} In either undirected or directed graph, the discovered time when state goes from WHITE to GRAY and the finish time when state turns to BLACK from GRAY has the parenthesis structure. We modify \texttt{dfs} to track the time: a static variable \texttt{t} is used to track the time, \texttt{discover} and \texttt{finish} is used to record the first discovered and finished time. The implementation is shown: -\begin{lstlisting}[language=Python] -def dfs(g, s, colors): - dfs.t += 1 # static variable - colors[s] = STATE.gray - dfs.discover[s] = dfs.t - for v in g[s]: - if colors[v] == STATE.white: - dfs(g, v, colors) - # complete - dfs.t += 1 - dfs.finish[s] = dfs.t - return -\end{lstlisting} -Now, we call the above function with directed graph in Fig.~\ref{fig:depth_first_graph_search_edges}. -\begin{lstlisting}[language=Python] -v = len(dcg) -colors = [STATE.white] * v -dfs.t = -1 -dfs.discover, dfs.finish = [-1] * v, [-1] * v -dfs(dcg,0, colors) -\end{lstlisting} -The output for \texttt{dfs.discover} and \texttt{dfs.finish} are: -\begin{lstlisting}[numbers=none] -([0, 1, 2, 4, 3, 6], [11, 10, 9, 5, 8, 7]) -\end{lstlisting} -From \texttt{dfs.discover} and \texttt{dfs.finish} list, we can generate a new list of merged order, \texttt{merge\_orders} that arranges nodes in order of there discovered and finish time. The code is as: -\begin{lstlisting}[language=Python] -def parenthesis(dt, ft, n): - merge_orders = [-1] * 2 * n - for v, t in enumerate(dt): - merge_orders[t] = v - for v, t in enumerate(ft): - merge_orders[t] = v - - print(merge_orders) - nodes = set() - for i in merge_orders: - if i not in nodes: - print('(', i, end = ', ') - nodes.add(i) - else: - print(i, '),', end = ' ') -\end{lstlisting} -The output is: -\begin{lstlisting}[language=Python] -[0, 1, 2, 4, 3, 3, 5, 6, 6, 5, 4, 2, 1, 0] -( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, ( 6, 6 ), 5 ), 4 ), 2 ), 1 ), 0 ), -\end{lstlisting} -We would easily find out that the ordering of nodes according to the discovery and finishing time makes a well-defined expression in the sense that the parentheses are properly nested. -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Questions to ponder: } -\begin{itemize} -\item Implement the iterative version of the recursive code. -\end{itemize} -\end{bclogo} - -%%%%%%%%%%%%%%%%BFS Graph Search%%%%%%%%%%%%%% -\subsection{Breadth-first Graph Search} -We have already known how to implement BFS of both the tree and graph search versions. In this section, we want to first exemplify the state change process of BFGS with example shown in Fig.~\ref{fig:depth_first_graph_search_process}. Second, we focus on proving that within the breath-first graph search tree, a path between root and any other node is the shortest path. - -\begin{figure}[!ht] - \centering - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process0.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process1.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process2.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process3.png} - - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process4.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process5.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process6.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process7.png} - - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process8.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process9.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process10.png} - \includegraphics[width=0.2\columnwidth]{fig/breath_first_graph_search_process11.png} - \caption{The process of Breath-first Graph Search. The black arrows denotes the the relation of $u$ and its not visited neighbors $v$. And the red arrow marks the backtrack edge. } - \label{fig:breath_first_graph_search_process} -\end{figure} - -\paragraph{Three States Iterative Implementation} When a node is first put into the frontier set, it is marked with gray color. A node is complete only if all its adjacent nodes turn into gray or black. With the visiting ordering of the breath-first graph search, the state change of nodes in the search process is shown in Fig.~\ref{fig:breath_first_graph_search_process}. The Python code is: -\begin{lstlisting}[language=Python] -def bfgs_state(g, s): - v = len(g) - colors = [STATE.white] * v - - q, orders = [s], [s] - complete_orders = [] - colors[s] = STATE.gray # make the state of the visiting node - while q: - u = q.pop(0) - for v in g[u]: - if colors[v] == STATE.white: - colors[v] = STATE.gray - q.append(v) - orders.append(v) - - # complete - colors[u] = STATE.black - complete_orders.append(u) - return orders, complete_orders -\end{lstlisting} -The printout of \texttt{orders} and \texttt{complete\_orders} are: -\begin{lstlisting}[numbers=none] -([0, 1, 2, 4, 3, 5], [0, 1, 2, 4, 3, 5]) -\end{lstlisting} -\paragraph{Properties} In breath-first graph search, the first discovery and finishing time are different for each node, but the discovery ordering and the finishing ordering of nodes are the same ordering. - -\paragraph{Shortest Path} - - - - - - - - - - - - \subsubsection{Applications} The common problems that can be solved by BFS are those only need one solution: the best one such like getting the shortest path. As we will learn later that breath-first-search is commonly used as archetype to solve graph optimization problems, such as Prim's minimum-spanning-tree algorithm and Dijkstra's single-source-paths algorithm. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%Tree Traversal%%%%%%%%%%%%%%%%%%%% -\section{Tree Traversal} -\subsection{Depth-First Tree Traversal} -%\paragraph{Tree traversal for the rooted tree} -\begin{figure}[H] - \centering - \includegraphics[width = .45\columnwidth]{fig/binary_tree_example.png} - \caption{Exemplary Binary Tree } - \label{fig:binary_tree_traversal_example} -\end{figure} -\subsubsection{Introduction} -% Let us see how we can iterate all nodes of the recursive tree we just constructed. -Depth-first search starts at the root node and continues branching down a particular path; it selects a child node that is at the deepest level of the tree from the frontier to expand next and defers the expansion of this node's siblings. Only when the search hits a dead end (a node that has no child) does the search ``backtrack'' to its parent node, and continue to branch down to other siblings that were deferred. A recursive tree can be traversed recursively. We print out the value of current node, then apply recursive call on the left and right node; by treating each node as a subtree, naturally a recursive call to a node can be thought of handling the traversal of that subtree. The code is quite straightforward: - -%The root node is like a queen, she sent out two assistants to traverse all provinces, and these two assistants further send out its sub finish the tasks and to combine the result is what the queen herself needs to do. Let us write the following recursive traversal function and observe its output first: -\begin{lstlisting}[language=Python] -def recursive(node): - if not node: - return - print(node.val, end=' ') - recursive(node.left) - recursive(node.right) -\end{lstlisting} -Now, we call this function with a tree as shown in Fig.~\ref{fig:binary_tree_traversal_example}, the output that indicates the traversal order is: -\begin{lstlisting}[language=Python] -1 2 4 5 3 6 -\end{lstlisting} - -% As we see, all three types of traversal, the search process where we say search tree deepened as much as possible on each child before going visiting the next sibling, this is also called \textbf{depth-first search} and - -% \paragraph{Backing Implementation of Depth First Search} -% We know that the recursion is implemented implicitly with call stack, - -\subsubsection{Three Types of Depth-first Tree Traversal} -\begin{figure}[!ht] - \centering - \includegraphics[width = .99\columnwidth]{fig/tree_traversal.png} - \caption{Left: PreOrder, Middle: InOrder, Right: PostOrder. The red arrows marks the traversal ordering of nodes.} - \label{fig:binary_tree_traversal} -\end{figure} -The visiting ordering between the current node, its left child, and its right child decides the following different types of recursive tree traversals: - -\begin{enumerate}[label=(\alph*)] -\item Preorder Traversal with ordering of \texttt{[current node, left child, right child]}: it visits the nodes in the tree with ordering [1, 2, 4, 5, 3, 6].In our example, the recursive function first prints the root node 1, then goes to its left child, which prints out 2. Then it goes to node 4. From node 4, it next moves to its left child which is empty and leads to the termination of the recursive call and then the recursion backward to node 4. Since node 4 has no right child, it further backwards to node 2, and then it check 2's right child 5. The same process of node 4 happens on node 5. It backwards to node 2, backwards to node 1, and keep visiting its right child 3, and the process goes on. We draw out this process in Fig.~\ref{fig:binary_tree_traversal}. -\item Inorder Traversal with ordering of \texttt{[left child, current node, right child]}: it traverses the nodes in ordering of [4, 2, 5, 1, 3, 6]. Three segments will appear with the inorder traversal for a root node: nodes in left subtree, root, and nodes in the right subtree. -\item Postorder Traversal with ordering of \texttt{[left child, right child, current node]}: it traverses the nodes in ordering of [4, 5, 2, 6, 3, 1]. -\end{enumerate} -We offer the code of Inorder Traversal: -\begin{lstlisting}[language=Python] -def inorder_traversal(node): - if not node: - return - inorder_traversal(node.left) - print(node.val, end=' ') - inorder_traversal(node.right) -\end{lstlisting} - - - - -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{Try to check the other two orderings: \texttt{[left child, current node, right child]} and \texttt{[left child, right child, current node]} by hand first and then write the code to see if you get it right?} -\end{bclogo} - - - -\subsubsection{Return Values} -Here, we want to do the task in a different way: We do not want to just print out the visiting orders, but instead write the ordering in a list and return this list. How would we do it? The process is the same, other than we need to return something(not \texttt{None} which is default in Python). If we only have empty node, it shall return us an empty list \texttt{[]}, if there is only one node, returns \texttt{[1]} instead. - -Let us use PreOrder traversal as an example. To make it easier to understand, the same queen this time wants to do the same job in a different way, that she wants to gather all the data from these different states to her own hand. This time, she assumes the two generals A and B will return a \texttt{list} of the subtree, safely and sount. Her job is going to combine the list returned from the left subtree, her data, and the list returned from the right subtree. Therefore, the left general brings back $A=[2,4,5]$, and the right general brings back $B=[3, 6]$. Then the final result will be $queue + A + B = [1,2,4,5,3, 6]$. The Python code is given: -\begin{lstlisting} [language = Python] -def PreOrder(root): - if root is None: - return [] - ans = [] - left = PreOrder(root.left) - right = PreOrder(root.right) - ans = [root.val] + left + right - return ans -\end{lstlisting} -\paragraph{An Example of Divide and Conquer} Be able to understand the returned value and combine them is exactly the method of \texttt{divide and conquer}, one of the fundamental algorithm design principles. This is a seemingly trivial change, but it approaches the problem solving from a totally different angle: atomic searching to divide and conquer that highlights the structure of the problem. The printing traversal and returning traversal represents two types of problem solving: the first is through searching--searching and treating each node more separately and the second is through reduce and conquer--reducing the problem to a series of smaller subproblems(subtrees where the smallest are empty subtrees) and construct the result by using the information of current problem and the solutions of the subproblems. - -\subsubsection{Complexity Analysis} -It is straightforward to see that it only visit all nodes twice, one in the forward pass and the other in the backward pass of the recursive call, making the time complexity linear to total number of nodes, $O(n)$. The other way is through the recurrence relation, we would write $T(n)=2\times T(n/2)+O(1)$, which gives out $O(n)$ too. -% Similarly, the recursive code for the InOrder Traversal and PostTraversal: -% \begin{lstlisting}[language = Python] -% def InOrder(root): -% if root is None: -% return [] -% res = [] -% left = InOrder(root.left) -% #print(root.val, end=',') -% right = InOrder(root.right) -% res = left + [root.val]+ right -% return res - -% def PostOrder(root): -% if root is None: -% return [] -% res = [] -% left = PostOrder(root.left) -% #print(root.val, end=',') -% right = PostOrder(root.right) -% res = left + right + [root.val] -% return res -% print(InOrder(root)) -% print(PostOrder(root)) -% # output -% #[4, 2, 5, 1, 3] -% #[4, 5, 2, 3, 1] -% \end{lstlisting} -\subsection{Iterative Tree Traversal} -In Chapter Iteration and Recursion, we would know that the recursive function might suffer from the stack overflow, and in Python the recursion depth is $1000$. This section, we explore iterative tree traversals corresponding to PreOrder, InOrder, and PostOrder tree traversal. We know that the recursion is implemented implicitly with call stack, therefore in our iterative counterparts, they all use an explicit stack data structure to mimic the recursive behavior. -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal_preorder.png} - \caption{The process of iterative preorder tree traversal.} - \label{fig:iterative_tree_traveral_preorder} -\end{figure} - -\paragraph{Simple Iterative Preorder Traversal} If we know how to implement a DFS iteratively with stack in a graph, we know our iterative preorder traversal. In this version, the stack saves all our frontier nodes. -\begin{itemize} - \item At first, we start from the root, and put it into the stack, which is 1 in our example. - \item Our frontier set has only one node, thus we have to pop out node 1 and expand the frontiner set. When we are expanding node 1, we add its children into the frontier set by pushing them into the stack. In the preorder traversal, the left child should be first expanded from the frontier stack, indicating we should push the left child into the stack afterward the right child is pushed into. Therefore, we add node 3 and 2 into the stack. - \item We continue step 2. Each time, we expand the frontier stack by pushing the toppest node's children into the stack and after popping out this node. This way, we use the first come last ordering of the stack data structure to replace the recursion. -\end{itemize} -We illustrate this process in Fig. ~\ref{fig:iterative_tree_traveral_preorder}. The code is shown as: -\begin{lstlisting} [language = Python] -def PreOrderIterative(root): - if root is None: - return [] - res = [] - stack = [root] - while stack: - tmp = stack.pop() - res.append(tmp.val) - if tmp.right: - stack.append(tmp.right) - if tmp.left: - stack.append(tmp.left) - return res -\end{lstlisting} -%Even we know what is our main auxiliary data structure, we are no where close to the conversion. In the recursion, there are always two passes of visiting each state, while this is not the case of the iteration. -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal_postorder.png} - \caption{The process of iterative postorder tree traversal.} - \label{fig:iterative_tree_traveral_postorder} -\end{figure} - -\paragraph{Simple Iterative Postorder Traversal} Similar to the above preorder traversal, the postordering is the ordering of nodes finishing the expanding of both its left and right subtree, thus with the ordering of \texttt{left subtree}, \texttt{right subtree}, and \texttt{root}. In preorder traversal,we obtained the ordering of \texttt{root}, \texttt{left subtree}, and \texttt{right subtree}. We try to reverse the ordering, it becomes \texttt{right subtree}, \texttt{left subtree}, and \texttt{root}. This ordering only differs with postorder by a single a swap between the left and right subtree. So, we can use the same process as in the preorder traversal but expanding a node's children in the order of left and right child instead of right and left. And then the reversed ordering of items being popped out is the postoder traversal ordering. The process is shown in Fig.~\ref{fig:iterative_tree_traveral_postorder}. The Python implementation is shown as: -\begin{lstlisting}[language=Python] -def PostOrderIterative(root): - if root is None: - return [] - res = [] - stack = [root] - while stack: - tmp = stack.pop() - res.append(tmp.val) - if tmp.left: - stack.append(tmp.left) - if tmp.right: - stack.append(tmp.right) - return res[::-1] -\end{lstlisting} - -\paragraph{General Iterative Preorder and Inorder Traversal } In the depth-first-traversal, we always branch down via the left child of the node at the deepest level in the frontier. The branching only stops when it can no longer find a left child for the deepest node in the frontier. Only till then, it will look around at expanding the right child of this deepest node, and if no such right child exists, it backtracks to its parents node and continues to check its right child to continue the branching down process. - -\begin{figure}[!ht] - \centering - \includegraphics[width = .9\columnwidth]{fig/iterative_tree_traversal.png} - \caption{The process of iterative tree traversal.} - \label{fig:iterative_tree_traveral} -\end{figure} - -Inspired by this process, we use a pointer, say \texttt{cur} to point to the root node of the tree, and we prepare an empty \texttt{stack}. The iterative process is: -\begin{itemize} - \item The branching down process can be implemented with visiting \texttt{cur} node, and pushing it into the \texttt{stack}. And then we set \texttt{cur=cur.left}, so that it keeps deepening down. - \item When one branch down process terminates, we pop out a node from \texttt{stack}, and we set \texttt{cur=node.right}, so that we expand the branching process to its right sibling. -\end{itemize} -We illustrate this process in Fig.~\ref{fig:iterative_tree_traveral}. The ordering of items pushed into the stack is the preorder traversal ordering, which is [1, 2, 4, 5, 3, 6]. And the ordering of items being popped out of the stack is the inorder traversal ordering, which is [4, 2, 5, 1, 3, 6]. - -\paragraph{Implementation} We use two lists--\texttt{preorders} and \texttt{inorders}--to save the traversal orders. The Python code is: -\begin{lstlisting}[language=Python] -def iterative_traversal(root): - stack = [] - cur = root - preorders = [] - inorders = [] - while stack or cur: - while cur: - preorders.append(cur.val) - stack.append(cur) - cur = cur.left - node = stack.pop() - inorders.append(node.val) - cur = node.right - return preorders, inorders -\end{lstlisting} - - -% \paragraph{Iterative PreOrder Traversal} Here is a common mistake we would make: we think we start at 1, put 1 in a stack, [1], then move to 2, have stack [1, 2], then move to 4, have a stack [1, 2, 4]. Now, 4 has no left child and no right child, we pop it out, and moves back to 2, then 2 would still have the left tree, which we end up with infinite loop. -% \begin{lstlisting}[language=Python] -% def preorder_iter(root): -% if not root: -% return -% stack = [root] -% print(root.val, end=' ') -% i = 0 -% while stack: -% i += 1 -% if i==10: -% return -% node = stack[-1] -% while node.left: -% print(node.left.val, end=' ') -% stack.append(node.left) -% node = node.left -% node = stack[-1] -% if node.right: -% print(node.right.val, end=' ') -% stack.append(node.right) -% else: -% stack.pop() -% \end{lstlisting} -% We will end up with the print out: -% \begin{lstlisting}[numbers=none] -% 1 2 4 4 4 4 4 4 4 4 4 -% \end{lstlisting} -% This means when we are - - -% \paragraph{PostOrder Iterative Tree Traversal} Need to explain better!!! -% \begin{lstlisting}[language = Python] -% def postorderTraversal(self, root): -% if root is None: -% return [] -% res = [] -% stack = [root] -% while stack: -% tmp = stack.pop() -% res.append(tmp.val) -% if tmp.left: -% stack.append(tmp.left) -% if tmp.right: -% stack.append(tmp.right) -% return res[::-1] -% \end{lstlisting} -% \paragraph{InOrder Iterative}. In the inorder, we need to print out all the left subtree first, and then the root, followed by the right. The process is as follows: -% \begin{lstlisting} -% 1) Create an empty stack S. -% 2) Initialize current node as root -% 3) Push the current node to S and set current = current->left until current is NULL -% 4) If current is NULL and stack is not empty then -% a) Pop the top item from stack. -% b) Print the popped item, set current = popped_item->right -% c) Go to step 3. -% 5) If current is NULL and stack is empty then we are done. -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% def InOrderIterative(root): -% if root is None: -% return [] -% res = [] -% stack = [] -% current = root -% while current: -% stack.append(current) -% current = current.left - -% while stack: -% tmp = stack.pop() -% res.append(tmp.val) -% current = tmp.right -% while current: -% stack.append(current) -% current = current.left - -% return res -% \end{lstlisting} -% Another way to write this: -% \begin{lstlisting}[language=Python] -% def inorder(self, root): -% cur, stack = root, [] -% while cur or stack: -% while cur: -% stack.append(cur) -% cur = cur.left -% node = stack.pop() -% print(node.val) -% cur = node.right -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% def inorder_iter(root): -% if not root: -% return -% stack = [] -% node = root -% i = 0 -% while stack or node: -% print_stack(stack) -% if node: -% stack.append(node) -% node = node.left -% else: -% node = stack.pop() -% print(node.val, end = ' ') -% node = node.right -% \end{lstlisting} - -%%%%%%%%%%%BFS tree traversl%%%%%%%%%%%%%% -\subsection{Breath-first Tree Traversal} -\label{bfs_tree_traversal} -% \begin{figure}[!ht] -% \centering -% \includegraphics[width=0.96\columnwidth]{fig/general_breath_first_search.png} -% \caption{Breath-first search on a simple search tree. At each stage, the node to be expanded next is indicated by a marker. } -% \label{fig:breath_first_search_strategy} -% \end{figure} -\begin{figure}[H] - \centering - \includegraphics[width = .45\columnwidth]{fig/binary_tree_example.png} - \caption{Draw the breath-first traversal order } - \label{fig:binary_tree_traversal_example_bfs} -\end{figure} -Instead of traversing the tree recursively deepening down each time, the alternative is to visit nodes level by level, as illustrated in Fig.~\ref{fig:fig:binary_tree_traversal_example_bfs} for our exemplary binary tree. We first visit the root node 1, and then its children 2 and 3. Next, we visit 2 and 3's children in order, we goes to node 4, 5, and 6. This type of Level Order Tree Traversal uses the \textbf{breath-first search strategy} which differs from our covered depth-first search strategy. As we see in the example, the root node is expanded first, then all successors of the root node are expanded next, and so on, following a level by level ordering. We can also find the rule, the nodes first come and get first expanded. For example 2 is first visited and then 3, thus we expand 2's children first. Then we have 4 and 5. Next, we expand 3's children. This First come first expanded tells us we can rely on a queue to implement BFS. - - - - -\paragraph{Simple Implementation} We start from the root, say it is our first level, put it in a list named \texttt{nodes\_same\_level}. Then we use a \texttt{while} loop, and each loop we visit all children nodes of \texttt{nodes\_same\_level} from the last level. We put all these children in a temporary list \texttt{temp}, before the loop ends, we assign \texttt{temp} to \texttt{nodes\_same\_level}, until the deepest level where no more children nodes will be found and leave our \texttt{temp} list to be empty and our while loop terminates. -\begin{lstlisting}[language = Python] -def LevelOrder(root): - if not root: - return - nodes_same_level = [root] - while nodes_same_level: - temp = [] - for n in nodes_same_level: - print(n.val, end=' ') - if n.left: - temp.append(n.left) - if n.right: - temp.append(n.right) - nodes_same_level = temp -\end{lstlisting} -The above will output follows with our exemplary binary tree: -\begin{lstlisting}[language=Python] -1 2 3 4 5 6 -\end{lstlisting} - -\paragraph{Implementation with Queue} As we discussed, we can use a FIFO queue to save the nodes waiting for expanding. In this case, at each \texttt{while} we only handle one node that are at the front of the queue. -\begin{lstlisting}[language=Python] -def bfs(root): - if not root: - return - q = [root] - while q: - node = q.pop(0) # get node at the front of the queue - print(node.val, end=' ') - if node.left: - q.append(node.left) - if node.right: - q.append(node.right) -\end{lstlisting} - - - -\section{Informed Search Strategies**} -%%%%%%%%%%%%%%%%BFS%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Best-first Search} - Best-first search is a search algorithm which explores a graph by expanding the most promising node chosen according to a specified rule. The degree of promising of a node is described by a \textbf{heuristic evaluation function $f(n)$} which, in general, may depend on the description of the node $n$, the description of the goal, and the information gathered by the search up to that point, and most important, on any extra knowledge about the problem domain. - - Breath-first search fits as a special case in Best-first search if the objective of the problem is to find the shortest path from source to other nodes in the graph; it uses the estimated distance to source as a heuristic function. At the start, the only node in the frontier set is the source node, expand this node and add all of its unexplored neighboring nodes in the frontier set and each comes with distance 1. Now, among all nodes in the frontier set, choose the node that is the most promising to expand. In this case, since they all have the same distance, expand any of them is good. Next, we would add nodes that have $f(n)=2$ in the frontier set, choose any one that has smaller distance. - - A Generic best-first search will need a priority queue to implement instead of a FIFO queue used in the breath-first search. - - - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Hands on examples%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - \subsection{Hands-on Examples} - - \subsubsection{Get a more straightforward example} Add an example - - - -% If we model each element in the array as a node in the graph, and assume given node $\mu$ with index $i$, if the element $v$ with index $j$, $j > i$, is larger than $\mu$, there will be an edge $\mu \rightarrow v$. We draw the graph shown in Fig.~\ref{fig:tree_lis}. The problem is modeled as finding the longest path in the graph, which can be solved with either DFS or BFS. - -% We define \texttt{curlen} as the length of increasing sequence from start node $[]$ to the current node, which can have $-\infty$ as value. length up. For example, at the leftmost node $101$, $curlen=2$ and the lowest $101$ node will have $curlen=4$ which is our longest LIS. Therefore, we would need a global variable $maxlen$ to track the maximum LIS. - -% \paragraph{Depth-first Graph Search} The implementation of Python is provided: -% \begin{lstlisting}[language=Python] -% import sys - -% def dfs(curIdx, preV, curlen, a, maxlen): - -% for i in range(curIdx+1, len(a)): -% # if a condition is satisfied, move to that node instead -% if a[i] > preV: -% dfs(i, a[i], curlen+1, a, maxlen) -% maxlen[0] = max(maxlen[0], curlen+1) -% return -% \end{lstlisting} -% Now, we need to call the function with \texttt{curIdx=-1} and \texttt{preV=-sys.maxsize}, and \texttt{curlen=0} for the root node in the graph. - -% \paragraph{Breath-first Graph Search} The implementation of Python is provided: -% \begin{lstlisting}[language=Python] -% def bfs( nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% maxlen = 0 -% q = [(-1, -sys.maxsize, 0)] # start pos can be any number in nums -% while q: -% new_q = [] -% for idx, prev, curlen in q: -% # search for number that is larger that current -% for j in range(idx+1, len(nums)): -% if nums[j] > prev: -% maxlen = max(maxlen, curlen + 1) -% new_q.append((j, nums[j], curlen + 1)) -% q = new_q -% return maxlen -% \end{lstlisting} -\subsubsection{Triangle (L120)} -Given a triangle, find the minimum path sum from top to bottom. Each step you may move to adjacent numbers on the row below. -\begin{lstlisting}[numbers=none] -Example: -Given the following triangle: - -[ -[2], -[3,4], -[6,5,7], -[4,1,8,3] -] -The minimum path sum from top to bottom is 11 (i.e., 2 + 3 + 5 + 1 = 11). -\end{lstlisting} - - -\paragraph{Analysis} -Solution: first we can use dfs traverse as required in the problem, and use a global variable to save the minimum value. The time complexity for this is $O(2^n)$. When we try to submit this code, we get LTE error. The code is as follows: -\begin{lstlisting}[language = Python] -import sys -def min_path_sum(t): - ''' - Purely Complete Search - ''' - min_sum = sys.maxsize - def dfs(i, j, cur_sum): - nonlocal min_sum - # edge case - if i == len(t) or j == len(t[i]): - # gather the sum - min_sum = min(min_sum, cur_sum) - return - # only two edges/ choices at this step - dfs(i+1, j, cur_sum + t[i][j]) - dfs(i+1, j+1, cur_sum + t[i][j]) - dfs(0, 0, 0) - return min_sum -\end{lstlisting} - - - - - -% \subsection{Categorization} -% So far we have covered the most important searching strategies, mainly two types: Uninformed and Informed (Heuristic) searches. DFS, DFS, Bidirectional search in the uninformed search group. -% \subsubsection{Explicit Search and Implicit Search} -% \subsubsection{Complete Search and } -% \subsubsection{exhaustive search and heuristic search} - -% \subsubsection{Applications} - -% An animation of DFS is available \url{https://www.cs.usfca.edu/~galles/visualization/DFS.html} -% The output will be $1, 2, 4, 6, 3, 5$. The path of the DFS actually composes a tree, we can this a \textbf{DFS tree}. - -% In the code snipet, line 5 is to check if the current neighbor is visited or not. We can either use a SET or a list of Booleans, or if we know the total vertices are within 32 or 64, we can use bit as shown in Section~\ref{chapter_bit_section_bitwise}. - - - - -% \subsection{Comparison of BFS and DFS} -% BFS and DFS is the most basic complete search in graph. They both search all vertices and edges by once, which made them share the same time complexity $O(|V|+|E|)$. We see, in the BFS, saving nodes of the gray state or black state has the same visiting ordering. Breadth-first search usually serves to find shortest path distances (and the associated predecessor subgraph) from a given source. Depth-first search is often a subroutine in another algorithm, as we shall see later in this chapter. -% \section{Discussion of Graph Search} -% \label{graph_types} -% As we will in the future chapters, basic BFS and DFS lays the fundations of all graph and tree-based search. Understanding the properties of graph search throughly in this chapter will ease our journey to explore more advanced graph algorithms. -% There are some properties related to graph that we need to learn before moving to the advanced algorithms. - -% \paragraph{Completeness} -% In the context of search, a complete algorithm is one that guarantees that if a path to the goal exists, the algorithm will reach the goal. Note that \textit{completeness} does not imply \textit{optimality} of the found path. - -% For example, breadth-first search (BFS) is complete (and in fact optimal if step costs are identical at a given level), because it can find all paths starting from a given source vertex in the graph. (This might not be the case if step cost at a given level is not identical). while depth-first search (DFS) on trees is incomplete (consider infinite or repeated states). - -\section{Exercises} -\subsection{Coding Practice} -\paragraph{Property of Graph} -\begin{enumerate} - \item 785. Is Graph Bipartite? (medium) - \item 261. Graph Valid Tree (medium) - \item 797. All Paths From Source to Target(medium) -\end{enumerate} - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/delete/algorithms.tex.prec b/Easy-Book/chapters/delete/algorithms.tex.prec deleted file mode 100644 index 73d1554..0000000 --- a/Easy-Book/chapters/delete/algorithms.tex.prec +++ /dev/null @@ -1,20 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -% \chapter{Introduction} -% To introduce the algorithms, start with the brute force, then go to the efficient algorithms, including the analysis of complexity. In this chapter, we start with the basic algorithms, sorting, and basic algorithm theory, divide and conquer, dynamic programming, greedy algirhtm. Then in another part we introduce more specific popular algorithms. -% \subfile{chapters/part3/introduction} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Divide and Conqure and Trees -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \chapter{Divide and Conquer} -% \label{divide-conquer} -% \subfile{chapters/part3/divide-conquer ***} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Dynamic Programming -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/delete/basic_algorithm.tex.prec b/Easy-Book/chapters/delete/basic_algorithm.tex.prec deleted file mode 100644 index 50c38cf..0000000 --- a/Easy-Book/chapters/delete/basic_algorithm.tex.prec +++ /dev/null @@ -1,10 +0,0 @@ -\documentclass[../main-book.tex]{subfiles} -\begin{document} -% \chapter{Introduction} -% To introduce the algorithms, start with the brute force, then go to the efficient algorithms, including the analysis of complexity. In this chapter, we start with the basic algorithms, sorting, and basic algorithm theory, divide and conquer, dynamic programming, greedy algirhtm. Then in another part we introduce more specific popular algorithms. -% \subfile{chapters/part3/introduction} - - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/delete/data-structure.tex.prec b/Easy-Book/chapters/delete/data-structure.tex.prec deleted file mode 100644 index fe14155..0000000 --- a/Easy-Book/chapters/delete/data-structure.tex.prec +++ /dev/null @@ -1,40 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%% -% CHAPTER Two! % -%%%%%%%%%%%%%%%% -In the programming, data structures are used to store data and make operations on them so that we can conduct different algorithms on them in order to solve real-world problems and meet certain efficiency. The comparison between varies of data structures are highly dependable on the context of the problem we are facing. Being familiar with data structures is a must for us to understand and implement algorithms following this part. The concepts of data structures and the real data types and/or built-in modules in Python goes hand in hand for the real understanding. Thus, in our book, we insist on learning the concepts, real implementation with basic built-in data types: list/ dict/ string/ together. On this base, we learn built-in modules which implements these data structures for us directly and with good efficiency. - -On high level, data structures can be categories as two main types: \textit{Liner} (Chapter~\ref{chapter_linear_data_structure}) and \textit{Non-liner} (include: Heap and Graph in Chapter~\ref{chapter_non_linear_data_structure} and Chapter~\ref{chapter_tree}). Before we move ahead to learn these data structures, it is essential for us to understand how normally data structures are categorized based on specific characters: -\begin{itemize} - \item \textbf{Mutable vs Immutable} In the sense of if modification of the items of the data structures is allowed, there are \textit{mutable} and \textit{immutable} data structures. - \item \textbf{Static vs Dynamic} Moreover, we can categorize the data structures as \textit{static data structures} and \textit{dynamic data structures} according to if we can change the size of the created data structures. In static data structure the size of the structure is fixed since its creation. While, in dynamic data structure, the size of the structure is not fixed and can be modified through operations such as Insertion and Append. Dynamic data structures are designed to facilitate change of data structures in the run time. -\end{itemize} - -The implementation of different data structures can vary as the programming languages. To make the contents more compact and make the reference more convenient, in this part, we combine data strutures from the programming literature with corresponding data structures (either built-in or external modules) come from Python. Due to this understanding, for each data structure, the contents are organized as: -\begin{itemize} - \item firstly we will introduce the concept of the data structures including definition, pros, and cons; - \item secondly, the common basic operations with concepts and time complexity: Access, Search, Insertion, Deletion. \item lastly, to complete the picture, we introduce Python's data structures (either built-in or external) with their methods and corresponding operations. -\end{itemize} - -% The contents of this part is organized as: Array (Section~\ref{}), Linked List (singly or doubly linked lists), Hashmap(Section~\ref{}), Queue and Stack (Section~\ref{}). - -Divide and Conquer serves as the fundalmental problem solving methodology for the software programming, Data structures on the other hand plays the role of laying the foundation for any problem-solving paradigm or say algorithms to run on. Therefore, the content of this chapter will serve as the footstone for the purpose of the whole book -- ``crackin'' the LeetCode problems. The purpose of this part is to give beginners a chance to learn different data structures and its Python implementation systematically and practically in the sense of problem solving. For medium or higher level audiences, the organization of this part can help them review their knowledge base efficiently. - -% to typical application of each data structure. The goal of this part is, when we are handling practical real-world problem, we can first come up with the possibly the optimal data structure to model the problem and apply algorithms on to get our answers. Also, this part gives beginners a chance to learn common to high-end data structures in Python before we head off to solving LeetCode problems. - -% All the data structures are containers of information, and each is structured differently for the others to fit in a certain need. In Chapter~\ref{chapter_basic_data_structure}, we will include the most basic and fundamental linear data structures in software programming: Array and String, Linked List, Hashmap, Queue and Stack. In Chapter~\ref{chapter_advanced_data_structure} will include the non-linear data Structure, including Heap, Tree, and Graph. - -% Each data structure is for its unique purpose, for example, we introduce Monotone Queue in Section~\ref{section_mono_stack} which is rarely covered in most of algorithms books. We will show how using this type of data structure can help us achieve liner time performance for challenging LeetCode problems. - -% However, we will included the categorized type of problems in Part~\ref{part_specific_algorithms} Specific Algorithms so that in this part, we can focus on the concepts and not the specific algorithms. Also, this structure can benefit the beginners, to start from the basic. And becaues in Part~\ref{part_algorithms} we will include comprehensive explanation of each algorithm methodology. And in Part~\ref{part_specific_algorithms} we complete the picture, combing the data structure and the algorithms to solve real problems. -% chapter 6 -\chapter{Linear Data Structure} -\label{chapter_linear_data_structure} -\subfile{chapters/chapter_6_linear_data-structure} - -% chapter 7 -\chapter{Graphs and Trees} -\label{chapter_non_linear_data_structure} -\subfile{chapters/chapter_7_advanced_data_structure} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/delete/divide-conquer_deprecated.tex b/Easy-Book/chapters/delete/divide-conquer_deprecated.tex deleted file mode 100644 index c31b1d6..0000000 --- a/Easy-Book/chapters/delete/divide-conquer_deprecated.tex +++ /dev/null @@ -1,351 +0,0 @@ -\documentclass[../algorithms.tex]{subfiles} -\begin{document} -In this chapter, we talk about recursive function and how to write a recursive function that work properly. Then we give an algorithm theory: divide and conquer, which is a recursive function and divide problems, and solve problems separely, the final result is merged from the result of each subproblem. Also we need to understand the space and time complexity. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Recursive Programming -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Recursive Programming} -For recursive function, we can draw recursive tree to denote the state transfer graph. With recursive function, it can simplify the programming of certain programs, once we mastered recursive function, we would feel it is a lot simpler than iterative implementation. However, it does take extra space complexity compared with the iterative implementation. - -\textbf{Recursive Function}: for recusrive program, we need to figure out the recursive transfer function between $f(i)$ and the next level $f(i+1)$. For example, the fibonacci number we could have $f(i) = f(i-1) + f(i-2)$. For the merge sort, we cant use a math function to represent this operation, we have $T(n) = 2*T(n/2) + O(n)$. $f(i) = merge(f(left), f(right))$. Some recursive function would have redundancy which we can improve the efficiency and avoid to compute the same subproblem twice by memoization which saves the result, and the iterative peer of the recursive implementation is called \textit{dynamic programming}. We will discuss the dynamic programming in details in the following chapter ~\ref{dynamic-programming}. Some other recursive function, there is no overlaps between subproblems which are the \textit{divide and conquer} cases, which will be discussed in detain in chapter~\ref{divide-conquer}. Or we have the universal \textit{Depth-first-searching} which can be implemented with recursive function, we will include this in chapter~\ref{searching}. - -\textbf{Recursive Function and Tree Structure}: - -\textbf{Stack Overflow and Iterative Implementation}: According to Wikepedia, in software, a stack overflow occurs if the call stack pointer exceeds the stack bound. The call stack may consist of a limited amount of address space, often determined at the start of the program depending on many factors, including the programming language, machine architecture, multi-threading, and amount of available memory. When a program attemps to use more space than is available on the call stack, the stack is said to \textit{overflow}, typically resulting in a program crash. The very deep recursive function is faced with the threat of stack overflow. And the only way we can fix it is by transforming the recursion into a loop and storing the function arguments in an explicit stack data structure, this is often called the iterative implementation which corresponds to the recursive implementation. - -We need to follow these points: -\begin{enumerate} - \item End condition, Base Cases and Return Values: either return an answer for base cases or None, and used to end the recursive calls. - \item Parameters: parameters include: data needed to implement the function, current paths, the global answers and so on. - \item Variables: What the \textbf{local} and {global} variables. In Python any pointer type of data can be used as global variable global result putting in the parameters. - \item Construct current result: when to collect the results from subtree and combine to get the result for current node. - \item Check the depth: if the program will lead to the heap stack overflow. -\end{enumerate} - - -% 递归函数关注以下几个因素 -% ·退出条件 -% ·参数有哪些 -% ·返回值是什么 -% ·局部变量有哪些 -% ·全局变量有哪些 -% ·何时输出 -% ·会不会导致堆栈溢出 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Divide and Conquer -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Divide and Conquer} -Divide and conquer partitions the problems into smaller subproblems and solves the problems recursively, and then combine the solutions to solve the original problem. It includes three steps at each recursive call: -\begin{enumerate} - \item Divide: divide one problems into a series of subproblems that are smaller instances of the same problem; - \item Conquer: recursively solve each subproblem. When the subproblem is small enough, it will be solved directly, which is the end condition in the recursive function; - \item Combine: combine the result from each subproblem into the solution to the current problem. -\end{enumerate} - -For divide and conquer, we have two cases, 1) our subproblems are disjoint with each other; 2) our smaller subproblem can include another smaller subproblem. We can use the the following two recurrence equation to generalize. -\begin{equation} \label{bt_time} -\begin{split} -T(n) & = aT(n/b) + f(n)\\ -\end{split} -\end{equation} -where $f(n)$ denotes the time and operation need to divide the problem into disjoint and independent subproblems and combine the solutions of the subproblems to solve the current problem. -\begin{equation} \label{dp_equation} -\begin{split} -T(n) &= T(n-1) + T(n-2) +...+T(1) + f(n)\\ -\end{split} -\end{equation} - We can have any combination of terms $T(k), k = [1, n-1]$ on the right side of this recurrence equation. Here the problem is divided into subproblems, however these problem are not disjoint and always depend on other. And the subproblems overlap, which is to say, when subproblems share subproblems. - - Because of the different relations between these two situations' subproblems, Eq.~\ref{bt_time} when we use recursive programming to solve the problem directly, we get the best time complexity since there is no overlap between subproblems. However, for the second case in Eq.~\ref{dp_equation}, programming them recursivly would end up with redundancy in time complexity becase their subproblems share subproblems. This also means they can be further optimized, and these type of algorithms fall in the category of dynamic programming which we will discuss in the next chapter in details. So, commonly divide and conquer refers to methods in the first category. - - Now, enough of the concepts, let us look at an example: - - Example 1: Maximum Subarray (53. medium) -\begin{lstlisting} - -Find the contiguous subarray within an array (containing at least one number) which has the largest sum. - -For example, given the array [-2,1,-3,4,-1,2,1,-5,4], - the contiguous subarray [4,-1,2,1] has the largest sum = 6. -\end{lstlisting} -divide and conquer solution: $T(n) = max(T(left),T(right), T(cross))$, max is for merging and the T(cross) is for the case that the potential subarray across the mid point. For the complexity, $T(n)=2T(n/2)+n$, if we use the master method, it would give us $O(nlgn)$. We write the following Python code -\begin{lstlisting}[language = Python] -def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - def getCrossMax(low,mid,high): - left_sum,right_sum =0,0 - left_max, right_max = -maxint, -maxint - left_i,right_j=-1,-1 - for i in xrange(mid,low-1,-1): #[) - left_sum+=nums[i] - if left_sum>left_max: - left_max= left_sum - left_i = i - for j in xrange(mid+1,high+1): - right_sum+=nums[j] - if right_sum>right_max: - right_max= right_sum - right_j = j - return (left_i,right_j,left_max+right_max) - - def maxSubarray(low,high): - if low==high: - return (low,high, nums[low]) - mid = (low+high)//2 - rslt=[] - #left_low, left_high, left_sum = maxSubarray(low,mid) #[low,mid] - rslt.append(maxSubarray(low,mid)) #[low,mid] - #right_low,right_high,right_sum = maxSubarray(mid+1,high)#[mid+1,high] - rslt.append(maxSubarray(mid+1,high)) - #cross_low,cross_high,cross_sum = getCrossMax(low, mid, high) - rslt.append(getCrossMax(low, mid, high)) - return max(rslt, key=lambda x: x[2]) - return maxSubarray(0,len(nums)-1)[2] -\end{lstlisting} -Also, we does not necessarily to use divide and conquer, we can be more creative and try harder to make the time complexity goes to $O(n)$. We can convert this problem to best time to buy and sell stock problem.[0, -2, -1, -4, 0, -1, 1, 2, -3, 1], => O(n), then we use prefix\_sum, the difference is we set prefix\_sum to 0 when it is smaller than 0, O(n) -\begin{lstlisting}[language = Python] -from sys import maxint -class Solution(object): - def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_so_far = -maxint - 1 - prefix_sum= 0 - for i in range(0, len(nums)): - prefix_sum+= nums[i] - if (max_so_far < prefix_sum): - max_so_far = prefix_sum - - if prefix_sum< 0: - prefix_sum= 0 - return max_so_far -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Complexity Analysis -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Complexity Analysis} -The complexity analysis incldudes the time and space complexity. While for the divide and conquer methods, because of the usage of the recursion function, it could be slightly tricky and we need to learn how to attack this complexity. Also it is usually required and asked by interviewers in real interview. - -\subsection{Time Complexity} -The core to analyze the time complexity of the divide and conquer methodology is by characterizing the recurrence relation shown in Eq.~\ref{bt_time}. In general we have three ways to do this, 1) substitution method; 2) recursion-tree method; 3) master method. However, in real situation, it depends on XX to choose which one to use. Each one has its own limitation. In this book, we prove enough theory about how to compute the time complexity for the recurrence equation for practical coding or interview situation. If you want to learn more, it is a good choice to refer the book (Introduction to Algorithms). Here, we wont detail on the substitution method, because in real interviews, we need some quick and more straightforward ways to get the computational cost. And the substitution method is more used to prove the cost in a very rigorous way. Recursion tree and the master therorems are the main ways we rely on to answer the time complexity for a divide and conquer method shown in Eq.~\ref{bt_time}. - -\subsubsection{Recursion Tree Method} -Drawing out a recursion tree serves as a straightforward way to come up with a good guess. Normally we can tolerate a small amount of "sloppiness", because later on, we can prove the complexity with substitution method discussed in the last section. However, when we are drawing the recursion tree, if we are careful enough and summing up the costs from each level and each node, we can use is as a direct proof of the solution to the recurrence. - -In the corresponding recursion tree for recurrence equation in divide and conquer, each node represents the cost of a single subproblem somewhere in the set of recursive function invocations. We sum the costs within each level of the tree to obtain a set of per-level costs, and then we sum all the per-level costs to determine the total cost of all levels of the recursion. Let's look at one example for given recursion $T(n) = 3T(\floor*{n/4}) + \Theta(n^2)$. We replace $\Theta(n^2) = cn^2$, where $c>0$. $cn^2$ is the cost we pay to divide a problem with $n$ input size to three problems each with $n/4$ input size and combine the solution of the subproblems to solve the current problem. We first expand $T(n)$, and put the cost $cn^2$ at the root, and with three children each noted with $T(n/4)$. Then we recursively replace $T(n/4)$ with the cost and its subproblem till the size of each subproblem to be 1, which means we get to the leaves. The computational complexity for this recursion would be the sum of all layers's costs. And we assume $T(1)=1$. -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/recursive_tree_1.png} - \includegraphics[width=0.8\columnwidth]{fig/recursive_tree_2.png} - \caption{The process to construct a recursive tree for $T(n) = 3T(\floor*{n/4}) + cn^2$} - \label{fig:recursive_tree} -\end{figure} -\begin{equation} \label{eg_recurrence_5} -\begin{split} -T(n) & = cn^2+\frac{3}{16}cn^2+(\frac{3}{16})^2cn^2+...+(\frac{3}{16})^{\log_4 {n-1}}cn^2+\Theta(n^{\log_4 3})\\ -& = \sum_{i=0}^{\log_4 {n-1}}(\frac{3}{16})^{i}cn^2+\Theta(n^{\log_4 3})\\ -&< \sum_{i=0}^{\infty}(\frac{3}{16})^{i}cn^2+\Theta(n^{\log_4 3})\\ -& = \frac{1}{1-(3/16)} cn^2+\Theta(n^{\log_4 3})\\ -& = O(n^2). -\end{split} -\end{equation} -\subsubsection{Master Method} -The master method is probably the easiest way to come up with the computational complexity analysis. It is a theorem that are proved by researchers, and we just need to learn how to use them. The master theorem goes: - -For Eq.~\ref{bt_time}, let $a\geq1, b>1$, we first compute $n^{\log_b a}$, -\begin{enumerate} - \item If $f(n) = O(n^{\log_b a - \epsilon}$ for constant $\epsilon>0$, then we get $T(n) = \Theta(n^{\log_b a})$. - \item If $f(n) = \Theta(n^{\log_b a }$, then we get $T(n) = \Theta(n^{\log_b a} \log n)$. - \item If $f(n) = \Omega(n^{\log_b a + \epsilon}$ for constant $\epsilon>0$, and if $a f(n/b)\leq cf(n)$ for constant $c<1$ and all sufficiently large $n$, then we get $T(n) = \Theta(f(n))$. -\end{enumerate} -\subsection{Space Complexity} -The space the recursive function occupies is rational to the depth of the recursive calls, $O(h)$, $h$ is the height of the recursive tree. - -\subsection{Summary} -For your convenience, we prove a table that shows the frequent used recurrence equations' time complexity. -\begin{figure}[h] - \centering - \includegraphics[width=1\columnwidth] {fig/complexity_cheatsheet.png} - \caption{The cheat sheet for time and space complexity with recurrence function.} - \label{fig:cheat_sheet} -\end{figure} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Exercises -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} -\begin{enumerate} - \item Pow(x, n) (50) - - Solution: T(n)= T(n/2)+O(1), the complexity is the same as the binary search, $O(logn)$. - \begin{lstlisting}[language=Python] - def myPow(self, x, n): - """ - :type x: float - :type n: int - :rtype: float - """ - if n==0: - return 1 - if n<0: - n=-n - x=1.0/x - def helper(n): - if n==1: - return x - - h = n//2 - r = n-h - value = helper(h) #T(n/2), then we have O(1) - if r==h: - return value*value - else: #r is going to be 1 bigger - return value*value*x - return helper(n) - \end{lstlisting} - - \item House Robber (198) - - Solution: If we use brute force is $O(2^n)$. Use divide and conquer, here because we use half and half. Which we need to get rid of. -\begin{lstlisting}[language = Python] -def rob(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - memo=[[-1 for _ in range(len(nums))] for _ in range(len(nums))] - - def dp(l,r): - nonlocal memo - if l==r: - return nums[l] - if l>r: - return 0 - if l=k''' - n1=len(nums1) - n2=len(nums2) - memo=[[[None for k in range(k+1)] for col in range(n2+1) ] for row in range(n1+1)] - def dp(i,j,k): - if k==0: - return 0 - if memo[i][j][k] is None: - max1,max2,max3=-1,-1,-1 - if i[0+6, 6–6=0, 0+4=4, 4–2=2, ] Set the first to 0+6, nums[i-1]+nums[i]. - -r = max(left\_subarray, right\_subarry, max(right\_subarry)-min(left\_subarray)), Thus, the real operation is max(right\_subarry)-min(left\_subarray). The time complexity would be decreased to $O(nlgn)$ from the brute force $O(n^2)$. So this example shows the divide and conquer. However, it might not be the best solution. Try the BCR with $O(n)$. -\begin{lstlisting}[language=Python] -class Solution(object): - def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - if len(prices)<=1: - return 0 - - r = -maxint - min_price = maxint - for price in prices: - if price ``````<-r -% \end{lstlisting} - -% \begin{enumerate} -% \item -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Others -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Others} -For example, the following question would be used as follow up for question \textit{Longest Continuous Increasing Subsequence} - -300. Longest Increasing Subsequence - - -673. Number of Longest Increasing Subsequence - -Given an unsorted array of integers, find the number of longest increasing subsequence. -\begin{lstlisting} -Example 1: - -Input: [1,3,5,4,7] -Output: 2 -Explanation: The two longest increasing subsequence are [1, 3, 4, 7] and [1, 3, 5, 7]. - -Example 2: -Input: [2,2,2,2,2] -Output: 5 -Explanation: The length of longest continuous increasing subsequence is 1, and there are 5 subsequences' length is 1, so output 5. -\textit{Note: Length of the given array will be not exceed 2000 and the answer is guaranteed to be fit in 32-bit signed int.} -\end{lstlisting} - -Solution: Another different problem, to count the number of the max subsequence. Typical dp: - -state: f[i] -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def findNumberOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_count = 0 - if not nums: - return 0 - memo =[None for _ in range(len(nums))] - rlst=[] - def recursive(idx,tail,res): - if idx==len(nums): - rlst.append(res) - return 0 - if memo[idx]==None: - length = 0 - if nums[idx]>tail: - addLen = 1+recursive(idx+1, nums[idx],res+[nums[idx]]) - notAddLen = recursive(idx+1, tail,res) - return max(addLen,notAddLen) - else: - return recursive(idx+1, tail,res) - - - ans=recursive(0,-maxsize,[]) - count=0 - for lst in rlst: - if len(lst)==ans: - count+=1 - - return count -\end{lstlisting} - -Using dynamic programming, the difference is we add a count array. -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def findNumberOfLIS(self, nums): - N = len(nums) - if N <= 1: return N - lengths = [0] * N #lengths[i] = longest ending in nums[i] - counts = [1] * N #count[i] = number of longest ending in nums[i] - - for idx, num in enumerate(nums): #i - for i in range(idx): #j - if nums[i] < nums[idx]: #bigger - if lengths[i] >= lengths[idx]: - lengths[idx] = 1 + lengths[i] #set the biggest length - counts[idx] = counts[i] #change the count - elif lengths[i] + 1 == lengths[idx]: #if it is a tie - counts[idx] += counts[i] #increase the current count by count[i] - -longest = max(lengths) - print(counts) - print(lengths) - return sum(c for i, c in enumerate(counts) if lengths[i] == longest) -\end{lstlisting} - -128. Longest Consecutive Sequence -\begin{lstlisting} -Given an unsorted array of integers, find the length of the longest consecutive elements sequence. - -For example, - Given [100, 4, 200, 1, 3, 2], - The longest consecutive elements sequence is [1, 2, 3, 4]. Return its length: 4. - - Your algorithm should run in O(n) complexity. - \end{lstlisting} - -Solution: Not thinking about the O(n) complexity, we can use sorting to get [1,2,3,4,100,200], and then use two pointers to get [1,2,3,4]. - -How about O(n)? We can pop out a number in the list, example, 4 , then we use while first-1 to get any number that is on the left side of 4, here it is 3, 2, 1, and use another to find all the bigger one and remove these numbers from the nums array. -\begin{lstlisting}[language =Python] -def longestConsecutive(self, nums): - nums = set(nums) - maxlen = 0 - while nums: - first = last = nums.pop() - while first - 1 in nums: #keep finding the smaller one - first -= 1 - nums.remove(first) - while last + 1 in nums: #keep finding the larger one - last += 1 - nums.remove(last) - maxlen = max(maxlen, last - first + 1) - return maxlen -\end{lstlisting} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/mastering/array/subset.tex b/Easy-Book/chapters/mastering/array/subset.tex deleted file mode 100644 index b8edbb4..0000000 --- a/Easy-Book/chapters/mastering/array/subset.tex +++ /dev/null @@ -1,646 +0,0 @@ -\documentclass[../../question_3_array_question.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Subset -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Subset(Combination and Permutation)} -\label{part4_array_subset} -The Subset B of a set A is defined as a set within all elements of this subset are from set A. In other words, the subset B is contained inside the set A, $B \in A$. There are two kinds of subsets: if the order of the subset doesnt matter, it is a combination problem, otherwise, it is a permutation problem. To solve the problems in this section, we need to refer to the backtracking in Sec~\ref{sec_combination}. When the subset has a fixed constant length, then hashmap can be used to lower the complexity by one power of n. - -\textbf{Subset VS Subsequence}. In the subsequence, the elements keep the original order from the original sequence. While, in the set concept, there is no ordering, only a set of elements. - -In this type of questions, we are asked to return subsets of a list. For this type of questions, backtracking~\ref{sec:backtrack} can be applied. -\subsection{Combination} -\label{part4_array_combine} -The solution of this section is heavily correlated to Section~\ref{sec_combination}. -78. Subsets -\begin{lstlisting} -Given a set of distinct integers, nums, return all possible subsets (the power set). - -Note: The solution set must not contain duplicate subsets. - -Example: - -Input: nums = [1,2,3] -Output: -[ - [3], - [1], - [2], - [1,2,3], - [1,3], - [2,3], - [1,2], - [] -] -\end{lstlisting} -\textbf{Backtracking}. This is a combination problem, which we have explained in backtrack section. We just directly gave the code here. -\begin{lstlisting}[language = Python] -def subsets(self, nums): - res, n = [], len(nums) - res = self.combine(nums, n, n) - return res - -def combine(self, nums, n, k): - """ - :type n: int - :type k: int - :rtype: List[List[int]] - """ - def C_n_k(d, k, s, curr, ans): #d controls the degree (depth), k is controls the return level, curr saves the current result, ans is all the result - ans.append(curr) - if d == k: #the length is satisfied - - return - for i in range(s, n): - curr.append(nums[i]) - C_n_k(d+1, k, i+1, curr[:], ans) # i+1 because no repeat, make sure use deep copy curr[:] - curr.pop() - - ans = [] - C_n_k(0, k, 0, [], ans) - return ans -\end{lstlisting} -\textbf{Incremental}. Backtracking is not the only way for the above problem. There is another way to do it iterative, observe the following process. We can just keep append elements to the end of of previous results. -\begin{lstlisting} -[1, 2, 3, 4] -l = 0, [] -l = 1, for 1, []+[1], -> [1], get powerset of [1] -l = 2, for 2, []+[2], [1]+[2], -> [2], [1, 2], get powerset of [1, 2] -l = 3, for 3, []+[3], [1]+[3], [2]+[3], [1, 2]+[3], -> [3], [1, 3], [2, 3], [1, 2, 3], get powerset of [1, 2, 3] -l = 4, for 4, []+ [4]; [1]+[4]; [2]+[4], [1, 2] +[4]; [3]+[4], [1,3]+[4],[2,3]+[4], [1,2,3]+[4], get powerset of [1, 2, 3, 4] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def subsets(self, nums): - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - new_results.append(r + [num]) - result += new_results - - return result -\end{lstlisting} -90. Subsets II -\begin{lstlisting} -Given a collection of integers that might contain duplicates, nums, return all possible subsets (the power set). - -Note: The solution set must not contain duplicate subsets. - -Example: - -Input: [1,2,2] -Output: -[ - [2], - [1], - [1,2,2], - [2,2], - [1,2], - [] -] -\end{lstlisting} -Analysis: Because of the duplicates, the previous superset algorithm would give repetitive subset. For the above example, we would have [1, 2] twice, and [2] twice. If we try to modify on the previous code. We first need to sort the nums, which makes the way we check repeat easiler. Then the code goes like this: -\begin{lstlisting}[language = Python] - def subsetsWithDup(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - nums.sort() - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - print(r) - new_results.append(r + [num]) - for rst in new_results: - if rst not in result: # check the repetitive - result.append(rst) - - return result -\end{lstlisting} -However, the above code is extremely inefficient because of the checking process. A better way to do this: -\begin{lstlisting} -[1, 2, 2] -l = 0, [] -l = 1, for 1, []+[1] -l = 2, for 2, []+[2], [1]+[2]; []+[2, 2], [1]+[2, 2] -\end{lstlisting} -So it would be more efficient if we first save all the numbers in the array in a dictionary. For the above case, the dic = {1:1, 2:2}. Each time we try to generate the result, we use 2 up to 2 times. Same way, we can use dictionary on the backtracking too. -\begin{lstlisting}[language=Python] -class Solution(object): - def subsetsWithDup(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - if not nums: - return [[]] - res = [[]] - dic = collections.Counter(nums) - for key, val in dic.items(): - tmp = [] - for lst in res: - for i in range(1, val+1): - tmp.append(lst+[key]*i) - res += tmp - return res -\end{lstlisting} - -77. Combinations -\begin{lstlisting} -Given two integers n and k, return all possible combinations of k numbers out of 1 ... n. - -Example: - -Input: n = 4, k = 2 -Output: -[ - [2,4], - [3,4], - [2,3], - [1,2], - [1,3], - [1,4], -] -\end{lstlisting} -Analysis: In this problem, it is difficult for us to generate the results iteratively, the only way we can use the second solution is by filtering and get only the results with the length we want. However, the backtrack can solve the problem easily as we mentioned in Section~\ref{sec_combination}. -\begin{lstlisting}[language=Python] -def combine(self, n, k): - """ - :type n: int - :type k: int - :rtype: List[List[int]] - """ - ans = [] - def C_n_k(d,k,s,curr): - if d==k: - ans.append(curr) - return - for i in range(s, n): - #curr.append(i+1) - #C_n_k(d+1, k, i+1, curr[:]) - #curr.pop() - C_n_k(d+1, k, i+1, curr+[i+1]) - C_n_k(0,k,0,[]) - - return ans -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%combination sum%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Combination Sum} -39. Combination Sum - -Given a set of candidate numbers (candidates) \textbf{(without duplicates)} and a target number (target), find all unique combinations in candidates where the candidate numbers sums to target. - -The same repeated number may be chosen from candidates \textbf{unlimited number} of times. -\begin{lstlisting} -Note: - - All numbers (including target) will be positive integers. - The solution set must not contain duplicate combinations. - -Example 1: - -Input: candidates = [2,3,6,7], target = 7, -A solution set is: -[ - [7], - [2,2,3] -] - -Example 2: - -Input: candidates = [2,3,5], target = 8, -A solution set is: -[ - [2,2,2,2], - [2,3,3], - [3,5] -] -\end{lstlisting} -\textbf{DFS Backtracking}. Analysis: This is still a typical combination problem, the only thing is the return level is when the sum of the path we gained is larger than the target, and we only collect the answer when it is equal. And Because a number can be used unlimited times, so that each time after we used one number, we do not increase the next start position. -\begin{lstlisting}[language=Python] -def combinationSum(self, candidates, target): - """ - :type candidates: List[int] - :type target: int - :rtype: List[List[int]] - """ - ans = [] - candidates.sort() - self.combine(candidates, target, 0, [], ans) - return ans - -def combine(self, nums, target, s, curr, ans): - if target < 0: - return # backtracking - if target == 0: - ans.append(curr) - return - for i in range(s, len(nums)): - # if nums[i] > target: - # return - self.combine(nums, target-nums[i], i, curr+[nums[i]], ans) # use i, instead of i+1 because we can reuse -\end{lstlisting} -40. Combination Sum II - -Given a collection of candidate numbers \textbf{(candidates with duplicates)} and a target number (target), find all unique combinations in candidates where the candidate numbers sums to target. - -Each number in candidates may only \textbf{be used once} in the combination. -\begin{lstlisting} -Note: - - All numbers (including target) will be positive integers. - The solution set must not contain duplicate combinations. - -Example 1: - -Input: candidates = [10,1,2,7,6,1,5], target = 8, -A solution set is: -[ - [1, 7], - [1, 2, 5], - [2, 6], - [1, 1, 6] -] - -Example 2: - -Input: candidates = [2,5,2,1,2], target = 5, -A solution set is: -[ - [1,2,2], - [5] -] -\end{lstlisting} -\textbf{Backtracking+Counter}. Because for the first example, if we reuse the code from the previous problem, we will get extra combinations: [7, 1], [2, 1, 5]. To avoid this, we need a dictionary to save all the unique candidates with its corresponding appearing times. For a certain number, it will be used at most its counter times. -\begin{lstlisting}[language=Python] -def combinationSum2(self, candidates, target): - """ - :type candidates: List[int] - :type target: int - :rtype: List[List[int]] - """ - - candidates = collections.Counter(candidates) - ans = [] - self.combine(list(candidates.items()), target, 0, [], ans) # convert the Counter to a list of (key, item) tuple - return ans - -def combine(self, nums, target, s, curr, ans): - if target < 0: - return - if target == 0: - ans.append(curr) - return - for idx in range(s, len(nums)): - num, count = nums[idx] - for c in range(count): - self.combine(nums, target-num*(c+1), idx+1, curr+[num]*(c+1), ans ) -\end{lstlisting} -377. Combination Sum IV (medium) -\begin{lstlisting} - Given an integer array with all positive numbers and no duplicates, find the number of possible combinations that add up to a positive integer target. - -Example: - -nums = [1, 2, 3] -target = 4 - -The possible combination ways are: -(1, 1, 1, 1) -(1, 1, 2) -(1, 2, 1) -(1, 3) -(2, 1, 1) -(2, 2) -(3, 1) - -Note that different sequences are counted as different combinations. - -Therefore the output is 7. - -Follow up: -What if negative numbers are allowed in the given array? -How does it change the problem? -What limitation we need to add to the question to allow negative numbers? -\end{lstlisting} -\textbf{DFS + MEMO}. This problem is similar to 39. Combination Sum. For [2, 3, 5], target = 8, comparison: -\begin{lstlisting} -[2, 3, 5], target = 8 -39. Combination Sum. # there is ordering (each time the start index is same or larger than before) -[ - [2,2,2,2], - [2,3,3], - [3,5] -] -377. Combination Sum IV, here we have no ordering( each time the start index is the same as before). Try all element. -[ - [2,2,2,2], - [2,3,3], -* [3,3,2] -* [3,2,3] - [3,5], -* [5,3] -] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def combinationSum4(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: int - """ - nums.sort() - n = len(nums) - def DFS(idx, memo, t): - if t < 0: - return 0 - if t == 0: - return 1 - count = 0 - if t not in memo: - for i in range(idx, n): - count += DFS(idx, memo, t-nums[i]) - memo[t] = count - return memo[t] - return(DFS(0, {}, target)) -\end{lstlisting} -Because, here we does not need to numerate all the possible solutions, we can use dynamic programming, which will be shown in Section~\ref{}. - -\subsection{K Sum} -In this subsection, we still trying to get subset that sum up to a target. But the length here is fixed. We would have 2, 3, 4 sums normally. Because it is still a combination problem, we can use the \textbf{backtracking} to do. Second, because the fixed length, we can use \textbf{multiple pointers} to build up the potential same lengthed subset. But in some cases, because the length is fixed, we can use \textbf{hashmap} to simplify the complexity. - -1. Two Sum -Given an array of integers, return \textbf{indices} of the two numbers such that they add up to a specific target. - -You may assume that each input would have \textbf{exactly} one solution, and you may not use the same element twice. -\begin{lstlisting} -Example: - -Given nums = [2, 7, 11, 15], target = 9, - -Because nums[0] + nums[1] = 2 + 7 = 9, -return [0, 1]. -\end{lstlisting} -\textbf{Hashmap}. Using backtracking or brute force will get us $O(n^2)$ time complexity. We can use hashmap to save the nums in a dictionary. Then we just check target-num in the dictionary. We would get $O(n)$ time complexity. We have two-pass hashmap and one-pass hashmap. -\begin{lstlisting}[language=Python] -# two-pass hashmap -def twoSum(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: List[int] - """ - dict = collections.defaultdict(int) - for i, t in enumerate(nums): - dict[t] = i - for i, t in enumerate(nums): - if target - t in dict and i != dict[target-t]: - return [i, dict[target-t]] -# one-pass hashmap -def twoSum(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: List[int] - """ - dict = collections.defaultdict(int) - for i, t in enumerate(nums): - if target - t in dict: - return [dict[target-t], i] - dict[t] = i -\end{lstlisting} - -15. 3Sum - -Given an array S of n integers, are there elements a, b, c in S such that a + b + c = 0? Find all unique triplets in the array which gives the sum of zero. - -Note: The solution set must not contain duplicate triplets. - -For example, given array S = [-1, 0, 1, 2, -1, -4], -\begin{lstlisting} -A solution set is: -[ - [-1, 0, 1], - [-1, -1, 2] -] -\end{lstlisting} - -Solution: Should use three pointers, no extra space. i is the start point from [0,len-2], l,r is the other two pointers. l=i+1, r=len-1 at the beignning. The saving of time complexity is totally from the sorting algorithm. -\begin{lstlisting} -[-4,-1,-1,0,1,2] -i, l-> ``````<-r -\end{lstlisting} -How to delete repeat? -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - res = [] - nums.sort() - for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: #make sure pointer not repeat - continue - l, r = i+1, len(nums)-1 - while l < r: - s = nums[i] + nums[l] + nums[r] - if s < 0: - l +=1 - elif s > 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l+=1 - r-=1 - - #after the first run, then check duplicate example. - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - return res -\end{lstlisting} -Use hashmap: -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - res =[] - nums=sorted(nums) - if not nums: - return [] - if nums[-1]<0 or nums[0]>0: - return [] - end_position = len(nums)-2 - dic_nums={} - for i in xrange(1,len(nums)): - dic_nums[nums[i]]=i# same result save the last index - - for i in xrange(end_position): - target = 0-nums[i] - if i>0 and nums[i] == nums[i-1]: #this is to avoid repeat - continue - if targeti+1 and nums[j]==nums[j-1]: - continue - complement =target - nums[j] - if complementj: #need to make sure the complement is bigger than nums[j] - res.append([nums[i],nums[j],complement]) - return res -\end{lstlisting} -The following code uses more time -\begin{lstlisting}[language = Python] -for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: - continue - l, r = i+1, len(nums)-1 - while l < r: - if l-1>=i+1 and nums[l] == nums[l-1]: #check the front - l += 1 - continue - if r+1 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l += 1; r -= 1 - return res -\end{lstlisting} -18. 4Sum -\begin{lstlisting}[language = Python] -def fourSum(self, nums, target): - def findNsum(nums, target, N, result, results): - if len(nums) < N or N < 2 or target < nums[0]*N or target > nums[-1]*N: # early termination - return - if N == 2: # two pointers solve sorted 2-sum problem - l,r = 0,len(nums)-1 - while l < r: - s = nums[l] + nums[r] - if s == target: - results.append(result + [nums[l], nums[r]]) - l += 1 - r-=1 - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - elif s < target: - l += 1 - else: - r -= 1 - else: # recursively reduce N - for i in range(len(nums)-N+1): - if i == 0 or (i > 0 and nums[i-1] != nums[i]): - findNsum(nums[i+1:], target-nums[i], N-1, result+[nums[i]], results) #reduce nums size, reduce target, save result - -results = [] - findNsum(sorted(nums), target, 4, [], results) - return results -\end{lstlisting} - -454. 4Sum II - -Given four lists A, B, C, D of integer values, compute how many tuples (i, j, k, l) there are such that A[i] + B[j] + C[k] + D[l] is zero. - -To make problem a bit easier, all A, B, C, D have same length of N where $0 \leq N \leq 500$. All integers are in the range of -228 to 228–1 and the result is guaranteed to be at most 231–1. - -Example: -\begin{lstlisting} -Input: -A = [ 1, 2] -B = [-2,-1] -C = [-1, 2] -D = [ 0, 2] - -Output: -2 -\end{lstlisting} - -Explanation: - -\begin{lstlisting} -The two tuples are: -1. (0, 0, 0, 1) -> A[0] + B[0] + C[0] + D[1] = 1 + (-2) + (-1) + 2 = 0 -2. (1, 1, 0, 0) -> A[1] + B[1] + C[0] + D[0] = 2 + (-1) + (-1) + 0 = 0 -\end{lstlisting} -Solution: if we use brute force, use 4 for loop, then it is $O(N^4)$. If we use divide and conquer, sum the first half, and save a dictionary (counter), time complexity is $O(2N^2)$. What if we have 6 sum, we can reduce it to $O(2N^3)$, what if 8 sum. - -\begin{lstlisting}[language = Python] -def fourSumCount(self, A, B, C, D): - AB = collections.Counter(a+b for a in A for b in B) - return sum(AB[-c-d] for c in C for d in D) -\end{lstlisting} - - -\subsubsection{Summary} -As we have seen from the shown examples in this section, to solve the combination problem, backtrack shown in Section~\ref{sec_combination} offers a universal solution. Also, there is another iterative solution which suits the power set purpose. And I would include its code here again: -\begin{lstlisting}[language = Python] -def subsets(self, nums): - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - new_results.append(r + [num]) - result += new_results - - return result -\end{lstlisting} -If we have duplicates, how to handle in the backtrack?? In the iterative solution, we can replace the array with a dictionary saves the counts. - -\subsection{Permutation} -46. Permutations -\begin{lstlisting} -Given a collection of distinct numbers, return all possible permutations. - -For example, - [1,2,3] have the following permutations: - -[ - [1,2,3], - [1,3,2], - [2,1,3], - [2,3,1], - [3,1,2], - [3,2,1] -] -\end{lstlisting} - -47. Permutations II - -Given a collection of numbers that might contain duplicates, return all possible unique permutations. - -For example, -\begin{lstlisting} - [1,1,2] have the following unique permutations: - -[ - [1,1,2], - [1,2,1], - [2,1,1] -] -\end{lstlisting} - -301. Remove Invalid Parentheses - -Remove the minimum number of invalid parentheses in order to make the input string valid. Return all possible results. - -Note: The input string may contain letters other than the parentheses ( and ). - -Examples: -\begin{lstlisting} -"()())()" -> ["()()()", "(())()"] -"(a)())()" -> ["(a)()()", "(a())()"] -")(" -> [""] -\end{lstlisting} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/mastering/array/sweep_line.tex b/Easy-Book/chapters/mastering/array/sweep_line.tex deleted file mode 100644 index 7eca818..0000000 --- a/Easy-Book/chapters/mastering/array/sweep_line.tex +++ /dev/null @@ -1,158 +0,0 @@ -\documentclass[../../main.tex]{subfiles} -\begin{document} -Sweep Line is a type of algorithm that mainly used to solve problems with intervals of one-dimensional. Let us look at one example: -1. 253. Meeting Rooms II - -Given an array of meeting time intervals consisting of start and end times [[s1,e1],[s2,e2],...] (si < ei), find the minimum number of conference rooms required. -\begin{lstlisting} -Example 1: - -Input: [[0, 30],[5, 10],[15, 20]] -Output: 2 - -Example 2: - -Input: [[7,10],[2,4]] -Output: 1 -\end{lstlisting} -It would help a lot if at first we can draw one example with cooridinates. -\begin{figure}[h] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/sweep_line_253.png} - \caption{Interval questions} - \label{fig:interval} -\end{figure} -First, the simplest situation is when we only need one meeting room is there is no intersection between these time intervals. If we add one interval that only intersect with one of the previous intervals, this means we need two conference rooms. So to find the minimum conference rooms we need, we need to find the maximum number of intersection between these time intervals. The most native solution is to scan all the time slot in one for loop, and at another inner loop go through all the intervals, if this time slot is in this intervals, then we increase the minimum number of meeting room counter. This gives us time complexity of $O(n*m)$, where $n$ is the number of intervals and $m$ is the total number of time slots. The Python code is as follows, unfortunately, with this solution we have LTE error. -\begin{lstlisting}[language = Python] -# Definition for an interval. -# class Interval(object): -# def __init__(self, s=0, e=0): -# self.start = s -# self.end = e - -from collections import defaultdict -from heapq import heappush, heappop -from sys import maxint -class Solution(object): - def minMeetingRooms(self, intervals): - """ - :type intervals: List[Interval] - :rtype: int - """ - if not intervals: - return 0 - #solution 1, voting, time complexity is O(e1-s1), 71/77 test, TLE - votes = defaultdict(int) - num_rooms = 0 - for interval in intervals: - s=interval.start - e=interval.end - for i in range(s+1,e+1): - votes[i]+=1 - num_rooms = max(num_rooms, votes[i]) - return num_rooms -\end{lstlisting} -\subsection{Speedup with Sweep Line} -Now, let us see how to speed up this process. We can use Sweep Line method. For the sweep line, we have three basic implementations: one-dimensional, min-heap, or map based. -\subsubsection{One-dimensional Implementation} - To get the maximum number of intersection of all the intervals, it is not necessarily to scan all the time slots, how about just scan the key slot: the starts and ends . Thus, what we can do is to open an array and put all the start or end slot into the array, and with $1$ to mark it as start and $0$ to mark it as end. Then we sort this array. Till this point, how to get the maximum intersection? We go through this sorted array, if we get a start our current number of room needed will increase by one, otherwise, if we encounter an end slot, it means one meeting room is freed, thus we decrease the current on-going meeting room by one. We use another global variable to track the maximum number of rooms needed in this whole process. Great, because now our time complexity is decided by the number of slots $2n$, with the sorting algorithm, which makes the whole time complexity $O(nlogn)$ and space complexity $n$. This speeded up algorithm is called Sweep Line algorithm. Before we write our code, we better check the \textit{special cases}, what if there is one slot that is marked as start in one interval but is the end of another interval. This means we can not increase the counting at first, but we need to decrease, so that the sorting should be based on the first element of the tuple, and followed by the second element of the tuple. For example, the simple case $[[13,15],[1,13]]$, we only need maximum of one meeting room. Thus it can be implemented as: -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/sweep_line_one_dimension.png} - \caption{One-dimensional Sweep Line} - \label{fig:one_dim_sl} -\end{figure} -\begin{lstlisting}[language=Python] - def minMeetingRooms(self, intervals): - if not intervals: - return 0 - #solution 2 - slots = [] - # put slots into one-dimensional axis - for i in intervals: - slots.append((i.start, 1)) - slots.append((i.end, 0)) - # sort these slots on this dimension - #slots.sort(key = lambda x: (x[0], x[1])) - slots.sort() - - # now execute the counting - crt_room, max_room = 0, 0 - for s in slots: - if s[1]==0: # if it ends, decrease - crt_room-=1 - else: - crt_room+=1 - max_room = max(max_room, crt_room) - return max_room -\end{lstlisting} -\subsubsection{Min-heap Implementation} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/sweep_line_min_heap.png} - \caption{Min-heap for Sweep Line} - \label{fig:min_heap_sl} -\end{figure} -Instead of opening an array to save all the time slots, we can directly sort the intervals in the order of the start time. We can see Fig.~\ref{fig:min_heap_sl}, we go through the intervals and visit their end time, the first one we encounter is $30$, we put it in a min-heap, and then we visit the next interval $[5, 10]$, $5$ is smaller than the previous end time $30$, it means this interval intersected with a previous interval, so the number of maximum rooms increase $1$, we get $2$ rooms now. We put $10$ into the min-heap. Next, we visit $[15, 20]$, $15$ is larger than the first element in the min-heap $10$, it means that these two intervals can be merged into one $[5, 20]$, so we need to update the end time $10$ to $20$. - -This way, the time complexity is still the same which is decided by the sorting algorithm. While the space complexity is decided by real situation, it varies from $O(1)$ (no intersection) to $O(n)$ (all the meetings are intersected at at least one time slot). -\begin{lstlisting}[language=Python] -def minMeetingRooms(self, intervals): - if not intervals: - return 0 - #solution 2 - intervals.sort(key=lambda x:x.start) - h = [intervals[0].end] - rooms = 1 - for i in intervals[1:]: - s,e=i.start, i.end - e_before = h[0] - if s& intervals) { - map mp; - for (auto val : intervals) { - ++mp[val.start]; - --mp[val.end]; - } - int max_room = 0, crt_room = 0; - for (auto val : mp) { - crt_room += val.second; - max_room = max(max_room, crt_room); - } - return max_room; - } -}; -\end{lstlisting} - -\subsection{LeetCode Problems} -\begin{enumerate} - \item \textbf{986. Interval List Intersections} Given two lists of closed intervals, each list of intervals is pairwise disjoint and in sorted order. Return the intersection of these two interval lists. -\begin{lstlisting}[numbers=none] -Input: A = [[0,2],[5,10],[13,23],[24,25]], B = [[1,5],[8,12],[15,24],[25,26]] -Output: [[1,2],[5,5],[8,10],[15,23],[24,24],[25,25]] -Reminder: The inputs and the desired output are lists of Interval objects, and not arrays or lists. -\end{lstlisting} -\end{enumerate} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/mastering/array_string.tex b/Easy-Book/chapters/mastering/array_string.tex deleted file mode 100644 index d06468b..0000000 --- a/Easy-Book/chapters/mastering/array_string.tex +++ /dev/null @@ -1,1832 +0,0 @@ -\documentclass[../specific-algorithms.tex]{subfiles} -\begin{document} -In this chapter, we mainly discuss about the array based questions. We first categorize these problems into different type, and then each type can usually be solved and optimized with nearly the best efficiency. - -Here array means one dimension list. For array problems, math will play an important role here. The rules are as follows: -\begin{itemize} - \item Subarray: using dynamic programming based algorithm to make brute force $O(n^3)$ to $O(n)$. Two pointers for the increasing subarray. Prefix sum, or kadane's algorithm plus sometimes with the hashmap, or two pointers (three pointers) for the maximum subarray. - \item Subsequence: using dynamic programming based algorithm to make brute force $O(2^n)$ to $O(n^2)$, which corresponds to the seqence type of dynamic programming. - \item Duplicates: 217, 26, 27, 219, 287, 442; - \item Intersections of Two Arrays: -\end{itemize} - -Before we get into solving each type of problems, we first introduce the algorithms we will needed in this Chapter, including two pointers (three pointers or sliding window), prefix sum, kadane's algorithm. Kadane's algorithm can be explained with sequence type of dynamic programming. - - % Easy problems: Duplicates: Intersection: 349. Intersection of Two Arrays; Consecutive: 485. Max Consecutive Ones - % Maximum/Minimum subarray: 718, 53. Maximum Subarray, 325. Maximum Size Subarray Sum Equals k. 209. Minimum Size Subarray Sum Solutions: divide and conquer, special sum and hashtable, two pointers (sliding window) for minimum - % Sum of K numbers of elements: Target, return either the index or the elements(might need to avoid repetition). (2/3/4 sums) - % Partition a list into K equal part: DP - -After this chapter, we need to learn the step to solve these problems: -\begin{enumerate} - \item Analyze the problem and categorize it. To know the naive solution's time complexity can help us identify it. - \item If we can not find what type it is, let us see if we can \textit{convert}. If not, we can try to identify a simple version of this problem, and then upgrade the simple solution to the more complex one. - \item Solve the problem with the algorithms we taught in this chapter. - \item Try to see if there is any more solutions. - - - % \textit{Note: If the problem is complex, trying to see the simple version, and then upgrade the simple version to a complex one. e.g. (487. Max Consecutive Ones II, 485. Max Consecutive Ones)} - \item Check the special case. (Usually very important for this type of problems) -\end{enumerate} -% Including two pointers both from the start, or two pointers one is from the beginning and the other is from the end. Also, the sliding window, and the flexible sliding windows, also find the cycle algorithm. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Algorithms -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Algorithms} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Two Pointers -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Pointers and Sliding Window Algorithm} -T1: If you see in the problem that you can do comparison and it is always one type of satisfactory element is in ahead of the other, this could be resolved by two pointers (slower and faster). Note: when the while loop stops, is there operations you need? - -Two pointers or three pointers are the most possible. \textit{Two pointers or three pointers is a superset of the sliding window algorithm, prefix sum too.} It can lower the complexity by one power level of n. - -\subsubsection{Two Pointers Sliding Window for Array} -674. Longest Continuous Increasing Subsequence -\begin{lstlisting} -Given an unsorted array of integers, find the length of longest continuous increasing subsequence (subarray). - -Example 1: -Input: [1,3,5,4,7] -Output: 3 -Explanation: The longest continuous increasing subsequence is [1,3,5], its length is 3. -Even though [1,3,5,7] is also an increasing subsequence, it's not a continuous one where 5 and 7 are separated by 4. - -Example 2: -Input: [2,2,2,2,2] -Output: 1 -Explanation: The longest continuous increasing subsequence is [2], its length is 1. -\textit{Note: Length of the array will not exceed 10,000.} -\end{lstlisting} -Solution: The description of this problem should use ''subarray" instead of the ''subsequence". The brute force solution is like any subarray problem $O(n^3)$. For embedded for loops to enumerate the subarray, and another $O(n)$ to check if it is strictly increasing. Using two pointers, we can get $O(n)$ time complexity. We put two pointers: one $i$ located at the first element of the nums, second $j$ at the second element. We specifically restrict the subarray from $i$ to $j$ to be increasing, if this is violated, we reset the starting point of the subarray from the violated place. -\begin{lstlisting}[language = Python] -class Solution: - def findLengthOfLCIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - if not nums: - return 0 - if len(nums)==1: - return 1 - i,j = 0,0 - max_length = 0 - while j < len(nums): - j += 1 #slide the window - max_length = max(max_length, j-i) - # when condition violated, reset the window - if j=nums[j]: - i = j - - return max_length -\end{lstlisting} - -\subsubsection{Three Pointers Sliding Window for Array} -Sometimes, by manipulating two pointers are not enough for us to get the final solution. - -930. Binary Subarrays With Sum - \begin{lstlisting} - In an array A of 0s and 1s, how many non-empty subarrays have sum S? -Example 1: - -Input: A = [1,0,1,0,1], S = 2 -Output: 4 -Explanation: -The 4 subarrays are bolded below: -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -Note: - - A.length <= 30000 - 0 <= S <= A.length - A[i] is either 0 or 1. -\end{lstlisting} -For example in the following problem, if we want to use two pointers to solve the problem, we would find we miss the case; like in the example $1, 0, 1, 0, 1$, when $j = 5$, $i = 1$, the sum is $2$, but the algorithm would miss the case of $i = 2$, which has the same sum value. - -To solve this problem, we keep another index $i_hi$, in addition to the moving rule of $i$, it also moves if the sum is satisfied and that value is $0$. This is actually a Three pointer algorithm, it is also a mutant sliding window algorithm. -\begin{lstlisting}[language=Python] -class Solution: - def numSubarraysWithSum(self, A, S): - i_lo, i_hi, j = 0, 0, 0 #i_lo <= j - sum_window = 0 - ans = 0 - while j < len(A): - - sum_window += A[j] - - while i_lo < j and sum_window > S: - sum_window -= A[i_lo] - i_lo += 1 - # up till here, it is standard sliding window - - # now set the extra pointer at the same location of the i_lo - i_hi = i_lo - while i_hi < j and sum_window == S and not A[i_hi]: - i_hi += 1 - if sum_window == S: - ans += i_hi - i_lo + 1 - - j += 1 #increase the pointer at last so that we do not need to check if ji, j\in[0,n-1])$, which is equivalent to $max(y_j - min(y_i)(i= s$. - -Because of the vague of the condition, a hashmap$+$prefix sum solution will on longer give us $O(n)$ liner time. The best we can do if the array is all positive number we can gain $O(nlgn)$ if it is combined with binary search. However, a carefully designed sliding window can still help us achieve linear time. - -\paragraph{All Positive Array} - -If it is all positive array, it can still be easily solved with sliding window. For example: - -209. Minimum Size Subarray Sum -\begin{lstlisting} -Given an array of n positive integers and a positive integer s, find the minimal length of a contiguous subarray of which the sum >= s. If there isn't one, return 0 instead. -Example: - -Input: s = 7, nums = [2,3,1,2,4,3] -Output: 2 -Explanation: the subarray [4,3] has the minimal length under the problem constraint. - -Follow up: -If you have figured out the O(n) solution, try coding another solution of which the time complexity is O(n log n). -\end{lstlisting} -For this problem, if we use prefix sum, we need to save the last index of the prefix sum compared with the maximum length problem. However, with this problem the condition is $sum >= s$, if we use a hashmap, we need to search through the hashmap with $key <= prefix_sum - s$. The time complexity would rise up to $O(n^2)$. We would receive LTE error. -\begin{lstlisting}[language = Python] - def minSubArrayLen(self, s, nums): - """ - :type s: int - :type nums: List[int] - :rtype: int - """ - if not nums: - return 0 - dict = collections.defaultdict(int) - dict[0] = -1 # pre_sum 0 with index -1 - prefixSum = 0 - minLen = sys.maxsize - for idx, n in enumerate(nums): - prefixSum += n - for key, value in dict.items(): - if key <= prefixSum - s: - minLen = min(minLen, idx-value) - dict[prefixSum] = idx #save the last index - return minLen if 1<=minLen<=len(nums) else 0 -\end{lstlisting} -Because if we use prefix sum and use brute force to enumerate the subarray we gain $O(n^2)$. In this problem because its all positive number, so the prefix sum array is increasing which means we can use binary search to find the largest value that is smaller than or equal to prefix sum - s. -\begin{lstlisting}[language = Python] - def minSubArrayLen(self, s, nums): - """ - :type s: int - :type nums: List[int] - :rtype: int - """ - def bSearch(nums, i, j, target): - while i < j: - mid = (i+j) / 2 - if nums[mid] == target: - return mid - elif nums[mid] < target: - i = mid + 1 - else: - j = mid - 1 - return i - - if not nums: - return 0 - rec = [0] * len(nums) - rec[0] = nums[0] - if rec[0] >= s: - return 1 - minlen = len(nums)+1 - for i in range(1, len(nums)): - rec[i] = rec[i-1] + nums[i] - if rec[i] >= s: - index = bSearch(rec, 0, i, rec[i] - s) - if rec[index] > rec[i] - s: - index -= 1 - minlen = min(minlen, i - index) - return minlen if minlen != len(nums)+1 else 0 -\end{lstlisting} -While, using the sliding window, we are still capable of getting the complexity with $O(n)$. -\begin{lstlisting}[language = Python] - def minSubArrayLen(self, s, nums): - """ - :type s: int - :type nums: List[int] - :rtype: int - """ - i,j = 0,0 - preSum =0 - min_length = len(nums)+1 - while j < len(nums): - preSum += nums[j] - j+=1 - #shrink the sliding window size - while i < j and preSum >= s: - min_length = min(min_length, j-i) - preSum -= nums[i] #shrink - i += 1 - return min_length if min_length< len(nums)+1 else 0 -\end{lstlisting} - -713. Subarray Product Less Than K - -\begin{lstlisting} -Your are given an array of positive integers nums. -Count and print the number of (contiguous) subarrays where the product of all the elements in the subarray is less than k. - -Example 1: -Input: nums = [10, 5, 2, 6], k = 100 -Output: 8 -Explanation: The 8 subarrays that have product less than 100 are: [10], [5], [2], [6], [10, 5], [5, 2], [2, 6], [5, 2, 6]. - -Note that [10, 5, 2] is not included as the product of 100 is not strictly less than k. -Note: -0 < nums.length <= 50000. -0 < nums[i] < 1000. -0 <= k < 10^6. -\end{lstlisting} - -Answer: Because we need the subarray less than k, so it is difficult to use prefix sum. If we use sliding window, -\begin{lstlisting} -i=0, j=0, 10 10<100, ans+= j-i+1 (1) -> [10] -i=0, j=1, 50 50<100, ans+= j-i+1 (3), -> [10],[10,5] -i=0, j=2, 100 shrink the window, i=1, product = 10, ans+=2, ->[5,2][2] -i=1, j=3, 60, ans+=3->[2,6],[2],[6] -\end{lstlisting} -The python code: -\begin{lstlisting}[language = Python] -class Solution: - def numSubarrayProductLessThanK(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: int - """ - if not nums: - return 0 - i, j = 0, 0 - window_product = 1 - ans = 0 - while j < len(nums): - window_product *= nums[j] - - while i= k: - window_product /= nums[i] - i+=1 - if window_product < k: - ans += j-i+1 - j += 1 - return ans -\end{lstlisting} - -\paragraph{Array with Negative Element} - -862. Shortest Subarray with Sum at Least K -\begin{lstlisting} -Return the length of the shortest, non-empty, contiguous subarray of A with sum at least K. - -If there is no non-empty subarray with sum at least K, return -1. - -Example 1: -Input: A = [1], K = 1 -Output: 1 - -Example 2: -Input: A = [1,2], K = 4 -Output: -1 - -Example 3: -Input: A = [2,-1,2], K = 3 -Output: 3 - -Note: - 1 <= A.length <= 50000 - -10 ^ 5 <= A[i] <= 10 ^ 5 - 1 <= K <= 10 ^ 9 -\end{lstlisting} -The only difference of this problem compared with the last is with negative value. Because of the negative, the shrinking method no longer works: for instance, [84,-37,32,40,95], K=167, the right answer is [32, 40, 95]. In this program, i=0, j=4, so how to handle the negative value? - - - -% \item 674. Longest Continuous Increasing Subsequence - -% Given an unsorted array of integers, find the length of longest continuous increasing subsequence (subarray). - -% Example 1: -% \begin{lstlisting} -% Input: [1,3,5,4,7] -% Output: 3 -% Explanation: The longest continuous increasing subsequence is [1,3,5], its length is 3. -% Even though [1,3,5,7] is also an increasing subsequence, it's not a continuous one where 5 and 7 are separated by 4. -% \end{lstlisting} -% Example 2: -% \begin{lstlisting} -% Input: [2,2,2,2,2] -% Output: 1 -% Explanation: The longest continuous increasing subsequence is [2], its length is 1. -% \end{lstlisting} -% \textit{Note: Length of the array will not exceed 10,000.} - -% Solution: The brute force solution is use two for loops with $O(n^2)$. The first loop is the start number, the second loop is the $nums[j]>nums[j-1]$ or else stop. Or we can use two pointers. i,j start from 0,1 respectively. -% \begin{lstlisting}[language = Python] -% class Solution: -% def findLengthOfLCIS(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% if not nums: -% return 0 -% if len(nums)==1: -% return 1 -% i,j=0,1 -% max_length = 0 -% while jmax_length: -% max_length = j-i -% i=j -% j+=1 -% if j-i>max_length: -% max_length = j-i - -% return max_length -% \end{lstlisting} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% sub sequence -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Subsequence (Medium or Hard)} -The difference of the subsequence type of questions with the subarray is that we do not need the elements to be consecutive. Because of this relaxation, the brute force solution of this type of question is exponential$O(2^n)$, because for each element, we have two options: chosen or not chosen. This type of questions would usually be used as a follow-up question to the subarray due to its further difficulty because of nonconsecutive. This type of problems are a typical dynamic programming. Here we should a list of all related subsequence problems shown on LeetCode in Fig.~\ref{fig:subsequence_problems} -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/subsequence_1.png} - \includegraphics[width=0.8\columnwidth]{fig/subsequence_2.png} - \caption{Subsequence Problems Listed on LeetCode} - \label{fig:subsequence_problems} -\end{figure} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Sum -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Sum} -In this section, to get sum we can choose to use hashmap to save the original list so that for the last element, we only check the hashmap, we can lower the complexity by one power of n. However, a better solution is to use two pointers or three pointers. for three pointers, the first one is to make sure the starting point. Also, we can think about divide and conquer. -\begin{lstlisting} -[-4,-1,-1,0,1,2] -i, l-> ``````<-r -\end{lstlisting} - -\begin{enumerate} - \item 15. 3Sum - -Given an array S of n integers, are there elements a, b, c in S such that a + b + c = 0? Find all unique triplets in the array which gives the sum of zero. - -Note: The solution set must not contain duplicate triplets. - -For example, given array S = [-1, 0, 1, 2, -1, -4], -\begin{lstlisting} -A solution set is: -[ - [-1, 0, 1], - [-1, -1, 2] -] -\end{lstlisting} - -Solution: Should use three pointers, no extra space. i is the start point from [0,len-2], l,r is the other two pointers. l=i+1, r=len-1 at the beignning. The saving of time complexity is totally from the sorting algorithm. -\begin{lstlisting} -[-4,-1,-1,0,1,2] -i, l-> ``````<-r -\end{lstlisting} -How to delete repeat? -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - res = [] - nums.sort() - for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: #make sure pointer not repeat - continue - l, r = i+1, len(nums)-1 - while l < r: - s = nums[i] + nums[l] + nums[r] - if s < 0: - l +=1 - elif s > 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l+=1 - r-=1 - - #after the first run, then check duplicate example. - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - return res -\end{lstlisting} -Use hashmap: -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - res =[] - nums=sorted(nums) - if not nums: - return [] - if nums[-1]<0 or nums[0]>0: - return [] - end_position = len(nums)-2 - dic_nums={} - for i in xrange(1,len(nums)): - dic_nums[nums[i]]=i# same result save the last index - - for i in xrange(end_position): - target = 0-nums[i] - if i>0 and nums[i] == nums[i-1]: #this is to avoid repeat - continue - if targeti+1 and nums[j]==nums[j-1]: - continue - complement =target - nums[j] - if complementj: #need to make sure the complement is bigger than nums[j] - res.append([nums[i],nums[j],complement]) - return res -\end{lstlisting} -The following code uses more time -\begin{lstlisting}[language = Python] -for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: - continue - l, r = i+1, len(nums)-1 - while l < r: - if l-1>=i+1 and nums[l] == nums[l-1]: #check the front - l += 1 - continue - if r+1 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l += 1; r -= 1 - return res -\end{lstlisting} - -\item 18. 4Sum -\begin{lstlisting}[language = Python] -def fourSum(self, nums, target): - def findNsum(nums, target, N, result, results): - if len(nums) < N or N < 2 or target < nums[0]*N or target > nums[-1]*N: # early termination - return - if N == 2: # two pointers solve sorted 2-sum problem - l,r = 0,len(nums)-1 - while l < r: - s = nums[l] + nums[r] - if s == target: - results.append(result + [nums[l], nums[r]]) - l += 1 - r-=1 - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - elif s < target: - l += 1 - else: - r -= 1 - else: # recursively reduce N - for i in range(len(nums)-N+1): - if i == 0 or (i > 0 and nums[i-1] != nums[i]): - findNsum(nums[i+1:], target-nums[i], N-1, result+[nums[i]], results) #reduce nums size, reduce target, save result - -results = [] - findNsum(sorted(nums), target, 4, [], results) - return results -\end{lstlisting} - -\item 454. 4Sum II - -Given four lists A, B, C, D of integer values, compute how many tuples (i, j, k, l) there are such that A[i] + B[j] + C[k] + D[l] is zero. - -To make problem a bit easier, all A, B, C, D have same length of N where $0 \leq N \leq 500$. All integers are in the range of -228 to 228–1 and the result is guaranteed to be at most 231–1. - -Example: -\begin{lstlisting} -Input: -A = [ 1, 2] -B = [-2,-1] -C = [-1, 2] -D = [ 0, 2] - -Output: -2 -\end{lstlisting} - -Explanation: - -\begin{lstlisting} -The two tuples are: -1. (0, 0, 0, 1) -> A[0] + B[0] + C[0] + D[1] = 1 + (-2) + (-1) + 2 = 0 -2. (1, 1, 0, 0) -> A[1] + B[1] + C[0] + D[0] = 2 + (-1) + (-1) + 0 = 0 -\end{lstlisting} -Solution: if we use brute force, use 4 for loop, then it is $O(N^4)$. If we use divide and conquer, sum the first half, and save a dictionary (counter), time complexity is $O(2N^2)$. What if we have 6 sum, we can reduce it to $O(2N^3)$, what if 8 sum. - -\begin{lstlisting}[language = Python] -def fourSumCount(self, A, B, C, D): - AB = collections.Counter(a+b for a in A for b in B) - return sum(AB[-c-d] for c in C for d in D) -\end{lstlisting} -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Others -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Others} -For example, the following question would be used as follow up for question \textit{Longest Continuous Increasing Subsequence} - -300. Longest Increasing Subsequence -\begin{lstlisting} -Given an unsorted array of integers, find the length of longest increasing subsequence. - -For example, - - Given [10, 9, 2, 5, 3, 7, 101, 18], - The longest increasing subsequence is [2, 3, 7, 101], therefore the length is 4. Note that there may be more than one LIS combination, it is only necessary for you to return the length. - - -Your algorithm should run in $O(n^2)$ complexity. - -Follow up: Could you improve it to $O(nlogn)$ time complexity? -\begin{lstlisting} - -Solution: Compared with the last question, this one loose the restriction that need to be continuous. For this problem, we need to understand it is not going to work with two pointers. It is not a brute-force $O(n^2)$ problem. It is a typical combination problem in recursive functions. So, at first, put the standard combination algorithm code here: -\begin{lstlisting}[language = Python] -def dfs(temp, idx): - rslt.append(temp[:]) #pass temp[:] with shallow copy so that we wont change the result of rslt when temp is changed - for i in range(idx, len(nums)): - temp.append(nums[i]) - #backtrack - dfs(temp, i+1) - temp.pop() - - - dfs([],0) - return rslt -\end{lstlisting} - -So, we use the backtracking-combination to enumerate all possible subsequence. The difference is here we do not unconditionally use this nums[i] in our result, only if nums[i]>tail, and the final length is the maximum of them all. $T(n) = max(T(n-1)+1, T(n-k)+1, …)$. So, the time complexity is $O(2^n)$. It passed 21/15 test cases with TLE. In this process, we transfer from the combination problem to dynamic programming. -\begin{lstlisting}[language = Python] -def lengthOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_count = 0 - if not nums: - return 0 - def backtrackingDFS(idx,tail): - if idx==len(nums): - - return 0 - length = 0 - for i in range(idx,len(nums)): - if nums[i]>tail: - length = max(1+backtrackingDFS(i+1, nums[i]), length) - return length - - return backtrackingDFS(0,-maxsize) -\end{lstlisting} - -Now, we know we are doing dynamic programming, if we already know the ans(idx), meaning the max length from somewhere, we do not need to do it again. With memoization: The time complexity is n subproblem, top-down recursive+memo. -\begin{lstlisting}[language = Python] -def lengthOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_count = 0 - if not nums: - return 0 - memo =[None for _ in range(len(nums))] - def backtrackingDFS(idx,tail): - if idx==len(nums): - - return 0 - if memo[idx]==None: - length = 0 - for i in range(idx,len(nums)): - if nums[i]>tail: - length = max(1+backtrackingDFS(i+1, nums[i]), length) - memo[idx]=length - return memo[idx] - - return backtrackingDFS(0,-maxsize) -\end{lstlisting} - -Now, we use dynamic programming which its solution can be found in Section~\ref{part2_sequence_dp}. And bottom-up iterative. For [10,9,2,5,3], the length array is [1,1,1,2,2], for [4,10,4,3,8,9], we have [1, 2, 1, 1, 2, 3]. To find the rule, T(0)=1, idx, max(memo[i]),$0\leq inums[i]$. Now the time complexity is $O(n^2)$. - -state: f[i] record the maximum length of increasing subsequence from 0-i. - -function: f[i]: choose or not to choose - -initialize: f[0]=1 -\begin{lstlisting}[language = Python] -def lengthOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_count = 0 - if not nums: - return 0 - dp=[0 for _ in range(len(nums))] - dp[0]=1 - maxans =1 - for idx in range(1,len(nums)): #current combine this to this subsequence, 10 to [], 9 to [10] - pre_max=0 - for i in range(0,idx): - if nums[idx]>nums[i]: - pre_max=max(pre_max, dp[i]) - dp[idx]=pre_max+1 - maxans=max(maxans,dp[idx]) - - print(dp) - return maxans -\end{lstlisting} - -We can even speedup further by using binary search, the second loop we can use a binary search to make the time complexity $O(logn)$, and the dp array used to save the maximum ans. Each time we use binary search to find an insertion point, if it is at the end, then the length grow. -[4]->[4,10],->[4,10],[3,10],->[3,8]->[3,8,9] -\begin{lstlisting}[language = Python] -def lengthOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - def binarySearch(arr,l,r,num): - while larr[mid]: - l=mid+1 - elif numtail: - addLen = 1+recursive(idx+1, nums[idx],res+[nums[idx]]) - notAddLen = recursive(idx+1, tail,res) - return max(addLen,notAddLen) - else: - return recursive(idx+1, tail,res) - - - ans=recursive(0,-maxsize,[]) - count=0 - for lst in rlst: - if len(lst)==ans: - count+=1 - - return count -\end{lstlisting} - -Using dynamic programming, the difference is we add a count array. -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def findNumberOfLIS(self, nums): - N = len(nums) - if N <= 1: return N - lengths = [0] * N #lengths[i] = longest ending in nums[i] - counts = [1] * N #count[i] = number of longest ending in nums[i] - - for idx, num in enumerate(nums): #i - for i in range(idx): #j - if nums[i] < nums[idx]: #bigger - if lengths[i] >= lengths[idx]: - lengths[idx] = 1 + lengths[i] #set the biggest length - counts[idx] = counts[i] #change the count - elif lengths[i] + 1 == lengths[idx]: #if it is a tie - counts[idx] += counts[i] #increase the current count by count[i] - -longest = max(lengths) - print(counts) - print(lengths) - return sum(c for i, c in enumerate(counts) if lengths[i] == longest) -\end{lstlisting} - -128. Longest Consecutive Sequence -\begin{lstlisting} -Given an unsorted array of integers, find the length of the longest consecutive elements sequence. - -For example, - Given [100, 4, 200, 1, 3, 2], - The longest consecutive elements sequence is [1, 2, 3, 4]. Return its length: 4. - - Your algorithm should run in O(n) complexity. - \end{lstlisting} - -Solution: Not thinking about the O(n) complexity, we can use sorting to get [1,2,3,4,100,200], and then use two pointers to get [1,2,3,4]. - -How about O(n)? We can pop out a number in the list, example, 4 , then we use while first-1 to get any number that is on the left side of 4, here it is 3, 2, 1, and use another to find all the bigger one and remove these numbers from the nums array. -\begin{lstlisting}[language =Python] -def longestConsecutive(self, nums): - nums = set(nums) - maxlen = 0 - while nums: - first = last = nums.pop() - while first - 1 in nums: #keep finding the smaller one - first -= 1 - nums.remove(first) - while last + 1 in nums: #keep finding the larger one - last += 1 - nums.remove(last) - maxlen = max(maxlen, last - first + 1) - return maxlen -\end{lstlisting} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Merge List -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Merge and Partition} -\subsubsection{Merge Lists} -We can use divide and conquer (see the merge sort) and the priority queue. -\subsubsection{Partition Lists} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Intersection -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Intersection} -For problems to get intersections of lists, we can use hashmap, which takes $O(m+n)$ time complexity. Also, we can use sorting at first and use two pointers one start from the start of each array. Examples are shown as below; -\begin{enumerate} - \item 349. Intersection of Two Arrays (Easy) - - Given two arrays, write a function to compute their intersection. - -Example: -\begin{lstlisting} -Given nums1 = [1, 2, 2, 1], nums2 = [2, 2], return [2]. -\end{lstlisting} - -Note: -\begin{itemize} - \item Each element in the result must be unique. - \item The result can be in any order. -\end{itemize} -Solution 1: Using hashmap, here we use set to convert, this takes 43ms. -\begin{lstlisting}[language = Python] -def intersection(self, nums1, nums2): - """ - :type nums1: List[int] - :type nums2: List[int] - :rtype: List[int] - """ - if not nums1 or not nums2: - return [] - if len(nums1) > len(nums2): - nums1, nums2 = nums2, nums1 - ans = set() - nums1 = set(nums1) - for e in nums2: - if e in nums1: - ans.add(e) - return list(ans) -\end{lstlisting} -Solution2: sorting at first, and then use pointers. Take 46 ms. -\begin{lstlisting}[language = Python] -def intersection(self, nums1, nums2): - """ - :type nums1: List[int] - :type nums2: List[int] - :rtype: List[int] - """ - nums1.sort() - nums2.sort() - r = set() - i, j = 0, 0 - while i < len(nums1) and j < len(nums2): - if nums1[i] < nums2[j]: - i += 1 - elif nums1[i] > nums2[j]: - j += 1 - else: - r.add(nums1[i]) - i += 1 - j += 1 - return list(r) -\end{lstlisting} -\item 350. Intersection of Two Arrays II(Easy) - - Given two arrays, write a function to compute their intersection. - -Example: -\begin{lstlisting} -Given nums1 = [1, 2, 2, 1], nums2 = [2, 2], return [2, 2]. -\end{lstlisting} - -Note: -\begin{itemize} - \item Each element in the result should appear as many times as it shows in both arrays. - \item The result can be in any order. -\end{itemize} - -Follow up: -\begin{enumerate} - \item What if the given array is already sorted? How would you optimize your algorithm? - \item What if nums1's size is small compared to nums2's size? Which algorithm is better? - \item What if elements of nums2 are stored on disk, and the memory is limited such that you cannot load all elements into the memory at once? -\end{enumerate} - -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Exercises -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Subarray -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Subarray} -\subsubsection{Absolute-conditioned Subarray} -\begin{enumerate} - \item 930. Binary Subarrays With Sum - \begin{lstlisting} - In an array A of 0s and 1s, how many non-empty subarrays have sum S? -Example 1: - -Input: A = [1,0,1,0,1], S = 2 -Output: 4 -Explanation: -The 4 subarrays are bolded below: -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -Note: - - A.length <= 30000 - 0 <= S <= A.length - A[i] is either 0 or 1. -\end{lstlisting} -Answer: this is exactly the third time of maximum subarray, the maximum length of subarry with a certain value. We solve it using prefix sum and a hashmap to save the count of each value. -\begin{lstlisting}[language=Python] -import collections -class Solution: - def numSubarraysWithSum(self, A, S): - """ - :type A: List[int] - :type S: int - :rtype: int - """ - dict = collections.defaultdict(int) #the value is the number of the sum occurs - dict[0]=1 #prefix sum starts from 0 and the number is 1 - prefix_sum, count=0, 0 - for v in A: - prefix_sum += v - count += dict[prefix_sum-S] # increase the counter of the appearing value k, default is 0 - dict[prefix_sum] += 1 # update the count of prefix sum, if it is first time, the default value is 0 - return count -\end{lstlisting} -We can write it as: -\begin{lstlisting}[language=Python] - def numSubarraysWithSum(self, A, S): - """ - :type A: List[int] - :type S: int - :rtype: int - """ - P = [0] - for x in A: P.append(P[-1] + x) - count = collections.Counter() - - ans = 0 - for x in P: - ans += count[x] - count[x + S] += 1 - - return ans -\end{lstlisting} -Also, it can be solved used a modified sliding window algorithm. For sliding window, we have $i,j$ starts from 0, which represents the window. Each iteration j will move one position. For a normal sliding window, only if the sum is larger than the value, then we shrink the window size by one. However, in this case, like in the example $1, 0, 1, 0, 1$, when $j = 5$, $i = 1$, the sum is $2$, but the algorithm would miss the case of $i = 2$, which has the same sum value. To solve this problem, we keep another index $i_hi$, in addition to the moving rule of $i$, it also moves if the sum is satisfied and that value is $0$. This is actually a Three pointer algorithm. -\begin{lstlisting}[language=Python] - def numSubarraysWithSum(self, A, S): - i_lo, i_hi, j = 0, 0, 0 #i_lo <= j - sum_lo = sum_hi = 0 - ans = 0 - while j < len(A): - # Maintain i_lo, sum_lo: - # While the sum is too big, i_lo += 1 - sum_lo += A[j] - while i_lo < j and sum_lo > S: - sum_lo -= A[i_lo] - i_lo += 1 - - # Maintain i_hi, sum_hi: - # While the sum is too big, or equal and we can move, i_hi += 1 - sum_hi += A[j] - while i_hi < j and ( - sum_hi > S or sum_hi == S and not A[i_hi]): - sum_hi -= A[i_hi] - i_hi += 1 - - if sum_lo == S: - ans += i_hi - i_lo + 1 - j += 1 - - return ans -\end{lstlisting} -\item 523. Continuous Subarray Sum -\begin{lstlisting} -Given a list of non-negative numbers and a target integer k, write a function to check if the array has a continuous subarray of size at least 2 that sums up to the multiple of k, that is, sums up to n*k where n is also an integer. - -Example 1: -Input: [23, 2, 4, 6, 7], k=6 -Output: True -Explanation: Because [2, 4] is a continuous subarray of size 2 and sums up to 6. - -Example 2: -Input: [23, 2, 6, 4, 7], k=6 -Output: True -Explanation: Because [23, 2, 6, 4, 7] is an continuous subarray of size 5 and sums up to 42. - -Note: -The length of the array won't exceed 10,000. -You may assume the sum of all the numbers is in the range of a signed 32-bit integer. -\end{lstlisting} -Answer: This is a mutant of the subarray with value k. The difference here, we save the prefix sum as the reminder of k. if $(a+b)\%k=0$, then $(a\%k+b\%k)/k=1$. -\begin{lstlisting}[language=Python] -class Solution: - def checkSubarraySum(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: bool - """ - - if not nums: - return False - k = abs(k) - prefixSum = 0 - dict = collections.defaultdict(int) - dict[0]=-1 - for i, v in enumerate(nums): - prefixSum += v - if k!=0: - prefixSum %= k - if prefixSum in dict and (i-dict[prefixSum])>=2: - return True - if prefixSum not in dict: - dict[prefixSum] = i - return False -\end{lstlisting} -\end{enumerate} -\subsubsection{Vague-conditioned Subarray} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Subsequence -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Subsequence} -\begin{enumerate} - \item 594. Longest Harmonious Subsequence - -We define a harmonious array is an array where the difference between its maximum value and its minimum value is exactly 1. - -Now, given an integer array, you need to find the length of its longest harmonious subsequence among all its possible subsequences. - -Example 1: -\begin{lstlisting} -Input: [1,3,2,2,5,2,3,7] -Output: 5 -Explanation: The longest harmonious subsequence is [3,2,2,2,3]. -\end{lstlisting} - -\textit{Note: The length of the input array will not exceed 20,000.} - -Solution: at first, use a Counter to save the whole set. Then visit the counter dictionary, to check key+1 and key-1, only when the item is not zero, we can count it as validate, or else it is 0. -\begin{lstlisting}[language = Python] -from collections import Counter -class Solution: - def findLHS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - if not nums or len(nums)<2: - return 0 - count=Counter(nums) #the list is sorted by the key value - maxLen = 0 - for key,item in count.items(): #to visit the key: item in the counter - if count[key+1]: #because the list is sorted, so we only need to check key+1 - maxLen = max(maxLen,item+count[key+1]) - - # if count[key-1]: - # maxLen=max(maxLen, item+count[key-1]) - return maxLen -\end{lstlisting} - -\item 521. Longest Uncommon Subsequence I - -Given a group of two strings, you need to find the longest uncommon subsequence of this group of two strings. The longest uncommon subsequence is defined as the longest subsequence of one of these strings and this subsequence should not be any subsequence of the other strings. - -A subsequence is a sequence that can be derived from one sequence by deleting some characters without changing the order of the remaining elements. Trivially, any string is a subsequence of itself and an empty string is a subsequence of any string. - -The input will be two strings, and the output needs to be the length of the longest uncommon subsequence. If the longest uncommon subsequence doesn’t exist, return -1. - -Example 1: -\begin{lstlisting} -Input: "aba", "cdc" -Output: 3 -Explanation: The longest uncommon subsequence is "aba" (or "cdc"), -because "aba" is a subsequence of "aba", -but not a subsequence of any other strings in the group of two strings. -\end{lstlisting} - -\textit{Note:} - - \textit{Both strings’ lengths will not exceed 100.} - - \textit{Only letters from a ~ z will appear in input strings.} - -Solution: if we get more examples, we could found the following rules, “aba”,”aba” return -1, -\begin{lstlisting}[language = Python] -def findLUSlength(self, a, b): - """ - :type a: str - :type b: str - :rtype: int - """ - if len(b)!=len(a): - return max(len(a),len(b)) - #length is the same - return len(a) if a!=b else -1 -\end{lstlisting} -\item 424. Longest Repeating Character Replacement - -Given a string that consists of only uppercase English letters, you can replace any letter in the string with another letter at most k times. Find the length of a longest substring containing all repeating letters you can get after performing the above operations. - -\textit{Note:} - - \textit{Both the string’s length and k will not exceed 104.} - -Example 1: -\begin{lstlisting} -Input: -s = "ABAB", k = 2 - -Output: -4 -\end{lstlisting} - -Explanation: -Replace the two 'A's with two 'B's or vice versa. - -Example 2: -\begin{lstlisting} -Input: -s = "AABABBA", k = 1 - -Output: -4 -\end{lstlisting} - -Explanation: -Replace the one 'A' in the middle with 'B' and form "AABBBBA". -The substring "BBBB" has the longest repeating letters, which is 4. - -Solution: the brute-force recursive solution for this, is try to replace any char into another when it is not equal or choose not too. LTE -\begin{lstlisting}[language = Python] -#brute force, use recursive function to write brute force solution - def replace(news, idx, re_char, k): - nonlocal maxLen - if k==0 or idx==len(s): - maxLen = max(maxLen, getLen(news)) - return - -if s[idx]!=re_char: #replace - news_copy=news[:idx]+re_char+news[idx+1:] - replace(news_copy, idx+1, re_char, k-1) - replace(news[:], idx+1, re_char,k) - - #what if we only have one char - # for char1 in chars.keys(): - # replace(s[:],0,char1, k) -\end{lstlisting} -To get the BCR, think about the sliding window. The longest repeating string we can by number of replacement = `length of string max(numer of occurence of letter i), i=’A’ to ‘Z’. With the constraint, which means the equation needs to be $\leq k$. So we can use sliding window to record the max occurence, and when the constraint is violated, we shrink the window. Given an example, strs= “BBCABBBAB”, k=2, when i=0, and j=7, 8–5=3>2, which is at A, we need to shrink it, the maxCharCount changed to 4, i=1, so that 8–1–4=3, i=2, 8–2–3=3, 8–3–3=2, so i=3, current length is 5. -\begin{lstlisting}[language = Python] -def characterReplacement(self, s, k): - """ - :type s: str - :type k: int - :rtype: int - """ - i,j = 0,0 #sliding window - counter=[0]*26 - ans = 0 - maxCharCount = 0 - while jk: #now shrink the window - counter[ord(s[i])-ord('A')]-=1 - i+=1 - #updata max - maxCharCount=max(counter) - ans=max(ans, j-i+1) - j+=1 - - return ans -\end{lstlisting} - -\item 395. Longest Substring with At Least K Repeating Characters - -Find the length of the longest substring T of a given string (consists of lowercase letters only) such that every character in T appears no less than k times. - -Example 1: -\begin{lstlisting} -Input: -s = "aaabb", k = 3 - -Output: -3 -\end{lstlisting} - -The longest substring is "aaa", as 'a' is repeated 3 times. - -Example 2: -\begin{lstlisting} -Input: -s = "ababbc", k = 2 - -Output: -5 -\end{lstlisting} - -The longest substring is "ababb", as 'a' is repeated 2 times and 'b' is repeated 3 times. - -Solution: use dynamic programming with memo: Cons: it takes too much space, and with LTE. -\begin{lstlisting}[language = Python] -from collections import Counter, defaultdict -class Solution: - def longestSubstring(self, s, k): - """ - :type s: str - :type k: int - :rtype: int - """ - if not s: - return 0 - if len(s)end: - return 0 - if memo[start][end]==None: - if any(0=k: - mid+=1 - if mid==len(s): return len(s) - left = self.longestSubstring(s[:mid],k) #"ababb" - #from pre_mid - cur_mid, get rid of those cant satisfy the condition - while mid a2 - \ - c1 -> c2 -> c3 - / -B: b1 -> b2 -> b3 -\end{lstlisting} - -begin to intersect at node c1. - -Notes: -\begin{itemize} - \item If the two linked lists have no intersection at all, return null. - \item The linked lists must retain their original structure after the function returns. - \item You may assume there are no cycles anywhere in the entire linked structure. - \item Your code should preferably run in O(n) time and use only O(1) memory. -\end{itemize} - - -\end{enumerate} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/mastering/divide_conquer_questions.tex b/Easy-Book/chapters/mastering/divide_conquer_questions.tex deleted file mode 100644 index 792aa5a..0000000 --- a/Easy-Book/chapters/mastering/divide_conquer_questions.tex +++ /dev/null @@ -1,146 +0,0 @@ -\documentclass[../specific-algorithms.tex]{subfiles} -\begin{document} -\begin{examples} - - - \item House Robber (198) - - Solution: If we use brute force is $O(2^n)$. Use divide and conquer, here because we use half and half. Which we need to get rid of. -\begin{lstlisting}[language = Python] -def rob(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - memo=[[-1 for _ in range(len(nums))] for _ in range(len(nums))] - - def dp(l,r): - nonlocal memo - if l==r: - return nums[l] - if l>r: - return 0 - if l=k''' - n1=len(nums1) - n2=len(nums2) - memo=[[[None for k in range(k+1)] for col in range(n2+1) ] for row in range(n1+1)] - def dp(i,j,k): - if k==0: - return 0 - if memo[i][j][k] is None: - max1,max2,max3=-1,-1,-1 - if i[0+6, 6–6=0, 0+4=4, 4–2=2, ] Set the first to 0+6, nums[i-1]+nums[i]. - -r = max(left\_subarray, right\_subarry, max(right\_subarry)-min(left\_subarray)), Thus, the real operation is max(right\_subarry)-min(left\_subarray). The time complexity would be decreased to $O(nlgn)$ from the brute force $O(n^2)$. So this example shows the divide and conquer. However, it might not be the best solution. Try the BCR with $O(n)$. -\begin{lstlisting}[language=Python] -class Solution(object): - def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - if len(prices)<=1: - return 0 - - r = -maxint - min_price = maxint - for price in prices: - if price= target, find the last position that value<= target.(this is called lower\_bound, and upper\_bound. -\end{enumerate} - -\subsection{Standard Binary Search and Python Module bisect} -Binary search is usually carried out on a Static sorted array or 2D matrix. There are three basic cases: (1) find the exact target that value = target; If there are duplicates, we are more likely to be asked to (2) find the first position that has value >= target; (3) find the first position that has value <= target. Here, we use two example array: one without duplicates and the other has duplicates. -\begin{lstlisting}[language=Python] -a = [2, 4, 5, 9] -b = [0, 1, 1, 1, 1, 1] -\end{lstlisting} - -\paragraph{Find the Exact Target} This is the most basic application of binary search. We can set two pointers, l and r. Each time we compute the middle position, and check if it is equal to the target. If it is, return the position; if it is smaller than the target, move to the left half, otherwise, move to the right half. The Python code is given: -\begin{lstlisting}[language=Python] -def standard_binary_search(lst, target): - l, r = 0, len(lst) - 1 - while l <= r: - mid = l + (r - l) // 2 - if lst[mid] == target: - return mid - elif lst[mid] < target: - l = mid + 1 - else: - r = mid - 1 - return -1 # target is not found -\end{lstlisting} -Now, run the example: -\begin{lstlisting}[language=Python] -print("standard_binary_search: ", standard_binary_search(a,3), standard_binary_search(a,4), standard_binary_search(b, 1)) -\end{lstlisting} -The print out is: -\begin{lstlisting} -standard_binary_search: -1 1 2 -\end{lstlisting} -From the example, we can see that multiple \textbf{duplicates} of the target exist, it can possibly return any one of them. And for the case when the target does not exist, it simply returns -1. In reality, we might need to find a position where we can potentially insert the target to keep the sorted array sorted. There are two cases: (1) the first position that we can insert, which is the first position that has value>= target (2) and the last position we can insert, which is the first position that has value > target. For example, if we try to insert 3 in a, and 1 in b, the first position should be 1 and 1 in each array, and the last position is 1 and 6 instead. For these two cases, we have a Python built-in Module \textbf{bisect} which offers two methods: bisect\_left() and bisect\_right() for these two cases respectively. - -\paragraph{Find the First Position that value >= target} This way the target position separates the array into two halves: value < target, target\_position, value>= target. In order to meet the purpose, we make sure that if value < target, we move to the right side, else, move to the left side. -\begin{lstlisting}[language=Python] -# bisect_left, no longer need to check the mid element, -# it separate the list in to two halfs: value < target, mid, value >= target -def bisect_left_raw(lst, target): - l, r = 0, len(lst)-1 - while l <= r: - mid = l + (r-l)//2 - if lst[mid] < target: # move to the right half if the value < target, till - l = mid + 1 #[mid+1, right] - else:# move to the left half is value >= target - r = mid - 1 #[left, mid-1] - return l # the final position is where -\end{lstlisting} -% Now insert the value with: -% \begin{lstlisting}[language=Python] -% lst.insert(l+1, target) -% \end{lstlisting} - -\paragraph{Find the First Position that value > target} This way the target position separates the array into two halves: value <= target, target\_position, value> target. Therefore, we simply change the condition of if value < target to if value <= target, then we move to the right side. -\begin{lstlisting}[language=Python] -#bisect_right: separate the list into two halfs: value<= target, mid, value > target -def bisect_right_raw(lst, target): - l, r = 0, len(lst)-1 - while l <= r: - mid = l + (r-l)//2 - if lst[mid] <= target: - l = mid + 1 - else: - r = mid -1 - return l -\end{lstlisting} -Now, run an example: -\begin{lstlisting}[language=Python] -print("bisect left raw: find 3 in a :", bisect_left_raw(a,3), 'find 1 in b: ', bisect_left_raw(b, 1)) -print("bisect right raw: find 3 in a :", bisect_right_raw(a, 3), 'find 1 in b: ', bisect_right_raw(b, 1)) -\end{lstlisting} -The print out is: -\begin{lstlisting} -bisect left raw: find 3 in a : 1 find 1 in b: 1 -bisect right raw: find 3 in a : 1 find 1 in b: 6 -\end{lstlisting} - -\paragraph{Bonus} For the last two cases, if we return the position as l-1, then we get the last position that value < target, and the last position value <= target. - -\paragraph{Python Built-in Module bisect} This module provides support for maintaining a list in sorted order without having to sort the list after each insertion. It offers six methods as shown in Table~\ref{tab:method_bisect}. However, only two are most commonly used: bisect\_left and bisect\_right. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Methods of \textbf{bisect}} - \noindent \begin{tabular}{|p{0.25\columnwidth}|p{0.75\columnwidth}| } - \hline -Method & Description \\ \hline -bisect\_left(a, x, lo=0, hi=len(a) & The parameters lo and hi may be used to specify a subset of the list; the function is the same as bisect\_left\_raw \\\hline -bisect\_right(a, x, lo=0, hi=len(a) & The parameters lo and hi may be used to specify a subset of the list; the function is the same as bisect\_right\_raw \\\hline -bisect(a, x, lo=0, hi=len(a)) &Similar to bisect\_left(), but returns an insertion point which comes after (to the right of) any existing entries of x in a.\\ \hline -insort\_left(a, x, lo=0, hi=len(a)) &This is equivalent to a.insert(bisect.bisect\_left(a, x, lo, hi), x).\\ \hline -insort\_right(a, x, lo=0, hi=len(a)) & This is equivalent to a.insert(bisect.bisect\_right(a, x, lo, hi), x).\\ \hline -insort(a, x, lo=0, hi=len(a)) & Similar to insort\_left(), but inserting x in a after any existing entries of x.\\ \hline -\end{tabular} - \label{tab:method_bisect} - \end{small} -\end{table} -Let's see come examplary code: -\begin{lstlisting}[language=Python] -from bisect import bisect_left,bisect_right, bisect -print("bisect left: find 3 in a :", bisect_left(a,3), 'find 1 in b: ', bisect_left(b, 1)) # lower_bound, the first position that value>= target -print("bisect right: find 3 in a :", bisect_right(a, 3), 'find 1 in b: ', bisect_right(b, 1)) # upper_bound, the last position that value <= target -\end{lstlisting} -The print out is: -\begin{lstlisting} -bisect left: find 3 in a : 1 find 1 in b: 1 -bisect right: find 3 in a : 1 find 1 in b: 6 -\end{lstlisting} -\subsection{Binary Search in Rotated Sorted Array} -\label{concept_binary_search_in_array} -The extension of the standard binary search is on array that the array is ordered in its own way like rotated array. - -\paragraph{Binary Search in Rotated Sorted Array } (See LeetCode problem, 33. Search in Rotated Sorted Array (medium). Suppose an array (without duplicates) sorted in ascending order is rotated at some pivot unknown to you beforehand. (i.e., 0 1 2 4 5 6 7 might become 4 5 6 7 0 1 2). You are given a target value to search. If found in the array return its index, otherwise return -1. You may assume no duplicate exists in the array. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: nums = [3, 4,5,6,7,0,1,2], target = 0 -Output: 5 - -Example 2: - -Input: nums = [4,5,6,7,0,1,2], target = 3 -Output: -1 -\end{lstlisting} - -In the rotated sorted array, the array is not purely monotonic. Instead, there is one drop in the array because of the rotation, where it cuts the array into two parts. Suppose we are starting with a standard binary search with example 1, at first, we will check index 3, then we need to move to the right side? Assuming we compare our middle item with the left item, -\begin{lstlisting}[numbers=none] -if nums[mid] > nums[l]: # the left half is sorted -elif nums[mid] < nums[l]: # the right half is sorted -else: # for case like [1,3], move to the right half -\end{lstlisting} -For a standard binary search, we simply need to compare the target with the middle item to decide which way to go. In this case, we can use objection. Check which side is sorted, because no matter where the left, right and the middle index is, there is always one side that is sorted. So if the left side is sorted, and the value is in the range of the [left, mid], then we move to the left part, else we object the left side, and move to the right side instead. -\begin{figure}[h] - \centering - \includegraphics[width=0.7\columnwidth]{fig/rotated_array.png} - \caption{Example of Rotated Sorted Array} - \label{fig:rotated_sorted_array} -\end{figure} - -The code is shown: -\begin{lstlisting}[language=Python] -'''implemente the rotated binary search''' -def RotatedBinarySearch(nums, target): - if not nums: - return -1 - - l,r = 0,len(nums)-1 - while l<=r: - mid = l+ (r-l)//2 - if nums[mid] == target: - return mid - if nums[l] < nums[mid]: # if the left part is sorted - if nums[l] <= target <= nums[mid]: - r = mid-1 - else: - l = mid+1 - elif nums[l] > nums[mid]: # if the right side is sorted - if nums[mid] <= target <= nums[r]: - l = mid+1 - else: - r = mid-1 - else: - l = mid + 1 - return -1 -\end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What happens if there is duplicates in the rotated sorted array? } In fact, similar comparing rule applies: -\begin{lstlisting}[numbers=none] -if nums[mid] > nums[l]: # the left half is sorted -elif nums[mid] < nums[l]: # the right half is sorted -else: # for case like [1,3], or [1, 3, 1, 1, 1] or [3, 1, 2, 3, 3, 3] - only l++ -\end{lstlisting} -\end{bclogo} - - - - -%%%%%%%%%%%%%%binary search on result space%%%%%%% -\subsection{Binary Search on Result Space} -If the question gives us the context: the target is in the range [left, right], we need to search the first or last position that satisfy a condition function. We can apply the concept of standard binary search and bisect\_left and bisect\_right and its mutant. Where we use the condition function to replace the value comparison between target and element at middle position. The steps we need: -\begin{enumerate} - \item get the result search range [l, r] which is the initial value for l and r pointers. - \item decide the valid function to replace such as if lst[mid] < target - \item decide which binary search we use: standard, bisect\_left/ bisect\_right or its mutant. -\end{enumerate} - -For example: -\begin{examples}[resume] -\item \textbf{441. Arranging Coins (easy)}. You have a total of n coins that you want to form in a staircase shape, where every k-th row must have exactly k coins. Given n, find the total number of full staircase rows that can be formed. n is a non-negative integer and fits within the range of a 32-bit signed integer. -\begin{lstlisting}[numbers=none] -Example 1: - -n = 5 - -The coins can form the following rows: -* -* * -* * - -Because the 3rd row is incomplete, we return 2. -\end{lstlisting} - -\textbf{Analysis: } Given a number n>=1, the minimum row is 1, and the maximum is n. Therefore, our possible result range is [1, n]. These can be treated as indexes of the sorted array. For a given row, we write a function to check if it is possible. We need a function $r* (r+1) // 2 <= n$. For this problem, we need to search in the range of [1, n] to find the last position that is valid. This is bisect\_left or bisect\_right, where we use the function replace the condition check: -\begin{lstlisting}[language=Python] -def arrangeCoins(self, n): - def isValid(row): - return (row*(row+1))//2 <= n - # we need to find the last position that is valid (<=) - def bisect_right(): - l, r = 1, n - while l <= r: - mid = l + (r-l) // 2 - if isValid(mid): # replaced compared with the standard binary search - l = mid + 1 - else: - r = mid - 1 - return l-1 - return bisect_right() -\end{lstlisting} -\item \textbf{278. First Bad Version.} You are a product manager and currently leading a team to develop a new product. Unfortunately, the latest version of your product fails the quality check. Since each version is developed based on the previous version, all the versions after a bad version are also bad. - -Suppose you have n versions [1, 2, ..., n] and you want to find out the first bad one, which causes all the following ones to be bad. - -You are given an API bool isBadVersion(version) which will return whether version is bad. Implement a function to find the first bad version. You should minimize the number of calls to the API. - -Solution: we keep doing binary search until we have searched all possible areas. -\begin{lstlisting}[language = Python] -class Solution(object): - def firstBadVersion(self, n): - """ - :type n: int - :rtype: int - """ - l,r=0,n-1 - last = -1 - while l<=r: - mid = l+(r-l)//2 - if isBadVersion(mid+1): #move to the left, mid is index, s - r=mid-1 - last = mid+1 #to track the last bad one - else: - l=mid-1 - return last -\end{lstlisting} -\end{examples} -% \subsection{Bisection Method} (second edition) -% The binary search principle can be used to find the root of a function that may be difficult to compute mathematically. We have not seen any problems that require this method on LeetCode yet. Thus we define the problem as: - -% Find the monthly payment for a loan: You want to buy a car using loan and want to pay in monthly installment of d d -% \subsection{Python Library} -% Python has \textbf{bisect} module for binary search. -% \begin{lstlisting}[numbers=none] -% bisect.bisect_left(a, x): Return the leftmost index where we can insert x into a to maintain sorted order! Leftmost rl that satisfy: x<=a[rl] - -% bisect.bisect_right(a, x): Return the rightmost index where we can insert x into a to maintain sorted order! Right most rr that satisfy: x>=a[rr] -% \end{lstlisting} -% For example: -% \begin{lstlisting}[language=Python] -% from bisect import bisect_left,bisect_right -% a = [1, 2, 3, 3, 3, 4, 5] -% p1, p2= bisect_left(a,3), bisect_right(a, 3) -% print(p1, p2) -% # output -% # 2, 5 -% \end{lstlisting} - -\subsection{LeetCode Problems} -\begin{examples} -\item \textbf{35. Search Insert Position (easy).} Given a sorted array and a target value, return the index if the target is found. If not, return the index where it would be if it were inserted in order. - -You can assume that there are no duplicates in the array. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [1,3,5,6], 5 -Output: 2 - -Example 2: -Input: [1,3,5,6], 2 -Output: 1 - -Example 3: -Input: [1,3,5,6], 7 -Output: 4 - -Example 4: -Input: [1,3,5,6], 0 -Output: 0 -\end{lstlisting} - -\textbf{Solution: Standard Binary Search Implementation.} For this problem, we just standardize the Python code of binary search, which takes $O(logn)$ time complexity and O(1) space complexity without using recursion function. In the following code, we use exclusive right index with len(nums), therefore it stops if l == r; it can be as small as 0 or as large as n of the array length for numbers that are either smaller or equal to the nums[0] or larger or equal to nums[-1]. We can also make the right index inclusive. -\begin{lstlisting}[language = Python] -# exclusive version -def searchInsert(self, nums, target): - l, r = 0, len(nums) #start from 0, end to the len (exclusive) - while l < r: - mid = (l+r)//2 - if nums[mid] < target: #move to the right side - l = mid+1 - elif nums[mid] > target: #move to the left side, not mid-1 - r= mid - else: #found the traget - return mid - #where the position should go - return l -\end{lstlisting} - -\begin{lstlisting}[language = Python] -# inclusive version -def searchInsert(self, nums, target): - l = 0 - r = len(nums)-1 - while l <= r: - m = (l+r)//2 - if target > nums[m]: #search the right half - l = m+1 - elif target < nums[m]: # search for the left half - r = m-1 - else: - return m - return l -\end{lstlisting} -\end{examples} -Standard binary search -\begin{enumerate} - \item 611. Valid Triangle Number (medium) - \item 704. Binary Search (easy) - -\item 74. Search a 2D Matrix) Write an efficient algorithm that searches for a value in an m x n matrix. This matrix has the following properties: -\begin{enumerate} - \item Integers in each row are sorted from left to right. - \item The first integer of each row is greater than the last integer of the previous row. - \end{enumerate} -\begin{lstlisting}[numbers=none] -For example, -Consider the following matrix: - -[ - [1, 3, 5, 7], - [10, 11, 16, 20], - [23, 30, 34, 50] -] - -Given target = 3, return true. -\end{lstlisting} - -Solution: 2D matrix search, time complexity from $O(n^2)$ to $O(lgm+lgn)$. -\begin{lstlisting}[language = Python] -def searchMatrix(self, matrix, target): - """ - :type matrix: List[List[int]] - :type target: int - :rtype: bool - """ - - if not matrix: - return False - row, col = len(matrix), len(matrix[0]) - if row==0 or col==0: #for [[]] - return False - sr, er = 0, row-1 - #fisrst search the mid row - while sr<=er: - mid = sr+(er-sr)//2 - if target>matrix[mid][-1]: #go to the right side - sr=mid+1 - elif target < matrix[mid][0]: #go the the left side - er = mid-1 - else: #value might be in this row - #search in this row - lc, rc = 0, col-1 - while lc<=rc: - midc = lc+(rc-lc)//2 - if matrix[mid][midc]==target: - return True - elif target <-). -\end{enumerate} -In order to use two pointers, most times the data structure needs to be ordered in some way, and decrease the time complexity from $O(n^2)$ or $O(n^3)$ of two/three nested for/while loops to $O(n)$ of just one loop with two pointers and search each item just one time. In some cases, the time complexity is highly dependable on the data and the criteria we set. - -As shown in Fig.~\ref{fig:two pointer}, the pointer $i$ and $j$ can decide: a pair or a subarray (with all elements starts from i and end at j). We can either do search related with a pair or a subarray. For the case of subarray, the algorithm is called sliding window algorithm. As we can see, two pointers and sliding window algorithm can be used to solve K sum (Section~\ref{}), most of the subarray (Section~\ref{}), and string pattern match problems (Section~\ref{}). -\begin{figure}[h!] - \centering - \includegraphics[width=0.9\columnwidth]{fig/two_pointers.png} - \caption{Two pointer Example} - \label{fig:two pointer} -\end{figure} - -Two pointer algorithm is less of a talk and more of problem attached. We will explain this type of algorithm in virtue of both the leetcode problems and definition of algorihtms. -% \subsection{Two Pointers Techniques} -To understand two pointers techniques, better to use examples, here we use two examples: use slow-faster pointer to find the median and Floyd's fast-slow pointer algorithm for loop detection in an array/linked list and two pointers to get two sum. - -\subsection{Slow-fast Pointer} -\paragraph{Find middle node of linked list} The simpest example of slow-fast pointer application is to get the middle node of a given linked list. (LeetCode problem: 876. Middle of the Linked List) -\begin{lstlisting}[numbers=none] -Example 1 (odd length): - -Input: [1,2,3,4,5] -Output: Node 3 from this list (Serialization: [3,4,5]) - -Example 2 (even length): - -Input: [1,2,3,4,5,6] -Output: Node 4 from this list (Serialization: [4,5,6]) -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width = 0.8\columnwidth]{fig/middle node of a given linked list.png} - \caption{Slow-fast pointer to find middle} - \label{fig:slow-faster} -\end{figure} -We place two pointers simultaneously at the head node, each one moves at different paces, the slow pointer moves one step and the fast moves two steps instead. When the fast pointer reached the end, the slow pointer will stop at the middle. For the loop, we only need to check on the faster pointer, make sure fast pointer and fast.next is not None, so that we can successfuly visit the fast.next.next. When the length is odd, fast pointer will point at the end node, because fast.next is None, when its even, fast pointer will point at None node, it terimates because fast is None. -\begin{lstlisting}[language=Python] -def middleNode(self, head): - slow, fast = head, head - while fast and fast.next: - fast = fast.next.next - slow = slow.next - return slow -\end{lstlisting} - -\paragraph{Floyd's Cycle Detection (Floyd's Tortoise and Hare)} Given a linked list which has a cycle, as shown in Fig.~\ref{fig:floyd_cycle}. To check the existence of the cycle is quite simple. We do exactly the same as traveling by the slow and fast pointer above, each at one and two steps. (LeetCode Problem: 141. Linked List Cycle). The code is pretty much the same with the only difference been that after we change the fast and slow pointer, we check if they are the same node. If true, a cycle is detected, else not. -\begin{lstlisting}[language=Python] -def hasCycle(self, head): - slow = fast = head - while fast and fast.next: - slow = slow.next - fast = fast.next.next - if slow == fast: - return True - return False -\end{lstlisting} - -In order to know the starting node of the cycle. Here, we set the distance of the starting node of the cycle from the head is $x$, and $y$ is the distance from the start node to the slow and fast pointer's node, and $z$ is the remaining distance from the meeting point to the start node. -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/TQoyH.png} - \caption{Floyd's Cycle finding Algorithm} - \label{fig:floyd_cycle} -\end{figure} - -Now, let's try to device the algorithm. Both slow and fast pointer starts at position 0, the node index they travel each step is: [0,1,2,3,...,k] and [0,2,4,6,...,2k] for slow and fast pointer respectively. Therefore, the total distance traveled by the slow pointer is half of the distance travelled by the fat pointer. From the above figure, we have the distance travelled by slow pointer to be $d_s = x+y$, and for the fast pointer $d_f = x+y+z+y = x+2y+z$. With the relation $2*d_s = d_f$. We will eventually get $x = z$. Therefore, by moving slow pointer to the start of the linked list after the meeting point, and making both slow and fast pointer to move one node at a time, they will meet at the starting node of the cycle. (LeetCode problem: 142. Linked List Cycle II (medium)). -\begin{lstlisting}[language=Python] -def detectCycle(self, head): - slow = fast = head - bCycle = False - while fast and fast.next: - slow = slow.next - fast = fast.next.next - if slow == fast: # a cycle is found - bCycle = True - break - - if not bCycle: - return None - # reset the slow pointer to find the starting node - slow = head - while fast and slow != fast: - slow = slow.next - fast = fast.next - return slow -\end{lstlisting} -\begin{figure}[h!] - \centering - \includegraphics[width=0.6\columnwidth]{fig/circularlinkedlist.png} - \caption{One example to remove cycle} - \label{fig:cycle_remove} -\end{figure} -In order to remove the cycle as shown in Fig.~\ref{fig:cycle_remove}, the starting node is when slow and fast intersect, the last fast node before they meet. For the example, we need to set -4 node's next node to None. Therefore, we modify the above code to stop at the last fast node instead: -\begin{lstlisting}[language=Python] - # reset the slow pointer to find the starting node - slow = head - while fast and slow.next != fast.next: - slow = slow.next - fast = fast.next - fast.next = None -\end{lstlisting} - - -\subsection{Opposite-directional Two pointer} -Two pointer is usually used for searching a pair in the array. There are cases the data is organized in a way that we can search all the result space by placing two pointers each at the start and rear of the array and move them to each other and eventually meet and terminate the search process. The search target should help us decide which pointer to move at that step. This way, each item in the array is guaranteed to be visited at most one time by one of the two pointers, thus making the time complexity to be $O(n)$. Binary search used the technique of two pointers too, the left and right pointer together decides the current searching space, but it erase of half searching space at each step instead. - -\paragraph{Two Sum - Input array is sorted} Given an array of integers that is already sorted in ascending order, find two numbers such that they add up to a specific target number. The function twoSum should return indices of the two numbers such that they add up to the target, where index1 must be less than index2. (LeetCode problem: 167. Two Sum II - Input array is sorted (easy).) -\begin{lstlisting}[numbers=none] -Input: numbers = [2,7,11,15], target = 9 -Output: [1,2] -Explanation: The sum of 2 and 7 is 9. Therefore index1 = 1, index2 = 2. -\end{lstlisting} - -Due to the fact that the array is sorted which means in the array [s,s1 ..., e1, e], the sum of any two integer is in range of [s+s1, e1+e]. By placing two pointers each start from s and e, we started the search space from the middle of the possible range. [s+s1, s+e, e1+e]. Compare the target $t$ with the sum of the two pointers $v_1$ and $v_2$: -\begin{enumerate} - \item $t == v_1 + v_2$: found - \item $v_1+v_2 < t$: we need to move to the right side of the space, then we increase $v_1$ to get larger value. - \item $v_1+v_2 > t$: we need to move to the left side of the space, then we decrease $v_2$ to get smaller value. -\end{enumerate} -\begin{lstlisting}[language=Python] -def twoSum(self, numbers, target): - #use two pointers - n = len(numbers) - i, j = 0, n-1 - while i < j: - temp = numbers[i] + numbers[j] - if temp == target: - return [i+1, j+1] - elif temp < target: - i += 1 - else: - j -= 1 - return [] -\end{lstlisting} -% T1: If you see in the problem that you can do comparison and it is always one type of satisfactory element is in ahead of the other, this could be resolved by two pointers (slower and faster). Note: when the while loop stops, is there operations you need? - -% Two pointers or three pointers are the most possible. \textit{Two pointers or three pointers is a superset of the sliding window algorithm, prefix sum too.} It can lower the complexity by one power level of n. - -%%%%%%%%%%%%%%%%%%%%%%Sliding Window Algorithm%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Sliding Window Algorithm} -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/sliding1.png} - \caption{Sliding Window Algorithm} - \label{fig:slide_window} -\end{figure} -Given an array, imagine that we have a fixed size window as shown in Fig.~\ref{fig:slide_window}, and we can slide it forward each time. If we are asked to compute the sum of each window, the bruteforce solution would be $O(kn)$ where k is the window size and n is the array size by using two nested for loops, one to set the starting point, and the other to compute the sum. A sliding window algorithm applied here used the property that the sum of the current window ($S_c$) can be computed from the last winodow ($S_l$) knowling the items that just slided out and moved in as $a_o$ and $a_i$. Then $S_c = S_l-a_o+a_i$. Not necessarily using sum, we generalize it as state, if we can compute $S_c$ from $S_l$, $a_o$ and $a_i$ in O(1), a function $S_c = f(S_l, a_o, a_i)$ then we name this \textbf{sliding window property}. Therefore the time complexity will be decreased to $O(n)$. -\begin{lstlisting}[language=Python] -def fixedSlideWindow(A, k): - n = len(A) - if k >= n: - return sum(A) - # compute the first window - acc = sum(A[:k]) - ans = acc - # slide the window - for i in range(n-k): # i is the start point of the window - j = i + k # j is the end point of the window - acc = acc - A[i] + A[j] - ans = max(ans, acc) - return ans -\end{lstlisting} - -\paragraph{When to use sliding window} It is important to know when we can use sliding window algorithm, we summarize three important standards: -\begin{enumerate} - \item It is a subarray/substring problem. - \item \textbf{sliding window property:}T he requirement of the sliding window satisfy the sliding window property. - \item \textbf{Completeness:} by moving the left and right pointer of the sliding window in a way that we can cover all the search space. Sliding window algorithm is about optimization problem, and by moving the left and right pointer we can search the whole searching space. \textbf{Therefore, to testify that if applying the sliding window can cover the whole search space and guarentee the completeness decide if the method works.} -\end{enumerate} - -For example, 644. Maximum Average Subarray II (hard) does not satisfy the completeness. Because the average of subarray does not follow a certain order that we can decided how to move the window. - -\paragraph{Flexible Sliding Window Algorithm} Another form of sliding window where the window size is flexble, and it can be used to solve a lot of real problems related to subarray or substring that is conditioned on some pattern. Compared with the fixed size window, we can first fix the left pointer, and push the right pointer to enlarge the window in order to find a subarray satisfy a condition. Once the condition is met, we save the optimal result and shrink the window by moving the left pointer in a way that we can set up a new starting pointer to the window (shrink the window). At any point in time only one of these pointers move and the other one remains fixed. - - -\paragraph{Sliding Window Algorithm with Sum} In this part, we list two examples that we use flexible sliding window algorithms to solve subarray problem with sum condition. - -Given an array of n positive integers and a positive integer s, find the minimal length of a contiguous subarray of which the sum >= s. If there isn't one, return 0 instead. (LeetCode Problem: 209. Minimum Size Subarray Sum (medium)). -\begin{lstlisting}[numbers=none] -Example: - -Input: s = 7, nums = [2,3,1,2,4,3] -Output: 2 -Explanation: the subarray [4,3] has the minimal length under the problem constraint. -\end{lstlisting} -\begin{figure}[h!] - \centering - \includegraphics[width=0.5\columnwidth]{fig/prefixsum.png} - \caption{The array and the prefix sum} - \label{fig:prefix_sum_array} -\end{figure} -As we have shown in Fig.~\ref{fig:prefix_sum_array}, the prefix sum is the subarray starts with the first item in the array, we know that the sum of the subarray is monotonically increasing as the size of the subarray increase. Therefore, we place a 'window' with left and right as i and j at the first item first. The steps are as follows: -\begin{enumerate} - \item Get the optimal subarray starts from current i, 0: Then we first move the j pointer to include enough items that sum[0:j+1]>=s, this is the process of getting the optimial subarray that starts with 0. And assume j stops at $e_0$ - \item Get the optimal subarray ends with current j, $e_0$: we shrink the window size by moving the i pointer forward so that we can get the optimal subarray that ends with current j and the optimal subarray starts from $s_0$. - \item Now, we find the optimal solution for subproblem [0:i,0:j](the start point in range [0, i] and end point in range [0,j]. Starts from next i and j, and repeat step 1 and 2. -\end{enumerate} - -The above process is a standard flexible window size algorithm, and it is a complete search which searched all the possible result space. Both j and i pointer moves at most n, it makes the total operations to be at most 2n, which we get time complexity as $O(n)$. -\begin{lstlisting}[language=Python] -def minSubArrayLen(self, s, nums): - ans = float('inf') - n = len(nums) - i = j = 0 - acc = 0 # acc is the state - while j < n: - acc += nums[j]# increase the window size - while acc >= s:# shrink the window to get the optimal result - ans = min(ans, j-i+1) - acc -= nums[i] - i += 1 - j +=1 - return ans if ans != float('inf') else 0 -\end{lstlisting} -\begin{bclogo}[couleur = blue!30, arrondi=0.1,logo=\bccrayon,ombre=true]{What happens if there exists negative number in the array? } Sliding window algorithm will not work any more, because the sum of the subarray is no longer monotonically increase as the size increase. Instead (1) we can use prefix sum and organize them in order, and use binary search to find all posible start index. (2) use monotone stack (see LeetCode probelm: 325. Maximum Size Subarray Sum Equals k, 325. Maximum Size Subarray Sum Equals k (hard))) -\end{bclogo} - -More similar problems: -\begin{enumerate} - \item 674. Longest Continuous Increasing Subsequence (easy) -\end{enumerate} - -\paragraph{Sliding Window Algorithm with Substring} For substring problems, to be able to use sldiing window, s[i,j] should be gained from s[i,j-1] and s[i-1,j-1] should be gained from s[i,j-1]. Given a string, find the length of the longest substring without repeating characters. (LeetCode Problem: 3. Longest Substring Without Repeating Characters (medium)) -\begin{lstlisting}[numbers=none] -Example 1: - -Input: "abcabcbb" -Output: 3 -Explanation: The answer is "abc", with the length of 3. - -Example 2: - -Input: "bbbbb" -Output: 1 -Explanation: The answer is "b", with the length of 1. -\end{lstlisting} - -First, we know it is a substring problem. Second, it askes to find substring that only has unique chars, we can use hashmap to record the chars in current window, and this satisfy the sliding window property. When the current window violates the condition ( a repeating char), we shrink the window in a way to get rid of this char in the current window by moving the i pointer one step after this char. -\begin{lstlisting}[language=Python] -def lengthOfLongestSubstring(self, s): - if not s: - return 0 - n = len(s) - state = set() - i = j = 0 - ans = -float('inf') - while j < n: - if s[j] not in state: - state.add(s[j]) - ans = max(ans, j-i) - else: - # shrink the window: get this char out of the window - while s[i] != s[j]: # find the char - state.remove(s[i]) - i += 1 - # skip this char - i += 1 - j += 1 - return ans if ans != -float('inf') else 0 -\end{lstlisting} - -Now, let us see another example with string ang given a pattern to match. Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n). (LeetCode Problem: 76. Minimum Window Substring (hard)) -\begin{lstlisting}[numbers=none] -Example: - -Input: S = "ADOBECODEBANC", T = "ABC" -Output: "BANC" -\end{lstlisting} - -In this problem, the desirable window is one that has all characters from T. The solution is pretty intuitive. We keep expanding the window by moving the right pointer. When the window has all the desired characters, we contract (if possible) and save the smallest window till now. The only difference compared with the above problem is the definition of desirable: we need to compare the state of current window with the required state in T. They can be handled as a hashmap with character as key and frequency of characters as value. -\begin{lstlisting}[language=Python] -def minWindow(self, s, t): - dict_t = Counter(t) - state = Counter() - required = len(dict_t) - - # left and right pointer - i, j = 0, 0 - - formed = 0 - ans = float("inf"), None # min len, and start pos - - while j < len(s): - char = s[j] - # record current state - if char in dict_t: - state[char] += 1 - if state[char] == dict_t[char]: - formed += 1 - - # Try and contract the window till the point where it ceases to be 'desirable'. - # bPrint = False - while i<=j and formed == required: - # if not bPrint: - # print('found:', s[i:j+1], i, j) - # bPrint = True - char = s[i] - if j-i+1 < ans[0]: - ans = j - i + 1, i - # change the state - if char in dict_t: - state[char] -= 1 - if state[char] == dict_t[char]-1: - formed -= 1 - - # Move the left pointer ahead, - i += 1 - - # Keep expanding the window - j += 1 - # if bPrint: - # print('move to:', s[i:j+1], i, j) - return "" if ans[0] == float("inf") else s[ans[1] : ans[1] + ans[0]] -\end{lstlisting} - -The process would be: -\begin{lstlisting}[numbers=none] -found: ADOBEC 0 5 -move to: DOBECO 1 6 -found: DOBECODEBA 1 10 -move to: ODEBAN 6 11 -found: ODEBANC 6 12 -move to: ANC 10 13 -\end{lstlisting} -\paragraph{Three Pointers and Sliding Window Algorithm} -Sometimes, by manipulating two pointers are not enough for us to get the final solution. -\begin{examples} -\item \textbf{930. Binary Subarrays With Sum.} In an array A of 0s and 1s, how many non-empty subarrays have sum S? -\begin{lstlisting}[numbers=none] -Example 1: - -Input: A = [1,0,1,0,1], S = 2 -Output: 4 -Explanation: -The 4 subarrays are bolded below: -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -\end{lstlisting} -\textit{Note: A.length <= 30000, 0 <= S <= A.length, A[i] is either 0 or 1.} - -For example in the following problem, if we want to use two pointers to solve the problem, we would find we miss the case; like in the example $1, 0, 1, 0, 1$, when $j = 5$, $i = 1$, the sum is $2$, but the algorithm would miss the case of $i = 2$, which has the same sum value. - -To solve this problem, we keep another index $i_hi$, in addition to the moving rule of $i$, it also moves if the sum is satisfied and that value is $0$. This is actually a Three pointer algorithm, it is also a mutant sliding window algorithm. -\begin{lstlisting}[language=Python] -class Solution: - def numSubarraysWithSum(self, A, S): - i_lo, i_hi, j = 0, 0, 0 #i_lo <= j - sum_window = 0 - ans = 0 - while j < len(A): - - sum_window += A[j] - - while i_lo < j and sum_window > S: - sum_window -= A[i_lo] - i_lo += 1 - # up till here, it is standard sliding window - - # now set the extra pointer at the same location of the i_lo - i_hi = i_lo - while i_hi < j and sum_window == S and not A[i_hi]: - i_hi += 1 - if sum_window == S: - ans += i_hi - i_lo + 1 - - j += 1 #increase the pointer at last so that we do not need to check if jLeaf Path: the starting and ending node of the path is the root and leaf node respectively; - \item Root->Any Path: the starting and ending node of the path is the root and any node (Root, inner, leaf node) respectively; - \item Any->Any Path: the starting and ending node of the path is both any node (Root, inner, leaf node) respectively. -\end{enumerate} - -\subsubsection{Properties} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/tree_property.png} - \caption{Example of a Tree with height and depth denoted} - \label{fig:tree_property} -\end{figure} -The property of a tree starts from the property of a \textit{node} (shown in Fig~\ref{fig:tree_property}). -\begin{enumerate} - \item The \textbf{depth} (or level) of a node is the number of edges from the node to the tree's root node. And it can be obtained from up-down level-by-level traversal. - \item The \textbf{height} of a node is the number of edges on the \textit{longest path} from the node to a leaf. A leaf node will have a height of 0. - \item The \textbf{descendant} of a node is any node that is reachable by repeated proceeding from parent to child starting from this node. They are also known as subchild. - \item The \textbf{ancestor} of a node is any node that is reachable by repeated proceeding from child to parent starting from this node. - \item The \textbf{degree} of a node is the number of its children. A leaf is necessarily degreed zero. -\end{enumerate} - -Properties of a \textit{tree}: -\begin{enumerate} - \item The \textbf{height}(or \textbf{depth}) of a tree would be the height of its root node, or equivalently, the depth of its deepest node. - \item The \textbf{diameter} (or \textbf{width}) of a tree is the number of nodes (or edges) on the longest path between any two leaf nodes. -\end{enumerate} - -\textbf{Forest} is a set of $n>=0$ disjoint treees. - - -\paragraph{Types of Binary Tree} -There are four common types of Binary Tree: 1) Full Binary Tree, 2) Complete Binary Tree, 3) Perfect Binary Tree, 4) Balanced Binary Tree. - -\textbf{Full Binary Tree} A binary tree is full if every node has 0 or 2 children. We can also say that a full binary tree is a binary tree in which all nodes except leaves have two children. In full binary tree, the number of leaves and the number of all other non-leaf nodes has relation: L = Non-L + 1. - -\textbf{Complete Binary Tree} A Binary Tree is complete Binary Tree if all levels are completely filled except possibly the last level and the last level has all keys as left as possible. - -\textbf{Perfect Binary Tree} A Binary tree is Perfect Binary Tree in which all internal nodes have two children and all leaves are at the same level. - -\textbf{Balanced Binary Tree} A binary tree is balanced if the height of the tree is O(Log n) where n is the number of nodes. For Example, AVL tree maintains O(Log n) height by making sure that the difference between heights of left and right subtrees is 1. - -\textbf{A degenerate (or pathological) tree} A Tree where every internal node has one child. Such trees are performance-wise same as linked list. -%%%%%%%%%%%%%%%%%%%%%binary search tree%%%%%%%%%%%%%%%%%%%%%% -\section{Binary Search Tree} -\label{sec_binary_search_tree} - In computer science, a \textbf{search tree} is a tree data structure used for locating specific keys from within a set. In order for a tree to function as a search tree, the key for each node must be greater than any keys in subtrees on the left and less than any keys in subtrees on the right. - -The advantage of search trees is their efficient search time ( $O(\log n)$) given the tree is reasonably balanced, which is to say the leaves at either end are of comparable depths as we introduced the \textbf{balanced binary tree}. - -The search tree data structure supports many dynamic-set operations, including SEARCH, MINIMUM, MAXIMUM, PREDECESSOR, SUCCESSOR, INSERT, and DELETE. Thus, a search tree can be both used as a dictionary and a priority queue. - - -% Search trees are often used to implement an associative array. The search tree algorithm uses the key from the key-value pair to find a location, and then the application stores the entire key–value pair at that location. - -In this section, we will introduce the most commonly used two types of searching trees: binary searching tree (BST) and Trie where the keys are usually numeric numbers and strings respectively. - -\subsection{Binary Searching Tree} -\label{concept_binary_search_tree} -A binary search tree (BST) is an organized searching tree structure in binary tree, as the name suggests. Binary search trees whose internal nodes each store a key (and optionally, an associated value), each node have two distinguished sub-trees (if only one sub-tree the other is None). - -BST keep their keys in sorted order, so that lookup and other operations can use the \textit{principle of binary search tree}: - -\indent Let $x$ be a node in a binary search tree, if $y$ is a node in the left subtree of x, them $y.key \leq x.key$. If $y$ is a node in the right subtree of $x$, then $y.key \geq x.key$. - -There are three possible ways to properly define a BST, and we use $l$ and $r$ to represent the left and right child of node $x$: 1)$l.key \leq x.key < r.key$, 2) $l.key < x.key \leq r.key$, 3) $l.key < x.key < r.key$. In the first and second definition, our resulting BST allows us to have duplicates, while not in the case of the third definiton. One example of BST without duplicates is shown in Fig~\ref{fig:bst}. -\begin{figure}[h] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/Binary_search_tree.png} - \caption{Example of Binary search tree of depth 3 and 8 nodes.} - \label{fig:bst} -\end{figure} - -\textbf{Solve Duplicate Problem} When there are duplicates, things can be more complicated, and the college algorithm book did not really tell us what to do when there are duplicates. If you use the definition "left <= root < right" and you have a tree like: -\begin{lstlisting}[numbers=none] - 3 - / \ - 2 4 -\end{lstlisting} - -then adding a ``3'' duplicate key to this tree will result in: -\begin{lstlisting} [numbers=none] - 3 - / \ - 2 4 - \ - 3 -\end{lstlisting} -Note that the duplicates are not in contiguous levels. - -This is a big issue when allowing duplicates in a BST representation as the one above: duplicates may be separated by any number of levels, so checking for duplicate's existence is not that simple as just checking for immediate children of a node. - -An option to avoid this issue is to not represent duplicates structurally (as separate nodes) but instead use a counter that counts the number of occurrences of the key. The previous example would then have a tree like: -\begin{lstlisting} - 3(1) - / \ - 2(1) 4(1) - \end{lstlisting} - -and after insertion of the duplicate "3" key it will become: -\begin{lstlisting} - 3(2) - / \ - 2(1) 4(1) - \end{lstlisting} - -This simplifies SEARCH, DELETE and INSERT operations, at the expense of some extra bytes and counter operations. In the following content, we assume using definition three so that our BST will have no duplicates. - -\subsubsection{Operations} -When looking for a key in a tree (or a place to insert a new key), we traverse the tree from root to leaf, making comparisons to keys stored in the nodes of the tree and deciding, on the basis of the comparison, to continue searching in the left or right subtrees. On average, this means that each comparison allows the operations to skip about half of the tree, so that each SEARCH, INSERT or DELETE takes time proportional to the logarithm of the number of items stored in the tree. This is much better than the linear time required to find items by key in an (unsorted) array, but slower than the corresponding operations on hash tables. - -% \textbf{Definition} A binary search tree is a rooted binary tree, whose internal nodes each store a key (and optionally, an associated value) and each have two distinguished sub-trees, commonly denoted left and right. The tree additionally satisfies the binary search property, which states that the key in each node must be greater than or equal to any key stored in the left sub-tree, and less than or equal to any key stored in the right sub-tree.[1]:287 The leaves (final nodes) of the tree contain no key and have no structure to distinguish them from one another. - -In order to build a BST, we need to INSERT a series of elements in the tree organized by the searching tree property, and in order to INSERT, we need to SEARCH the position to INSERT this element. Thus, we introduce these operations in the order of SEARCH, INSERT and GENERATE. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/bst_insertion.png} - \caption{The lightly shaded nodes indicate the simple path from the root down to the position where the item is inserted. The dashed line indicates the link in the tree that is added to insert the item. } - \label{fig:bst_operation} -\end{figure} -\paragraph{SEARCH} - -There are two different implementations for SEARCH: recursive and iterative. -\begin{lstlisting}[language = Python] -# recursive searching -def search(root,key): - # Base Cases: root is null or key is present at root - if root is None or root.val == key: - return root - - # Key is greater than root's key - if root.val < key: - return search(root.right,key) - - # Key is smaller than root's key - return search(root.left,key) -\end{lstlisting} -Also, we can write it in an iterative way, which helps us save the heap space: -\begin{lstlisting}[language = Python] -# iterative searching -def iterative_search(root,key): - while root is not None and root.val != key: - if root.val < key: - root = root.right - else: - root = root.left - return root -\end{lstlisting} -\paragraph{INSERT} -Assuming we are inserting a node $13$ into the tree shown in Fig~\ref{fig:bst_operation}. A new key is always inserted at leaf (there are other ways to insert but here we only discuss this one way). We start searching a key from root till we hit an empty node. Then we new a TreeNode and insert this new node either as the left or the child node according to the searching property. Here we still shows both the recursive and iterative solutions. -\begin{lstlisting}[language = Python] -# Recursive insertion -def insertion(root, key): - if root is None: - root = TreeNode(key) - return root - if root.val < key: - root.right = insertion(root.right, key) - else: - root.left = insertion(root.left, key) - return root -\end{lstlisting} -The above code needs return value and reassign the value for the right and left every time, we can use the following code which might looks more complex with the if condition but works faster and only assign element at the end. -\begin{lstlisting}[language=Python] -# recursive insertion -def insertion(root, val): - if root is None: - root = TreeNode(val) - return - if val > root.val: - if root.right is None: - root.right = TreeNode(val) - else: - insertion(root.right, val) - else: - if root.left is None: - root.left = TreeNode(val) - else: - insertion(root.left, val) -\end{lstlisting} -We can search the node iteratively and save the previous node. The while loop would stop when hit at an empty node. There will be three cases in the case of the previous node. -\begin{lstlisting}[numbers=none] -1. The previous node is None, which means the tree is empty, so we assign a root node with the value -2. The previous node has a value larger than the key, means we need to put key as left child. -3. The previous node has a value smaller than the key, means we need to put key as right child. -\end{lstlisting} -\begin{lstlisting}[language = Python] -# iterative insertion -def iterativeInsertion(root, key): - pre_node = None - node = root - while node is not None: - pre_node = node - if key < node.val: - node = node.left - else: - node = node.right - # we reached to the leaf node which is pre_node - if pre_node is None: - root = TreeNode(key) - elif pre_node.val > key: - pre_node.left = TreeNode(key) - else: - pre_node.right = TreeNode(key) - return root -\end{lstlisting} -\paragraph{BST Generation} -First, let us declare a node as BST which is the root node. Given a list, we just need to call INSERT for each element. The time complexity can be $O(n\log_n)$. -\begin{lstlisting}[language=Python] -datas = [8, 3, 10, 1, 6, 14, 4, 7, 13] -BST = None -for key in datas: - BST = iterativeInsertion(BST, key) -print(LevelOrder(BST)) -# output -# [8, 3, 10, 1, 6, 14, 4, 7, 13] -\end{lstlisting} -\paragraph{DELETE} -Before we start to check the implementation of DELETE, I would suggest the readers to read the next subsection--the Features of BST at first, and then come back here to finish this paragraph. - -When we delete a node, three possibilities arise. -\begin{lstlisting}[numbers=none] -1) Node to be deleted is leaf: Simply remove from the tree. - - 50 50 - / \ delete(20) / \ - 30 70 ---------> 30 70 - / \ / \ \ / \ - 20 40 60 80 40 60 80 - -2) Node to be deleted has only one child: Copy the child to the node and delete the child - - 50 50 - / \ delete(30) / \ - 30 70 ---------> 40 70 - \ / \ / \ - 40 60 80 60 80 - -3) Node to be deleted has two children: Find inorder successor of the node. Copy contents of the inorder successor to the node and delete the inorder successor. Note that inorder predecessor can also be used. - - 50 60 - / \ delete(50) / \ - 40 70 ---------> 40 70 - / \ \ - 60 80 80 - -The important thing to note is, inorder successor is needed only when right child is not empty. In this particular case, inorder successor can be obtained by finding the minimum value in right child of the node. -\end{lstlisting} -\subsubsection{Features of BST} -\label{concept_features_bst} -\paragraph{Minimum and Maximum} The operation is similar to search, to find the minimum, we always traverse on the left subtree. For the maximum, we just need to replace the ``left'' with ``right'' in the key word. Here the time complexity is the same $O(lgn)$. -\begin{lstlisting}[language=Python] -# recursive -def get_minimum(root): - if root is None: - return None - if root.left is None: # a leaf or node has no left subtree - return root - if root.left: - return get_minimum(root.left) - -# iterative -def iterative_get_minimum(root): - while root.left is not None: - root = root.left - return root -\end{lstlisting} - -Also, sometimes we need to search two additional items related to a given node: successor and predecessor. The structure of a binary search tree allows us to determine the successor or the predecessor of a tree without ever comparing keys. - -\paragraph{Successor of a Node} A successor of node $x$ is the smallest item in the BST that is strictly greater than $x$. It is also called in-order successor, which is the next node in Inorder traversal of the Binary Tree. Inoreder Successor is None for the last node in inorder traversal. If our TreeNode data structure has a parent node. - -Use parent node: the algorihtm has two cases on the basis of the right subtree of the input node. -\begin{lstlisting}[numbers=none] -For the right subtree of the node: -1) If it is not None, then the successor is the minimum node in the right subtree. e.g. for node 12, successor(12) = 13 = min(12.right) -2) If it is None, then the successor is one of its ancestors. We traverse up using the parent node until we find a node which is the left child of its parent. Then the parent node here is the successor. e.g. successor(2)=5 -\end{lstlisting} - The Python code is provided: -\begin{lstlisting}[language = Python] -def Successor(root, n): -# Step 1 of the above algorithm - if n.right is not None: - return get_minimum(n.right) -# Step 2 of the above algorithm -p = n.parent -while p is not None: - if n == p.left :# if current node is the left child node, then we found the successor, p - return p - n = p - p = p.parent -return p -\end{lstlisting} -However, if it happens that your tree node has no parent defined, which means you can not traverse back its parents. We only have one option. Use the inorder tree traversal, and find the element right after the node. \begin{lstlisting}[numbers=none] -For the right subtree of the node: -1) If it is not None, then the successor is the minimum node in the right subtree. e.g. for node 12, successor(12) = 13 = min(12.right) -2) If it is None, then the successor is one of its ancestors. We traverse down from the root till we find current node, the node in advance of current node is the successor. e.g. successor(2)=5 -\end{lstlisting} -\begin{lstlisting}[language=Python] -def SuccessorInorder(root, n): - # Step 1 of the above algorithm - if n.right is not None: - return get_minimum(n.right) - # Step 2 of the above algorithm - succ = None - while root is not None: - - if n.val > root.val: - root = root.right - elif n.val < root.val: - succ = root - root = root.left - else: # we found the node, no need to traverse - break - return succ -\end{lstlisting} - -\paragraph{Predecessor of A Node} A predecessor of node $x$ on the other side, is the largest item in BST that is strictly smaller than $x$. It is also called in-order predecessor, which denotes the previous node in Inorder traversal of BST. e.g. for node 14, predecessor(14)=12= max(14.left). The same searching rule applies, if node $x$'s left subtree exists, we return the maximum value of the left subtree. Otherwise we traverse back its parents, and make sure it is the right subtree, then we return the value of its parent, otherwise the reversal traverse keeps going. -\begin{lstlisting}[language = Python] -def Predecessor(root, n): -# Step 1 of the above algorithm - if n.left is not None: - return get_maximum(n.left) -# Step 2 of the above algorithm -p = n.parent -while p is not None: - if n == p.right :# if current node is the right node, parent is smaller - return p - n = p - p = p.parent -return p -\end{lstlisting} - The worst case to find the successor or the predecessor of a BST is to search the height of the tree: include the one of the subtrees of the current node, and go back to all the parents and greatparents of this code, which makes it the height of the tree. The expected time complexity is $O(lgn)$. And the worst is when the tree line up and has no branch, which makes it $O(n)$. - - \paragraph{Lowest Common Ancestor(LCA)} The lowest common ancestor is defined between two nodes v and w as the lowest node in T that has both v and w as descendants (where we allow a node to be a descendant of itself).” e.g., if u=5,w=19, then we first node when we recursively visiting the tree that is within [u,w], then the LCA is 14. Compared with LCA for binary tree, because of the searching property of searching tree, it is even simipler: - \begin{lstlisting} - treverse the tree: - if node.val is in [s, b], return node is LCA - if node.val > b, traverse node.left - if node.val < s, traverse node.right - \end{lstlisting} - - 235. Lowest Common Ancestor of a Binary Search Tree - - Given a binary search tree (BST), find the lowest common ancestor (LCA) of two given nodes in the BST. -\begin{lstlisting} -Given binary search tree: root = [6,2,8,0,4,7,9,null,null,3,5] - - _______6______ - / \ - ___2__ ___8__ - / \ / \ - 0 _4 7 9 - / \ - 3 5 - -Example 1: - -Input: root = [6,2,8,0,4,7,9,null,null,3,5], p = 2, q = 8 -Output: 6 -Explanation: The LCA of nodes 2 and 8 is 6. - -Example 2: - -Input: root = [6,2,8,0,4,7,9,null,null,3,5], p = 2, q = 4 -Output: 2 -Explanation: The LCA of nodes 2 and 4 is 2, since a node can be a descendant of itself - according to the LCA definition. -\end{lstlisting} -\begin{lstlisting}[language=Python] -def lowestCommonAncestor(self, root, p, q): - """ - :type root: TreeNode - :type p: TreeNode - :type q: TreeNode - :rtype: TreeNode - """ - s = min(p.val,q.val) - b = max(p.val,q.val) - def LCA(node): - if not node: - return None - if node.val>b: - return LCA(node.left) - if node.val 9 -update(1, 2) -sumRange(0, 2) -> 8 -\end{lstlisting} -Note: -\begin{enumerate} - \item - The array is only modifiable by the update function. - \item You may assume the number of calls to update and sumRange function is distributed evenly. -\end{enumerate} -\paragraph{Solution: Brute-Force.} There are several ways to solve the RSQ. The \textbf{brute-force solution} is to simply iterate the array from index i to j to sum up the elements and return its corresponding index. And it gives $O(n)$ per query, such algorithm maybe infeasible if queries are constantly required. Because the update and query action distributed evenly, it still gives $O(n)$ time complexity and $O(n)$ in space, which will get LET error. - -\paragraph{Solution: Segment Tree.} With Segment Tree, we can store the TreeNode's val as the sum of elements in its corresponding interval. We can define a TreeNode as follows: -\begin{lstlisting}[language=Python] -class TreeNode: - def __init__(self, val, start, end): - self.val = val - self.start = start - self.end = end - self.left = None - self.right = None -\end{lstlisting} -As we see in the process, it is actually not necessary if we save the size of the array, we can decide the start and end index of each node on-the-fly and saves space. -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/307_RSQ_SegmentTree.png} - \caption{Illustration of Segment Tree. } - \label{fig:segment_tree} -\end{figure} -\paragraph{Build Segment Tree.} Because the leaves of the tree is a single element, we can use divide and conquer to build the tree recursively. For a given node, we first build and return its left and right child(including calculating its sum) in advance in the `divide` step, and in the `conquer' step, we calculate this node's sum using its left and right child's sum, and set its left and right child. Because there are totally $2n-1$ nodes, which makes the time and space complexity $O(n)$. -\begin{lstlisting}[language=Python] -def _buildSegmentTree(self, nums, s, e): #start index and end index - if s > e: - return None - if s == e: - return self.TreeNode(nums[s]) - - m = (s + e)//2 - # divide - left = self._buildSegmentTree(nums, s, m) - right = self._buildSegmentTree(nums, m+1, e) - - # conquer - node = self.TreeNode(left.val + right.val) - node.left = left - node.right = right - return node -\end{lstlisting} -\paragraph{Update Segment Tree.} Updating the value at index i is like searching the tree for leaf node with range [i, i]. We just need to recalculate the value of the node in the path of the searching. This operation takes $O(\log n)$ time complexity. -\begin{lstlisting}[language=Python] -def _updateNode(self, i, val, root, s, e): - if s == e: - root.val = val - return - m = (s + e)//2 - if i <= m: - self._updateNode(i, val, root.left, s, m) - else: - self._updateNode(i, val, root.right, m+1, e) - root.val = root.left.val + root.right.val - return -\end{lstlisting} -\paragraph{Range Sum Query.} Each query range [i, j], will be a combination of ranges of one or multiple ranges. For instance, as in the segment tree shown in Fig~\ref{fig:segment_tree}, for range [2, 4], it will be combination of [2, 3] and [4, 4]. The process is similar to the updating, we starts from the root, and get its middle index m: 1) if [i, j] is the same as [s, e] that i == s and j == e, then return the value, 2) if the interval [i, j] is within range [s, m] that j <=m , then we just search it in the left branch. 3) if [i, j] in within range [m+1, e] that i>m, then we search for the right branch. 4) else, we search both branch and the left branch has target [i, m], and the right side has target [m+1, j], the return value should be the sum of both sides. The time complexity is still $O(\log n)$. -\begin{lstlisting}[language=Python] -def _rangeQuery(self, root, i, j, s, e): - if s > e or i > j: - return 0 - if s == i and j == e: - return root.val if root is not None else 0 - - m = (s + e)//2 - - if j <= m: - return self._rangeQuery(root.left, i, j, s, m) - elif i > m: - return self._rangeQuery(root.right, i, j, m+1, e) - else: - return self._rangeQuery(root.left, i, m, s, m) + self._rangeQuery(root.right, m+1, j, m+1, e) -\end{lstlisting} -The complete code is given: -\begin{lstlisting}[language=Python] -class NumArray: - class TreeNode: - def __init__(self, val): - self.val = val - self.left = None - self.right = None - - def __init__(self, nums): - self.n = 0 - self.st = None - if nums: - self.n = len(nums) - self.st = self._buildSegmentTree(nums, 0, self.n-1) - - def update(self, i, val): - self._updateNode(i, val, self.st, 0, self.n -1) - - def sumRange(self, i, j): - return self._rangeQuery(self.st, i, j, 0, self.n-1) -\end{lstlisting} -\end{examples} - - - -Segment tree can be used here to lower the complexity of each query to $O(log n)$. - -%%%%%%%%%%%%%%%%%%%Trie%%%%%%%%%%%%%%%%% -\section{Trie for String} -\label{concept_trie} -\paragraph{Definition} Trie comes from the word re\textbf{Trie}val. In computer science, a trie, also called digital tree, radix tree or prefix tree which like BST is also a kind of search tree for finding substring in a text. We can solve string matching in $O(|T|)$ time, where |T| is the size of our text. This purely algorithmic approach has been studied extensively in the algorithms: Knuth-Morris-Pratt, Boyer-Moore, and Rabin-Karp. However, we entertain the possibility that multiple queries will be made to the same text. This motivates the development of data structures that preprocess the text to allow for more efficient queries. Such efficient data structure is Trie, which can do each query in $O(P)$, where P is the length of the pattern string. Trie is an ordered tree structure, which is used mostly for storing strings (like words in dictionary) in a compact way. -\begin{enumerate} - \item In a Trie, each child branch is labeled with letters in the alphabet $\sum$. Actually, it is not necessary to store the letter as the key, because if we order the child branches of every node alphabetically from left to right, the position in the tree defines the key which it is associated to. - \item The root node in a Trie represents an empty string. -\end{enumerate} -% An ordered tree data structure used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. - -Now, we define a trie Node: first it would have a bool variable to denote if it is the end of the word and a children which is a list of of 26 children TrieNodes. -\begin{lstlisting}[language= Python] -class TrieNode: - # Trie node class - def __init__(self): - self.children = [None]*26 - # isEndOfWord is True if node represent the end of the word - self.isEndOfWord = False -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/trie_compact_trie.jpg} - \caption{Trie VS Compact Trie} - \label{fig:trie_compact_trie} -\end{figure} - -\paragraph{Compact Trie} If we assign only one letter per edge, we are not taking full advantage of the trie’s tree structure. It is more useful to consider compact or compressed tries, tries where we remove the one letter per edge constraint, and contract non-branching paths by concatenating the letters on these paths. -In this way, every node branches out, and every node traversed represents a choice between two different words. The compressed trie that corresponds to our example trie is also shown in Figure -~\ref{fig:trie_compact_trie}. - -\paragraph{Operations: INSERT, SEARCH} -% Now, let us solve an LeetCode problem together which requires us to implement a complete Trie that with the operations INSERT, SEARCH, STARTWITH. All of these operations are actually quickly similar and they all require us to simultaneously iterate each character in the input string (or word) and each level of the Trie on the location of that character. So, it would not be hard to get the worst time complexity when we searched the whole tree or finished iterating the characters in the input. -Both for INSERT and SEARCH, it takes $O(m)$, where m is the length of the word/string we wand to insert or search in the trie. Here, we use an LeetCode problem as an example showing how to implement INSERT and SEARCH. Because constructing a trie is a series of INSERT operations which will take $O(n*m)$, n is the total numbers of words/strings, and m is the average length of each item. The space complexity fof the non-compact Trie would be $O(N*|\sum|)$, where $|\sum|$ is the alphlbetical size, and N is the total number of nodes in the trie structure. The upper bound of N is $n*m$. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/Trie.png} - \caption{Trie Structure} - \label{fig:trie} -\end{figure} -\begin{examples} -\item \textbf{208. Implement Trie (Prefix Tree) (medium).} Implement a trie with insert, search, and startsWith methods. -\begin{lstlisting} -Example: -Trie trie = new Trie(); -trie.insert("apple"); -trie.search("apple"); // returns true -trie.search("app"); // returns false -trie.startsWith("app"); // returns true -trie.insert("app"); -trie.search("app"); // returns true -\end{lstlisting} -\textit{Note: You may assume that all inputs are consist of lowercase letters a-z. All inputs are guaranteed to be non-empty strings.} - -\paragraph{INSERT} with INSERT operation, we woould be able to insert a given word in the trie, when traversing the trie from the root node which is a TrieNode, with each letter in world, if its corresponding node is None, we need to put a node, and continue. At the end, we need to set that node's endofWord variable to True. thereafter, we would have a new branch starts from that node constructured. For example, when we first insert ``app`` as shown in Fig~\ref{fig:trie_compact_trie}, we would end up building branch ``app``, and with ape, we would add nodes ``e`` as demonstrated with red arrows. -\begin{lstlisting}[language=Python] -def insert(self, word): - """ - Inserts a word into the trie. - :type word: str - :rtype: void - """ - node = self.root #start from the root node - for c in word: - loc = ord(c)-ord('a') - if node.children[loc] is None: # char does not exist, new one - node.children[loc] = self.TrieNode() - # move to the next node - node = node.children[loc] - # set the flag to true - node.is_word = True -\end{lstlisting} - -\paragraph{SEARCH} For SEARCH, like INSERT, we traverse the trie using the letters as pointers to the next branch. There are three cases: 1) for word P, if it doesnt exist, but its prefix does exist, then we return False. 2) If we found a matching for all the letters of P, at the last node, we need to check if it is a leaf node where is\_word is True. STARTWITH is just slightly different from SEARCH, it does not need to check that and return True after all letters matched. -\begin{lstlisting}[language=Python] -def search(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True if node.is_word else False -\end{lstlisting} -\begin{lstlisting}[language=Python] -def startWith(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True -\end{lstlisting} -Now complete the given Trie class with TrieNode and \_\_init\_\_ function. -\begin{lstlisting}[language=Python] -class Trie: - class TrieNode: - def __init__(self): - self.is_word = False - self.children = [None] * 26 #the order of the node represents a char - - def __init__(self): - """ - Initialize your data structure here. - """ - self.root = self.TrieNode() # root has value None -\end{lstlisting} -\end{examples} - -\begin{examples} -\item \textbf{336. Palindrome Pairs (hard).} Given a list of unique words, find all pairs of distinct indices (i, j) in the given list, so that the concatenation of the two words, i.e. words[i] + words[j] is a palindrome. -\begin{lstlisting} -Example 1: - -Input: ["abcd","dcba","lls","s","sssll"] -Output: [[0,1],[1,0],[3,2],[2,4]] -Explanation: The palindromes are ["dcbaabcd","abcddcba","slls","llssssll"] - -Example 2: - -Input: ["bat","tab","cat"] -Output: [[0,1],[1,0]] -Explanation: The palindromes are ["battab","tabbat"] -\end{lstlisting} -\textbf{Solution: One Forward Trie and Another Backward Trie.} We start from the naive solution, which means for each element, we check if it is palindrome with all the other strings. And from the example 1, [3,3] can be a pair, but it is not one of the outputs, which means this is a combination problem, the time complexity is ${C_n}{C_{n-1}}$, and multiply it with the average length of all the strings, we make it $m$, which makes the complexity to be $O(mn^2)$. However, we can use Trie Structure, -\begin{lstlisting}[language = Python] -from collections import defaultdict - - -class Trie: - def __init__(self): - self.links = defaultdict(self.__class__) - self.index = None - # holds indices which contain this prefix and whose remainder is a palindrome - self.pali_indices = set() - - def insert(self, word, i): - trie = self - for j, ch in enumerate(word): - trie = trie.links[ch] - if word[j+1:] and is_palindrome(word[j+1:]): - trie.pali_indices.add(i) - trie.index = i - - -def is_palindrome(word): - i, j = 0, len(word) - 1 - while i <= j: - if word[i] != word[j]: - return False - i += 1 - j -= 1 - return True - - -class Solution: - def palindromePairs(self, words): - '''Find pairs of palindromes in O(n*k^2) time and O(n*k) space.''' - root = Trie() - res = [] - for i, word in enumerate(words): - if not word: - continue - root.insert(word[::-1], i) - for i, word in enumerate(words): - if not word: - continue - trie = root - for j, ch in enumerate(word): - if ch not in trie.links: - break - trie = trie.links[ch] - if is_palindrome(word[j+1:]) and trie.index is not None and trie.index != i: - # if this word completes to a palindrome and the prefix is a word, complete it - res.append([i, trie.index]) - else: - # this word is a reverse suffix of other words, combine with those that complete to a palindrome - for pali_index in trie.pali_indices: - if i != pali_index: - res.append([i, pali_index]) - if '' in words: - j = words.index('') - for i, word in enumerate(words): - if i != j and is_palindrome(word): - res.append([i, j]) - res.append([j, i]) - return res -\end{lstlisting} -\textbf{Solution2: .}Moreover, there are always more clever ways to solve these problems. Let us look at a clever way: - abcd, the prefix is ''. 'a', 'ab', 'abc', 'abcd', if the prefix is a palindrome, so the reverse[abcd], reverse[dc], to find them in the words, the words stored in the words with index is fastest to find. $O(n)$. Note that when considering suffixes, we explicitly leave out the empty string to avoid counting duplicates. That is, if a palindrome can be created by appending an entire other word to the current word, then we will already consider such a palindrome when considering the empty string as prefix for the other word. - \begin{lstlisting}[language = Python] - class Solution(object): - def palindromePairs(self, words): - # 0 means the word is not reversed, 1 means the word is reversed - words, length, result = sorted([(w, 0, i, len(w)) for i, w in enumerate(words)] + - [(w[::-1], 1, i, len(w)) for i, w in enumerate(words)]), len(words) * 2, [] - - #after the sorting,the same string were nearby, one is 0 and one is 1 - for i, (word1, rev1, ind1, len1) in enumerate(words): - for j in xrange(i + 1, length): - word2, rev2, ind2, _ = words[j] - #print word1, word2 - if word2.startswith(word1): # word2 might be longer - if ind1 != ind2 and rev1 ^ rev2: # one is reversed one is not - rest = word2[len1:] - if rest == rest[::-1]: result += ([ind1, ind2],) if rev2 else ([ind2, ind1],) # if rev2 is reversed, the from ind1 to ind2 - else: - break # from the point of view, break is powerful, this way, we only deal with possible reversed, - return result - \end{lstlisting} - \end{examples} - - %https://fizzbuzzed.com/top-interview-questions-5/ -% \paragraph{Searching} -% \paragraph{Insertion} -% \paragraph{Deletion} - -% Let us see the complete code of a Trie Class: -% \begin{lstlisting}[language = Python] - -% class Trie: - -% # Trie data structure class -% def __init__(self): -% self.root = self.getNode() - -% def getNode(self): - -% # Returns new trie node (initialized to NULLs) -% return TrieNode() - -% def _charToIndex(self,ch): - -% # private helper function -% # Converts key current character into index -% # use only 'a' through 'z' and lower case - -% return ord(ch)-ord('a') - - -% def insert(self,key): - -% # If not present, inserts key into trie -% # If the key is prefix of trie node, -% # just marks leaf node -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) - -% # if current character is not present -% if not pCrawl.children[index]: -% pCrawl.children[index] = self.getNode() -% pCrawl = pCrawl.children[index] - -% # mark last node as leaf -% pCrawl.isEndOfWord = True - -% def search(self, key): - -% # Search key in the trie -% # Returns true if key presents -% # in trie, else false -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) -% if not pCrawl.children[index]: -% return False -% pCrawl = pCrawl.children[index] - -% return pCrawl != None and pCrawl.isEndOfWord - -% # driver function -% def main(): - -% # Input keys (use only 'a' through 'z' and lower case) -% keys = ["the","a","there","anaswe","any", -% "by","their"] -% output = ["Not present in trie", -% "Present in tire"] - -% # Trie object -% t = Trie() - -% # Construct trie -% for key in keys: -% t.insert(key) - -% # Search for different keys -% print("{} ---- {}".format("the",output[t.search("the")])) -% print("{} ---- {}".format("these",output[t.search("these")])) -% print("{} ---- {}".format("their",output[t.search("their")])) -% print("{} ---- {}".format("thaw",output[t.search("thaw")])) - -% if __name__ == '__main__': -% main() -% \end{lstlisting} -There are several other data structures, like balanced trees and hash tables, which give us the possibility to search for a word in a dataset of strings. Then why do we need trie? Although hash table has $O(1)$ time complexity for looking for a key, it is not efficient in the following operations : -\begin{itemize} - \item Finding all keys with a common prefix. - \item Enumerating a dataset of strings in lexicographical order. -\end{itemize} - -\paragraph{Sorting} -Lexicographic sorting of a set of keys can be accomplished by building a trie from them, and traversing it in pre-order, printing only the leaves' values. This algorithm is a form of radix sort. This is why it is also called radix tree. - -% \paragraph{Dynamic Programming for Static Array} - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/mastering/two-pointer.tex.prec b/Easy-Book/chapters/mastering/two-pointer.tex.prec deleted file mode 100644 index afc1036..0000000 --- a/Easy-Book/chapters/mastering/two-pointer.tex.prec +++ /dev/null @@ -1,354 +0,0 @@ -\documentclass[../specific-algorithms.tex]{subfiles} -\begin{document} -This chapter complement the above Algorithm Section. Before we dive into solving problems on LeetCode, learning these algorithms would make us feel less stressed when we are trying to solve problem on LeetCode. The benefits of this chapter includes: 1) saves us time to actually solve the problem 2) diminish the stress level. - -These algorithms are more specific compared with the algorithm section~\ref{part_algorithms}. These algorithms include: Pointers and Sliding window algorithm, Prefix sum and Kadane's Algorithm, Floyd's Cycle-Finding Algorithm, and some pattern matching algorithm. These algorithm can be used to solve both array and string problems. We will work through these algorithms and demonstrate how they can serve to solve LeetCode problems. -% Including two pointers both from the start, or two pointers one is from the beginning and the other is from the end. Also, the sliding window, and the flexible sliding windows, also find the cycle algorithm. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Algorithms -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Two Pointers -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Pointers and Sliding Window Algorithm} -% T1: If you see in the problem that you can do comparison and it is always one type of satisfactory element is in ahead of the other, this could be resolved by two pointers (slower and faster). Note: when the while loop stops, is there operations you need? - -% Two pointers or three pointers are the most possible. \textit{Two pointers or three pointers is a superset of the sliding window algorithm, prefix sum too.} It can lower the complexity by one power level of n. -% \subsection{Two Pointers and Sliding Window Algorithm} -% 674. Longest Continuous Increasing Subsequence -% \begin{lstlisting} -% Given an unsorted array of integers, find the length of longest continuous increasing subsequence (subarray). - -% Example 1: -% Input: [1,3,5,4,7] -% Output: 3 -% Explanation: The longest continuous increasing subsequence is [1,3,5], its length is 3. -% Even though [1,3,5,7] is also an increasing subsequence, it's not a continuous one where 5 and 7 are separated by 4. - -% Example 2: -% Input: [2,2,2,2,2] -% Output: 1 -% Explanation: The longest continuous increasing subsequence is [2], its length is 1. -% \textit{Note: Length of the array will not exceed 10,000.} -% \end{lstlisting} -% Solution: The description of this problem should use ''subarray" instead of the ''subsequence". The brute force solution is like any subarray problem $O(n^3)$. For embedded for loops to enumerate the subarray, and another $O(n)$ to check if it is strictly increasing. Using two pointers, we can get $O(n)$ time complexity. We put two pointers: one $i$ located at the first element of the nums, second $j$ at the second element. We specifically restrict the subarray from $i$ to $j$ to be increasing, if this is violated, we reset the starting point of the subarray from the violated place. -% \begin{lstlisting}[language = Python] -% class Solution: -% def findLengthOfLCIS(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% if not nums: -% return 0 -% if len(nums)==1: -% return 1 -% i,j = 0,0 -% max_length = 0 -% while j < len(nums): -% j += 1 #slide the window -% max_length = max(max_length, j-i) -% # when condition violated, reset the window -% if j=nums[j]: -% i = j - -% return max_length -% \end{lstlisting} - -% \subsection{Three Pointers and Sliding Window Algorithm} -% Sometimes, by manipulating two pointers are not enough for us to get the final solution. - -% 930. Binary Subarrays With Sum -% \begin{lstlisting} -% In an array A of 0s and 1s, how many non-empty subarrays have sum S? -% Example 1: - -% Input: A = [1,0,1,0,1], S = 2 -% Output: 4 -% Explanation: -% The 4 subarrays are bolded below: -% [1,0,1,0,1] -% [1,0,1,0,1] -% [1,0,1,0,1] -% [1,0,1,0,1] -% Note: - -% A.length <= 30000 -% 0 <= S <= A.length -% A[i] is either 0 or 1. -% \end{lstlisting} -% For example in the following problem, if we want to use two pointers to solve the problem, we would find we miss the case; like in the example $1, 0, 1, 0, 1$, when $j = 5$, $i = 1$, the sum is $2$, but the algorithm would miss the case of $i = 2$, which has the same sum value. - -% To solve this problem, we keep another index $i_hi$, in addition to the moving rule of $i$, it also moves if the sum is satisfied and that value is $0$. This is actually a Three pointer algorithm, it is also a mutant sliding window algorithm. -% \begin{lstlisting}[language=Python] -% class Solution: -% def numSubarraysWithSum(self, A, S): -% i_lo, i_hi, j = 0, 0, 0 #i_lo <= j -% sum_window = 0 -% ans = 0 -% while j < len(A): - -% sum_window += A[j] - -% while i_lo < j and sum_window > S: -% sum_window -= A[i_lo] -% i_lo += 1 -% # up till here, it is standard sliding window - -% # now set the extra pointer at the same location of the i_lo -% i_hi = i_lo -% while i_hi < j and sum_window == S and not A[i_hi]: -% i_hi += 1 -% if sum_window == S: -% ans += i_hi - i_lo + 1 - -% j += 1 #increase the pointer at last so that we do not need to check if ji, j\in[0,n-1])$, which is equivalent to $max(y_j - min(y_i)(i= rods[i]: - dp[j-rods[i]] = max(dp[j-rods[i]], old_dp[j]+min(rods[i], j)) - else: - dp[rods[i]-j] = max(dp[rods[i]-j], old_dp[j]+min(rods[i], j)) - # blue arrow - dp[j+rods[i]] = max(dp[j+rods[i]], old_dp[j]) - # the red line is saved in dp indirectly - return dp[0] -\end{lstlisting} -\end{examples} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/part3_deprc/introduction.tex b/Easy-Book/chapters/part3_deprc/introduction.tex deleted file mode 100644 index 8e90ace..0000000 --- a/Easy-Book/chapters/part3_deprc/introduction.tex +++ /dev/null @@ -1,81 +0,0 @@ -\documentclass[../algorithms.tex]{subfiles} -\begin{document} -% \chapter{Introduction} -In this chapter, I will mainly discuss some problems so that we can see how different approaches and algorithms can make difference on the time and space complexity. -\section{Maximum Subarray} -LeetCode 53. Maximum Subarray - -Find the contiguous subarray within an array (containing at least one number) which has the largest sum. For example, given the array $[-2,1,-3,4,-1,2,1,-5,4]$, the contiguous subarray $[4,-1,2,1]$ has the largest sum = 6. -\subsection{Brute Force} -The brute force solution of this problem is to use two for loops, one pointer at the start position of the subarray, the other point at the end position of the subarray. Then we get the maximum sum of these subarries. The time complexity is $O(n^3)$, where we spent $O(n)$ to the sum of each subarray. The code is writen as: -\begin{lstlisting}[language=Python] -for i in range(n): - for j in range(i+1,n): -\end{lstlisting} -However, if we can get the sum of each subarray with $O(1)$. Then we can lower the complexity to $O(n^2)$. Here one solution is to trade space for efficiency. the sum of subarray from index $i$ to $j$ is $sum(i,j)=sum(0,j)-sum(0,i)$. We can pre compute the accumulated sum to each index and save it in an array of the same size, which gives us $O(n^2)$ time complexity and $O(n)$ space complexity. -\subsection{Divide and Conquer} -To further improve the efficiency, we use divide and conquer, where we divide one array into two halves: the maximum subarray might located on the left size, or the right side, or some in the left side and some in the right size, which crossed the bound. $T(n) = max(T(left),T(right), T(cross))$, max is for merging and the T(cross) is for the case that the potential subarray across the mid point. For the complexity, $T(n)=2T(n/2)+n$, if we use the master method, it would give us $O(nlgn)$. With this solution, we use $O(lgn)$ space for the recursive function stack space. -\begin{lstlisting}[language=Python] -def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - def getCrossMax(low,mid,high): - left_sum,right_sum =0,0 - left_max, right_max = -maxint, -maxint - left_i,right_j=-1,-1 - for i in xrange(mid,low-1,-1): #[) - left_sum+=nums[i] - if left_sum>left_max: - left_max= left_sum - left_i = i - for j in xrange(mid+1,high+1): - right_sum+=nums[j] - if right_sum>right_max: - right_max= right_sum - right_j = j - return (left_i,right_j,left_max+right_max) - - def maxSubarray(low,high): - if low==high: - return (low,high, nums[low]) - mid = (low+high)//2 - rslt=[] - #left_low, left_high, left_sum = maxSubarray(low,mid) #[low,mid] - rslt.append(maxSubarray(low,mid)) #[low,mid] - #right_low,right_high,right_sum = maxSubarray(mid+1,high)#[mid+1,high] - rslt.append(maxSubarray(mid+1,high)) - #cross_low,cross_high,cross_sum = getCrossMax(low, mid, high) - rslt.append(getCrossMax(low, mid, high)) - return max(rslt, key=lambda x: x[2]) - return maxSubarray(0,len(nums)-1)[2] -\end{lstlisting} -\subsection{Dynamic Programming} -Using dynamic programming: the $f$ memorize the maximum subarray value till $j$, $f[ j ] = max f [ j- 1] + S [ j ] , S [ j ]$. This would gave us $O(n)$ time complexity and $O(n)$ space complexity. -\subsection{Greedy Algorithm} -Because $sum(i,j)=sum(0,j)-sum(0,i)$, to till index $j$, we use $f(j)$ represents the maximum subarray value. which gives us relation $f(j) = sum(0,j)-min(Sum(0,i))_{i in [0,j] }, j>=1$ -\subsection{Prefix Sum} -convert this problem to best time to buy and sell stock problem. $[0, -2, -1, -4, 0, -1, 1, 2, -3, 1]$, which is to find the maximum benefit, => O(n), use prefix$\_$sum, the difference is we set prefix$\_$sum to 0 when it is smaller than 0, $O(n)$. Or we can try two pointers. -\begin{lstlisting} -from sys import maxint -class Solution(object): - def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_so_far = -maxint - 1 - prefix_sum= 0 - for i in range(0, len(nums)): - prefix_sum+= nums[i] - if (max_so_far < prefix_sum): - max_so_far = prefix_sum - - if prefix_sum< 0: - prefix_sum= 0 - return max_so_far -\end{lstlisting} -From this problem, we get a peek how using different methods can gradually improve the algorithms' performance. - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/part_complete_search_introduction.tex b/Easy-Book/chapters/part_complete_search_introduction.tex deleted file mode 100644 index 880763d..0000000 --- a/Easy-Book/chapters/part_complete_search_introduction.tex +++ /dev/null @@ -1,62 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -Finding a solution to a problem in -Computer Science and Artificial Intelligence is often thought -as a process of search through the space of possible solutions (state space), either carried on some data strutcures, or calculated in the search space of a problem domain. - -\paragraph{Searching Strategies} In this part, we will first learn the basic searching algorithms carried out on explicit data strucutres in Chapter~\ref{chapter_basic_searching}. This will include: -\begin{enumerate} - \item General Searching Strategies - \item Linear Search - \item Tree Search - \item Graph Search -\end{enumerate} -In this chapter, we only explain different strategies on explicitly defineded data structures to keep it simple and clean. The main purpose is to learn the fundamenal concepts and properties to lay the ground for the mode advanced algorithms. - -\paragraph{Uninformed search vs informed search} Searching can be categorized as \textbf{uninformed search} (also called \textbf{blind search}) and \textbf{informed search}(also called \textbf{heuristic search}) strategies. The uninformed search means that the strategies have no additional information about states beyond that provided in the problem definition. All they can do is to generate successors and distinguish a goal state from a non-goal state. And we categorize their strategies by the \textit{order} in which nodes are expanded. On the other hand, strategies that know whether one non-goal state is ``more promising" than another are the informed search. In this book, we only cover common uninformed search strategies. - - -\paragraph{Advanced uninformed searching} learning the basics and knowing the properties, we can move on to more advanced uninformed searching techniques in Chapter~\ref{} that has better efficiency. The content will included: -\begin{enumerate} - \item Advanced Linear Searching such as Binary Search and Two-pointer technique; - \item Recursive Backtracking: - \item Bidirectional BFS. -\end{enumerate} - - - - - - - - -\textbf{ Complete Search} and \textbf{partial search} are the two main branches in the Searching paradigm. - -Complete search is one that guarantees that if a path/solution to the goal/requirement exists, the algorithm will reach the goal given enough time, this is denoted as \textit{completeness}. Complete search is thought of as a \textit{universal solution} to problem solving. On the other hand, Partial Search a.k.a Local Search will not always find the correct or optimal solution if one exists because it usually sacrifice completeness for greater efficiency. - -In this part of this book, we will learn the Complete Search instead of the partial search due to its practicity solving the LeetCode Problems. The name ``Complete Search'' does not necessarily mean they are not efficient and a brute force solution all the time. For instance, the \textbf{backtracking} and \textbf{Bi-directional Search} they are more efficient that a brute force exhaustive search solutions. - -Complete Search algorithms can be categorized into: -\begin{itemize} - \item Explicit VS Virtual Search Space: Explicit complete search is carried on data structures, linear structures or non-linear data structures like graph/ trees. In Explicit Search, the search space size is the size of the targeting data structure size. We will need to find a sub-structure of the given data structure. Virtual space based search is to find a set of value assignments to certain variables that satisfy specific mathematical equations/inequations, or sometimes to maximize or minimiaze a certain function of these variables. This type of problems is known as \textbf{constraint satisfaction problem}. Such as backtracking an optimized search algorithms for virtual space. - \item Linear VS Non-linear Complete Search: Linear search checks every record in a linear fashion, such as sliding window algorithm, binary search, sweep linear. On the other hand, Non-linear Search is applied on non-linear data structures and follows graph fashion. - \item Iterative VS Recursive Search: For example, most linear search is iterative. Breath-first-search for graph and level-by-level search for trees are iterative too. Recursive Search are algorithms implemented with recursion, such as Depth-first-search for graph, or DFS based tree traversal, or backtracking. -\end{itemize} - -\paragraph{How to Learn Complete Search?} Up till now, we have already learned the explicit complete search carried out on different data structures in Part~\ref{part_data_structure}. In this part, we will complete the complete search topic with more advanced and efficient searching algorithms applied either on real data structures or result space. Also, this part will give us more examples of how to apply the algorithm design methodogy as divide and conquer, use the basic data structures we learned before to design \textbf{complete search} algorithms with efficiency and elegance. - -\paragraph{Organization of Complete Search} This part follows the same organization as of Part~\ref{part_data_structure}, it is composed of linear search or non-linear search. For each type of algorithm, we will explain how to use it on cases like: explicit or virtual search space. - -\begin{itemize} - \item Linear Search (Chapter~\ref{chapter_linear_searching}) which describes the common algorithms that carries on Linear data structures: Array, Linked List and String. - \item Non-linear Search (Chapter~\ref{chapter_non_linear_searching}) encompasses the most common and basic search techniques for graph and tree data structures. The two most basic search techniques: Breadth-first-search and Depth-first-search serves as the basis to the following more advanced graph algorithms in the next chapter. - \item Advanced Non-linear Search ( Chapter~\ref{chapter_advanced_non_linear_search}) includes more advanced concepts of graph and more advanced graph search algorithms that solve common problems defined in graph. The problems specifically we include are: Connected Components, topological sort, cycle detection, minimum spanning trees and shortest path related problems. -\end{itemize} - -In this part, for graph algorithms, we will only cover medium level, and leave out the more advanced ones in Part~\ref{part_advanced_topics}. - -\paragraph{Searching Methodology} -Before we head to more specific searching algorithms, we shall define the searching algorithm design methodology more clearer: - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/preface.pdf b/Easy-Book/chapters/preface.pdf deleted file mode 100644 index 5bebe0a..0000000 Binary files a/Easy-Book/chapters/preface.pdf and /dev/null differ diff --git a/Easy-Book/chapters/preface.synctex.gz b/Easy-Book/chapters/preface.synctex.gz deleted file mode 100644 index 723e9f3..0000000 Binary files a/Easy-Book/chapters/preface.synctex.gz and /dev/null differ diff --git a/Easy-Book/chapters/preface.tex b/Easy-Book/chapters/preface.tex deleted file mode 100644 index afe2fa2..0000000 --- a/Easy-Book/chapters/preface.tex +++ /dev/null @@ -1,63 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} - -%%%%%%%%%%% -% Preface % -%%%%%%%%%%% -\chapter*{Preface} -Graduating with a Computer science or engineering degree? Converting from physics, or math, or any unrelated field to computer science? Dreaming of getting a job as a software engineer in game-playing companies such as Google, Facebook, Amazon, Microsoft, Oracle, LinkedIn, and so on? Unfortunately, there are the most challenging ``coding interview'' guarding the door to these top-notch tech companies. The interview process can be intimidating, with the interviewer scrutinizing every punch of your typing or scribbling on the whiteboard. Meanwhile, you are required to express whatever is on your mind to walk your interviewer through the design and analysis process and end the interview with clean and supposedly functional code. %I was one of you two years ago. - -What kind of weapons or martial arts do we need to toughen ourselves up so that we can knock down the ``watchdog'' and kick it in? By weapons and martial arts, I mean books and resources. Naturally, you pull out your first or second year college textbook \textit{Introduction to Algorithms} from bookshelf, dust it off, and are determined to read this $1000$-plus-pages massive book to refresh your brain with data structures, divide and conquer, dynamic programming, greedy algorithm and so on. If you are bit more knowledgeable, you would be able to find another widely used book--\textit{Cracking the Coding Interviews} and online coding websites--LeetCode and LintCode--to prepare. How much time do you think you need to put in? A month? Two months? Or three months? You would think after this, you are done with the interview, but for software engineers, it is not uncommon to switch companies frequently. Then you need to start the whole process again until you gain a free pass to ``coding interviews'' via becoming an experienced senior engineer or manager. %I was there - -I was in the exact same shoes. My first war started in the fall of 2015, continued for two months and ended without a single victory. I gave up the whole interview thing until two years ago when my life (I mean finances) situation demanded me to get an internship. This time, I got to know LeetCode and started to be more problem and practice driven from the beginning. `Cause God knows how much I did not want to redo this process, I naturally started to dig, summarize or create, and document problem-patterns, from sources such as both English and Chinese blogs, class slides, competitive programming guideline and so on. - -I found I was not content with just passing interviews. I wanted to seek the \textit{source} of the wisdom of algorithmic problem solving--the principles. I wanted to reorganize my continuously growing knowledge in algorithms in a way that is as clear and concise as possible. I wanted to attach math that closely relates to the topic of algorithmic problem solving, which would ease my nerves when reading related books. But meanwhile I tried to avoid getting too deep and theoretical which may potentially deviate me from the topic and adds more stress. All in all, we are not majoring in math, which is not ought to be easy; we use it as a practical tool, a powerful one! When it comes to data structures, I wanted to connect the \textit{abstract} structures to real Python objects and modules, so that when I'm using data structures in Python, I know the underlying data structures and their responding behaviors and efficiency. I felt more at ease seeing each particular algorithm explained with the source principle of algorithm design--\textit{why} it is so, instead of treating each as a standalone case and telling me ``what'' it is. %In all, I do not want to worship each algorithm like it's only genius-capable of invention. - -Three or four months in midst of the journey of searching for answers to the above ``wantes'', the idea of writing a book on this topic appeared in my mind. I did not do any market research, and did not know anything about writing a book. I just embarked on the boat, drifted along, and as I was farther and deeper in the ocean of writing the book, I realized how much work it can be. If you are reading this sometime in the future, then I landed. The long process is more of an \textit{agile} development in software engineering; knowledge, findings, and guidelines are added piece by piece, constantly going through revision. -Yet, when I started to do research, I found that there are plenty of books out there focusing on either teaching algorithmic knowledge (\textit{Introduction to Algorithms}, \textit{Algorithmic Problem Solving}, etc) or introducing interview processes and solving interview problems(\textit{Cracking the Coding Interview}, \textit{Coding Interview Questions}, etc), but barely any that combines the two. This book naturally makes up this role in the categorization; learning the algorithmic problem solving by analyzing and practicing interview problems creates a reciprocal relationship--creating passion and confidence to make 1+1=4. - -What's my expectation? First, your feeling of enjoyment when reading and practicing along with the book is of the upmost importance to me. Second, I really wish that you would be able to sleep well right the night before the interview which proves that your investment both financially and timewise was worthwhile. -% There is no way back, with the thought and the real action of writing a book with the all the features I wanted. It was onlyIt was only when kept reading, practicing, thinking, searching -% Another thing, I I preferred to use real code and that I can run and play with conveniently instead of using pseudo-code. that I am inclined to grasp a well understanding of the algorithmic problem solving skills, categorizing problem-patterns, connecting conceptual data structures to Python modules, and documenting all these so that I won't need to repeat this tedious process ever again. I wanted to over-qualify so that I would be able to sleep tight before the interviews. - -% I started to document what I learned from either in English community or Chinese community in the blogs, until one day the idea of writing a book on this topic appears in my mind. I mean why not? There are plenty of books out there focusing on either teaching algorithmic knowledge (\textit{Introduction to Algorithms}, \textit{Algorithmic Problem Solving}, etc) or introducing interview process and solving interview problems(\textit{Cracking the Coding Interview}, \textit{Coding Interview Questions}, etc), but barely any that combines the two. This book naturally make up this role in the categorization; learning the algorithmic problem solving by analyzing and practicing interview problems -- a reciprocal relationship that makes 1+1=4: Algorithms, Interviews, Passion, and Confidence. - -In all, this is a book that unites the algorithmic problem solving, Coding Interviews, and Python objects and modules. I tried hard to do a good job. This book differs from books focusing on extracting the exact formulation of problems from the fuzzy and obscure world. We focus on learning the principle of algorithm design and analysis and practicing it using well-defined classical problems. This knowledge will also help you define a problem more easily in your job. - -% As one of you, these are the features that I want to see from a book to ease our nerves, and I wish it is what you want and what works for you too. -% \begin{itemize} -% \item -% I want the book to tell me the fundamental principles of problem modeling and problem solving. That it starts with a global picture of algorithmic problem solving. In the search of the answer, I found it is hard, not many books try to seek the source of principles. -% I want these principles to be well-explained but not too theoretical and comes with practice guideline. -% \item I want to read real code, not pseudo-code. -% \item I want to learn some math that can serves as a tool to learn the algorithm design and analysis, but not get too deep and theoretical that would deviate me from the main purpose. -% \item -% I want to learn data structures, and know how it is related to the programming languages. Connecting the dots between theoretical data structures and python modules. So when I'm really using a data structure, I have a clue of its underlying data structure, expected behavior and efficiency. -% \item I want each particular algorithm explained with the principle algorithm design, not treating each as a standalone case. That would be a pain to treat them alone and learn one by one -% \item I want a whole problem-pattern section, where problems are categorized and how each type can be solved is summarized and padded with real examples. -% \item I do not want traditional book that comes with after class exercises (never really did it out of my will), practicing online and be competitive can be fun. -% \end{itemize} - - -% . - -% It is fair to say that most technical books are written by expert-already, that is kind of traditional software engineering, they first start to plot or design the requirement, write the book, and polishing. While, this book is different, it is a process from newbie to someone great (not saying experts, because once you learn more, you figure they are way more that are out of the scope) - -Li Yin - -\mbox{}\\ -%\mbox{}\\ -\noindent Li Yin \\ -\noindent \url{http://liyinscience.com} - -8/30/2019 - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Give credit where credit is due. % -% Say thanks! % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section*{Acknowledgements} - -\end{document} diff --git a/Easy-Book/chapters/purged.tex b/Easy-Book/chapters/purged.tex deleted file mode 100644 index e69de29..0000000 diff --git a/Easy-Book/chapters/question_1_dynamic_programming.tex b/Easy-Book/chapters/question_1_dynamic_programming.tex deleted file mode 100644 index 667172b..0000000 --- a/Easy-Book/chapters/question_1_dynamic_programming.tex +++ /dev/null @@ -1,2819 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%%%%%Seven types %%%%%%%%%%%%%%%%%%%%%%%%%%% -% \chapter{Dynamic Programming Questions} -In this Chapter, we categorize dynamic programming into three according to the input data types, including Single Sequence (Section~\ref{sec_single_n} and Section~\ref{sec_single_n2}), Coordinate (Section~\ref{sec_coordinate}), and Double Sequence(Section~\ref{sec_double_sequence}). Each type has its own identifiable characters and can be solved in a certain similar way. In this process, we found the \textbf{Forward Induction Method} is the most effective way to identify the recurrence state transfer function. In Forward Induction Method, we start from the base cases (corresponds to the base cases in the DFS solution), and incrementally move to the larger subproblem, and try to induce the state transfer function between current problem and its previous subproblems. If can be induced from only constant subproblems, we have $O(n)$, if relates to all smaller subproblems, we have $O(n^2)$. Using forward inductio method, is intuitive and effective. The only thing we need to note is to try a variety of examples, make sure the recurrence function we found is comprehensive and right. At the end of the section, we would summarize a template for this type of problems solved using dynamic programming. These types include: -\begin{enumerate} - \item Single Sequence (50\%): This is an easy type too. The states represents if the sequence ends here and include the current element. This way of divide the problem we can obtain the state transfer function easily to find a pattern. - \item Coordinate (15\%): 1D or 2D coordinate. This is the easiest type of DP because the state transfer function can be directly obtained through the problem (how to make moves to the next position). - \item Double Sequence (30\%): Because double sequence make its state a matrix and subproblem size $O(mn)$, this type of dynamic programming is similar to coordinate type, within which we just need to figure out the transfer function (moves) ourselves. - % \item Splitting Type (10\%): The splitting type is quite different in the ways of dividing the problem. - % \item Backpack Type (10\%): - % \item Range Type (10\%): -\end{enumerate} - % 坐标型15%: jump game: 棋盘,格子 f[i]代表从起点走到i坐标 - % 序列型30%:f[i]代表前i个元素总和,i=0表示不取任何元素 - % 双序列型30% - % 划分型10% - % 背包型10% - % 区间型5% -%%%%%%%%%%%%%%%%%%%single sequence %%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Single Sequence} -% \label{part2_sequence_dp} -The single sequence type dynamic programming is usually applied on the string and array. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Different Type of Single Sequence Dynamic Programming} - \noindent \begin{tabular}{|p{0.14\columnwidth}|p{0.14\columnwidth}| p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|} - \hline - Case & Input & Subproblems & f(n) & Time & Space \\ \hline -Section~\ref{sec_single_n} & $O(n)$& $O(n)$ & $O(1)$ & $O(n)$ & $O(n)->O(1)$ \\\hline -Section~\ref{sec_single_n2} & $O(n)$& $O(n)$ & $O(n)$ & $O(n^2)$ & $O(n)$\\ \hline -Section~\ref{sec_single_n3} & $O(n)$& $O(n^2)$ & $O(n)$ & $O(n^3)$ & $O(n^2)$\\ \hline -Hard & $O(n)$& $O(n^3)$ & $O(n)$ & $O(n^4)$ & $O(n^3)$\\ \hline -\end{tabular} - \label{tab:single_sequence} - \end{small} -\end{table} - -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Different Type of Coordinate Dynamic Programming} - \noindent \begin{tabular}{|p{0.14\columnwidth}|p{0.14\columnwidth}| p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|} - \hline - Case & Input & Subproblems & f(n) & Time & Space \\ \hline -Easy & $O(mn)$& $O(mn)$ & $O(1)$ & $O(mn)$ & $O(mn)->O(m)$ \\\hline -Medium & $O(mn)$& $O(kmn)$ & $O(1)$ & $O(kmn)$ & $O(kmn)->O(mn)$\\ \hline -\end{tabular} - \label{tab:2d_coordinate} - \end{small} -\end{table} - -% \begin{enumerate} -% \item state: $f[i]$ denotes the state for number or character for the first $i$ position, or the $i$th. Normally need $n+1$ space, because we need to consider the empty string; -% \item function: $f[i] = f[j]$ … $j$ is the position previous to $i$; -% \item initialize: $f[0]$ -% \item answer: $f[n]$, while not $n-1$; -% \end{enumerate} - - -Now, let us look at some examples: -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Single Sequence%%%%%%%%%%%%%%%%%%% -\section{Single Sequence $O(n)$} -\label{sec_single_n} -In this section, we will see how to solve the easy type of dynamic programming shown in Table~\ref{tab:single_sequence}, where each subproblem is only dependent on the state of constant number of smaller subproblems. \textbf{Subarray} and \textbf{Substring} are two types of them. Here, we will see how to using \textit{deduction} method which starts from base case, and gradually get the result of all the cases after it. The examples include probelems with one or multiple choice. - -Moreover, for this type, because for each subproblem, we only need to look back constant smaller subproblems, we do not even need $O(n)$ space to save all the result, unless you are asked to get the best solution for all subproblems too. Thus, this section generally achieve $O(n)$ and $O(1)$ for time complexity and space complexity, respectively. -\begin{enumerate} - \item 276. Paint Fence - \item 256. Paint House - \item 198. House Robber - \item 337. House Robber III (medium) - \item 53. Maximum Subarray (Easy) - \item 152. Maximum Product Subarray - \item 32. Longest Valid Parentheses(hard) -\end{enumerate} -\subsection{Easy Type} -\begin{examples} -\item \textbf{Paint Fence (L276, *).} There is a fence with n posts, each post can be painted with one of the k colors. You have to paint all the posts such that no more than two adjacent fence posts have the same color.Return the total number of ways you can paint the fence. \textit{Note: n and k are non-negative integers.} -\begin{lstlisting}[numbers=none] -Example: - -Input: n = 3, k = 2 -Output: 6 -Explanation: Take c1 as color 1, c2 as color 2. All possible ways are: - - post1 post2 post3 - ----- ----- ----- ----- - 1 c1 c1 c2 - 2 c1 c2 c1 - 3 c1 c2 c2 - 4 c2 c1 c1 - 5 c2 c1 c2 - 6 c2 c2 c1 -\end{lstlisting} -\textbf{Solution: Induction and Multi-choiced State}. suppose n=1, dp[1] = k; when n=2, we have two cases: same color with k ways to paint and different color with k*(k-1) ways. -\begin{lstlisting}[numbers=none] -dp[1] = k -dp[2] = same + diff; same = k, diff = k*(k-1) -dp[3]: for dp[2].same, we can only have diff colors, diff = dp[2].same*(k-1) - for dp[2].diff, we can have either diff color or small color, same = dp[2].diff, diff+=dp[2].diff*(k-1) -\end{lstlisting} -Thus, using deduction, which is the dynamic programming, the code is: -\begin{lstlisting}[language=Python] -def numWays(self, n, k): - if n==0 or k==0: - return 0 - if n==1: - return k - - same = k - diff = k*(k-1) - for i in range(3,n+1): - pre_diff = diff - diff = (same+diff)*(k-1) - same = pre_diff - return (same+diff) -\end{lstlisting} -\item \textbf{Paint House (L256, *).} There are a row of n houses, each house can be painted with one of the three colors: red, blue or green. The cost of painting each house with a certain color is different. You have to paint all the houses such that no two adjacent houses have the same color. - -The cost of painting each house with a certain color is represented by a n x 3 cost matrix. For example, costs[0][0] is the cost of painting house 0 with color red; costs[1][2] is the cost of painting house 1 with color green, and so on... Find the minimum cost to paint all houses. \textit{Note: All costs are positive integers.} -\begin{lstlisting}[numbers=none] -Example: - -Input: [[17,2,17],[16,16,5],[14,3,19]] -Output: 10 -Explanation: Paint house 0 into blue, paint house 1 into green, paint house 2 into blue. - Minimum cost: 2 + 5 + 3 = 10. - -\end{lstlisting} -\textbf{Solution: Induction and Multi-choiced State.} For this problem, each item has three choice, so we need to track the optimal solution for taking each color. dp[0] = 0, for one house, return min(c1, c2, c3). -\begin{lstlisting}[numbers=none] -for 1 house: for three choice - (c1, c2, c3), the result is min(c1, c2, c3) -for 2 houses: cost of taking c1 = costs[2][c1]+min(dp[1].c2, dp[1].c3) - cost of taking c2 = costs[2][c2]+min(dp[1].c1, dp[1].c3) - cost of taking c3 = costs[2][c3]+min(dp[1].c1, dp[1].c2) -\end{lstlisting} -\begin{lstlisting}[language=Python] -def minCost(self, costs): - if not costs: - return 0 - c1, c2, c3 = costs[0] - n = len(costs) - for i in range(1, n): - nc1 = costs[i][0] + min(c2, c3) - nc2 = costs[i][1] + min(c1, c3) - nc3 = costs[i][2] + min(c1, c2) -q c1, c2, c3 = nc1, nc2, nc3 - return min(c1, c2, c3) -\end{lstlisting} -\item \textbf{House Robber (L198,*).} You are a professional robber planning to rob houses along a street. Each house has a certain amount of money stashed, the only constraint stopping you from robbing each of them is that adjacent houses have security system connected and it will automatically contact the police if two adjacent houses were broken into on the same night. - -Given a list of non-negative integers representing the amount of money of each house, determine the maximum amount of money you can rob tonight without alerting the police. - -\textbf{Solution: Induction and Multi-choiced State.} For each house has two choice: rob or not rob. Thus the profit for each house can be deducted as follows: -\begin{lstlisting}[numbers=none] -1 house: dp[1].rob = p[1], dp[1].not_rob = 0, return max(dp[1)] -2 house: if rob house 2, means we definitely can not rob house 1. dp[2].rob = dp[1].not_rob + p[2]. - if not rob house 2, means we can choose rob house 1 or not rob house 1. dp[2].not_rob = max(dp[1].rob, dp[1].not_rob) -\end{lstlisting} -\begin{lstlisting}[language = Python] -def rob(self, nums): - if not nums: - return 0 - if len(nums)==1: - return nums[0] - rob = nums[0] - not_rob = 0 - for i in range(1, len(nums)): - new_rob = not_rob + nums[i] - new_not_rob = max(rob, not_rob) - rob, not_rob = new_rob, new_not_rob - return max(rob, not_rob) -\end{lstlisting} -\item \textbf{House Robber III (L337, medium).} The thief has found himself a new place for his thievery again. There is only one entrance to this area, called the "root." Besides the root, each house has one and only one parent house. After a tour, the smart thief realized that "all houses in this place forms a binary tree". It will automatically contact the police if two directly-linked houses were broken into on the same night. - -Determine the maximum amount of money the thief can rob tonight without alerting the police. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [3,2,3,null,3,null,1] - - 3 - / \ - 2 3 - \ \ - 3 1 - -Output: 7 -Explanation: Maximum amount of money the thief can rob = 3 + 3 + 1 = 7. - -Example 2: - -Input: [3,4,5,1,3,null,1] - - 3 - / \ - 4 5 - / \ \ - 1 3 1 - -Output: 9 -Explanation: Maximum amount of money the thief can rob = 4 + 5 = 9. -\end{lstlisting} -\textbf{Solution: Induction + Tree Traversal + Multi-choiced State.} This is a dynamic programming applied on tree structure. The brute force still takes $O(2^n)$, where $n$ is the total nodes of the tree. Also, for the tree structure, naturally, the result of a node dependent on the result of its both left and right subtree. When the subtree is empty, then we return (0, 0) for rob and not rob. After we gained the result of left and right subtree each for robbing or not robbing, we merge the result with the current node. Say if we want the result for robbing state for current node: then the left tree and right subtree will only use not robbing, it will be left\_not\_rob + right\_not\_rob + current node val. If the current is not robbing, then for the left and right subtree, it both can take rob or not rob state, so we pick the maximum combination of them. Walking through a carefully designed sophisticated enough example is necessary to figure out the process. -\begin{lstlisting}[language=Python] -# class TreeNode(object): -# def __init__(self, x): -# self.val = x -# self.left = None -# self.right = None -def rob(self, root): - def TreeTraversal(root): - if not root: - return (0, 0) - - l_rob, l_not_rob = TreeTraversal(root.left) - r_rob, r_not_rob = TreeTraversal(root.right) - - rob = root.val+(l_not_rob+r_not_rob) - not_rob = max(l_rob+r_rob, l_rob+r_not_rob, l_not_rob+r_not_rob, l_not_rob+r_rob) - # not_rob = (max(l_rob, l_not_rob)+max(r_rob, r_not_rob) - return (rob, not_rob) - return max(TreeTraversal(root)) -\end{lstlisting} - -\end{examples} - -% p[i] : take i or not to take i, we can get the maximum money - -% take i = nums[i]+dp[i-2] -% not take i = dp[i-1] - -% initiazation n+1 -% dp[0]=0 -% dp[1] = nums[0] -% \begin{lstlisting}[language = Python] -% def rob(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% if not nums: -% return 0 -% if len(nums)==1: -% return nums[0] -% dp=[0]*(len(nums)+1) -% dp[0] =0 -% dp[1] =nums[0] #if len is 1 -% for i in range(2,len(nums)+1): #if leng is 2...., index is i-1 -% dp[i]=max(dp[i-2]+nums[i-1], dp[i-1]) -% return dp[-1] -% \end{lstlisting} - -% Now, to save space. We use rolling array to optimize the space complexity. %这就是滚动数组, 或者叫做滚动指针的空间优化. -% \begin{lstlisting}[language = Python] -% def rob(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% if not nums: -% return 0 -% if len(nums)==1: -% return nums[0] -% dp=[0]*(2) -% dp[0] =0 -% dp[1] =nums[0] #if len is 1 -% for i in range(2,len(nums)+1): #if leng is 2...., index is i-1 -% dp[i%2]=max(dp[(i-2)%2]+nums[i-1], dp[(i-1)%2]) -% return dp[len(nums)%2] -% \end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Prefex Sum -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Subarray Sum: Prefix Sum and Kadane's Algorithm} -\label{dp_single_n_prefix} -This subsection is a continuation of the last section. The purpose of seperating from the last section is due to the importance of the algorithms--Prefix Sum and Kadane's Algorithms in the problems related to the sum or product of the subarray. - -Both Prefix Sum and Kadane's algorithm has used the dynamic programming methodology, and they are highly correlated to each others. They each holds a different perspective to solve a similar problem: one best example is the maximum subarray problem. -% \subsubsection{Introduction to Prefix Sum} - In the following two sections (Sec~\ref{prefix_sum_sloton} and Sec~\ref{kadane algorithm}) we will demonstrate how prefix sum is used to solve the maximum subarray problem and how kadane's algorithm which applied dynamic programming directly on this problem. -And we show Python code in the next paragraph. After we obtained the prefix sum of the array, using formula $S_{(i,j)} = y_j-y_{i-1}$ can get us the sum of any subarray in the array. -\begin{lstlisting}[language=Python] -P = [0]*(len(A)+1) -for i, v in enumerate(A): - P[i+1] = P[i] + v -\end{lstlisting} -\subsubsection{Prefix Sum and Kadane's Algorithm Application} -\label{prefix_sum_sloton} -\begin{examples}[resume] -\item \textbf{Maximum Subarray (L53, *).} Given an integer array $nums$, find the contiguous subarray (containing at least one number) which has the largest sum and return its sum. -\begin{lstlisting}[numbers=none] -Example: - -Input: [-2,1,-3,4,-1,2,1,-5,4], -Output: 6 -Explanation: [4,-1,2,1] has the largest sum = 6. -\end{lstlisting} -Follow up: If you have figured out the O(n) solution, try coding another solution using the divide and conquer approach, which is more subtle. - -\textbf{Solution 1: Prefix Sum}. For the maximum subarray problem, we have our answer to be $max(y_j - y_i) (j>i, j\in[0,n-1])$, which is equivalent to $max(y_j - min(y_i)(i=1$ - -{Prefix Sum} to get BCR -convert this problem to best time to buy and sell stock problem. $[0, -2, -1, -4, 0, -1, 1, 2, -3, 1]$, which is to find the maximum benefit, => O(n), use prefix$\_$sum, the difference is we set prefix$\_$sum to 0 when it is smaller than 0, $O(n)$. Or we can try two pointers. -\begin{lstlisting}[language = Python] -from sys import maxint - def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_so_far = -maxint - 1 - prefix_sum= 0 - for i in range(0, len(nums)): - prefix_sum+= nums[i] - if (max_so_far < prefix_sum): - max_so_far = prefix_sum - - if prefix_sum< 0: - prefix_sum= 0 - return max_so_far -\end{lstlisting} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Kadane Algorithm -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Generalize Kadane's Algorithm} -Because we can still do space optimization to the above solution, we use one variable to replace the dp array, and we track the maximum dp in the for loop instead of obtaining the maximum value at the end. Also, if we rename the dp to max\_ending\_here and the max(dp) to max\_so\_far, the code is as follows: -\begin{lstlisting}[language=Python] -def maximumSumSubarray(arr, n): - if not arr: - return 0 - max_ending_here = 0 - max_so_far = -sys.maxsize - for i in range(len(arr)): - max_ending_here = max(max_ending_here+arr[i], arr[i]) - max_so_far = max(max_so_far, max_ending_here) - return max_so_far -\end{lstlisting} -This space-wise optimized dynamic programming solution to the maximum subarray problem is exactly the Kadane's algorithm. Kadane's algorithm begins with a simple inductive question: if we know the maximum subarray sum ending at position $i$, what is the maximum subarray sum ending at position $i+1$? The answer turns out to be relatively straightforward: either the maximum subarray sum ending at position $i+1$ includes the maximum subarray sum ending at position $i$ as a prefix, or it doesn't. Thus, we can compute the maximum subarray sum ending at position $i$ for all positions $i$ by iterating once over the array. As we go, we simply keep track of the maximum sum we've ever seen. Thus, the problem can be solved with the following code, expressed here in Python: -\begin{lstlisting}[language = Python] -def max_subarray(A): - max_ending_here = max_so_far = A[0] - for x in A[1:]: - max_ending_here = max(x, max_ending_here + x) - max_so_far = max(max_so_far, max_ending_here) - return max_so_far -\end{lstlisting} - -The algorithm can also be easily modified to keep track of the starting and ending indices of the maximum subarray (when max\_so\_far changes) as well as the case where we want to allow zero-length subarrays (with implicit sum 0) if all elements are negative. For example: - -Now, let us see how we do maximum subarray with product operation instead of the sum. - -\begin{examples}[resume] - \item \textbf{Maximum Product Subarray (L152, **).} Given an integer array nums, find the contiguous subarray within an array (containing at least one number) which has the largest product. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [2,3,-2,4] -Output: 6 -Explanation: [2,3] has the largest product 6. - -Example 2: - -Input: [-2,0,-1] -Output: 0 -Explanation: The result cannot be 2, because [-2,-1] is not a subarray. -\end{lstlisting} - -\textbf{Solution: Kadane's Algorithm with product.} For the product, the difference compared with sum is the max\_ending\_here is not necessarily computed from the previous value with current element; if the element is negative it might even become the smallest. So that we need to track another variable, the min\_ending\_here. Let use see the Python code which is a straightforward implementation of the product-modified kadane's algorithm. -\begin{lstlisting}[language=Python] -from sys import maxsize -class Solution(object): - def maxProduct(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - if not nums: - return 0 - n = len(nums) - max_so_far = nums[0] - min_local, max_local = nums[0], nums[0] - for i in range(1, n): - a = min_local*nums[i] - b = max_local*nums[i] - max_local = max(nums[i], a, b) - min_local = min(nums[i], a, b) - max_so_far = max(max_so_far, max_local) - return max_so_far -\end{lstlisting} -\end{examples} -\subsection{Subarray or Substring} -It will lower the complexity from $O(n^2)$ or $O(n^3)$ to $O(n)$. -\begin{examples}[resume] -\item \textbf{Longest Valid Parentheses (L32, hard).} Given a string containing just the characters '(' and ')', find the length of the longest valid (well-formed) parentheses substring. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: "(()" -Output: 2 -Explanation: The longest valid parentheses substring is "()" - -Example 2: - -Input: ")()())" -Output: 4 -Explanation: The longest valid parentheses substring is "()()" -\end{lstlisting} -\textbf{Solution 1: Dynamic programming.} We define the state to be the longest length ends at this position. We would know only ')' can possibly has value larger than 0. At all position of '(' it is 0. As our define, for the following case: -\begin{lstlisting} - ") ( ) ( ) )" - dp 0 0 2 0 4 0 - ") ( ) ( ( ( ) ) ) (" - dp 0 0 2 0 0 0 2 4 8 0 -\end{lstlisting} -Thus, when we are at position ')', we look for i-1, there are two cases: -\begin{lstlisting} -1) if s[i-1] == '(', it is an closure, dp[i]+=2, then we check dp[i-2] to connect with previous longest length. for example in case 1, ")()()", where dp[i] = 4. -2) if s[i-1] == ')', then we check at position i-1-dp[i-1], in case , at dp[i] = 8, if at its corresponding position we check if it is '('. If it is we increase the count by 2, and connect it with previous position. -\end{lstlisting} -\begin{lstlisting}[language=Python] -def longestValidParentheses(self, s): - """ - :type s: str - :rtype: int - """ - if not s: - return 0 - dp = [0]*len(s) - for i in range(1, len(s)): - c = s[i] - if c == ')':#check previous position - if s[i-1] == '(':#this is the closure - dp[i] +=2 - if i-2>=0: #connect with previous length - dp[i]+=dp[i-2] - if s[i-1] == ')': #look at i-1-dp[i-1] for '(' - if i-1-dp[i-1]>=0 and s[i-1-dp[i-1]] == '(': - dp[i] = dp[i-1]+2 - if i-1-dp[i-1]-1 >=0: # connect with previous length - dp[i-1]+=dp[i-1-dp[i-1]-1] - print(dp) - return max(dp) -# input "(()))())(" -# output [0, 0, 2, 4, 0, 0, 2, 0, 0] -\end{lstlisting} -\textbf{Solution 2: Using Stack}. -\begin{lstlisting}[language=Python] -def longestValidParentheses(self, s): - if not s: - return 0 - stack=[-1] - ans = 0 - for i, c in enumerate(s): - if c == '(': - stack.append(i) - else: - if stack: - stack.pop() - if not stack: - stack.append(i) - else: - ans = max(ans, i-stack[-1]) - return ans -\end{lstlisting} - -\end{examples} - -\subsection{Exercise} -\begin{enumerate} - \item 639. Decode Ways II (hard) -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Medium -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Single Sequence $O(n^2)$} -\label{sec_single_n2} -In this section, we will analysis the second type in Table~\ref{tab:single_sequence} where we have $O(n)$ subproblems, and each subproblem is dependent on all the previous smaller subproblems, thus gave us $O(n^2)$ time complexity. The problems here further can be categorized as \textbf{Subsequence} and \textbf{Splitting}. -\begin{enumerate} - \item 300. Longest Increasing Subsequence (medium) - \item 139. Word Break (Medium) - \item 132. Palindrome Partitioning II (hard) - \item 123. Best Time to Buy and Sell Stock III (hard) - \item 818. Race Car (hard) -\end{enumerate} -\subsection{Subsequence} -\begin{examples}[resume] - \item \textbf{Longest Increasing Subsequence (L300, medium).} Given an unsorted array of integers, find the length of longest increasing subsequence. -\begin{lstlisting}[numbers=none] -Example: - -Input: [10,9,2,5,3,7,101,18] -Output: 4 -Explanation: The longest increasing subsequence is [2,3,7,101], therefore the length is 4. -\end{lstlisting} -\textit{Note: (1) There may be more than one LIS combination, it is only necessary for you to return the length. (2) Your algorithm should run in O(n2) complexity.} - -Follow up: Could you improve it to O(n log n) time complexity? - -\begin{figure}[h] - \centering - \includegraphics[width=\columnwidth]{fig/LIS_tree.png} - \caption{State Transfer Tree Structure for LIS, each path represents a possible solution. Each arrow represents an move: find an element in the following elements that's larger than the current node.} - \label{fig:tree_lis} -\end{figure} -\textbf{Solution 1: Induction}. For each subproblem, we show the result as follows. Each state dp[i] we represents the longest increasing subsequence ends with nums[i]. The reconstruction depends on all the previous i-1 subproblems, as shown in Eq.~\ref{LIS_equation_2}. -\begin{lstlisting} -subproblem: [], [10], [10,9], [10,9,2],[10,9,2,5],[10,9,2,5,3], [10,9,2,5,3, 7]... -Choice: -ans: 0, 1, 1, 1, 2, 2, 3, -\end{lstlisting} -\begin{equation} -\label{LIS_equation_2} - f(i) = \begin{cases} - 1 + max(f(j)),& 0 nums[j]: - max_before = max(max_before, LIS[j+1]) - LIS[i+1] = max_before+1 - return max(LIS) -\end{lstlisting} -\end{examples} - -\subsection{Splitting} -Need to figure out how to fill out the two-dimensional dp matrix for splitting. -\begin{examples}[resume] -\item \textbf{Word Break (L139, **).} Given a non-empty string s and a dictionary wordDict containing a list of non-empty words, determine if s can be segmented into a space-separated sequence of one or more dictionary words. \textit{Note: (1) The same word in the dictionary may be reused multiple times in the segmentation. (2) You may assume the dictionary does not contain duplicate words.} -\begin{lstlisting}[numbers=none] -Example 1: - -Input: s = "leetcode", wordDict = ["leet", "code"] -Output: true -Explanation: Return true because "leetcode" can be segmented as "leet code". - -Example 2: - -Input: s = "applepenapple", wordDict = ["apple", "pen"] -Output: true -Explanation: Return true because "applepenapple" can be segmented as "apple pen apple". - Note that you are allowed to reuse a dictionary word. - -Example 3: - -Input: s = "catsandog", wordDict = ["cats", "dog", "sand", "and", "cat"] -Output: false -\end{lstlisting} -\textbf{Solution: Induction + Splitting}. Like most of single sequence problem, we have n overlapping subproblems, for example of ``leetcode". -\begin{lstlisting}[numbers=none] -subproblem: '', 'l', 'le', 'lee', 'leet', 'leetc', 'leetco', 'leetcod', 'leetcode'. -ans: 1, 0, 0, 0, 1, 0, 0, 0, 1 -\end{lstlisting} -Thus, deduction still works here. We manually write down the result of each subproblem. Suppose we are trying to achieve answer for 'leet', how does it work? -if 'lee' is true and 't' is true, then we have true. Or, if 'le' is true, and 'et' is ture, we have true. unlike problems before, the ans for 'leet' can only be constructured from all the previous smaller problems. -\begin{lstlisting}[language=Python] -def wordBreak(self, s, wordDict): - wordDict = set(wordDict) - n = len(s) - dp = [False]*(n+1) - dp[0] = True #set 1 for empty str '' - for i in range(1, n+1): - for j in range(i): - if dp[j] and s[j:i] in wordDict: # check previous result, and new word s[j:i] - dp[i] = True - - return dp[-1] -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/word_break_139.png} - \includegraphics[width=0.8\columnwidth]{fig/tree_word_break.png} - \caption{Word Break with DFS. For the tree, each arrow means check the word = parent-child and then recursively check the result of child. } - \label{fig:word_break_dfs} -\end{figure} -\textbf{DFS+Memo}. To understand why each subproblem depends on $O(n)$ even smaller subproblem, we can look at the process solving the problem with DFS shown in Fig.~\ref{fig:word_break_dfs} (we can also draw a tree structure which will be more obvious). For ``leetcode" and ``leet" they both computed the subproblem '', 'l','le', 'lee'. Thus we can use memory to save solved problems. From the tree strcture, for each root node, it has $O(n)$ subbranches. So we should see why. To complete this, we give the code for the DFS version. -\begin{lstlisting}[language=Python] -def wordBreak(self, s, wordDict): - wordDict = set(wordDict) - #backtracking - def DFS(start, end, memo): - if start >= end: - return True - if start not in memo: - if s[start:end] in wordDict: - memo[start] = True - return memo[start] - - for i in range(start, end+1): - word = s[start:i] #i is the splitting point - if word in wordDict: - if i not in memo: - memo[i] = DFS(i, end, memo) - if memo[i]: - return True - memo[start] = False - - return memo[start] - - return DFS(0, n, {}) -\end{lstlisting} - \item \textbf{ Palindrome Partitioning II (L132, ***)} Given a string s, partition s such that every substring of the partition is a palindrome. Return the minimum cuts needed for a palindrome partitioning of s. -\begin{lstlisting}[numbers=none] -Example: - -Input: "aab" -Output: 1 -Explanation: The palindrome partitioning ["aa","b"] could be produced using 1 cut. -\end{lstlisting} -\textbf{Solution: use two dp.} one to track if it is pal and the other is to compute the cuts. -\begin{lstlisting}[language = Python] - def minCut(self, s): - """ - :type s: str - :rtype: int - """ - pal = [[False for _ in range(len(s))] for _ in range(len(s))] - cuts = [len(s)-i-1 for i in range(len(s))] - for start in range(len(s)-1,-1,-1): - for end in range(start, len(s)): - if s[start] == s[end] and (end-start < 2 or pal[start+1][end-1]): - pal[start][end] = True - if end == len(s)-1: - cuts[start] = 0 - else: - cuts[start] = min(cuts[start], 1+cuts[end+1]) - return cuts[0] -\end{lstlisting} -\item \textbf{Best Time to Buy and Sell Stock III (L123, hard).} Say you have an array for which the ith element is the price of a given stock on day i. Design an algorithm to find the maximum profit. You may complete at most two transactions. \textit{Note: You may not engage in multiple transactions at the same time (i.e., you must sell the stock before you buy again).} -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [3,3,5,0,0,3,1,4] -Output: 6 -Explanation: Buy on day 4 (price = 0) and sell on day 6 (price = 3), profit = 3-0 = 3. - Then buy on day 7 (price = 1) and sell on day 8 (price = 4), profit = 4-1 = 3. - -Example 2: -\begin{lstlisting} -Input: [1,2,3,4,5] -Output: 4 -Explanation: Buy on day 1 (price = 1) and sell on day 5 (price = 5), profit = 5-1 = 4. - Note that you cannot buy on day 1, buy on day 2 and sell them later, as you are - engaging multiple transactions at the same time. You must sell before buying again. - -Example 3: -Input: [7,6,4,3,1] -Output: 0 -Explanation: In this case, no transaction is done, i.e. max profit = 0. -\end{lstlisting} -\textbf{Solution:} the difference compared with I is that we need at most two times of transaction. We split the array into two parts from i, the max profit we can get till i and the max profit we can get from i to n. To get the maximum profit of each part is the same as the problem I. At last, the answer is max{preProfit[i] + postProfit[i]},$(0\leq i\leq n-1)$. However, we would get $O(n^2)$ time complexity if we use the following code, it has a lot of redundency. -\begin{lstlisting}[language=Python] -from sys import maxsize -class Solution: - def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - def maxProfitI(start, end): - - if start == end: - return 0 - max_global_profit = 0 - min_local = prices[start] - for i in range(start+1, end+1): - max_global_profit= max(max_global_profit, prices[i]-min_local) - min_local = min(min_local, prices[i]) - return max_global_profit - - if not prices: - return 0 - n = len(prices) - min_local = prices[0] - preProfit, postProfit = [0]*n, [0]*n - - for i in range(n): - preProfit[i] = maxProfitI(0,i) - postProfit[i] = maxProfitI(i,n-1) - maxProfit = max([pre+post for pre, post in zip(preProfit, postProfit)]) - return maxProfit -\end{lstlisting} -To avoid repeat work, we can use a for loop to get all the value of preProfit, and use another to get values for postProfit. For the postProfit, we need to traverse from the end to the start of the array in reverse direction, this way we track the local\_max and the profit is going to be local\_max - prices[i], and both keep a global max profit. The code is as follows: -\begin{lstlisting}[language = Python] -def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - if not prices: - return 0 - n = len(prices) - - preProfit, postProfit = [0]*n, [0]*n - #get preProfit, from 0-n, track the mini_local, global_max - min_local = prices[0] - max_global_profit = 0 - for i in range(1,n): - max_global_profit= max(max_global_profit, prices[i]-min_local) - min_local = min(min_local, prices[i]) - preProfit[i] = max_global_profit - #get postProfit, from n-1 to 0, track the max_local, global_min - max_local = prices[-1] - max_global_profit = 0 - for i in range(n-1, -1, -1): - max_global_profit= max(max_global_profit, max_local-prices[i]) - max_local = max(max_local, prices[i]) - postProfit[i] = max_global_profit - # iterate preProfit and postProfit to get the maximum profit - maxProfit = max([pre+post for pre, post in zip(preProfit, postProfit)]) - return maxProfit -\end{lstlisting} - -818. Race Car (hard) -\end{examples} - -%%%%%%%%%%%%%%%%%%%range type %%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Single Sequence $O(n^3)$} -The difference of this type of singe sequence is that there are not only n subproblems for a sequence of size n, each subarray is A[0:i], i=[0, n]. There will be $n^2$ subproblems, each states as subarray $A[i:j], i \leq j$. Usually for this type, it shows such optimal substructure $dp[i][j] = f(dp[i][k], dp[k][j]), k \in [i, j]$. This would give us the $O(n^3)$ time complexity and $O(n^2)$ space complexity. The classical examples of this type of problem is matrix-multiplcation as explained in \textit{Introduction to Algorithms} and stone game. -\label{sec_single_n3} -\subsection{Interval} -Problems include Stone Game, Burst Ballons, and Scramble String. The features of this type of dynamic programming is we try to get the min/max/count of a range of array; and the state transfer function updates through the range by from the big range to small rang. - -\begin{examples}[resume] -\item \textbf{486. Predict the Winner (medium)} -Given an array of scores that are non-negative integers. Player 1 picks one of the numbers from either end of the array followed by the player 2 and then player 1 and so on. Each time a player picks a number, that number will not be available for the next player. This continues until all the scores have been chosen. The player with the maximum score wins. - -Given an array of scores, predict whether player 1 is the winner. You can assume each player plays to maximize his score. -\begin{lstlisting}[numbers=none] -Example 1: Input: [1, 5, 2]. Output: False - -Explanation: Initially, player 1 can choose between 1 and 2. -If he chooses 2 (or 1), then player 2 can choose from 1 (or 2) and 5. If player 2 chooses 5, then player 1 will be left with 1 (or 2). So, final score of player 1 is 1 + 2 = 3, and player 2 is 5. Hence, player 1 will never be the winner and you need to return False. - -Example 2: Input: [1, 5, 233, 7]. Output: True - -Explanation: Player 1 first chooses 1. Then player 2 have to choose between 5 and 7. No matter which number player 2 choose, player 1 can choose 233. Finally, player 1 has more score (234) than player 2 (12), so you need to return True representing player1 can win. -\end{lstlisting} -Note: -\begin{enumerate} - \item 1 <= length of the array <= 20. - \item Any scores in the given array are non-negative integers and will not exceed 10,000,000. - \item If the scores of both players are equal, then player 1 is still the winner. - \end{enumerate} -Solution: At first, we can not use $f[i]$ to denote the state, because we can choose element from both the left and the right side, we use $f[i][j]$ instead, which represents the maximum value we can get from $i$ to $j$ range. Second, when we deal with problem with potential accumulate value, we can use $sum[i][j]$ to represent the sum in the range $i-j$. Each player take actions to maximize their total points, f[i][j], it has two choice: left, right, which left f[i+1][j] and f[i][j-1] respectively for player two to choose. In order to gain the maximum scores in range [i,j] we need to optimize it by making sure f[i+1][j] and f[i][j-1] we choose the mimum value from. Therefore, we have state transfer function: $f[i][j] = sum[i][j] - min(f[i+1][j], f[i][j-1])$. Each subproblem relys on only two subproblems, which makes the total time complexity $O(n^2)$. This is actually a game theory type. According to the function: if the range is $1$, when $i==j$, the value is $nums[i]$, which is the initialization. For the loop, the first for loop is the range: from size $2$ to $n$, the second for loop to get the start index $i$ in range $[0, n-l]$, then the end index $j = i+l-1$. The answer for this problem is: if $f[0][-1]>=sum/2$. If it is, then it is true. - -The process of the for loop is we initialize the diagonal element, and fill out element on the right upper side, which is upper diagonal. -\begin{figure}[h] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/guesswinner.png} - \caption{Caption} - \label{fig:my_label} -\end{figure} -\begin{lstlisting}[language = Python] -def PredictTheWinner(nums): - """ - :type nums: List[int] - :rtype: bool - """ - if not nums: - return False - if len(nums)==1: - return True - #sum[i,j] = sum[j+1]-sum[i] - sums = nums[:] - for i in range(1,len(nums)): - sums[i]+=sums[i-1] - sums.insert(0,0) - - dp=[[0 for col in range(len(nums))] for row in range(len(nums))] - for i in range(len(nums)): - dp[i][i] = nums[i] - - for l in range(2, len(nums)+1): - for i in range(0,len(nums)-l+1): #start 0, end len -l+1 - j =i+l-1 - dp[i][j] = (sums[j+1]-sums[i])-min(dp[i+1][j],dp[i][j-1]) - n =len(nums) - return dp[0][n-1]>=sums[-1]/2 -\end{lstlisting} -Else, we use $f[i][j] = max(nums[i]-f[i+1][j], nums[j]-f[i][j-1])$ to represent the difference of the points gained by player one compared with player two. When $f[i][j]$ is the state of player one, then $f[i][j-1]$ and $f[i+1][j]$ are the potential states of player two. -\begin{lstlisting}[language = Python] -class Solution: - def PredictTheWinner(self, nums): - """ - :type nums: List[int] - :rtype: bool - """ - n = len(nums) - if n == 1 or n%2==0 : return True - dp = [[0] * n for _ in range(n)] - for l in range(2, len(nums)+1): - for i in range(0,len(nums)-l+1): #start 0, end len -l+1 - j =i+l-1 - dp[i][j] = max(nums[j] - dp[i][j-1],nums[i] - dp[i+1][j]) - return dp[0][-1]>=0 -\end{lstlisting} -Actually the for loop we can use a simpler one. However, it is harder to understand to code compared with the standard version. -\begin{lstlisting}[language = Python] -for i in range(n-1,-1,-1): - dp[i][i] = nums[i] #initialization - for j in range(i+1,n): - dp[i][j] = max(nums[j] - dp[i][j-1],nums[i] - dp[i+1][j]) -\end{lstlisting} - - -\item \textbf{Stone Game} - -There is a stone game. At the beginning of the game the player picks n piles of stones in a line. The goal is to merge the stones in one pile observing the following rules: - - At each step of the game,the player can merge two adjacent piles to a new pile. The score is the number of stones in the new pile. You are to determine the minimum of the total score. - Example - For [4, 1, 1, 4], in the best solution, the total score is 18: - - Merge second and third piles [4, 2, 4], score $+2$ - Merge the first two piles [6, 4], score $+6$ - Merge the last two piles [10], score $+10$ - -Other two examples: - [1, 1, 1, 1] return 8 - [4, 4, 5, 9] return 43 - -\item \textbf{312. Burst Balloons} - -Given n balloons, indexed from 0 to n-1. Each balloon is painted with a number on it represented by array nums. You are asked to burst all the balloons. If the you burst balloon i you will get nums[left] * nums[i] * nums[right] coins. Here left and right are adjacent indices of i. After the burst, the left and right then becomes adjacent. - -Find the maximum coins you can collect by bursting the balloons wisely. - -Note: - (1) You may imagine nums[-1] = nums[n] = 1. They are not real therefore you can not burst them. - (2) 0 $\leq$ nums[i] $\leq$ 500, 0 $\leq$ nums[i] $\leq$ 100 -\begin{lstlisting}[numbers=none] -Example: - -Given [3, 1, 5, 8] - -Return 167 - -nums = [3,1,5,8] --> [3,5,8] --> [3,8] --> [8] --> [] - coins = 3*1*5 + 3*5*8 + 1*3*8 + 1*8*1 = 167 -\end{lstlisting} - -at first burst c[i][k-1] then burst c[k+1][j], then burst k, -\begin{lstlisting}[language = Python] -class Solution: - def maxCoins(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - n = len(nums) - nums.insert(0,1) - nums.append(1) - - c = [[0 for _ in range(n+2)] for _ in range(n+2)] - for l in range(1, n+1): #length [1,n] - for i in range(1,n-l+2): #start [1, n-l+1] - j = i+l-1 #end =i+l-1 - - #function is a k for loop - for k in range(i,j+1): - c[i][j] = max(c[i][j], c[i][k-1]+nums[i-1]*nums[k]*nums[j+1]+c[k+1][j]) - #return from 1 to n - return c[1][n] -\end{lstlisting} - -\item \textbf{516. Longest Palindromic Subsequence} - -Given a string s, find the longest palindromic subsequence’s length in s. You may assume that the maximum length of s is 1000. -\begin{lstlisting}[numbers=none] -Example 1: - - Input: -"bbbab" -Output: -4 -One possible longest palindromic subsequence is "bbbb". - -Example 2: - Input: -"cbbd" -Output: -2 -One possible longest palindromic subsequence is "bb". -\end{lstlisting} - -Solution: for this problem, we have state dp[i][j] means from i to j, the length of the longest palindromic subsequence. dp[i][i] = 1. Then we use this range to fill in the dp matrix (upper triangle.) -\begin{lstlisting}[language = Python] -def longestPalindromeSubseq(self, s): - """ - :type s: str - :rtype: int - """ - nums=s - if not nums: - return 0 - if len(nums)==1: - return 1 - - def isPanlidrome(s): - l,r= 0, len(s)-1 - while l<=r: - if s[l]!=s[r]: - return False - else: - l+=1 - r-=1 - return True - - if isPanlidrome(s): #to speed up - return len(s) - - rows=len(nums) - dp=[[0 for col in range(rows)] for row in range(rows)] - for i in range(0,rows): - dp[i][i] = 1 - - for l in range(2, rows+1): #use a length - for i in range(0,rows-l+1): #start 0, end len -l+1 - j =i+l-1 - if j>rows: - continue - if s[i]==s[j]: - dp[i][j] = dp[i+1][j-1]+2 - else: - left_size,right_size = dp[i][j-1],dp[i+1][j] - dp[i][j]= max(dp[i][j-1], right_size) - print(dp) - return dp[0][rows-1] -\end{lstlisting} - -Or else, we can say, i need to be from i+1 to i, from big to small, j need to from j-1 or j to j, from small to big. -\begin{lstlisting}[language = Python] -for (int i = n - 1; i >= 0; --i) { - dp[i][i] = 1; - for (int j = i + 1; j < n; ++j) { - if (s[i] == s[j]) { - dp[i][j] = dp[i + 1][j - 1] + 2; - } else { - dp[i][j] = max(dp[i + 1][j], dp[i][j - 1]); - } - } - } -\end{lstlisting} - -Now to do the space optimization: -\begin{lstlisting}[language = Python] -class Solution { -public: - int longestPalindromeSubseq(string s) { - int n = s.size(), res = 0; - vector dp(n, 1); - for (int i = n - 1; i >= 0; --i) { - int len = 0; - for (int j = i + 1; j < n; ++j) { - int t = dp[j]; - if (s[i] == s[j]) { - dp[j] = len + 2; - } - len = max(len, t); - } - } - for (int num : dp) res = max(res, num); - return res; - } -}; -\end{lstlisting} -\end{examples} - %%%%%%%%%%%%%%%%%%%Coordinate: BFS and DP %%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Coordinate: BFS and DP} -\label{sec_coordinate} -In this type of problems, we are give an array or a matrix with $1$D or $2$D axis. We either do 'optimization' to find the minimum path sum, or do the 'counting' to get the total number of paths, or check if we can start from A and end at B. %The four key elements for type of dynamic programming: - -\textbf{Two-dimensional}. For a $O(mn)$ sized coordinate, Tab.~\ref{tab:2d_coordinate} shows two different types: one there will only be $O(mn)$, and the other is $O(kmn)$, $k$ here normally represents number of steps. Because a 2D coordinate is inherently a graph, so this type is closely related to the graph traversal algorithms; BFS for counting and DFS for the optimization problems. -\begin{table}[h] -\begin{small} -\centering -\noindent\captionof{table}{ Different Type of Coordinate Dynamic Programming} - \noindent \begin{tabular}{|p{0.14\columnwidth}|p{0.14\columnwidth}| p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|p{0.14\columnwidth}|} - \hline - Case & Input & Subproblems & f(n) & Time & Space \\ \hline -Easy & $O(mn)$& $O(mn)$ & $O(1)$ & $O(mn)$ & $O(mn)->O(m)$ \\\hline -Medium & $O(mn)$& $O(kmn)$ & $O(1)$ & $O(kmn)$ & $O(kmn)->O(mn)$\\ \hline -\end{tabular} - \label{tab:2d_coordinate} - \end{small} -\end{table} -% \begin{enumerate} -For this type of problems, understanding the BFS related solution is more important than just mesmorizing the template of the dynamic programming solution. There, we will use two sections: Counting: BFS and DP in Sec.~\ref{sec_coordinate_counting} and Optimization in Sec.~\ref{} with LeetCode examples to learn how to solve this type of dynamic programming problems. -% \item state $f[x]$ denotes state from the starting point to axis $x$, for $2$D, we use $f[x][y]$ denotes the state from $x$ to $y$; Will have $n$ or $m\times n$ space. -% \item function: we need to find the relation between $f[x]$ or $f[x][y]$ with its previous state (from top-down) or afterwards state. -% \item initialization: initialze the first column and first row, and sometimes is the first column and the last column. Initialize the edge condition that can not compleete the function. -% \item answer: $f[n-1]$ or $max(f)$ or $f[-1][-1]$ -% \end{enumerate} - -%%%%%%%%%%%%%%%%%%%One Time Traversal %%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{One Time Traversal} -\label{sec_coordinate_counting} -In this section, we want to explore how we can modify our solution from BFS to the dynamic programming. Inherently, dynamic programming solutions for this type of problems are the optimized Breath-first-search. - -\subsubsection{Counting} In this type, any location in the coordinate will be only vised once. Thus, it gives $O(mn)$ time complexity. - -62. Unique Paths -\begin{lstlisting} -A robot is located at the top-left corner of a m x n grid (marked 'Start' in the diagram below). - -The robot can only move either down or right at any point in time. The robot is trying to reach the bottom-right corner of the grid (marked 'Finish' in the diagram below). - -How many possible unique paths are there? - -Above is a 3 x 7 grid. How many possible unique paths are there? - -Note: m and n will be at most 100. - -Example 1: - -Input: m = 3, n = 2 -Output: 3 -Explanation: -From the top-left corner, there are a total of 3 ways to reach the bottom-right corner: -1. Right -> Right -> Down -2. Right -> Down -> Right -3. Down -> Right -> Right - -Example 2: - -Input: m = 7, n = 3 -Output: 28 -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/unique_path.png} - \caption{One Time Graph Traversal. Different color means different levels of traversal.} - \label{fig:unique_path} -\end{figure} -\textbf{BFS}. Fig.~\ref{fig:unique_path} shows the BFS traversal process in the matrix. We can clearly see that each node and edge is only visited once. The BFS solution is straightforward and is the best solution. We use bfs to track the nodes in the queue at each level, and dp to record the unique paths to location $(i,j)$. Because each location is only visted once, thus, at each level, using the same dp will have no conflict. -\begin{lstlisting}[language = Python] -# BFS -def uniquePaths(self, m, n): - dp = [[0 for _ in range(n)] for _ in range(m)] - dp[0][0] = 1 - bfs = set([(0,0)]) - dirs = [(1, 0), (0,1)] - while bfs: - new_bfs = set() - for x, y in bfs: - for dx, dy in dirs: - nx, ny = x+dx, y+dy - if 0<=nx= 0: - dp[t] += dp[t-n] - else: - break - return dp[-1] -\end{lstlisting} -\subsubsection{Optimization} -64. Minimum Path Sum (medium) -\begin{lstlisting} - -Given a m x n grid filled with non-negative numbers, find a path from top left to bottom right which minimizes the sum of all numbers along its path. - -Note: You can only move either down or right at any point in time. - -Example 1: - -[[1,3,1], - [1,5,1], - [4,2,1]] -\end{lstlisting} - -Given the above grid map, return 7. Because the path $1\rightarrow3\rightarrow1\rightarrow1\rightarrow1$ minimizes the sum. -\begin{figure}[h] - \centering - \includegraphics[width = 0.5\columnwidth] {fig/min_path.png} - \caption{Caption} - \label{fig:my_label} -\end{figure} -\textbf{Dynamic Programming}. For this problem, it is exactly the same as all the previous problems, the only difference is the state transfer function. $f(i,j) = g(i,j) + min(f(i-1, j), f(i, j-1))$. -\begin{lstlisting}[language = Python] -# dynamic programming -def minPathSum(self, grid): - if not grid: - return 0 - rows, cols = len(grid), len(grid[0]) - dp = [[0 for _ in range(cols)] for _ in range(rows)] - dp[0][0] = grid[0][0] - - # initialize row - for c in range(1, cols): - dp[0][c] = dp[0][c-1] + grid[0][c] - - # initialize col - for r in range(1, rows): - dp[r][0] = dp[r-1][0] + grid[r][0] - - for r in range(1, rows): - for c in range(1, cols): - dp[r][c] = grid[r][c] + min(dp[r-1][c], dp[r][c-1]) - return dp[-1][-1] -\end{lstlisting} -\textbf{Dynamic Programming with Space Optimization}. As can be seen, each time when we update $sum[i][j]$, we only need $sum[i - 1][j]$ (at the current column) and $sum[i][j - 1]$ (at the left column). So we need not maintain the full $m*n$ matrix. Maintaining two columns is enough and now we have the following code. -\begin{lstlisting}[language = Python] -rows, cols= len(grid),len(grid[0]) - #O(rows) - pre, cur =[0]*rows, [0]*rows - #intialize the the first col, walk from the (0,0)->(1,0)->(row,0) - pre[0]=grid[0][0] - - for row in range(1, rows): - pre[row]=pre[row-1]+grid[row][0] #this is equal to cost[0][row] - for col in range(1, cols): - cur[0] = pre[0]+grid[0][col] #initialize the first row, current [0][0] - for row in range(1, rows): - cur[row]= min(cur[row-1], pre[row])+grid[row][col] - pre,cur = cur, pre - return pre[rows-1] -\end{lstlisting} - -Further inspecting the above code, it can be seen that maintaining pre is for recovering $pre[i]$, which is simply $cur[i]$ before its update. So it is enough to use only one vector. Now the space is further optimized and the code also gets shorter. -\begin{lstlisting}[language = Python] -rows, cols= len(grid),len(grid[0]) - #O(rows) - cur = [0]*rows - #intialize the the first col, walk from the (0,0)->(1,0)->(row,0) - cur[0]=grid[0][0] - for row in range(1, rows): - cur[row]=cur[row-1]+grid[row][0] - for col in range(1, cols): - cur[0] = cur[0]+grid[0][col] #initialize the first row - for row in range(1, rows): - cur[row]= min(cur[row-1], cur[row])+grid[row][col] - return cur[rows-1] -\end{lstlisting} - -Now, we use O(1) space by reusing the original grid. -\begin{lstlisting}[language = Python] -rows, cols= len(grid),len(grid[0]) - #O(1) space by reusing the space here - for i in range(0, rows): - for j in range(0, cols): - if i==0 and j ==0: - continue - elif i==0 : - grid[i][j]+=grid[i][j-1] - elif j==0: - grid[i][j]+=grid[i-1][j] - else: - grid[i][j]+= min(grid[i-1][j], grid[i][j-1]) - return grid[rows-1][cols-1] -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%Multiple-time Traversal %%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Multiple-time Traversal} -In this type, we need to traverse each location for K times, making K steps of moves thus we can get the final solution. This will have $O(kmn)$ time complexity. -\subsubsection{Two-dimensional Coordinate} -935. Knight Dialer (Medium) -\begin{figure} - \centering - \includegraphics{fig/knight_dialer.png} - \caption{Caption} - \label{fig:my_label} -\end{figure} -\begin{lstlisting} -A chess knight can move as indicated in the chess diagram below: -This time, we place our chess knight on any numbered key of a phone pad (indicated above), and the knight makes N-1 hops. Each hop must be from one key to another numbered key. - -Each time it lands on a key (including the initial placement of the knight), it presses the number of that key, pressing N digits total. - -How many distinct numbers can you dial in this manner? - -Since the answer may be large, output the answer modulo 10^9 + 7. - -Example 1: - -Input: 1 -Output: 10 - -Example 2: - -Input: 2 -Output: 20 - -Example 3: - -Input: 3 -Output: 46 - -Note: - - 1 <= N <= 5000 -\end{lstlisting} -\textbf{Most Naive BFS}. Analysis: First, we need to figure out from each number, where is the possible next moves. We would have get this dictionary: $moves = \{0:[4, 6], 1:[6, 8], 2: [7, 9], 3: [4,8], 4: [0, 3, 9], 5:[], 6:[0,1,7], 7:[2,6], 8:[1,3],9:[2,4]\}$. This is not exactly a coordinate, however, because we can make endless move, we would have a graph. The brute force is we put [0,1,2,3,4,5,6,7,8,9] as the start positions, and we use BFS to control the steps, the total number of paths is the sum over of all the leaves. At each step, we would do two things 1) generate a list to save all the possible next numbers; 2) if it reaches to the leaves, sum up all the nodes. -\begin{lstlisting}[language = Python] -# naive BFS solution -def knightDialer(self, N): - """ - :type N: int - :rtype: int - """ - if N == 1: - return 10 - moves = {0:[4, 6], 1:[6, 8], 2: [7, 9], 3: [4,8], 4: [0, 3, 9], 5:[], 6:[0,1,7], 7:[2,6], 8:[1,3],9:[2,4]} #4, 6 has three - - bfs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # all starting points - step = 1 - while bfs: - new = [] - for i in bfs: - new += moves[i] - step += 1 - bfs = new - if step == N: - return len(bfs)%(10**9+7) -\end{lstlisting} -\textbf{Optimized BFS}. However, the brute force BFS only passed 18/120 test cases. To improve it further, we know that we only need a counter to record the counter of each number in that level. This way, bfs is replaced with a counter. Now, the new code is: -\begin{lstlisting}[language = Python] -#optimized BFS exactly a DP -def knightDialer(self, N): - MOD = 10**9+7 - if N == 1: - return 10 - moves = {0:[4, 6], 1:[6, 8], 2: [7, 9], 3: [4,8], 4: [0, 3, 9], 5:[], 6:[0,1,7], 7:[2,6], 8:[1,3],9:[2,4]} #4, 6 has three - - bfs = [1]*10 - step = 1 - - while bfs: - size = 0 - new = [0]*10 - for idx, count in enumerate(bfs): - for m in moves[idx]: - new[m] += count - new[m] %= MOD - step += 1 - bfs = new - if step == N: - return sum(bfs)%(MOD) -\end{lstlisting} -\textbf{Optimized Dynamic Programming}. This is exactly a dynamic programming algorithm: $new[m] += bfs[i]$, for example, from 1 we can move to 6,8,so that we have $f(1,n) = f(6, n-1) + f(8, n-1)$. So here a state is represented by $bfs[num]$ and $step$, and it saves the count at each state. Now, we write it in the way of dp template: -\begin{lstlisting}[language=Python] -# optimized dynamic programming template -def knightDialer(self, N): - MOD = 10**9+7 - moves = {0:[4, 6], 1:[6, 8], 2: [7, 9], 3: [4,8], 4: [0, 3, 9], 5:[], 6:[0,1,7], 7:[2,6], 8:[1,3],9:[2,4]} #4, 6 has three - - dp = [1]*10 - - for step in range(N-1): - size = 0 - new_dp = [0]*10 - for idx, count in enumerate(dp): - for m in moves[idx]: - new_dp[m] += count - new_dp[m] %= MOD - dp = new_dp - - return sum(dp)%(MOD) -\end{lstlisting} -% \begin{lstlisting}[language = Python] -% # optimized BFS -% def knightDialer(self, N): -% """ -% :type N: int -% :rtype: int -% """ -% if N == 1: -% return 10 -% moves = {0:[4, 6], 1:[6, 8], 2: [7, 9], 3: [4,8], 4: [0, 3, 9], 5:[], 6:[0,1,7], 7:[2,6], 8:[1,3],9:[2,4]} #4, 6 has three - -% bfs = collections.Counter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -% step = 1 - -% while bfs: -% size = 0 -% new = collections.Counter() -% for i in bfs: -% for m in moves[i]: -% new[m] += bfs[i] -% step += 1 -% bfs = new -% if step == N: -% return sum(bfs.values())%(10**9+7) -% \end{lstlisting} -%Now, this updated algorithm passed 93/120 test cases. - - - -688. Knight Probability in Chessboard (Medium) -\begin{lstlisting} -On an NxN chessboard, a knight starts at the r-th row and c-th column and attempts to make exactly K moves. The rows and columns are 0 indexed, so the top-left square is (0, 0), and the bottom-right square is (N-1, N-1). - -A chess knight has 8 possible moves it can make, as illustrated below. Each move is two squares in a cardinal direction, then one square in an orthogonal direction. - -Each time the knight is to move, it chooses one of eight possible moves uniformly at random (even if the piece would go off the chessboard) and moves there. - -The knight continues moving until it has made exactly K moves or has moved off the chessboard. Return the probability that the knight remains on the board after it has stopped moving. - -Example: - -Input: 3, 2, 0, 0 -Output: 0.0625 -Explanation: There are two moves (to (1,2), (2,1)) that will keep the knight on the board. -From each of those positions, there are also two moves that will keep the knight on the board. -The total probability the knight stays on the board is 0.0625. -\end{lstlisting} -\textbf{Optimized BFS}. Analysis: Each time we can make 8 moves, thus after K steps, we can have $8^K$ total unique paths. Thus, we just need to get the total number of paths that it ends within the board (valid paths). The first step is to write down the possible moves or directions. And, then we initialize a two-dimensional array $dp$ to record the number of paths end at $(i, j)$ after $k$ steps. Using a BFS solution, each time we just need to save all the unique positions can be reached at that step. -\begin{lstlisting}[language=Python] -# Optimized BFS solution -def knightProbability(self, N, K, r, c): - dirs = [[-2, -1], [-2, 1], [-1, -2], [-1, 2], [1, -2], [1, 2], [2,-1],[2, 1]] - dp = [[0 for _ in range(N)] for _ in range(N) ] - total = 8**K - last_pos = set([(r, c)]) - dp[r][c]=1 - - for step in range(K): - new_pos = set() - new_dp = [[0 for _ in range(N)] for _ in range(N) ] - for x, y in last_pos: - for dx, dy in dirs: - nx = x+dx - ny = y+dy - if 0<=nxi][0->j] - -#template -dp[n+1][m+1] -for i in range(n): - for j in range(m): - dp[i][j] = f(dp[pre_i][pre_j]) -return f(dp) -\end{lstlisting} - -\subsubsection{Multiple-Dimensional Traversal} -\begin{lstlisting} -dp[k][i][j] := answer of A[0->i][0->j] after k steps - -#template -dp[k][n+1][m+1] -for _ in range(k): - for i in range(n): - for j in range(m): - dp[k][i][j] = f(dp[k-1][pre_i][pre_j]) -return f(dp) -\end{lstlisting} - - - - -%%%%%%%%%%%%%%%%%%%double sequence %%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Double Sequence: Pattern Matching DP} -\label{sec_double_sequence} -\begin{figure}[h!] - \centering - \includegraphics[width=0.7\columnwidth]{fig/lcs_problem.png} - \caption{Longest Common Subsequence} - \label{fig:lcs} -\end{figure} -In this section, we focus on double sequence P and S with input size $O(m)+O(n)$. Because double sequence can naturally be arranged to be a matrix with size $(m+1)\times(n+1)$. Here we have extra row and extra column, it happens because we put empty char '' at the beginning of each string to better initialize and get result even for empty string too. One example is shown in Fig.~\ref{fig:lcs}. This mostly make the time complexity for this section $O(mn)$. \textit{This type of dynamic programming can be generalized to coordinate problems. The difference is the \textit{moves} are not given as in coordinate section (\ref{sec_coordinate_counting})}. - -We need to find the deduction rules or say recurrence relation ourselves. Most of the time, the moves are around their neighbors: for (i, j), we have potential positions of (i-1, j-1), (i-1, j), (i, j-1). For example, in the case of Longest Common Subsequence in Fig.~\ref{fig:lcs}, if current P[i] and S[j] matches, then it only depends on dp[i-1][j-1]. If not, it depends on the relation between (P(0, i), S(0,j-1)) and (P(0, i-1), S(0,j)). \textit{Filling out an examplary table manually can guide us find the rules. } If we do so, we would find out that even problems marked as hard from LeetCode is solvable. - -\textbf{Brute Force.} For the brute force solution: we need t - -Problems shown in this section include: - -\begin{enumerate} - \item 72. Edit Distance - \item 712. Minimum ASCII Delete Sum for Two Strings - \item 115. Distinct Subsequences (hard) - \item 44. Wildcard Matching (hard) -\end{enumerate} -\subsection{Longest Common Subsequence} - -Problem Definition: Given two string A and B, for example A is "ABCD", and B is "ABD", the longest common subsequence is "ABD", so the length of the longest common subsequence is $3$. - - - -\textbf{Coordinate+Moves}. Because each has $m$ and $n$ subproblems, two sequence make it a matrix problem. The result of the above example is shown in Fig.~\ref{fig:lcs}. We can try to observe the problem and generalize the moves or state transfer function. For the red marked positions, the char in string A and B are the same. So, the length would be the result of its previous subtrings plus one. Otherwise as the black marked positions, it is the maximum of the left and above positions. And the math equation is shown in Eq.~\ref{eq_lcs}. To initialize, we need to initialize the first row and the first column, which is $f[i][0] = 0, f[0][j] = 0$. -\begin{equation} -\label{eq_lcs} - f[i][j] = \begin{cases} - 1 + f[i-1][j-1],& a[i-1] == b[j-1];\\ - max(f[i-1][j], f[i][j-1])& \text{otherwise} - \end{cases} -\end{equation} -The Python code is shown as follow: -\begin{lstlisting}[language = Python] -def LCSLen(S1, S2): - if not S1 or not S2: - return 0 - n, m = len(S1), len(S2) - f = [[0]*(m+1) for _ in range(n+1)] - #init f[0][0] = 0 - for i in range(n): - for j in range(m): - f[i+1][j+1] = f[i][j]+1 if S1[i]==S2[j] else max(f[i][j+1], f[i+1][j]) - print(f) - return f[-1][-1] -S1 = "ABCD" -S2 = "ABD" -LCSLen(S1, S2) -# output -# [[0, 0, 0, 0], [0, 1, 1, 1], [0, 1, 2, 2], [0, 1, 2, 2], [0, 1, 2, 3]] -# 3 -\end{lstlisting} - -\subsection{Other Problems} -There are more pattern matching related dynamic programming, we give them in this section. -\begin{examples}[resume] -\item \textbf{72. Edit Distance (hard).} -Given two words word1 and word2, find the minimum number of operations required to convert word1 to word2. You have the following 3 operations permitted on a word: Insert a character, Delete a character, Replace a character. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: word1 = "horse", word2 = "ros" -Output: 3 -Explanation: -horse -> rorse (replace 'h' with 'r') -rorse -> rose (remove 'r') -rose -> ros (remove 'e') - -Example 2: - -Input: word1 = "intention", word2 = "execution" -Output: 5 -Explanation: -intention -> inention (remove 't') -inention -> enention (replace 'i' with 'e') -enention -> exention (replace 'n' with 'x') -exention -> exection (replace 'n' with 'c') -exection -> execution (insert 'u') -\end{lstlisting} - -\textbf{Coordinate+Deduction}. This is similar to the LCS length. We use f[i][j] to denote the minimum number of operations needed to make the previous i chars in S1 to be the same as the first j chars in S2. The upbound of the minimum edit distance is max(m,n) by replacing and insertion. The most important step is to decide the transfer function: to get the result of current state f[i][j]. If directly filling in the matrix is obscure, then we can try the recursive: -\begin{lstlisting}[numbers=none] -DFS("horse","rose") -= DFS("hors", "ros") # no edit at e -= DFS("hor", "ro") # no edit at s -= 1+ min(DFS("ho", "ro"), # delete "r" from longer one - DFS("hor", "r"), # insert "o" at the longer one, left "hor" and "r" to match - DFS("ho", "r")), # replace "r" in the longer one with "o" in the shorter one, left "ho" and "r" to match -\end{lstlisting} -Be written as equation~\ref{eq_edit_distance}. Thus, it can be solved by dynamic programming. -\begin{equation} -\label{eq_edit_distance} - f[i][j] = \begin{cases} - min(f[i][j-1], f[i-1][j], f[i-1][j-1])+1,& S1[i-1] != S1[j-1];\\ - f[i-1][j-1]& \text{otherwise} - \end{cases} -\end{equation} -The Python code is as follows: -\begin{lstlisting}[language = Python] -def minDistance(word1, word2): - if not word1: - if not word2: - return 0 - else: - return len(word2) - if not word2: - return len(word1) - dp = [[0 for col in range(len(word2)+1)] for row in range(len(word1)+1)] - rows=len(word1) - cols=len(word2) - for row in range(1, rows+1): - dp[row][0] = row - for col in range(1, cols+1): - dp[0][col] = col - - for i in range(1, rows+1): - for j in range(1,cols+1): - if word1[i-1]==word2[j-1]: - dp[i][j]=dp[i-1][j-1] - else: - dp[i][j]=min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+1) # add , delete, replace - return dp[rows][cols] -\end{lstlisting} -\item \textbf{115. Distinct Subsequences (hard).} - -Given a string S and a string T, count the number of distinct subsequences of S which equals T. - -A subsequence of a string is a new string which is formed from the original string by deleting some (can be none) of the characters without disturbing the relative positions of the remaining characters. (ie, "ACE" is a subsequence of "ABCDE" while "AEC" is not). -\begin{lstlisting}[numbers=none] -Example 1: - -Input: S = "rabbbit", T = "rabbit" -Output: 3 -Explanation: - -As shown below, there are 3 ways you can generate "rabbit" from S. -(The caret symbol ^ means the chosen letters) - -rabbbit -^^^^ ^^ -rabbbit -^^ ^^^^ -rabbbit -^^^ ^^^ - -Example 2: - -Input: S = "babgbag", T = "bag" -Output: 5 -Explanation: - -As shown below, there are 5 ways you can generate "bag" from S. -(The caret symbol ^ means the chosen letters) - -babgbag -^^ ^ -babgbag -^^ ^ -babgbag -^ ^^ -babgbag - ^ ^^ -babgbag - ^^^ -\end{lstlisting} - -\textbf{Coordinate}. Here still we need to fill out a matrix. We would see if the length of s is smaller than the length of t: then it is 0. If the length is equal, which is the diagonal in the matrix, then it only depends on position (i-1, j-1) and s(i), s(j). For the lower part of the matrix it has different rule: for example, s = 'ab', t = 'a', because s[i]!=t[j], then we need to find s[0, i-1] with t[0, j]. if it equals, we can check the dp[i-1][j-1]. -\begin{lstlisting}[numbers=none] - '' a b a - '' 1 0 0 0 - a 1 1 0 0 - b 1 1 1 0 - b 1 1 2 0 - a 1 2 2 2 -\end{lstlisting} -\begin{lstlisting}[language=Python] -def numDistinct(self, s, t): - if not s or not t: - if not s and t: - return 0 - else: - return 1 - - rows, cols = len(s), len(t) - if cols > rows: - return 0 - if cols == rows: - return 1 if s==t else 0 - - # initalize - dp = [[0 for c in range(cols+1)] for r in range(rows+1)] - for r in range(rows): - dp[r+1][0] = 1 - dp[0][0] = 1 - - # fill out the lower part - for i in range(rows): - for j in range(min(i+1,cols)): - if i==j: # diagnoal - if s[i] == t[j]: - dp[i+1][j+1] = dp[i][j] - else: # lower half of the matrix - if s[i] == t[j]: - dp[i+1][j+1] = dp[i][j+1]+dp[i][j] # dp[i][j] is because they equal, so check previous i,j, - else: - dp[i+1][j+1] = dp[i][j+1] # check the subsequence before this char in S is the same as t - return dp[-1][-1] -\end{lstlisting} -\item \textbf{44. Wildcard Matching (hard).} Given an input string (s) and a pattern (p), implement wildcard pattern matching with support for '?' and '*'. - -'?' Matches any single character. -'*' Matches any sequence of characters (including the empty sequence). - -The matching should cover the entire input string (not partial). - -Note: - - s could be empty and contains only lowercase letters a-z. - p could be empty and contains only lowercase letters a-z, and characters like ? or *. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: -s = "aa" -p = "a" -Output: false -Explanation: "a" does not match the entire string "aa". - -Example 2: - -Input: -s = "aa" -p = "*" -Output: true -Explanation: '*' matches any sequence. - -Example 3: - -Input: -s = "cb" -p = "?a" -Output: false -Explanation: '?' matches 'c', but the second letter is 'a', which does not match 'b'. - -Example 4: - -Input: -s = "adceb" -p = "*a*b" -Output: true -Explanation: The first '*' matches the empty sequence, while the second '*' matches the substring "dce". -\end{lstlisting} - -\textbf{Solution 1: Complete Search: DFS.} We start from the first element in s and p with index i, j. If it is a '?', or s[i]=p[j], we match dfs(i+1, j+1). The more complex one is for '*', it can go from empty to full length of s. Therefore, we call $dfs(k, j+1), k \in [i, n]$. Check if any of these recursive calls return True. It receives LTE error. -\begin{lstlisting}[language=Python] -def isMatch(self, s, p): - """ - :type s: str - :type p: str - :rtype: bool - """ - ns, np = len(s), len(p) - def helper(si, pi): - if si == ns and pi == np: - return True - elif si == ns or pi == np: - if si == ns: # if pattern left, make sure its all '*' - for i in range(pi, np): - if p[i] != '*': - return False - return True - else: # if string left, return False - return False - - if p[pi] in ['?', '*']: - if p[pi] == '?': - return helper(si+1, pi+1) - else: - for i in range (si, ns+1): # we can match all till the end - #print(i) - if helper(i, pi+1): - return True - return False - else: - if p[pi] != s[si]: - return False - return helper(si+1, pi+1) - return helper(0, 0) -\end{lstlisting} - -\textbf{Solution 2: Dynamic programming.} Same as all the above problems, we try to fill out the dp table ourselves. If it is a '?', check dp[i-1][j-1], if p[i]==s[j], check dp[i-1][j-1]. For '*', if it is treated as '', check dp[i-1][j] (above), because it can be any length of string, we check left dp[i][j-1]. -\begin{lstlisting}[numbers=none] - '' a d c e b - '' 1 0 0 0 0 0 - * 1 1 1 1 1 1 - a 0 1 0 0 0 0 - * 0 1 1 1 1 1 - b 0 0 0 0 0 1 - * 0 0 0 0 0 1 -\end{lstlisting} -\begin{lstlisting}[language=Python] -def isMatch(self, s, p): - ns, np = len(s), len(p) - dp = [[False for c in range(ns+1)] for r in range(np+1)] - - # initialize - dp[0][0] = True - for r in range(1, np+1): - if p[r-1] == '*' and dp[r-1][0]: - dp[r][0] = True - - # dp main - for r in range(1, np+1): - for c in range(1, ns+1): - if p[r-1] == '?': - dp[r][c] = dp[r-1][c-1] - elif p[r-1] == '*': - dp[r][c] = dp[r-1][c] or dp[r][c-1] # above or left - if dp[r][c]: - for nc in range(c+1, ns+1): - dp[r][nc] = True - break - else: - if dp[r-1][c-1] and p[r-1] == s[c-1]: - dp[r][c] = True - - - return dp[-1][-1] -\end{lstlisting} -\end{examples} - - -\subsection{Summary} -The four elements include: -\begin{enumerate} - \item state: f[i][j]: i denotes the previous $i$ number of numbers or characters in the first string, j is the previous j elements for the second string; We need to assign $n+1$ and $m+1$ for each dimension; - \item function: f[i][j] research how to match the ith element in the first string with the jth element in the second string; - \item initialize: f[i][0] for the first column and f[0][j] for the first row; - \item answer: f[n][m] -\end{enumerate} -%%%%%%%%%%%%%%%%%%%splitting %%%%%%%%%%%%%%%%%%%%%%%%%%% -% \section{Splitting Type DP} -% In this splitting type dynamic programming, we will be given a sequence, either an array or a string. In order to obtain the result, the sequence needed to be split into non-overlapping ranges. This means the total number of subproblems are decided by the total number of positions to \textit{split}. For this type, the time complexity is usually $O(n^2)$. - -% Examples include: -% \begin{enumerate} -% \item 926. Flip string to monotone increasing -% \item 121. Best Time to Buy and Sell Stock -% \item 123. Best Time to Buy and Sell Stock III -% \item 132. Palindrome Partitioning II (hard) -% \end{enumerate} -% Normally for this type of problems, we are given a sequence, either array or a string, we need to split the array into different parts, and to acquire some max or min values. -% Four elements include: -% \begin{enumerate} -% \item state: global[i] to denotes the max/min value gained from the previous i elements, and local[i] denotes the min/max value gained by selecting the ith element; -% \item Function: local[i] = min/max(local[i-1]+nums[i], nums[i]); global[i] = max/min(global[i-1], local[i]; -% \item Initialize: local[0], global[0]; -% \item Answer:global[n-1] -% \end{enumerate} -% \textbf{Example 1} Maximum Subarray - -% Solution: use the standard steps from this section: -% \begin{lstlisting}[language = Python] -% from sys import maxsize -% def maximumSubarray(nums): -% if not nums: -% return 0 -% local = [0]*len(nums) -% globalA = [-maxsize]*len(nums) -% for i in range(1,len(nums)): -% local[i] = max(local[i-1]+nums[i], nums[i]) #use f[i+1] because we have n+1 space -% globalA[i] = max(globalA[i-1], local[i]) -% return globalA[-1] -% \end{lstlisting} -% However, here since we only need to track $f[i]$ and $f[i+1]$, and keep current maximum value, so that we do not need to use any space. -% \begin{lstlisting}[language = Python] -% from sys import maxsize -% def maximumSubarray(nums): -% if not nums: -% return 0 -% local = 0 -% globalA = -maxsize -% for i in range(1,len(nums)): -% local = max(local+nums[i], nums[i]) #use f[i+1] because we have n+1 space -% globalA = max(globalA, local) -% return globalA -% \end{lstlisting} -% Here, the above is actually the concept of prefix sum. - -% \textbf{Example 2:} 121. Best Time to Buy and Sell Stock (easy) - -% Say you have an array for which the ith element is the price of a given stock on day i. - -% If you were only permitted to complete at most one transaction (i.e., buy one and sell one share of the stock), design an algorithm to find the maximum profit. - -% Note that you cannot sell a stock before you buy one. - -% Example 1: -% \begin{lstlisting} -% Input: [7,1,5,3,6,4] -% Output: 5 -% Explanation: Buy on day 2 (price = 1) and sell on day 5 (price = 6), profit = 6-1 = 5. -% Not 7-1 = 6, as selling price needs to be larger than buying price. -% \end{lstlisting} - -% Example 2: -% \begin{lstlisting} -% Input: [7,6,4,3,1] -% Output: 0 -% Explanation: In this case, no transaction is done, i.e. max profit = 0. -% \end{lstlisting} - -% Solution: using brute force, it would take two for loops and with $O(n^2)$ time complexity. However, use the dynamic programming, we need to track the minimum value till i as min\_local[i] , and then use a global\_max[i] to track the maximum profit which is global\_max[i] =nums[i] - min\_local[i], min\_local[i] = min(min\_local[i-1], prices[i]). -% \begin{lstlisting}[language = Python] -% def maxProfit(self, prices): -% if not prices: -% return 0 -% min_local =[0]*len(prices) -% max_global_profit = [0]*len(prices) - -% min_local[0] = prices[0] -% for i in range(1, len(prices)): -% max_global_profit[i] = max(max_global_profit[i-1], prices[i]-min_local[i-1]) -% min_local[i] = min(min_local[i-1], prices[i]) -% return max_global_profit[-1] -% \end{lstlisting} - -% \textbf{Example 2: } 123. Best Time to Buy and Sell Stock III (hard) - -% Say you have an array for which the ith element is the price of a given stock on day i. - -% Design an algorithm to find the maximum profit. You may complete at most two transactions. - -% Note: You may not engage in multiple transactions at the same time (i.e., you must sell the stock before you buy again). - -% Example 1: -% \begin{lstlisting} -% Input: [3,3,5,0,0,3,1,4] -% Output: 6 -% Explanation: Buy on day 4 (price = 0) and sell on day 6 (price = 3), profit = 3-0 = 3. -% Then buy on day 7 (price = 1) and sell on day 8 (price = 4), profit = 4-1 = 3. -% \end{lstlisting} - -% Example 2: -% \begin{lstlisting} -% Input: [1,2,3,4,5] -% Output: 4 -% Explanation: Buy on day 1 (price = 1) and sell on day 5 (price = 5), profit = 5-1 = 4. -% Note that you cannot buy on day 1, buy on day 2 and sell them later, as you are -% engaging multiple transactions at the same time. You must sell before buying again. -% \end{lstlisting} -% Example 3: -% \begin{lstlisting} -% Input: [7,6,4,3,1] -% Output: 0 -% Explanation: In this case, no transaction is done, i.e. max profit = 0. -% \end{lstlisting} -% Solution: the difference compared with I is that we need at most two times of transaction. We split the array into two parts from i, the max profit we can get till i and the max profit we can get from i to n. To get the maximum profit of each part is the same as the problem I. At last, the answer is max{preProfit[i] + postProfit[i]},$(0\leq i\leq n-1)$. However, we would get $O(n^2)$ time complexity if we use the following code, it has a lot of redundency. -% \begin{lstlisting}[language=Python] -% from sys import maxsize -% class Solution: -% def maxProfit(self, prices): -% """ -% :type prices: List[int] -% :rtype: int -% """ -% def maxProfitI(start, end): - -% if start == end: -% return 0 -% max_global_profit = 0 -% min_local = prices[start] -% for i in range(start+1, end+1): -% max_global_profit= max(max_global_profit, prices[i]-min_local) -% min_local = min(min_local, prices[i]) -% return max_global_profit - -% if not prices: -% return 0 -% n = len(prices) -% min_local = prices[0] -% preProfit, postProfit = [0]*n, [0]*n - -% for i in range(n): -% preProfit[i] = maxProfitI(0,i) -% postProfit[i] = maxProfitI(i,n-1) -% maxProfit = max([pre+post for pre, post in zip(preProfit, postProfit)]) -% return maxProfit -% \end{lstlisting} -% To avoid repeat work, we can use a for loop to get all the value of preProfit, and use another to get values for postProfit. For the postProfit, we need to traverse from the end to the start of the array in reverse direction, this way we track the local\_max and the profit is going to be local\_max - prices[i], and both keep a global max profit. The code is as follows: -% \begin{lstlisting}[language = Python] -% def maxProfit(self, prices): -% """ -% :type prices: List[int] -% :rtype: int -% """ -% if not prices: -% return 0 -% n = len(prices) - -% preProfit, postProfit = [0]*n, [0]*n -% #get preProfit, from 0-n, track the mini_local, global_max -% min_local = prices[0] -% max_global_profit = 0 -% for i in range(1,n): -% max_global_profit= max(max_global_profit, prices[i]-min_local) -% min_local = min(min_local, prices[i]) -% preProfit[i] = max_global_profit -% #get postProfit, from n-1 to 0, track the max_local, global_min -% max_local = prices[-1] -% max_global_profit = 0 -% for i in range(n-1, -1, -1): -% max_global_profit= max(max_global_profit, max_local-prices[i]) -% max_local = max(max_local, prices[i]) -% postProfit[i] = max_global_profit -% # iterate preProfit and postProfit to get the maximum profit -% maxProfit = max([pre+post for pre, post in zip(preProfit, postProfit)]) -% return maxProfit -% \end{lstlisting} - - -% Example 2: -% % 题目:给一个序列,求一次划分区间,求区间中的最大值 - -% % state: f[i] 表⽰示前 i个元素的最⼤大值 -% % function: f[i] = 前 i 个元素里⾯选一个区间的最⼤值 -% % initialize: f[0].. -% % answer: f[n-1].. - -% % 优化 - -% % state: - -% % global[i] 表示前 i 个元素的最大值 -% % local[i] 表⽰示包含第 i 个元素前 i 个元素的最大值 - -% % 2. function: - -% % global[i] = 通过 local[i] 更新 -% % local[i] = 通过原序列或者 global[i] 更新 - -% % 3. initialize: global[0].. Local[i] - -% % 4. answer: global[n-1].. - -% %这里面需要一个isPalindrome()的函数,这个函数用两个指针做. - -% Example $2$: Word Break - -% Given a string s and a dictionary of words dict, determine if s can be break into a space-separated sequence of one or more dictionary words. - -% Example: -% Given s = “lintcode”, dict = [“lint”, “code”]. - -% Return true because “lintcode” can be break as “lint code”. - -% hash查长度为L的单词的复杂度为O(L), 所以对于长度为N的string的几乎每个字母,搜索之前的L个位置,每个位置花费时间L,有:Return true because”leetcode”can be segmented as”leet code”. - -% 思路:首先我们要存储的历史信息res[i]是表示到字符串s的第i个元素为止能不能用字典中的词来表示,我们需要一个长度为n的布尔数组来存储信息。然后假设我们现在拥有res[0,…,i-1]的结果,我们来获得res[i]的表达式。思路是对于每个以i为结尾的子串,看看他是不是在字典里面以及他之前的元素对应的res[j]是不是true,(search a previous position j)如果都成立,那么res[i]为true,写成式子是 - -% 假设总共有n个字符串,并且字典是用HashSet来维护,那么总共需要n次迭代,每次迭代需要一个取子串的O(i)操作,然后检测i个子串,而检测是constant操作。所以总的时间复杂度是O(n²)(i的累加仍然是n²量级),而空间复杂度则是字符串的数量,即O(n)。 - -% 总时间复杂度:O(N*L*L)即 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%Sackpack%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Knapsack} -The problems in this section are defined as: Given $n$ items with Cost $C_i$ and value $V_i$, we can choose $i$ items that either 1) equals to an amount $S$ or 2) is bounded by an amount $S$. We would be required to obtain either 1) maximum values or 2) minimum items. Depends on if we can use one item multiple times, we have three categorizes: -\begin{enumerate} - \item 0-1 Knapsack (Section~\ref{01knapsack}): each item is only allowed to use 0 or 1 time. - \item Unbounded Knapsack(Section~\ref{unbound_knapsack}): each item is allowed to use unlimited times. - \item Bounded Knapsack(Section~\ref{bound_knapsack}): each item is allowed to use a fixed number of times. -\end{enumerate} - -How to solve the above three types of questions will be explained and the Python example will be given in the next three subsections (Section~\ref{01knapsack}, ~\ref{unbound_knapsack}, and ~\ref{bound_knapsack}) with the second type of restriction that the total cost is bounded by an amount $S$. - -The problems itself is a combination problem with restriction, therefore we can definitely use DFS as the naive solution. Moreover, the problems are not about to simply enumerate all the combinations, its an optimization problems, this is the difference of with memoization to solve these problems. Thus, dynamic programming is not our only choice. We can refer to Section~\ref{sec:backtrack} and Section~\ref{sec_combination} for the DFS based solution and reasoning. - -LeetCode problems: -\begin{enumerate} - \item 322. Coin Change (**) unbounded, fixed amount. -\end{enumerate} - - -% 有的题目要求“恰好装满背包”时的最优解,有的题目则并没有要求必须把背包装满。一种区别这两种问法的实现方法是在初始化的时候有所不同。 - -% 如果是第一种问法,要求恰好装满背包,那么在初始化时除了f[0]为0其它f[1..V]均设为-∞,这样就可以保证最终得到的f[N]是一种恰好装满背包的最优解。 - -% 如果并没有要求必须把背包装满,而是只希望价格尽量大,初始化时应该将f[0..V]全部设为0。 - -% 为什么呢?可以这样理解:初始化的f数组事实上就是在没有任何物品可以放入背包时的合法状态。如果要求背包恰好装满,那么此时只有容量为0的背包可能被价值为0的nothing“恰好装满”,其它容量的背包均没有合法的解,属于未定义的状态,它们的值就都应该是-∞了。如果背包并非必须被装满,那么任何容量的背包都有一个合法解“什么都不装”,这个解的价值为0,所以初始时状态的值也就全部为0了。 - -% 这个小技巧完全可以推广到其它类型的背包问题,后面也就不再对进行状态转移之前的初始化进行讲解。 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% zero one backpack % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{0-1 Knapsack} -\label{01knapsack} -In this subsection, each item is only allowed to be used at most one time. This is a combination problem with restriction (total cost be bounded by a given cost or say the total weights of items need to be <= the capacity of the knapsack). - -Given the following example: we can get the maximum value to be 9 by choosing item 3 and 4 each with cost 2. -\begin{lstlisting}[numbers=none] -c = [1,1,2,2] -v = [1,2,4,5] -C = 4 -\end{lstlisting} -\textbf{Solution 1: Combination with DFS.} Clearly this is a combination problem, here we give the naive DFS solution. The time complexity if $O(2^n)$. -\begin{lstlisting}[language=Python] -def knapsack01DFS(c, v, C): - def dfs(s, cur_c, cur_v, ans): - ans[0] = max(ans[0], cur_v) - if s == n: return - for i in range(s, n): - if cur_c + c[i] <= C: # restriction - dfs(i + 1, cur_c + c[i], cur_v + v[i], ans) - ans = [0] - n = len(c) - dfs(0, 0, 0, ans) - return ans[0] - -c = [1,1,2,2] -v = [1,2,4,5] -C = 4 -print(knapsack01DFS(c, v, C)) -# output -# 9 -\end{lstlisting} -\textbf{Solution 2: DFS+MEMO.} However, because this is an optimization problem -\textbf{Solution 3: Dynamic Programming.} Here, we can try to make it iterative with dynamic programming. Here, because we have two variables to track (need modification), we use dp[i][c] to denote maximum value we can gain with subproblems (0,i) and a cost of c. Thus, the size of the dp matrix is $n\times (C+1)$. This makes the time complexity of $O(n\times C)$. Like any coordinate type of dynamic programming problems, We definitely need to iterate through two for loops, one for i and the other for c, which one is inside or outside does not matter here. The state transfer function will be: the maximum value of 1) not choose this item, 2) choose this item, which will add v[i] to the value of the first i-1 items with cost of c-c[i]. $dp[i][c] = \max(dp[i-1][c] , dp[i-1][c-c[i]]+v[i])$. -\begin{lstlisting}[language=Python] -def knapsack01DP(c, v, C): - dp = [[0 for _ in range(C+1)] for r in range(len(c)+1)] - for i in range(len(c)): - for w in range(c[i], C+1): - dp[i+1][w] = max(dp[i][w], dp[i][w-c[i]]+v[i]) - return dp[-1][-1] -\end{lstlisting} -\textbf{Optimize Space.} Because when we are updating dp, we use the left upper row to update the right lower row, we can reduce the space to $O(C)$. If we keep the same code as above just with one dimensional dp, then for the later part of updating it is using the updated result from the same level, thus resulting using each item multiple times which is actually the most efficient solution to unbounded knapsack problem in the next section. To avoid this we have two choices 1) by using a temporary one-dimensional new dp for each i. 2) by updating the cost reversely we can make sure each time we are not using the newly updated result. -\begin{lstlisting}[language=Python] -def knapsack01OptimizedDP1(c, v, C): - dp = [0 for _ in range(C+1)] - for i in range(len(c)): - new_dp = [0 for _ in range(C+1)] - for w in range(c[i], C+1): - new_dp[w] = max(dp[w], dp[w-c[i]]+v[i]) - dp = new_dp - return dp[-1] - -def knapsack01OptimizedDP2(c, v, C): - dp = [0 for _ in range(C+1)] - for i in range(len(c)): - for w in range(C, c[i]-1, -1): - dp[w] = max(dp[w], dp[w-c[i]]+v[i]) - return dp[-1] -\end{lstlisting} -For the convenience of the later sections, we modularize the final code as: -\begin{lstlisting}[language=Python] -def knapsack01(cost, val, C, dp): - for j in range(C, cost-1, -1): - dp[j] = max(dp[j], dp[j-cost]+val) - return dp -def knapsack01Final(c, v, C): - n = len(c) - dp = [0 for _ in range(C+1)] - for i in range(n): - knapsack01(c[i], v[i], C, dp) - return dp[-1] -\end{lstlisting} - -% To fill in the matrix f: we have the following pseudocode: -% \begin{lstlisting} -% for i=1,...,N -% for v=0,...,V -% f[i][v]=max{f[i-1]f[v], f[i-1][v-c[i]] + w[i] }; -% \end{lstlisting} -% The time complexity is $O(n*s)$, and the space complexity is $O(n*s)$. The first for loop we gradually try each item, to check if choose this item we can increase the total value we gain, the final result is at the last row of this f matrix. We can optimize the space, by only use $1*s$ vector instead, with optimization we can get $O(s)$ space complexity. The function become f[s]=max(f[s], f[s-a[i]]+b[i]). Because each iteration of i, to get f[s] we need f[s-a[i]], so we need to make sure every time we compute s, the smaller size s-a[i] is not rewritten, so that we need to loop the volume from in reverse direction from large to small. If we still use the same order, then we the transfer state is equivalent to f[i][s] = f[i][v-a[i]], which is different than the original problem. %如果将v的循环顺序从上面的逆序改成顺序的话,那么则成了f[i][v]由f[i][v-c[i]]推知,与本题意不符,但它却是另一个重要的背包问题P02最简捷的解决方案,故学习只用一维数组解01背包问题是十分必要的。 -% \begin{lstlisting} -% for i=1..N -% for v=V..0 -% f[v]=max{f[v], f[v-c[i]] + w[i] }; -% \end{lstlisting} -% Now to distinguish different types of backpack problems, we modularize the above code as follows. -% \begin{lstlisting} -% procedure ZeroOnePack(cost,weight) -% for v=V..cost -% f[v]=max{f[v], f[v-cost] + weight } -% \end{lstlisting} -% Here stop at cost of current item is because in the original we require $S-a[i] \geq 0$. This is a slight optimization. -% \begin{lstlisting} -% for i=1..N -% ZeroOnePack(c[i],w[i]); -% \end{lstlisting} - - - -% 特点: - -% 用值作为DP维度 -% DP过程就是填写矩阵 -% 可以滚动数组优化 - -% Backpack - -% State: - -% f[i][S] “前i”个物品,取出一些能否组成和为S (True or False) - -% 2. Function: - -% f[i][S] = f[i-1][S-a[i]] or f[i-1][S] - -% 3. Initialize: - -% f[i][0] = true; f[0][1..target] = false - -% 4. Answer: - -% 检查所有的f[n][j] - -% 时间复杂度 O(n*S) , 滚动数组优化 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% complete backpack % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Unbounded Knapsack} -\label{unbound_knapsack} -Unbounded knapsack problems where one item can be used for unlimited times only if the total cost is limited. So each item can be used at most $C/c[i]$ times. - -%strategy with each item is not taking zero or taking one time, we can have 2, 3 and so on. So the state transfer function is f[i][v]=max{ f[i-1][v-k*c[i]] + k*w[i] |$0\leq k*c[i] \leq v$ } instead. - -%For the zero one backpack we have $O(n*s)$ states to find solution to, however, here for each state we can get the solution in $O(1)$ time, we need $O(v/c[i])$ for state f[i][v]. - -\textbf{Solution 1: Combination with DFS.} Here, because one item can be used only if the cost is within restriction of the knapsack's capacity, thus when we recursively call DFS function, we do not increase the index i like we did in the 0-1 knapsack problem. -\begin{lstlisting}[language=Python] -def knapsackUnboundDFS(c, v, C): - def combinationUnbound(s, cur_c, cur_v, ans): - ans[0] = max(ans[0], cur_v) - if s == n: return - for i in range(s, n): - if cur_c + c[i] <= C: # restriction - combinationUnbound(i, cur_c + c[i], cur_v + v[i], ans) - ans = [0] - n = len(c) - combinationUnbound(0, 0, 0, ans) - return ans[0] -print(knapsackUnboundDFS(c, v, C)) -# output -# 10 -\end{lstlisting} -\textbf{Solution 2: Use 0-1 knapsack's dynamic programming. } We can simply copy each item up to C/c[i] times. Or we can do it better, because any positive integer can be composed by using 1, 2, 4, ..., $2^{k}$. For instance, 3=1+2, 5=1+4,6=2+4. Thus we can shrink the $C/c[i]$ to $\log_2(C/c[i]))+1$ items, each with value c[i], v[i]; 2*c[i], 2*v[i], to $2^k$ times the cost and value. -\begin{lstlisting}[language=Python] -import math -def knapsackUnboundNaiveDP2(c, v, C): - n = len(c) - dp = [0 for _ in range(C+1)] - for i in range(n): - for j in range(int(math.log(C/c[i], 2))+1): # call it multiple times - # log(3, 2) = 1.4, 3= 1+2, so we need 2, 4 = 4. - knapsack01(c[i]<= w[j],则将物品j去掉,不用考虑。这个优化的正确性显然:任何情况下都可将价值小费用高得j换成物美价廉的i,得到至少不会更差的方案。对于随机生成的数据,这个方法往往会大大减少物品的件数,从而加快速度。然而这个并不能改善最坏情况的复杂度,因为有可能特别设计的数据可以一件物品也去不掉。 - -% 这个优化可以简单的O(N^2)地实现,一般都可以承受。另外,针对背包问题而言,比较不错的一种方法是:[显然]首先将费用大于V的物品去掉,然后使用类似计数排序的做法,计算出费用相同的物品中价值最高的是哪个,可以O(V+N)地完成这个优化。这个不太重要的过程就不给出伪代码了,希望你能独立思考写出伪代码或程序。 - -% Now, let us see the relation to zero one backpack. The simplest thought is for the ith item, we can choose at most V/c[i] times, so that we can convert the ith item into V/c[i] items with the same cost and weight. So here n become $\sum_i(V_i/c[i])$. A more efficient way is: - -%更高效的转化方法是:把第i种物品拆成费用为c[i]*2^k、价值为w[i]*2^k的若干件物品,其中k满足c[i]*2^k<=V。这是二进制的思想,因为不管最优策略选几件第i种物品,总可以表示成若干个2^k件物品的和。这样比把每种物品拆成O(log(V/c[i]))件物品,是一个很大的改进。 - -% However, there is one way to do it in $O(n*s)$ time complexity, the pseudo code is given as follows: -% \begin{lstlisting} -% for i=1..N -% for v=0..V -% f[v]=max{f[v], f[v-cost] + weight } -% \end{lstlisting} -% We can find this code is only different from zero one backpack at the second line with the second v loop. This time, it is iterated from small to big. For zero one backpack to guarantee that when we selecting the ith item, we can only choose one time, so that f[i][v] should come from the result of f[i-1][v-c[i]]. However, for the complete problem, f[i][v] can still come from f[i][v-c[i]]. %你会发现,这个伪代码与P01的伪代码只有v的循环次序不同而已。为什么这样一改就可行呢?首先想想为什么P01中要按照v=V..0的逆序来循环。这是因为要保证第i次循环中的状态f[i][v]是由状态f[i-1][v-c[i]]递推而来。换句话说,这正是为了保证每件物品只选一次,保证在考虑“选入第i件物品”这件策略时,依据的是一个绝无已经选入第i件物品的子结果f[i-1][v-c[i]]。而现在完全背包的特点恰是每种物品可选无限件,所以在考虑“加选一件第i种物品”这种策略时,却正需要一个可能已选入第i种物品的子结果f[i][v-c[i]],所以就可以并且必须采用v=0..V的顺序循环。这就是这个简单的程序为何成立的道理。 - -% From another point of view to explain this, the state transfer function can be written as f[i][v]=max{ f[i-1][v], f[i][v-c[i]] + w[i]}. -% 这个算法也可以以另外的思路得出。例如,基本思路中的状态转移方程可以等价地变形成这种形式: - -% f[i][v]=max{ f[i-1][v], f[i][v-c[i]] + w[i]} - -% 将这个方程用一维数组实现,便得到了上面的伪代码。 - -% So that we moduarlize the code of complete backpack as follows: -% \begin{lstlisting} -% procedure CompletePack(cost,weight) -% for v=cost..V -% f[v]=max{f[v], f[v-c[i]] + w[i] } -% \end{lstlisting} - -%%%%%%%%%%%%%%%%Bounded Knapsack%%%%%%%%%%%% -\subsection{Bounded Knapsack} -\label{bound_knapsack} -In this type of problems, each item can be used at most n[i] times. - -\textbf{Reduce to 0-1 Knapsack problem.} Like in the Unbounded Knapsack, it can be reduced to 0-1 knapsack and each can appear at most n[i] times. Thus, we can use $min(\log_2(n[i]), \log_2(C/c[i])$. -\begin{lstlisting}[language=Python] -def knapsackboundDP(c, v, Num, C): - n = len(c) - dp = [0 for _ in range(C+1)] - for i in range(n): - for j in range(min(int(math.log(C/c[i], 2))+1, int(math.log(Num[i], 2))+1)): # call it multiple times - knapsack01(c[i]<= C/c[i], \forall i$, then the Bounded Knapsack can be reduced to Unbounded Knapsack. -%https://blog.csdn.net/carol123456/article/details/52155142 - -%%%%%%%%%%%%%%%%%%%%%%%%%%%Generalization%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Generalization} -The four elements of the backpack problems include: -\begin{enumerate} -\item State: $dp[i][c]$ denotes the optimized value (maximum value, minimum items, total number) with subproblem (0,i) with cost c. -\item State transfer Function: $dp[i][c] = f( dp[i-1][c-c[i]], dp[i-1][c]) $. For example, if we want: -\begin{itemize} - \item maximum/min value: f = max/min, dp[i-1][c-c[i]] -> dp[i-1][c-c[i]]+v[i]; - \item total possible solutions: dp[i][c] += dp[i-1][c-c[i]] - \item the maximum cost (how full we can fill the snapsack): dp[i][j] = max(dp[i-1][j], dp[i-1][j-c[i]]+c[i]) -\end{itemize} -\item Initialize: $f[i][0] = True; f[0][1, ..., size] = False$, which is explained that if we have i items, we choose 0, so we can always get size 0, if we only have 0 items, we cant fill backpack with size in range (1, size). -\item Answer: dp[n-1][C-1]. -\end{enumerate} -\textbf{Restriction Requires to Reach to Exact Amount of Capacity} - -In the above sections, we answered different type of knapsacks with the second restriction, while how about for the first restriction which requires the total cost to be exact equal to an amount $S$. Think about that if we are given an amount that no combination from the cost array can be added up to this amount, then it should be set to invalid, with value \textit{float(``-inf")} for max function and \textit{float(``inf")} for min state function in Python. For the amount of 0, the value will be valid with 0. Thus, the only difference for the first restriction lies in the initialization. Here, we give an example of Eaxt for the unbounded type: -\begin{lstlisting}[language=Python] -def knapsackUnboundExactNaiveDP2(c, v, C): - n = len(c) - dp = [float("-inf") for _ in range(C+1)] - dp[0] = 0 - for i in range(n): - for j in range(int(math.log(C/c[i], 2))+1): # call it multiple times - knapsack01(c[i]< Right -> Down -> Down -2. Down -> Down -> Right -> Right -\end{lstlisting} -\textbf{Coordinate}. -\begin{lstlisting}[language=Python] -def uniquePathsWithObstacles(self, obstacleGrid): - """ - :type obstacleGrid: List[List[int]] - :rtype: int - """ - if not obstacleGrid or obstacleGrid[0][0] == 1: - return 0 - m, n = len(obstacleGrid), len(obstacleGrid[0]) - dp = [[0 for c in range(n)] for r in range(m)] - dp[0][0] = 1 if obstacleGrid[0][0] == 0 else 0 # starting point - - # init col - for r in range(1, m): - dp[r][0] = dp[r-1][0] if obstacleGrid[r][0] == 0 else 0 - - for c in range(1, n): - dp[0][c] = dp[0][c-1] if obstacleGrid[0][c] == 0 else 0 - - for r in range(1, m): - for c in range(1, n): - dp[r][c] = dp[r-1][c] + dp[r][c-1] if obstacleGrid[r][c] == 0 else 0 - print(dp) - return dp[-1][-1] -\end{lstlisting} - - -\subsection{Double Sequence} -712. Minimum ASCII Delete Sum for Two Strings -\begin{lstlisting} -Given two strings s1, s2, find the lowest ASCII sum of deleted characters to make two strings equal. - -Example 1: - -Input: s1 = "sea", s2 = "eat" -Output: 231 -Explanation: Deleting "s" from "sea" adds the ASCII value of "s" (115) to the sum. -Deleting "t" from "eat" adds 116 to the sum. -At the end, both strings are equal, and 115 + 116 = 231 is the minimum sum possible to achieve this. - -Example 2: - -Input: s1 = "delete", s2 = "leet" -Output: 403 -Explanation: Deleting "dee" from "delete" to turn the string into "let", -adds 100[d]+101[e]+101[e] to the sum. Deleting "e" from "leet" adds 101[e] to the sum. -At the end, both strings are equal to "let", and the answer is 100+101+101+101 = 403. -If instead we turned both strings into "lee" or "eet", we would get answers of 433 or 417, which are higher. - -Note: -0 < s1.length, s2.length <= 1000. -All elements of each string will have an ASCII value in [97, 122]. -\end{lstlisting} - -\begin{lstlisting}[language=Python] -def minimumDeleteSum(self, s1, s2): - word1, word2=s1,s2 - if not word1: - if not word2: - return 0 - else: - return sum([ord(c) for c in word2]) - if not word2: - return sum([ord(c) for c in word1]) - - rows, cols=len(word1),len(word2) - - dp = [[0 for col in range(cols+1)] for row in range(rows+1)] - for i in range(1,rows+1): - dp[i][0] = dp[i-1][0] + ord(word1[i-1]) #delete in word1 - for j in range(1,cols+1): - dp[0][j] = dp[0][j-1] + ord(word2[j-1]) #delete in word2 - - for i in range(1,rows+1): - for j in range(1,cols+1): - if word1[i-1] == word2[j-1]: - dp[i][j] = dp[i-1][j-1] - else: - dp[i][j] = min(dp[i][j-1] + ord(word2[j-1]), dp[i-1][j] + ord(word1[i-1])) #delete in word2, delete in word1 - return dp[rows][cols] -\end{lstlisting} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_2_string_matching.tex b/Easy-Book/chapters/question_2_string_matching.tex deleted file mode 100644 index 9ca6c10..0000000 --- a/Easy-Book/chapters/question_2_string_matching.tex +++ /dev/null @@ -1,1032 +0,0 @@ - -\documentclass[../main.tex]{subfiles} - -\begin{document} - - -Pattern matching is a fundamental string processing problem. Pattern matching algorithms are also called string searching algorithms, and it is defined a class of string algorithms that try to find a place where one or several strings (also called patterns) are found within a larger string or text. Based on if some mismathces are allowed or not, we have \textbf{Exact or Approximate} Pattern Matching. In this section, we start from exact single-pattern matching algorithms where we only need to find one pattern in a given string or text. -Based on how on how many patterns we might have, we have \textbf{one-time or multiple-times} string pattern matching problems. For multiple-times matching, preprocessing the text using suffix array/trie/tree can improve the total efficiency. This chapter is organized as: -\begin{enumerate} - \item Exact Pattern Matching: includes one-pattern and multiple patterns. - \item Approximate Pattern Matching: -\end{enumerate} - - -\section{Exact Single-Pattern Matching} - -\paragraph{Exact Single-pattern Matching Problem} Given two strings or two arrays, one is pattern \textbf{P} which has size $m$, and the other is the target string or text \textbf{T} which has size $n$, the exact single-pattern matching problem is defined as finding the first one or all occurrences of pattern P in the T as substring, and return the starting indexes of all the occurrences. - -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/brute_force_matching.png} - \caption{The process of the brute force exact pattern matching} - \label{fig:brute_force_string_matching} -\end{figure} - -\paragraph{Brute Force Solution} The naive searching is straightforward, we slide the pattern P like sliding window algorithm through the text T one by one item. At each position $i$, we compare P with T[i:i+m]. In this process, we need to do $n-m$ times of comparison, and each comparison takes maximum of $m$ times of computation. This brute force solution gives $O(mn)$ time complexity. -\begin{lstlisting}[language=Python] -def bruteForcePatternMatching(p, s): - if len(p) > len(s): - return [-1] - m, n = len(p), len(s) - ans = [] - for i in range(n-m+1): - if s[i:i+m] == p: - ans.append(i) - return ans - -p = "AABA" -s = "AABAACAADAABAABA" -print(bruteForcePatternMatching(p,s)) -# output -# [0, 9, 12] -\end{lstlisting} -We write it in another way that use less built-in python function: -\begin{lstlisting}[language=Python] -def bruteForcePatternMatchingAll(p, s): - if not s or not p: - return [] - m, n = len(p), len(s) - i, j = 0, 0 - ans = [] - while i < n: - # do the pattern matching - if s[i] == p[j]: - i += 1 - j += 1 - if j == m: #collect position - ans.append(i - j) - i = i-j+1 - j = 0 - else: - i = i -j + 1 - j = 0 - return ans -\end{lstlisting} - -For LeetCode Problems, most times, brute force solution will not be accepted and receive LTE. In real applications, such as human genome matching, the text can have approximate size of $3*10^9$ and the pattern can be very long to, such as $10^8$. Therefore, other faster algorithms are needed to improve the efficiency. - -The other algorithms requires us preprocess either/both the pattern and text. In this book, we mainly discuss three algorithms: -\begin{enumerate} - \item Knuth Morris Pratt (KMP) Algorithm (Section~\ref{pattern_matching_subsec_kmp}). KMP is a linear algorithm, and it should mostly be enough to solve interview related string matching, and also once we understand the algorithm, the implementation is quite trivial, which makes it a very good algorithm during interviews. It has $O(m+n)$ and $O(m)$ in the case of the time and space complexity. - \item Suffix Trie/Tree/Array Matching (Section~\ref{pattern_matching_subsec_suffix_array}). -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Knuth Morris Pratt -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Prefix Function and Knuth Morris Pratt (KMP)} -\label{pattern_matching_subsec_kmp} -In the above brute force solution, we compare our pattern with each item as starting window in the text. Each matching result is independent of each other, which is a lot of information lose to improve the efficiency. - -\paragraph{Skipping Positions} See Fig.~\ref{fig:brute_force_string_matching}, we know a matching at step 1. Is it necessary for us to do step 2 and step 3? The pattern itself tells us it is impossible to get a match at step 2 and step 3 because 'b' will dismatch 'a' and 'r' will dismatch 'a' too. However, at the original step 4, by analyzing the pattern itself we know 'a' will match 'a', and any step further, we have not enough information to cover, therefore, step 4 is necessary to compare 'c' with 'b' in the pattern. In this example, step 4, 5, 6, 7 are all needed but step 4, 5, 6 will only end up do one or two comparison each step. - -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/skipping_rule_kmp.png} - \caption{The Skipping Rule} - \label{fig:skipping_rule_kmp} -\end{figure} - - The reason why step 2 and 3 can be skipped can be shown from Fig.~\ref{fig:skipping_rule_kmp}. If we analyze our pattern at first, we will know at step 2 and step 3, ``bra'' not equals to ``abr'' and ``ra'' not equals to ``ab''. While at step 4, we do have ``a'' equals to ``a''. If we observe further of the relations of these pairs, we will know they are suffix and prefix of the same length of the pattern. Inspired by this, we define \textbf{border} of string S as a prefix of S which is equals to a suffix of the same length of S, but not equals to the whole S. For example: - \begin{lstlisting}[numbers=none] - ''a'' is a border of 'arba' - 'ab' is a border of 'abcdab' - 'ab' is not a border of 'ab' - \end{lstlisting} - -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/shifing_pattern.png} - \caption{The Sliding Rule} - \label{fig:sliding_rule_kmp} -\end{figure} - -%\url{https://cp-algorithms.com/string/prefix-function.html} -\paragraph{Prefix Function} A Prefix function for a string P generates an array $l$ (lps is short for failure loopkp table) of the same length of string, where $lps[i]$ is the length of the longest border of for prefix substring P[0...i]. Mathematically the definition of prefix function can be written as follows: -\begin{equation} - l[i] = \max_{k=0,...,i}\{k: P[0...k-1] = P[i-(k-1)...i\} -\end{equation} -The naive implementation of prefix-function takes $O(n^3)$: -\begin{lstlisting}[language=Python] -def naiveLps(p:str): - dp = [0] * len(p) - for i in range(1, len(p)): - for l in range(i, 0, -1): # from maxmim length to length 1 - prefix = p[0: l] - suffix = p[i - l+1: i+1] - #print(prefix, suffix) - if prefix == suffix: - dp[i] = l - break - return dp -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/kmp_lemma_proof.png} - \caption{Proof of Lemma} - \label{fig:proof_border_property} -\end{figure} - -For example, prefix function of string ``abcabcd'' is [0,0,0,1,2,3,0]. The trivial algorithm to implement this has $O(n^3)$ time complexity (one for loop for i, second nested for loop for k, and another n for comparing corresponding substring), which exactly follows the definition of the prefix function. The efficient algorithm which is demonstrated to run in $O(n)$ was proposed by Knuth and Pratt and independently from them by Morris in 1977. It was used as the main function of a substring search algorithm. This is the core of Knuth Morris Pratt (KMP) algorithm. In order to implement the prefix function in linear time, we first need to utilize two properties ( facts) for the purpose of two further optimization: -\begin{enumerate} - \item \label{observation_1} Observation: $\pi[i+1] \leq \pi[i] + 1$, which states that the value of the prefix function can either increase by one, stay the same, or decrease by some amount. - \item \label{Lemma_1} Lemma: \textbf{If $l[i] > 0$, then all borders of P[0...i] but for the longest one are also borders of $P[0...l(i)-1]$.} The proof is: As shown in Fig.~\ref{fig:proof_border_property}, $l(i)$ is the longest border for P[0...i]. We let $\mu$ be another shorter border of P[0...i] such that $|\mu| < l(i) $. Because the first l(i) and the second is the same, this means at the first l(i), the suffix of l(i) that of the same length of $\mu$ is $\mu$. This states that $\mu$ is both a border of P[0...l(i)-1]. - -\end{enumerate} - -Now, with such knowledge we can do the following two further optimization: -\begin{enumerate} - \item With \ref{observation_1}, the complexity can be reduced to $O(n^2)$ by getting rid of the for loop on $k$. Because each step the prefix function can grow at most one. And among all iterations of $i$, it can grow at most $n$ steps, and also only can decrease a total of $n$ steps. - \item With \ref{Lemma_1}, we can further get rid of the $O(n)$ string comparison each step. To accomplish this, we have to use all the information computed in the previous steps: all borders of P[0...i] (assuming it has k in total) can be enumerated from the longest to shortest as: $b_0 = \pi(i)$, $b_1 = \pi(b_0-1)$, ..., $b_{k-1} = \pi(b_{k-2}-1)$ ($b_{k-1} = 0$). Therefore, at step posited at $i+1$, instead of comparing string $s[0...\pi(i)]$ with $s[i-(\pi(i)-1)...i]$, comparison of char $s[\pi(i)]$ and $s[i]$ is needed. -\end{enumerate} - - - -\paragraph{Implementation of Prefix Function for a Given String S} Let's recap the above optimization to get the final algorithm which computes prefix function in $O(n)$. This step is of key importance to the success of KMP algorithm. Let's understand this together with the algorithm statement and the code. -\begin{enumerate} - \item Initialization: assign $n$ space to $l$ array and set $l_0 = 0$. - \item A for loop in range of [1, m-1] to compute $l(i)$. Set a variable j = $l(i-1)$, and a while loop over j until j = 0: check if s[j] == s[i]; if true, $l(i)=j+1$, otherwise reassign j $j = l(j-1)$ in order to check smaller border. -\end{enumerate} -\begin{lstlisting}[language=Python] -def prefix_function(s): - n = len(s) - pi = [0] * n - for i in range(1, n): - # compute l(i) - j = pi[i-1] - while j > 0 and s[i] != s[j]: # try all borders of s[0...i-1], from the longest to the shortest - j = pi[j-1] - # check the character - if s[i] == s[j]: - pi[i] = j + 1 - - return pi -\end{lstlisting} - -Run an example: -\begin{lstlisting}[language=Python] -S = 'abcabcd' -print('The prefix function of: ', S, " is ", prefix_function(S)) - -The prefix function of: abcabcd is [0, 0, 0, 1, 2, 3, 0] -\end{lstlisting} - -\paragraph{Knuth Morris Pratt (KMP)} Back to the problem of eaxact pattern matching, we first build a new string as $s = P+'\$'+T$, which is a concatenation of pattern P, '\$', and text T. Let us calculate the prefix function of string s. Now, let us think about the meaning of the prefix function, except for the first $m+1$ items (which belong to the string P and the separator '\$'): -\begin{enumerate} - \item For all $i$, $\pi[i] \leq m$ because of the separator '\$' in the middle of the pattern and the text that acts as a separator. - \item If $\pi[i] = m$, i.e. $K[0:m] = K[i-m:i] = P$. This means that the pattern P appears completely in the new string s and ends at position $i$. Now, we convert $i$ to the starting position of pattern in T with $i-2m$. - \item If $f[i] < m$, no full occurrence of pattern ends with position i. -\end{enumerate} - -Thus the Knuth-Morris-Pratt algorithm solves the problem in $O(n+m)$ time and $O(n+m)$ memory. And can be simply implemented with prefix function as follows: -\begin{lstlisting}[language=Python] -def KMP_coarse(p, t): - m = len(p) - s = p + '$' + t - n = len(s) - pi = prefix_function(s) - ans = [] - for i in range(2*m, n): - if pi[i] == m: - ans.append(i - 2*m) - return ans -\end{lstlisting} - -Because for all $\pi[i] \leq m$: for i in [0, m-1], we save the border in $\pi$; for i in [m, n+m-1], we set up a global variable $j$ to track the last border. We can decrease the space complexity in $O(m)$. -The Python implementation is given as: -\begin{lstlisting}[language=Python] -def KMP(p, t): - m = len(p) - s = p + '$' + t - n = len(s) - pi = [0] * m - j = pi[0] - ans = [] - for i in range(1, n): - # compute l(i) - while j > 0 and s[i] != s[j]: # try all borders of s[0...i-1], from the longest to the shortest - j = pi[j-1] - # check the character - if s[i] == s[j]: - j += 1 - # record the result - if j == m: - ans.append(i-2*m) - # save the result if i in [0, m-1] - if i < m: - pi[i] = j - return ans -\end{lstlisting} - -Run an example: -\begin{lstlisting}[language=Python] -t = 'textbooktext' -p = 'text' -print(KMP(p, t)) -# output -# [0, 8] -\end{lstlisting} - -\paragraph{Sliding Rule with Border Information} Now, assuming we know how to compute the border information, how do we slide instead compared with the brute force solution? There are three steps, with Fig.~\ref{fig:sliding_rule_kmp} as demonstration: - -\begin{enumerate} - \item Find longest common prefix $\mu$. - \item Find $w$ -- the longest border of $\mu$. - \item Move P such that prefix $w$ in P aligns with suffix $w$ of $\mu$ in T. -\end{enumerate} -% There exists redundant comparison between pattern and the text at different location. For example in the following case, we found a match at pos 0, if we know p[0:3]==p[1:4], then we know p[0:3]==s[1:4], in the next sliding window, s[1:5], we only need to compare if p[3]==s[4]. -% \begin{lstlisting}[numbers=none] -% txt = "AAAAABAAABA" -% pat = "AAAA" [Initial position] -% \end{lstlisting} - -% Knuth Morris Pratt is an exact pattern matching algorithm, which preprocesses the pattern string at first to recognize those patterns having same sub-patterns appearing more than once to skip characters while matching. - -% \textbf{Pattern Lookup Table.} Therefore, for the pattern we preprocess it and construct an auxiliary lookup table of size $m$ which saves the longest length of prefix before current index i which is the same as suffix. We define the table as f, we have that f[0] = 0, if f[i] = t, it means $P[:t+1] = P[i-t:i]$. The following figure(Fig~\ref{fig:lookup}) shows the example of a lookup table. -% \begin{figure}[h] - -% \centering -% \includegraphics[width = 0.98\columnwidth]{fig/lookup.jpg} -% \caption{The example of lookup table of KMP, which can be generated with dynamic programming.} -% \label{fig:lookup} - -% \includegraphics[width = 0.98\columnwidth]{fig/lookup_table.jpg} -% \caption{The example of lookup table of KMP} -% \end{figure} - -% \textbf{Generate Lookup Table with Dynamic Programming.} Now, we have not learn dynamic programming yet, you can come to digest this section more later. We initiate f[0]=0. At the 6th row with 'ABCDAB', we know before at 'ABCDA', the prefix 'A' matches suffix 'A', now we compare current char 'B' with the one next to prefix 'A' at position f[i-1], if it matches, then f[i] = f[i-1]+1. If it does'nt match, at 7th row, 'D' != 'C', thus we cant have 3 as the answer, we retreat to check position f[i-2], where we have 'D'!='B', we retreat to f[i-3], and where 'A'!='D', then we have i-4 < 0, we stop, and put 0 as the result. The Python code is given as follows: -% \begin{lstlisting}[language=Python] -% def LPS(p): -% m = len(p) -% f = [0] * m -% for i in range(1, m): -% # chek the previous position -% check_pos = f[i-1] -% if p[i] == p[check_pos]: -% f[i] = check_pos + 1 -% break -% return f -% print(LPS("ABAB")) -% print(LPS("AAABAAA")) -% # output -% # [0, 0, 1, 2] -% # [0, 1, 2, 0, 1, 2, 3] -% \end{lstlisting} - -% \begin{lstlisting}[language = Python] -% # Generating lookup table for string S using Python -% f=[0]*n -% for i in xrange(1,n): -% t = f[i-1] -% while t > 0 and S[i] != S[t]: -% t = f[t-1] -% if S[i] == S[t]: -% t +=1 -% f[i] = t -% \end{lstlisting} -\paragraph{Knuth Morris Pratt $O(m+n)$} Now, to complete the picture of KMP, when we have the lookup table at hand, when we failed to match i and j, we set j = lps[j-1], and i doest not need to backtrack. -\begin{lstlisting}[language = Python] -def KMP(p, ps): - f = LPS(p) - n =m, n = len(p), len(ts) - - i = 0 # index in s - j = 0 # index in p - pos = [] - while i < n: - if p[j] == s[i]: - i += 1 - posj += 1 - dp[i] = pos - if dp[i] == m:if j == m: # i at i+1, j at f[j-1] - print("Found pattern at index ", i-j) - ans.append(i-2*mj) - i += 1 - else: - if pos > 0: j = f[j-1] - else: # mismatch at i and j - if j != 0: # if j can retreat with lps, then i keep the same - pos = dp[posj = f[j-1] - else: - i += 1 #the value is 0 - return ans # if j needs to start over, i moves too - i += 1 - return ans -print(KMP(p,s)) -# [0, 9, 12] -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%Application of Prefix Function -\subsection{More Applications of Prefix Functions} - - -\paragraph{Counting the number of occurrences of each prefix} - -\paragraph{Counting the number of occurrences of different substring in a string} - -\paragraph{Compressing a string} - -% \paragraph{} - - -%%%%%%%%%%%%%%%%%%%%%%%Z-function%%%%%%%%%%% -\subsection{Z-function} -\subsubsection{Definition and Implementation} -Z-function for a string $s$ of length $n$ is defined as an array $z[i] = k, i \in [1, n-1]$. At item $z[i]=k$ stores the longest substring starting at index $i$ which is also a prefix of string $s$. To notice, the length of the substring has to be smaller than the whole length, therefore, $z[0]=0$. In other words, it means the the length of the longest common prefix between $s$ and substring $s[i:n]$. %Thus, $z[i]=k$ tells us that $s[0...k-1] = s[i...i+k−1]$. -For example: -\begin{lstlisting}[numbers=none] -"aaaaa" - [0,4,3,2,1] -a -a substring 'aaaa' = prefix 'aaaa' -a substring 'aaa' = prefix 'aaa' -a substring 'aa' = prefix 'aa' -a substring 'a' = prefix 'a' -\end{lstlisting} -Another Example. -\begin{lstlisting}[numbers=none] -"aaabaab" - [0,2,1,0,2,1,0] -a 0 -a substring 'aa' = prefix 'aa' -a substring 'a' = prefix 'a' -b 0 -a substring 'aa' = prefix 'aa' -a substring 'a' = prefix 'a' -b -\end{lstlisting} -z-function can be represented with a formula: -\begin{equation} - l[i] = \max_{k=0,...,i}\{k+1: P[0...k] = P[i...i+k]\} -\end{equation} -The naive implementation of z-function takes $O(n^2)$ time complexity just as the prefix function. -\begin{lstlisting}[language=Python] -def naiveZF(s): - n = len(s) - z = [0] * n - for i in range(1, n): # starting point - k = 0 - while i + k < n and s[i + k] == s[k]: - k += 1 - z[i] = k - return z -\end{lstlisting} -\paragraph{Z-function Property} -Here, we show how we can implement it in $O(n)$. To compute $z[i]$, do we have to start at $i$, then follows the order of $i+1$, $i+2$, ..., $i+k$? The answer is No. -\begin{figure}[h] - \centering - \includegraphics[width = 0.9\columnwidth]{fig/z_function.png} - \caption{Z function property} - \label{fig:z_function_property} -\end{figure} -First, As shown in Fig.~\ref{fig:z_function_property}, for a given position $i$, $[l, r]$ is one of its preceding non-zero $z[p], p< i$, which has the furthest right boundary $r$. We can think it as a rightmost window, wherein $s[l, r] = s[0, r-l+1]$. $s[0, i-l]$ is marked as yellow. We divide the area in range $[0, r-l+1]$ into yellow $[0, l-i]$ and a green parts $[l-i+1, r-l+1]$. Therefore, to compare range $[i, r]$ with prefix is the same as of comparing range $[l-i+1, r-l+1]$ with the prefix, which already has a result $z[i-l]$. So instead, our $k$ can start from position $z[i-l]$. However, there are two more restrictions: -\begin{enumerate} - \item Enable to utilize z-function property, $r \geq i$ because the index $r$ can be seen as ``boundary'' to which our string $s$ has been scanned by the algorithm. - \item The initial approximation for $z[i]$ is bounded by the length between $r$ and $i$, which is $r-i+1$. Therefore, we modify our initial approximation to $z[i]$ to $z[i] = \min(r-i+1, z[i-l])$ instead. -\end{enumerate} - -Now, the $O(n)$ implementation is given as follows: -\begin{lstlisting}[language=Python] -def linearZF(s): - n = len(s) - z = [0] * n - l = r = 0 - for i in range(1, n): - k = 0 - if i <= r: # r is the right bound has been scanned - k = min(r-i+1, z[i - l]) - while i + k < n and s[i+k] == s[k]: - k += 1 - # update the boundary - if i + k - 1 > r: - l = i - r = i + k - 1 - z[i] = k - return z -\end{lstlisting} - -\subsubsection{Applications} -The applications of Z-function are largely similar to those of prefix function. Therefore, the applications will be explained briefly compared with the applications of prefix functions. If you have problems to understand this section, please read the prefix function first. - -\paragraph{Exact Single-Pattern Matching} In this problem set, we are asked to find all occurrences of the pattern $p$ inside the text $t$. We can do the same as of in the KMP, we create a new string $s=p+\$+t$. Then, we compute the z-function for $s$. With the z array, for $z[i]=k$, if $k=|p|$, then we know there is one occurrence of p starting in the i-th position in $s$, which is $i-(|p|+1)$ in the t. -\begin{lstlisting}[language=Python] -def findPattern(p, t): - s = p + '$' + t - m = len(p) - z = (linearZF(s)) - ans = [] - for i, v in enumerate(z): - if v == m: - ans.append(i-m-1) - return ans -\end{lstlisting} - -\paragraph{Number of distinct substrings in a string} Given a string s of length n, count the number of distinct substrings of s. - -To solve this problem we need to use dynamic programming and the subproblems are $s[0...0]$, $s[0...1]$, ..., $s[1...i]$,...$s[0...n-1]$. For example, given ``abc'', -\begin{lstlisting}[numbers=none] -subproblem 1: 'a', dp[0] = 1 -subproblem 2: 'ab', dp[1] = 2, with new substrings 'b', 'ab' -subproblem 3, 'abc', dp[2] = 3, new substrs 'c', 'bc', 'abc' -\end{lstlisting} - -We know the maximum for dp[i] is $i+1$, however for cases like ``aaa''', the situation is different: -\begin{lstlisting}[numbers=none] -subproblem 1: 'a', dp[0] = 1 -subproblem 2: 'aa', dp[1] = 1, 'aa', because 'a'_1 == 'a_0' -subproblem 3, 'aaa', dp[2] = 1, new substrs 'aaa', because 'a_0a_1'='a_1a_2', 'a_2' = 'a_0'. -\end{lstlisting} - -If for each subproblem i, we take the string s[0...i] and reverse it $i...0$. If using z-function on this substring, we can find the number of prefixes of the reversed string are found somewhere else in it, which is the maximum value of its z-function. This is because if we know z[j] = max(k), then s[i...i-max-1] = s[i-j...i-j+max], which is to say s[i-max-1...i] = s[i-j-max...i-j] -With the max value, all of the shorter prefixes also occur too. Therefore, $dp[i] = i+1 - max(z[i])$. The time complexity is $O(n^2)$ -\begin{lstlisting}[language=Python] -def distinctSubstrs(s): - n = len(s) - if n < 1: - return 0 - ans = 1 # for dp[0] - #last_str = s[0:1] - for i in range(1, n): - reverse_str = s[0:i+1][::-1] - z = linearZF(reverse_str) - ans += (i + 1 - max(z)) - return ans -\end{lstlisting} - -Run an example: -\begin{lstlisting}[language=Python] -s = 'abab' -print(distinctSubstrs(s)) -# output -# 7 -\end{lstlisting} - -\section{Exact Multi-Patterns Matching} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Rabin-Karp algorithm -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Suffix Trie/Tree/Array Introduction} -\label{pattern_matching_subsec_suffix_trie} -Up till now, prefix function and the KMP algorithms seems impeccable with its liner time and space complexity. However, there are two problems that KMP can not resolve: -\begin{enumerate} - \item Approximate matching, which we will detail more in the next section. - \item If frequent queries will be made on the same text with a given pattern, and if the $m<= to the string's length. - -\begin{lstlisting}[language=Python] -def cyclic_shifts_sort(s): - s = s + '$' - n = len(s) - order = getCharOrder(s) - cls = getCharClass(s, order) - print(order, cls) - L = 1 - while L < n: - order = sortDoubled(s, 1, order, cls) - cls = updateClass(order, cls, L) - print(order, cls) - L *= 2 - - return order -\end{lstlisting} - -\subsubsection{Applications} -\paragraph{Number of Distinct Substrings of a string} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Rabin-Karp algorithm -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Rabin-Karp Algorithm (Exact or anagram Pattern Matching) } -Used to find the exact pattern, because different anagram of string would have different hash value. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Bonus -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Bonus} - - -\begin{figure}[h] - \centering - \includegraphics[width = 0.7\columnwidth]{fig/trie_pattern.png} - \caption{Building a Trie from Patterns} - \label{fig:trie_pattern} -\end{figure} -\paragraph{Multiple-Patterns Matching} Previously, we mainly talked about exact/approximate one pattern matching. When there are multiple patterns the time complexity became to $O(\sum_i{m_i*n})$ if brute force solution is used. We can construct a trie of all patterns as shown in Section~\ref{concept_trie}. For example, in Fig.~\ref{fig:trie_pattern} shows a trie built with all patterns. - -Now, let us do \textbf{Trie Matching} exactly the same way as the brute force pattern matching algorithm by sliding the pattern trie along the text at each position of text. Each comparison: walk down the trie by spelling symbols of text and a pattern from the pattern list matches text each time we reach a leaf. Try text = ``panamabananas''. We will first walk down branch of p->a->n and stop at the leaf, thus we find pattern `pan`. With Trie Matching, the runtime is decreased to $O(\max_i{m_i*n})$. Plus the trie construction time $O(\sum_i{m_i})$. - -However, merging all patterns into a trie makes it impossible for using advanced single-pattern matching algorithms such as KMP. - -\paragraph{More Pattern Matching Tasks} There are more types of matching, instead of finding the exact occurrence of one string in another. -\begin{enumerate} - \item Longest Common Substring (LCS): LCS asks us to return the longest substring between these two strings. - \item Anagram Matching: this asks us to find a substring in T that has all letters in P, and does not care about the order of these letters in P. - \item Palindrome Matching. -\end{enumerate} - -%%%%%%%%%%%%%%%%%%%Trie%%%%%%%%%%%%%%%%% -\section{Trie for String} -\label{concept_trie} -\paragraph{Definition} Trie comes from the word re\textbf{Trie}val. In computer science, a trie, also called digital tree, radix tree or prefix tree which like BST is also a kind of search tree for finding substring in a text. We can solve string matching in $O(|T|)$ time, where |T| is the size of our text. This purely algorithmic approach has been studied extensively in the algorithms: Knuth-Morris-Pratt, Boyer-Moore, and Rabin-Karp. However, we entertain the possibility that multiple queries will be made to the same text. This motivates the development of data structures that preprocess the text to allow for more efficient queries. Such efficient data structure is Trie, which can do each query in $O(P)$, where P is the length of the pattern string. Trie is an ordered tree structure, which is used mostly for storing strings (like words in dictionary) in a compact way. -\begin{enumerate} - \item In a Trie, each child branch is labeled with letters in the alphabet $\sum$. Actually, it is not necessary to store the letter as the key, because if we order the child branches of every node alphabetically from left to right, the position in the tree defines the key which it is associated to. - \item The root node in a Trie represents an empty string. -\end{enumerate} -% An ordered tree data structure used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. - -Now, we define a trie Node: first it would have a bool variable to denote if it is the end of the word and a children which is a list of of 26 children TrieNodes. -\begin{lstlisting}[language= Python] -class TrieNode: - # Trie node class - def __init__(self): - self.children = [None]*26 - # isEndOfWord is True if node represent the end of the word - self.isEndOfWord = False -\end{lstlisting} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/trie_compact_trie.jpg} - \caption{Trie VS Compact Trie} - \label{fig:trie_compact_trie} -\end{figure} - -\paragraph{Compact Trie} If we assign only one letter per edge, we are not taking full advantage of the trie’s tree structure. It is more useful to consider compact or compressed tries, tries where we remove the one letter per edge constraint, and contract non-branching paths by concatenating the letters on these paths. -In this way, every node branches out, and every node traversed represents a choice between two different words. The compressed trie that corresponds to our example trie is also shown in Figure -~\ref{fig:trie_compact_trie}. - -\paragraph{Operations: INSERT, SEARCH} -% Now, let us solve an LeetCode problem together which requires us to implement a complete Trie that with the operations INSERT, SEARCH, STARTWITH. All of these operations are actually quickly similar and they all require us to simultaneously iterate each character in the input string (or word) and each level of the Trie on the location of that character. So, it would not be hard to get the worst time complexity when we searched the whole tree or finished iterating the characters in the input. -Both for INSERT and SEARCH, it takes $O(m)$, where m is the length of the word/string we wand to insert or search in the trie. Here, we use an LeetCode problem as an example showing how to implement INSERT and SEARCH. Because constructing a trie is a series of INSERT operations which will take $O(n*m)$, n is the total numbers of words/strings, and m is the average length of each item. The space complexity fof the non-compact Trie would be $O(N*|\sum|)$, where $|\sum|$ is the alphlbetical size, and N is the total number of nodes in the trie structure. The upper bound of N is $n*m$. -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/Trie.png} - \caption{Trie Structure} - \label{fig:trie} -\end{figure} -\begin{examples} -\item \textbf{208. Implement Trie (Prefix Tree) (medium).} Implement a trie with insert, search, and startsWith methods. -\begin{lstlisting} -Example: -Trie trie = new Trie(); -trie.insert("apple"); -trie.search("apple"); // returns true -trie.search("app"); // returns false -trie.startsWith("app"); // returns true -trie.insert("app"); -trie.search("app"); // returns true -\end{lstlisting} -\textit{Note: You may assume that all inputs are consist of lowercase letters a-z. All inputs are guaranteed to be non-empty strings.} - -\paragraph{INSERT} with INSERT operation, we woould be able to insert a given word in the trie, when traversing the trie from the root node which is a TrieNode, with each letter in world, if its corresponding node is None, we need to put a node, and continue. At the end, we need to set that node's endofWord variable to True. thereafter, we would have a new branch starts from that node constructured. For example, when we first insert ``app`` as shown in Fig~\ref{fig:trie_compact_trie}, we would end up building branch ``app``, and with ape, we would add nodes ``e`` as demonstrated with red arrows. -\begin{lstlisting}[language=Python] -def insert(self, word): - """ - Inserts a word into the trie. - :type word: str - :rtype: void - """ - node = self.root #start from the root node - for c in word: - loc = ord(c)-ord('a') - if node.children[loc] is None: # char does not exist, new one - node.children[loc] = self.TrieNode() - # move to the next node - node = node.children[loc] - # set the flag to true - node.is_word = True -\end{lstlisting} - -\paragraph{SEARCH} For SEARCH, like INSERT, we traverse the trie using the letters as pointers to the next branch. There are three cases: 1) for word P, if it doesnt exist, but its prefix does exist, then we return False. 2) If we found a matching for all the letters of P, at the last node, we need to check if it is a leaf node where is\_word is True. STARTWITH is just slightly different from SEARCH, it does not need to check that and return True after all letters matched. -\begin{lstlisting}[language=Python] -def search(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True if node.is_word else False -\end{lstlisting} -\begin{lstlisting}[language=Python] -def startWith(self, word): - node = self.root - for c in word: - loc = ord(c)-ord('a') - # case 1: not all letters matched - if node.children[loc] is None: - return False - node = node.children[loc] - # case 2 - return True -\end{lstlisting} -Now complete the given Trie class with TrieNode and \_\_init\_\_ function. -\begin{lstlisting}[language=Python] -class Trie: - class TrieNode: - def __init__(self): - self.is_word = False - self.children = [None] * 26 #the order of the node represents a char - - def __init__(self): - """ - Initialize your data structure here. - """ - self.root = self.TrieNode() # root has value None -\end{lstlisting} -\end{examples} - -\begin{examples} -\item \textbf{336. Palindrome Pairs (hard).} Given a list of unique words, find all pairs of distinct indices (i, j) in the given list, so that the concatenation of the two words, i.e. words[i] + words[j] is a palindrome. -\begin{lstlisting} -Example 1: - -Input: ["abcd","dcba","lls","s","sssll"] -Output: [[0,1],[1,0],[3,2],[2,4]] -Explanation: The palindromes are ["dcbaabcd","abcddcba","slls","llssssll"] - -Example 2: - -Input: ["bat","tab","cat"] -Output: [[0,1],[1,0]] -Explanation: The palindromes are ["battab","tabbat"] -\end{lstlisting} -\textbf{Solution: One Forward Trie and Another Backward Trie.} We start from the naive solution, which means for each element, we check if it is palindrome with all the other strings. And from the example 1, [3,3] can be a pair, but it is not one of the outputs, which means this is a combination problem, the time complexity is ${C_n}{C_{n-1}}$, and multiply it with the average length of all the strings, we make it $m$, which makes the complexity to be $O(mn^2)$. However, we can use Trie Structure, -\begin{lstlisting}[language = Python] -from collections import defaultdict - - -class Trie: - def __init__(self): - self.links = defaultdict(self.__class__) - self.index = None - # holds indices which contain this prefix and whose remainder is a palindrome - self.pali_indices = set() - - def insert(self, word, i): - trie = self - for j, ch in enumerate(word): - trie = trie.links[ch] - if word[j+1:] and is_palindrome(word[j+1:]): - trie.pali_indices.add(i) - trie.index = i - - -def is_palindrome(word): - i, j = 0, len(word) - 1 - while i <= j: - if word[i] != word[j]: - return False - i += 1 - j -= 1 - return True - - -class Solution: - def palindromePairs(self, words): - '''Find pairs of palindromes in O(n*k^2) time and O(n*k) space.''' - root = Trie() - res = [] - for i, word in enumerate(words): - if not word: - continue - root.insert(word[::-1], i) - for i, word in enumerate(words): - if not word: - continue - trie = root - for j, ch in enumerate(word): - if ch not in trie.links: - break - trie = trie.links[ch] - if is_palindrome(word[j+1:]) and trie.index is not None and trie.index != i: - # if this word completes to a palindrome and the prefix is a word, complete it - res.append([i, trie.index]) - else: - # this word is a reverse suffix of other words, combine with those that complete to a palindrome - for pali_index in trie.pali_indices: - if i != pali_index: - res.append([i, pali_index]) - if '' in words: - j = words.index('') - for i, word in enumerate(words): - if i != j and is_palindrome(word): - res.append([i, j]) - res.append([j, i]) - return res -\end{lstlisting} -\textbf{Solution2: .}Moreover, there are always more clever ways to solve these problems. Let us look at a clever way: - abcd, the prefix is ''. 'a', 'ab', 'abc', 'abcd', if the prefix is a palindrome, so the reverse[abcd], reverse[dc], to find them in the words, the words stored in the words with index is fastest to find. $O(n)$. Note that when considering suffixes, we explicitly leave out the empty string to avoid counting duplicates. That is, if a palindrome can be created by appending an entire other word to the current word, then we will already consider such a palindrome when considering the empty string as prefix for the other word. - \begin{lstlisting}[language = Python] - class Solution(object): - def palindromePairs(self, words): - # 0 means the word is not reversed, 1 means the word is reversed - words, length, result = sorted([(w, 0, i, len(w)) for i, w in enumerate(words)] + - [(w[::-1], 1, i, len(w)) for i, w in enumerate(words)]), len(words) * 2, [] - - #after the sorting,the same string were nearby, one is 0 and one is 1 - for i, (word1, rev1, ind1, len1) in enumerate(words): - for j in xrange(i + 1, length): - word2, rev2, ind2, _ = words[j] - #print word1, word2 - if word2.startswith(word1): # word2 might be longer - if ind1 != ind2 and rev1 ^ rev2: # one is reversed one is not - rest = word2[len1:] - if rest == rest[::-1]: result += ([ind1, ind2],) if rev2 else ([ind2, ind1],) # if rev2 is reversed, the from ind1 to ind2 - else: - break # from the point of view, break is powerful, this way, we only deal with possible reversed, - return result - \end{lstlisting} - \end{examples} - - %https://fizzbuzzed.com/top-interview-questions-5/ -% \paragraph{Searching} -% \paragraph{Insertion} -% \paragraph{Deletion} - -% Let us see the complete code of a Trie Class: -% \begin{lstlisting}[language = Python] - -% class Trie: - -% # Trie data structure class -% def __init__(self): -% self.root = self.getNode() - -% def getNode(self): - -% # Returns new trie node (initialized to NULLs) -% return TrieNode() - -% def _charToIndex(self,ch): - -% # private helper function -% # Converts key current character into index -% # use only 'a' through 'z' and lower case - -% return ord(ch)-ord('a') - - -% def insert(self,key): - -% # If not present, inserts key into trie -% # If the key is prefix of trie node, -% # just marks leaf node -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) - -% # if current character is not present -% if not pCrawl.children[index]: -% pCrawl.children[index] = self.getNode() -% pCrawl = pCrawl.children[index] - -% # mark last node as leaf -% pCrawl.isEndOfWord = True - -% def search(self, key): - -% # Search key in the trie -% # Returns true if key presents -% # in trie, else false -% pCrawl = self.root -% length = len(key) -% for level in range(length): -% index = self._charToIndex(key[level]) -% if not pCrawl.children[index]: -% return False -% pCrawl = pCrawl.children[index] - -% return pCrawl != None and pCrawl.isEndOfWord - -% # driver function -% def main(): - -% # Input keys (use only 'a' through 'z' and lower case) -% keys = ["the","a","there","anaswe","any", -% "by","their"] -% output = ["Not present in trie", -% "Present in tire"] - -% # Trie object -% t = Trie() - -% # Construct trie -% for key in keys: -% t.insert(key) - -% # Search for different keys -% print("{} ---- {}".format("the",output[t.search("the")])) -% print("{} ---- {}".format("these",output[t.search("these")])) -% print("{} ---- {}".format("their",output[t.search("their")])) -% print("{} ---- {}".format("thaw",output[t.search("thaw")])) - -% if __name__ == '__main__': -% main() -% \end{lstlisting} -There are several other data structures, like balanced trees and hash tables, which give us the possibility to search for a word in a dataset of strings. Then why do we need trie? Although hash table has $O(1)$ time complexity for looking for a key, it is not efficient in the following operations : -\begin{itemize} - \item Finding all keys with a common prefix. - \item Enumerating a dataset of strings in lexicographical order. -\end{itemize} - -\paragraph{Sorting} -Lexicographic sorting of a set of keys can be accomplished by building a trie from them, and traversing it in pre-order, printing only the leaves' values. This algorithm is a form of radix sort. This is why it is also called radix tree. -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_3_array_question.tex b/Easy-Book/chapters/question_3_array_question.tex deleted file mode 100644 index 81bc86a..0000000 --- a/Easy-Book/chapters/question_3_array_question.tex +++ /dev/null @@ -1,2328 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Examples -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -In this chapter, we mainly discuss about the array based questions. We first categorize these problems into different type, and then each type can usually be solved and optimized with nearly the best efficiency. - -Given an array, a subsequence is composed of elements whose subscripts are increasing in the original array. A subarray is a subset of subsequence, which is contiguous subsequence. Subset contain any possible combinations of the original array. For example, for array [1, 2, 3, 4]: -\begin{lstlisting}[numbers=none] -Subsequence -[1, 3] -[1, 4] -[1, 2, 4] -Subarray -[1, 2] -[2, 3] -[2, 3, 4] -Subset includes different length of subset, either -length 0: [] -length 1: [1], [2], [3], [4] -length 2: [1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4] -\end{lstlisting} - -Here array means one dimension list. For array problems, math will play an important role here. The rules are as follows: -\begin{itemize} - \item Subarray: using dynamic programming based algorithm to make brute force $O(n^3)$ to $O(n)$. Two pointers for the increasing subarray. Prefix sum, or kadane's algorithm plus sometimes with the hashmap, or two pointers (three pointers) for the maximum subarray. - \item Subsequence: using dynamic programming based algorithm to make brute force $O(2^n)$ to $O(n^2)$, which corresponds to the seqence type of dynamic programming. - \item Duplicates: 217, 26, 27, 219, 287, 442; - \item Intersections of Two Arrays: -\end{itemize} - -Before we get into solving each type of problems, we first introduce the algorithms we will needed in this Chapter, including two pointers (three pointers or sliding window), prefix sum, kadane's algorithm. Kadane's algorithm can be explained with sequence type of dynamic programming. - - % Easy problems: Duplicates: Intersection: 349. Intersection of Two Arrays; Consecutive: 485. Max Consecutive Ones - % Maximum/Minimum subarray: 718, 53. Maximum Subarray, 325. Maximum Size Subarray Sum Equals k. 209. Minimum Size Subarray Sum Solutions: divide and conquer, special sum and hashtable, two pointers (sliding window) for minimum - % Sum of K numbers of elements: Target, return either the index or the elements(might need to avoid repetition). (2/3/4 sums) - % Partition a list into K equal part: DP - -After this chapter, we need to learn the step to solve these problems: -\begin{enumerate} - \item Analyze the problem and categorize it. To know the naive solution's time complexity can help us identify it. - \item If we can not find what type it is, let us see if we can \textit{convert}. If not, we can try to identify a simple version of this problem, and then upgrade the simple solution to the more complex one. - \item Solve the problem with the algorithms we taught in this chapter. - \item Try to see if there is any more solutions. - - - % \textit{Note: If the problem is complex, trying to see the simple version, and then upgrade the simple version to a complex one. e.g. (487. Max Consecutive Ones II, 485. Max Consecutive Ones)} - \item Check the special case. (Usually very important for this type of problems) -\end{enumerate} -% Including two pointers both from the start, or two pointers one is from the beginning and the other is from the end. Also, the sliding window, and the flexible sliding windows, also find the cycle algorithm. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Subarray -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Subarray } -\textit{Note: For subarray the most important feature is contiguous. Here, we definitely will not use sorting.} Given an array with size $n$, the total number of subarrays we have is $\sum_{i=1}^{i=n} i = n*(n+1)/2$, which makes the time complexity of naive solution that use two nested for/while loop $O(n^2)$ or $O(n^3)$. - -There are two types of problems related to subarry: \textbf{Range Query} and \textbf{optimization-based subarray}. The Range query problems include querying the minimum/maximum or sum of all elements in a given range [i,j] in an array. Range Query has a more standard way to solve, either by searching or with the segment tree: -\paragraph{Range Query} -\begin{enumerate} - \item 303. Range Sum Query - Immutable - \item 307. Range Sum Query - Mutable - \item 304. Range Sum Query 2D - Immutable -\end{enumerate} - -\paragraph{Optimization-based subarray} -Given a single array, we would normally be asked to return either the maximum/minimum value, the maximum/minimum length, or the number of subarrays that has sum/product that \textit{satisfy a certain condition}. The condition here decide the difficulty of these problems. - -The questions can are classified into two categories: -\begin{enumerate} - \item \textit{Absolute-conditioned Subarray} that $sum/product=K$ or - \item \textit{Vague-conditioned subarray} that has these symbols that is not equal. -\end{enumerate} - -% \begin{enumerate} -% \item Maximum/minimum subarray with Sum or Product or a pattern; we use \textbf{math and prefix\_sum} or sometimes together with hashmap method to tackle. Also, sliding window can be used. -% \item Minimum Subarray with Sum or Product or a pattern; \textbf{sliding window} can be used to get the minimum length of subarray. -% \item Find subarray that is increasing or decreasing ; \textbf{Two pointers or sliding window} can be used. -% \end{enumerate} -With the proposed algorithms, the time complexity of subarray problems can be decreased from the brute force $O(n^3)$ to $O(n)$. The brute force is universal: two nested for loops marked the start and end of the subarray to enumerate all the possible subarrays, and another $O(n)$ spent to compute the result needed (sum or product or check the pattern like increasing or decreasing). - -% Using prefix sum or kadane's algorithm or hashmap sometimes if we have problems to solve it, a panacea is the Sliding Window Algorithm either with two or three pointers. - -As we have discussed in the algorithm section, -\begin{enumerate} - \item \textbf{stack/queue/monotone stack} can be used to solve subarray problems that is related to its smaller/larger item to one item's left/right side - \item \textbf{sliding window} can be used to find subarray that either the sum or product inside of the sliding window is ordered (either monotone increasing/decreasing). This normally requires that the array are all positive or all negative. We can use the sliding window to cover its all search space. Or else we cant use sliding window. - \item For all problems related with subarray sum/product, for both vague or absolute conditioned algorithm, we have a universal algorithm: save the prefix sum (sometimes together with index) in a sorted array, and use binary search to find all possible starting point of the window. - \item Prefix Sum or Kadane's algorithm can be used when we need to get the sum of the subarry. -\end{enumerate} - -\begin{enumerate} - \item 53. Maximum Subarray (medium) - \item 325. Maximum Size Subarray Sum Equals k - \item 525. Contiguous Array - \item 560. Subarray Sum Equals K - \item 209. Minimum Size Subarray Sum (medium) -\end{enumerate} -Monotone stack and vague conditioned subarray -\begin{enumerate} - \item 713. Subarray Product Less Than K (all positive) - \item 862. Shortest Subarray with Sum at Least K (with negative) - \item 907. Sum of Subarray Minimums (all positive, but minimum in all subarray and sum) -\end{enumerate} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Maximum Subarray -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Absolute-conditioned Subarray} -For the maximum array, you are either asked to return: -\begin{enumerate} -\item the maximum sum or product; \textit{solved using prefix sum or kadane's algorithm} -\item the maximum length of subarray with sum or product S equals to K; \textit{solved using prefix sum together with a hashmap saves previous prefix sum and its indices} -\item the maximum number of subarray with sum or product S (the total number of) equals to K; \textit{solved using prefix sum together with a hashmap saves previous prefix sum and its count} -\end{enumerate} - -\paragraph{Maximum/Minimum sum or product} -\begin{examples}[resume] -\item \textbf{53. Maximum Subarray (medium).} -Find the contiguous subarray within an array (containing at least one number) which has the largest sum. -\begin{lstlisting}[numbers=none] -For example, given the array [-2,1,-3,4,-1,2,1,-5,4], - the contiguous subarray [4,-1,2,1] has the largest sum = 6. -\end{lstlisting} -Solution: Brute force is to use two for loops, first is the starting, second is the end, then we can get the maximum value. To optimize, we can use divide and conquer, $O(nlgn)$ vs brute force is $O(n^3)$ (two embedded for loops and n for computing the sum). The divide and conquer method was shown in that chapter. A more efficient algorithm is using pre\_sum. Please check Section~\ref{part4_prefix_sum} for the answer. - -Now what is the slinding window solution? The key step in sliding window is when to move the first pointer of the window (shrinking the window). The window must include current element j. For the maximum subarray, to increase the sum of the window, we need to abandon any previous elements if they have negative sum. -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def maxSubArray(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - if not nums: - return 0 - i, j = 0, 0 #i<=j - maxValue = -maxsize - window_sum = 0 - while j < len(nums): - window_sum += nums[j] - j += 1 - maxValue = max(maxValue, window_sum) - while i=nums[j]: - i = j - - return max_length -\end{lstlisting} -\item \textbf{209. Minimum Size Subarray Sum (medium)} Given an array of n positive integers and a positive integer s, find the minimal length of a contiguous subarray of which the sum >= s. If there isn't one, return 0 instead. -\begin{lstlisting}[numbers=none] -Example: - -Input: s = 7, nums = [2,3,1,2,4,3] -Output: 2 -Explanation: the subarray [4,3] has the minimal length under the problem constraint. -\end{lstlisting} - -\textbf{Solution 1: Sliding Window, $O(n)$.} -\begin{lstlisting}[language=Python] -def minSubArrayLen(self, s, nums): - ans = float('inf') - n = len(nums) - i = j = 0 - acc = 0 - while j < n: - acc += nums[j] # increase the window size - while acc >= s: # shrink the window to get the optimal result - ans = min(ans, j-i+1) - acc -= nums[i] - i += 1 - j +=1 - return ans if ans != float('inf') else 0 -\end{lstlisting} - -\textbf{Solution 2: prefix sum and binary search. $O(n\log_n)$.} Assuming current prefix sum is $p_i$, We need to find the $\max p_j \leq (p_i-s)$, this is the right most value in the prefix sum array (sorted) that is <= $p_i -s$. -\begin{lstlisting}[language=Python] -from bisect import bisect_right -class Solution(object): - def minSubArrayLen(self, s, nums): - ans = float('inf') - n = len(nums) - i = j = 0 - ps = [0] - while j < n: - ps.append(nums[j]+ps[-1]) - # find a posible left i - if ps[-1]-s >= 0: - index = bisect_right(ps, ps[-1]-s) - if index > 0: - index -= 1 - ans = min(ans, j-index+1) - j+=1 - return ans if ans != float('inf') else 0 -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Maximum Subarray -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\paragraph{The maximum number of subarray with sum or product S} -\begin{examples}[resume] -\item \textbf{560. Subarray Sum Equals K} Given an array of integers and an integer k, you need to find the total number of continuous subarrays whose sum equals to k. -\begin{lstlisting}[numbers=none] -Example 1: -Input:nums = [1,1,1], k = 2 -Output: 2 -\end{lstlisting} - -Answer: The naive solution is we enumerate all possible subarray which is $n^2$, and then we compute and check its sum which is $O(n)$. So the total time complexity is $O(n^3)$ time complexity. However, we can decrease it to $O(n^2)$ if we compute the sum of array in a different way: we first compute the sum till current index for each position, with equation $sum(i,j) = sum(0,j)-sum(0,i)$. However the OJ gave us LTE error. -\begin{lstlisting}[language = Python] -def subarraySum(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: int - """ - ''' return the number of subarrays that equal to k''' - count = 0 - sums = [0]*(len(nums)+1) # sum till current index - for idx, v in enumerate(nums): - sums[idx+1] = sums[idx]+v - for i in range(len(nums)): - for j in range(i, len(nums)): - value = sums[j+1]-sums[i] - count = count+1 if value==k else count - return count -\end{lstlisting} - -Solution 3: using prefix\_sum and hashmap, to just need to reformulate dict[sum\_i]. For this question, we need to get the total number of subsubarray, so $dict[i] = count$, which means every time we just set the dict[i]+=1. dict[0]=1 -\begin{lstlisting}[language = Python] -import collections -class Solution(object): - def subarraySum(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: int - """ - ''' return the number of subarrays that equal to k''' - dict = collections.defaultdict(int) #the value is the number of the sum occurs - dict[0]=1 - prefix_sum, count=0, 0 - for v in nums: - prefix_sum += v - count += dict[prefix_sum-k] # increase the counter of the appearing value k, default is 0 - dict[prefix_sum] += 1 # update the count of prefix sum, if it is first time, the default value is 0 - return count -\end{lstlisting} - -\item \textbf{974. Subarray Sums Divisible by K.} Given an array A of integers, return the number of (contiguous, non-empty) subarrays that have a sum divisible by K. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: A = [4,5,0,-2,-3,1], K = 5 -Output: 7 -Explanation: There are 7 subarrays with a sum divisible by K = 5: -[4, 5, 0, -2, -3, 1], [5], [5, 0], [5, 0, -2, -3], [0], [0, -2, -3], [-2, -3] -\end{lstlisting} - -\textbf{Analysis:} for the above array, we can compute the prefix sum as [0,4,9, 9, 7,4,5]. Let P[i+1] = A[0] + A[1] + ... + A[i]. Then, each subarray can be written as P[j] - P[i] (for j > i). We need to i for current j index that (P[j]-P[i])\% K == 0. Because P[j]\%K=P[i]\%K, therefore different compared with when sum == K, we not check P[j]-K but instead P[j]\%K if it is in the hashmap. Therefore, we need to save the prefix sum as the modulo of K. For the example, we have dict: {0: 2, 4: 4, 2: 1}. -\begin{lstlisting}[language=Python] -from collections import defaultdict -class Solution: - def subarraysDivByK(self, A, K): - """ - :type A: List[int] - :type K: int - :rtype: int - """ - a_sum = 0 - p_dict = defaultdict(int) - p_dict[0] = 1 # when it is empty we still has one 0:1 - ans = 0 - for i, v in enumerate(A): - a_sum += v - a_sum %= K - if a_sum in p_dict: - ans += p_dict[a_sum] - p_dict[a_sum] += 1 # save the remodule instead - return ans -\end{lstlisting} -\textbf{Solution 2: use Combination} Then P = [0,4,9,9,7,4,5], and $C_0 = 2, C_2 = 1, C_4 = 4$. With $C_0=2$, (at P[0] and P[6]), it indicates $C_2^1$ subarray with sum divisible by K, namely A[0:6]=[4, 5, 0, -2,-3,1]. With $C_4 = 4$ (at P[1], P[2], P[3], P[5]), it indicates $C_4^2=6$ subarrays with sum divisible by K, namely A[1:2]], A[1:3], A[1:5], A[2:3], A[2:5], A[3:5]. -\begin{lstlisting}[language=Python] -def subarraysDivByK(self, A, K): - P = [0] - for x in A: - P.append((P[-1] + x) % K) - - count = collections.Counter(P) - return sum(v*(v-1)/2 for v in count.values()) -\end{lstlisting} - -\end{examples} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Vague-conditioned subarray -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Vague-conditioned subarray} -In this section, we would be asked to ask the same type of question comapred with the last section. The only difference is the condition. For example, in the following question, it is asked with subarray that with $sum >= s$. - -Because of the vague of the condition, a hashmap$+$prefix sum solution will on longer give us $O(n)$ liner time. The best we can do if the array is all positive number we can gain $O(nlgn)$ if it is combined with binary search. However, a carefully designed sliding window can still help us achieve linear time $O(n)$. For array with negative number, we can ultilize monotonic queue mentioned in Section~\ref{section_mono_stack}, which will achieve $O(n)$ both in time and space complexity. - -\paragraph{All Positive Array (Sliding Window)} - -If it is all positive array, it can still be easily solved with sliding window. For example: -\begin{examples}[resume] -\item \textbf{209. Minimum Size Subarray Sum (medium)} Given an array of n positive integers and a positive integer s, find the minimal length of a contiguous subarray of which the sum >= s. If there isn't one, return 0 instead. -\begin{lstlisting}[numbers=none] -Example: -Input: s = 7, nums = [2,3,1,2,4,3] -Output: 2 -Explanation: the subarray [4,3] has the minimal length under the problem constraint. -\end{lstlisting} -Follow up: If you have figured out the O(n) solution, try coding another solution of which the time complexity is O(n log n). - -\textbf{Analysis.} For this problem, we can still use prefix sum saved in hashmap. However, since the condition is $sum >= s$, if we use a hashmap, we need to search through the hashmap with $key <= prefix_sum - s$. The time complexity would rise up to $O(n^2)$ if we use linear search. We would receive LTE error. -\begin{lstlisting}[language = Python] - def minSubArrayLen(self, s, nums): - """ - :type s: int - :type nums: List[int] - :rtype: int - """ - if not nums: - return 0 - dict = collections.defaultdict(int) - dict[0] = -1 # pre_sum 0 with index -1 - prefixSum = 0 - minLen = sys.maxsize - for idx, n in enumerate(nums): - prefixSum += n - for key, value in dict.items(): - if key <= prefixSum - s: - minLen = min(minLen, idx-value) - dict[prefixSum] = idx #save the last index - return minLen if 1<=minLen<=len(nums) else 0 -\end{lstlisting} - -\textbf{Solution 1: Prefix Sum and Binary Search.} Because the items in the array are all positive number, so the prefix sum array is increasing, this means if we save the prefix sum in an array, it is ordered, we can use binary search to find the index of largest value <= (prefix sum - s). If we use bisect module, we can use bisect\_right function which finds the right most position that we insert current value to keep the array ordered. The index will be rr-1. -\begin{lstlisting}[language=Python] -import bisect -def minSubArrayLen(self, s, nums): - ps = [0] - ans = len(nums)+1 - for i, v in enumerate(nums): - ps.append (ps[-1] + v) - #find the right most position that <= - rr = bisect.bisect_right(ps, ps[i+1] - s) - if rr: - ans = min(ans, i+1 - (rr-1)) - return ans if ans <= len(nums) else 0 -\end{lstlisting} -\begin{lstlisting}[language = Python] - def minSubArrayLen(self, s, nums): - """ - :type s: int - :type nums: List[int] - :rtype: int - """ - def bSearch(nums, i, j, target): - while i < j: - mid = (i+j) / 2 - if nums[mid] == target: - return mid - elif nums[mid] < target: - i = mid + 1 - else: - j = mid - 1 - return i - - if not nums: - return 0 - rec = [0] * len(nums) - rec[0] = nums[0] - if rec[0] >= s: - return 1 - minlen = len(nums)+1 - for i in range(1, len(nums)): - rec[i] = rec[i-1] + nums[i] - if rec[i] >= s: - index = bSearch(rec, 0, i, rec[i] - s) - if rec[index] > rec[i] - s: - index -= 1 - minlen = min(minlen, i - index) - return minlen if minlen != len(nums)+1 else 0 -\end{lstlisting} - -\textbf{Solution 2: Sliding window in $O(n)$.} While, using the sliding window, Once the sum in the window satisfy the condition, we keep shrinking the window size (moving the left pointer rightward) untill the condition is no longer hold. This way, we are capable of getting the complexity with $O(n)$. -\begin{lstlisting}[language = Python] -def minSubArrayLen(self, s, nums): - i, j = 0, 0 - sum_in_window = 0 - ans = len(nums) + 1 - while j < len(nums): - sum_in_window += nums[j] - j += 1 - # shrink the window if the condition satisfied - while i = s: - ans = min(ans, j-i) - sum_in_window -= nums[i] - i += 1 - return ans if ans <= len(nums) else 0 -\end{lstlisting} - -\item \textbf{713. Subarray Product Less Than K} Your are given an array of positive integers nums. -Count and print the number of (contiguous) subarrays where the product of all the elements in the subarray is less than k. -\begin{lstlisting}[numbers=none] -Example 1: -Input: nums = [10, 5, 2, 6], k = 100 -Output: 8 -Explanation: The 8 subarrays that have product less than 100 are: [10], [5], [2], [6], [10, 5], [5, 2], [2, 6], [5, 2, 6]. - -Note that [10, 5, 2] is not included as the product of 100 is not strictly less than k. -Note: -0 < nums.length <= 50000. -0 < nums[i] < 1000. -0 <= k < 10^6. -\end{lstlisting} - -Answer: Because we need the subarray less than k, so it is difficult to use prefix sum. If we use sliding window, -\begin{lstlisting} -i=0, j=0, 10 10<100, ans+= j-i+1 (1) -> [10] -i=0, j=1, 50 50<100, ans+= j-i+1 (3), -> [10],[10,5] -i=0, j=2, 100 shrink the window, i=1, product = 10, ans+=2, ->[5,2][2] -i=1, j=3, 60, ans+=3->[2,6],[2],[6] -\end{lstlisting} -The python code: -\begin{lstlisting}[language = Python] -class Solution: - def numSubarrayProductLessThanK(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: int - """ - if not nums: - return 0 - i, j = 0, 0 - window_product = 1 - ans = 0 - while j < len(nums): - window_product *= nums[j] - - while i= k: - window_product /= nums[i] - i+=1 - if window_product < k: - ans += j-i+1 - j += 1 - return ans -\end{lstlisting} -\end{examples} -\paragraph{Array with Negative Element (Monotonic Queue)} - -In this section, we will work through how to handle the array with negative element and is Vague-conditioned. We found using monotonic Queue or stack (Section~\ref{section_mono_stack} will fit the scenairo and gave $O(n)$ time complexity and $O(N)$ space complexity. - -\begin{examples}[resume] -\item \textbf{862. Shortest Subarray with Sum at Least K} -Return the length of the shortest, non-empty, contiguous subarray of A with sum at least K. - -If there is no non-empty subarray with sum at least K, return -1. -\begin{lstlisting}[numbers=none] -Example 1: -Input: A = [1], K = 1 -Output: 1 - -Example 2: -Input: A = [1,2], K = 4 -Output: -1 - -Example 3: -Input: A = [2,-1,2], K = 3 -Output: 3 -\end{lstlisting} -Note: $1 <= A.length <= 50000$, $-10 ^ 5 <= A[i] <= 10 ^ 5$, $1 <= K <= 10 ^ 9$. - -\textbf{Analysis:} The only difference of this problem compared with the last is with negative value. Because of the negative, the shrinking method no longer works because when we shrink the window, the sum in the smaller window might even grow if we just cut out a negative value. For instance, [84,-37,32,40,95], K=167, the right answer is [32, 40, 95]. In this program, i=0, j=4, so how to handle the negative value? - -\textbf{Solution 1: prefix sum and binary search in prefix sum. LTE} - -\begin{lstlisting}[language=Python] -def shortestSubarray(self, A, K): - def bisect_right(lst, target): - l, r = 0, len(lst)-1 - while l <= r: - mid = l + (r-l)//2 - if lst[mid][0] <= target: - l = mid + 1 - else: - r = mid -1 - return l - acc = 0 - ans = float('inf') - prefixSum=[(0, -1)] #value and index - for i, n in enumerate(A): - acc += n - index = bisect_right(prefixSum, acc-K) - for j in range(index): - ans = min(ans, i-prefixSum[j][1]) - index = bisect_right(prefixSum, acc) - prefixSum.insert(index, (acc, i)) - #print(index, prefixSum) - return ans if ans != float('inf') else -1 -\end{lstlisting} - -% For an all positive array, the prefix is perfectly increasing, if we want $S_{(i,j)} = y[j]-y[i-1] >= k$, if we want to get the smallest subarray, then $max(i)$. For each $j$, if we found $i$ that is satisfied, that i is no longer needed to be considered again (just like the previous sliding window, where when the condition satisfied, we move the window i+1 till the condition is violated again or we can not move i any further). - -Now, let us analyze a simple example which includes both 0 and negative number. [2, -1, 2, 0, 1], K=3, with prefix sum [0, 2, 1, 3, 3, 4], the subarray is [2,-1,2], [2,-1,2, 0] and [2, 0, 1] where its sum is at least three. First, let us draw the prefix sum on a x-y axis. When we encounter an negative number, the prefix sum decreases, if it is zero, then the prefix sum stablize. For the zero case: at p[2] = p[3], if subarray ends with index 2 is considered, then 3 is not needed. For the negative case: p[0]=2>p[1]=1 due to A[1]<0. Because p[1] can always be a better choice to be $i$ than p[1] (smaller so that it is more likely, shorter distance). Therefore, we can still keep the validate prefix sum monoitually increasing like the array with all positive numbers by maintaince a mono queue. -\begin{lstlisting}[language = Python] -class Solution: - def shortestSubarray(self, A, K): - - P = [0]*(len(A)+1) - for idx, x in enumerate(A): - P[idx+1] = P[idx]+x - - - ans = len(A)+1 # N+1 is impossible - monoq = collections.deque() - for y, Py in enumerate(P): - while monoq and Py <= P[monoq[-1]]: #both negative and zero leads to kick out any prevous larger or equal value - print('pop', P[monoq[-1]]) - monoq.pop() - - while monoq and Py - P[monoq[0]] >= K: # if one x is considered, no need to consider again (similar to sliding window where we move the first index forward) - print('pop', P[monoq[0]]) - ans = min(ans, y - monoq.popleft()) - print('append', P[y]) - monoq.append(y) - - - return ans if ans < len(A)+1 else -1 -\end{lstlisting} -\end{examples} - -\subsection{LeetCode Problems and Misc} -\paragraph{Absolute-conditioned Subarray} -\begin{enumerate} - \item 930. Binary Subarrays With Sum - \begin{lstlisting} - In an array A of 0s and 1s, how many non-empty subarrays have sum S? -Example 1: - -Input: A = [1,0,1,0,1], S = 2 -Output: 4 -Explanation: -The 4 subarrays are bolded below: -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -[1,0,1,0,1] -Note: - - A.length <= 30000 - 0 <= S <= A.length - A[i] is either 0 or 1. -\end{lstlisting} -Answer: this is exactly the third time of maximum subarray, the maximum length of subarry with a certain value. We solve it using prefix sum and a hashmap to save the count of each value. -\begin{lstlisting}[language=Python] -import collections -class Solution: - def numSubarraysWithSum(self, A, S): - """ - :type A: List[int] - :type S: int - :rtype: int - """ - dict = collections.defaultdict(int) #the value is the number of the sum occurs - dict[0]=1 #prefix sum starts from 0 and the number is 1 - prefix_sum, count=0, 0 - for v in A: - prefix_sum += v - count += dict[prefix_sum-S] # increase the counter of the appearing value k, default is 0 - dict[prefix_sum] += 1 # update the count of prefix sum, if it is first time, the default value is 0 - return count -\end{lstlisting} -We can write it as: -\begin{lstlisting}[language=Python] - def numSubarraysWithSum(self, A, S): - """ - :type A: List[int] - :type S: int - :rtype: int - """ - P = [0] - for x in A: P.append(P[-1] + x) - count = collections.Counter() - - ans = 0 - for x in P: - ans += count[x] - count[x + S] += 1 - - return ans -\end{lstlisting} -Also, it can be solved used a modified sliding window algorithm. For sliding window, we have $i,j$ starts from 0, which represents the window. Each iteration j will move one position. For a normal sliding window, only if the sum is larger than the value, then we shrink the window size by one. However, in this case, like in the example $1, 0, 1, 0, 1$, when $j = 5$, $i = 1$, the sum is $2$, but the algorithm would miss the case of $i = 2$, which has the same sum value. To solve this problem, we keep another index $i_hi$, in addition to the moving rule of $i$, it also moves if the sum is satisfied and that value is $0$. This is actually a Three pointer algorithm. -\begin{lstlisting}[language=Python] - def numSubarraysWithSum(self, A, S): - i_lo, i_hi, j = 0, 0, 0 #i_lo <= j - sum_lo = sum_hi = 0 - ans = 0 - while j < len(A): - # Maintain i_lo, sum_lo: - # While the sum is too big, i_lo += 1 - sum_lo += A[j] - while i_lo < j and sum_lo > S: - sum_lo -= A[i_lo] - i_lo += 1 - - # Maintain i_hi, sum_hi: - # While the sum is too big, or equal and we can move, i_hi += 1 - sum_hi += A[j] - while i_hi < j and ( - sum_hi > S or sum_hi == S and not A[i_hi]): - sum_hi -= A[i_hi] - i_hi += 1 - - if sum_lo == S: - ans += i_hi - i_lo + 1 - j += 1 - - return ans -\end{lstlisting} -\item 523. Continuous Subarray Sum -\begin{lstlisting} -Given a list of non-negative numbers and a target integer k, write a function to check if the array has a continuous subarray of size at least 2 that sums up to the multiple of k, that is, sums up to n*k where n is also an integer. - -Example 1: -Input: [23, 2, 4, 6, 7], k=6 -Output: True -Explanation: Because [2, 4] is a continuous subarray of size 2 and sums up to 6. - -Example 2: -Input: [23, 2, 6, 4, 7], k=6 -Output: True -Explanation: Because [23, 2, 6, 4, 7] is an continuous subarray of size 5 and sums up to 42. - -Note: -The length of the array won't exceed 10,000. -You may assume the sum of all the numbers is in the range of a signed 32-bit integer. -\end{lstlisting} -Answer: This is a mutant of the subarray with value k. The difference here, we save the prefix sum as the reminder of k. if $(a+b)\%k=0$, then $(a\%k+b\%k)/k=1$. -\begin{lstlisting}[language=Python] -class Solution: - def checkSubarraySum(self, nums, k): - """ - :type nums: List[int] - :type k: int - :rtype: bool - """ - - if not nums: - return False - k = abs(k) - prefixSum = 0 - dict = collections.defaultdict(int) - dict[0]=-1 - for i, v in enumerate(nums): - prefixSum += v - if k!=0: - prefixSum %= k - if prefixSum in dict and (i-dict[prefixSum])>=2: - return True - if prefixSum not in dict: - dict[prefixSum] = i - return False -\end{lstlisting} -\end{enumerate} - - -For problems like bounded, or average, minimum in a subarray, -\begin{examples}[resume] -\item 795.Number of Subarrays with Bounded Maximum (medium) -\item 907. Sum of Subarray Minimums (monotone stack) -\end{examples} - -% \item 674. Longest Continuous Increasing Subsequence - -% Given an unsorted array of integers, find the length of longest continuous increasing subsequence (subarray). - -% Example 1: -% \begin{lstlisting} -% Input: [1,3,5,4,7] -% Output: 3 -% Explanation: The longest continuous increasing subsequence is [1,3,5], its length is 3. -% Even though [1,3,5,7] is also an increasing subsequence, it's not a continuous one where 5 and 7 are separated by 4. -% \end{lstlisting} -% Example 2: -% \begin{lstlisting} -% Input: [2,2,2,2,2] -% Output: 1 -% Explanation: The longest continuous increasing subsequence is [2], its length is 1. -% \end{lstlisting} -% \textit{Note: Length of the array will not exceed 10,000.} - -% Solution: The brute force solution is use two for loops with $O(n^2)$. The first loop is the start number, the second loop is the $nums[j]>nums[j-1]$ or else stop. Or we can use two pointers. i,j start from 0,1 respectively. -% \begin{lstlisting}[language = Python] -% class Solution: -% def findLengthOfLCIS(self, nums): -% """ -% :type nums: List[int] -% :rtype: int -% """ -% if not nums: -% return 0 -% if len(nums)==1: -% return 1 -% i,j=0,1 -% max_length = 0 -% while jmax_length: -% max_length = j-i -% i=j -% j+=1 -% if j-i>max_length: -% max_length = j-i - -% return max_length -% \end{lstlisting} - - -% section subsequence -%\subfile{chapters/mastering/array/subsequence} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% sub sequence -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Subsequence (Medium or Hard)} -The difference of the subsequence type of questions with the subarray is that we do not need the elements to be consecutive. Because of this relaxation, the brute force solution of this type of question is exponential$O(2^n)$, because for each element, we have two options: chosen or not chosen. This type of questions would usually be used as a follow-up question to the subarray due to its further difficulty because of nonconsecutive. This type of problems are a typical dynamic programming. Here we should a list of all related subsequence problems shown on LeetCode in Fig.~\ref{fig:subsequence_problems} - -A subsequence of a string is a new string which is formed from the original string by deleting some (can be none) of the characters without disturbing the relative positions of the remaining characters. (ie, "ACE" is a subsequence of "ABCDE" while "AEC" is not). For the subsequence problems, commonly we will see increasing subsequence, count the distinct subsequence. And they are usually solved with single sequence type of dynamic programming. -\begin{figure}[h] - \centering - \includegraphics[width=0.8\columnwidth]{fig/subsequence_1.png} - \includegraphics[width=0.8\columnwidth]{fig/subsequence_2.png} - \caption{Subsequence Problems Listed on LeetCode} - \label{fig:subsequence_problems} -\end{figure} -940. Distinct Subsequences II (hard) - -Given a string S, count the number of distinct, non-empty subsequences of S . Since the result may be large, return the answer modulo $10^9 + 7$. -\begin{lstlisting} -Example 1: - -Input: "abc" -Output: 7 -Explanation: The 7 distinct subsequences are "a", "b", "c", "ab", "ac", "bc", and "abc". - -Example 2: - -Input: "aba" -Output: 6 -Explanation: The 6 distinct subsequences are "a", "b", "ab", "ba", "aa" and "aba". - -Example 3: - -Input: "aaa" -Output: 3 -Explanation: The 3 distinct subsequences are "a", "aa" and "aaa". -\end{lstlisting} -\textbf{Sequence type dynamic programming}. The naive solution for subsequence is using DFS to generate all of the subsequence recursively and we also need to check the repetition. The possible number of subsequence is $2^n-1$. Let's try forward induction method. -\begin{lstlisting} -# define the result for each state: number of subsequence ends with each state -state: a b c -ans : 1 2 4 -a: a; dp[0] = 1 -b: b, ab; = dp[0]+1 if this is 'a', length 1 is the same as dp[0], only length 2 is possible -c: c, ac, bc, abc; = dp[0]+dp[1]+1, if it is 'a', aa, ba, aba, = dp[1]+1 -d: d, ad, bd, abd, cd, acd, bcd, abcd = dp[0]+dp[1]+dp[2]+1 -\end{lstlisting} -Thus the recurrence function can be Eq.~\ref{eq:distinct_subsequence}. -\begin{equation} -\label{eq:distinct_subsequence} - dp[i] = \sum_{j ``````<-r -% \end{lstlisting} - -% \begin{enumerate} -% \item -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Others -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Others} -For example, the following question would be used as follow up for question \textit{Longest Continuous Increasing Subsequence} - -300. Longest Increasing Subsequence - - -673. Number of Longest Increasing Subsequence - -Given an unsorted array of integers, find the number of longest increasing subsequence. -\begin{lstlisting} -Example 1: - -Input: [1,3,5,4,7] -Output: 2 -Explanation: The two longest increasing subsequence are [1, 3, 4, 7] and [1, 3, 5, 7]. - -Example 2: -Input: [2,2,2,2,2] -Output: 5 -Explanation: The length of longest continuous increasing subsequence is 1, and there are 5 subsequences' length is 1, so output 5. -\textit{Note: Length of the given array will be not exceed 2000 and the answer is guaranteed to be fit in 32-bit signed int.} -\end{lstlisting} - -Solution: Another different problem, to count the number of the max subsequence. Typical dp: - -state: f[i] -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def findNumberOfLIS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - max_count = 0 - if not nums: - return 0 - memo =[None for _ in range(len(nums))] - rlst=[] - def recursive(idx,tail,res): - if idx==len(nums): - rlst.append(res) - return 0 - if memo[idx]==None: - length = 0 - if nums[idx]>tail: - addLen = 1+recursive(idx+1, nums[idx],res+[nums[idx]]) - notAddLen = recursive(idx+1, tail,res) - return max(addLen,notAddLen) - else: - return recursive(idx+1, tail,res) - - - ans=recursive(0,-maxsize,[]) - count=0 - for lst in rlst: - if len(lst)==ans: - count+=1 - - return count -\end{lstlisting} - -Using dynamic programming, the difference is we add a count array. -\begin{lstlisting}[language = Python] -from sys import maxsize -class Solution: - def findNumberOfLIS(self, nums): - N = len(nums) - if N <= 1: return N - lengths = [0] * N #lengths[i] = longest ending in nums[i] - counts = [1] * N #count[i] = number of longest ending in nums[i] - - for idx, num in enumerate(nums): #i - for i in range(idx): #j - if nums[i] < nums[idx]: #bigger - if lengths[i] >= lengths[idx]: - lengths[idx] = 1 + lengths[i] #set the biggest length - counts[idx] = counts[i] #change the count - elif lengths[i] + 1 == lengths[idx]: #if it is a tie - counts[idx] += counts[i] #increase the current count by count[i] - -longest = max(lengths) - print(counts) - print(lengths) - return sum(c for i, c in enumerate(counts) if lengths[i] == longest) -\end{lstlisting} - -128. Longest Consecutive Sequence -\begin{lstlisting} -Given an unsorted array of integers, find the length of the longest consecutive elements sequence. - -For example, - Given [100, 4, 200, 1, 3, 2], - The longest consecutive elements sequence is [1, 2, 3, 4]. Return its length: 4. - - Your algorithm should run in O(n) complexity. - \end{lstlisting} - -Solution: Not thinking about the O(n) complexity, we can use sorting to get [1,2,3,4,100,200], and then use two pointers to get [1,2,3,4]. - -How about O(n)? We can pop out a number in the list, example, 4 , then we use while first-1 to get any number that is on the left side of 4, here it is 3, 2, 1, and use another to find all the bigger one and remove these numbers from the nums array. -\begin{lstlisting}[language =Python] -def longestConsecutive(self, nums): - nums = set(nums) - maxlen = 0 - while nums: - first = last = nums.pop() - while first - 1 in nums: #keep finding the smaller one - first -= 1 - nums.remove(first) - while last + 1 in nums: #keep finding the larger one - last += 1 - nums.remove(last) - maxlen = max(maxlen, last - first + 1) - return maxlen -\end{lstlisting} - - -% subset -%\subfile{chapters/mastering/array/subset.tex} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Subset -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Subset(Combination and Permutation)} -\label{part4_array_subset} -The Subset B of a set A is defined as a set within all elements of this subset are from set A. In other words, the subset B is contained inside the set A, $B \in A$. There are two kinds of subsets: if the order of the subset doesnt matter, it is a combination problem, otherwise, it is a permutation problem. To solve the problems in this section, we need to refer to the backtracking in Sec~\ref{sec_combination}. When the subset has a fixed constant length, then hashmap can be used to lower the complexity by one power of n. - -\textbf{Subset VS Subsequence}. In the subsequence, the elements keep the original order from the original sequence. While, in the set concept, there is no ordering, only a set of elements. - -In this type of questions, we are asked to return subsets of a list. For this type of questions, backtracking~\ref{sec:backtrack} can be applied. -\subsection{Combination} -\label{part4_array_combine} -The solution of this section is heavily correlated to Section~\ref{sec_combination}. -78. Subsets -\begin{lstlisting} -Given a set of distinct integers, nums, return all possible subsets (the power set). - -Note: The solution set must not contain duplicate subsets. - -Example: - -Input: nums = [1,2,3] -Output: -[ - [3], - [1], - [2], - [1,2,3], - [1,3], - [2,3], - [1,2], - [] -] -\end{lstlisting} -\textbf{Backtracking}. This is a combination problem, which we have explained in backtrack section. We just directly gave the code here. -\begin{lstlisting}[language = Python] -def subsets(self, nums): - res, n = [], len(nums) - res = self.combine(nums, n, n) - return res - -def combine(self, nums, n, k): - """ - :type n: int - :type k: int - :rtype: List[List[int]] - """ - def C_n_k(d, k, s, curr, ans): #d controls the degree (depth), k is controls the return level, curr saves the current result, ans is all the result - ans.append(curr) - if d == k: #the length is satisfied - - return - for i in range(s, n): - curr.append(nums[i]) - C_n_k(d+1, k, i+1, curr[:], ans) # i+1 because no repeat, make sure use deep copy curr[:] - curr.pop() - - ans = [] - C_n_k(0, k, 0, [], ans) - return ans -\end{lstlisting} -\textbf{Incremental}. Backtracking is not the only way for the above problem. There is another way to do it iterative, observe the following process. We can just keep append elements to the end of of previous results. -\begin{lstlisting} -[1, 2, 3, 4] -l = 0, [] -l = 1, for 1, []+[1], -> [1], get powerset of [1] -l = 2, for 2, []+[2], [1]+[2], -> [2], [1, 2], get powerset of [1, 2] -l = 3, for 3, []+[3], [1]+[3], [2]+[3], [1, 2]+[3], -> [3], [1, 3], [2, 3], [1, 2, 3], get powerset of [1, 2, 3] -l = 4, for 4, []+ [4]; [1]+[4]; [2]+[4], [1, 2] +[4]; [3]+[4], [1,3]+[4],[2,3]+[4], [1,2,3]+[4], get powerset of [1, 2, 3, 4] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def subsets(self, nums): - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - new_results.append(r + [num]) - result += new_results - - return result -\end{lstlisting} -90. Subsets II -\begin{lstlisting} -Given a collection of integers that might contain duplicates, nums, return all possible subsets (the power set). - -Note: The solution set must not contain duplicate subsets. - -Example: - -Input: [1,2,2] -Output: -[ - [2], - [1], - [1,2,2], - [2,2], - [1,2], - [] -] -\end{lstlisting} -Analysis: Because of the duplicates, the previous superset algorithm would give repetitive subset. For the above example, we would have [1, 2] twice, and [2] twice. If we try to modify on the previous code. We first need to sort the nums, which makes the way we check repeat easiler. Then the code goes like this: -\begin{lstlisting}[language = Python] - def subsetsWithDup(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - nums.sort() - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - print(r) - new_results.append(r + [num]) - for rst in new_results: - if rst not in result: # check the repetitive - result.append(rst) - - return result -\end{lstlisting} -However, the above code is extremely inefficient because of the checking process. A better way to do this: -\begin{lstlisting} -[1, 2, 2] -l = 0, [] -l = 1, for 1, []+[1] -l = 2, for 2, []+[2], [1]+[2]; []+[2, 2], [1]+[2, 2] -\end{lstlisting} -So it would be more efficient if we first save all the numbers in the array in a dictionary. For the above case, the dic = {1:1, 2:2}. Each time we try to generate the result, we use 2 up to 2 times. Same way, we can use dictionary on the backtracking too. -\begin{lstlisting}[language=Python] -class Solution(object): - def subsetsWithDup(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - if not nums: - return [[]] - res = [[]] - dic = collections.Counter(nums) - for key, val in dic.items(): - tmp = [] - for lst in res: - for i in range(1, val+1): - tmp.append(lst+[key]*i) - res += tmp - return res -\end{lstlisting} - -77. Combinations -\begin{lstlisting} -Given two integers n and k, return all possible combinations of k numbers out of 1 ... n. - -Example: - -Input: n = 4, k = 2 -Output: -[ - [2,4], - [3,4], - [2,3], - [1,2], - [1,3], - [1,4], -] -\end{lstlisting} -Analysis: In this problem, it is difficult for us to generate the results iteratively, the only way we can use the second solution is by filtering and get only the results with the length we want. However, the backtrack can solve the problem easily as we mentioned in Section~\ref{sec_combination}. -\begin{lstlisting}[language=Python] -def combine(self, n, k): - """ - :type n: int - :type k: int - :rtype: List[List[int]] - """ - ans = [] - def C_n_k(d,k,s,curr): - if d==k: - ans.append(curr) - return - for i in range(s, n): - #curr.append(i+1) - #C_n_k(d+1, k, i+1, curr[:]) - #curr.pop() - C_n_k(d+1, k, i+1, curr+[i+1]) - C_n_k(0,k,0,[]) - - return ans -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%combination sum%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Combination Sum} -39. Combination Sum - -Given a set of candidate numbers (candidates) \textbf{(without duplicates)} and a target number (target), find all unique combinations in candidates where the candidate numbers sums to target. - -The same repeated number may be chosen from candidates \textbf{unlimited number} of times. -\begin{lstlisting} -Note: - - All numbers (including target) will be positive integers. - The solution set must not contain duplicate combinations. - -Example 1: - -Input: candidates = [2,3,6,7], target = 7, -A solution set is: -[ - [7], - [2,2,3] -] - -Example 2: - -Input: candidates = [2,3,5], target = 8, -A solution set is: -[ - [2,2,2,2], - [2,3,3], - [3,5] -] -\end{lstlisting} -\textbf{DFS Backtracking}. Analysis: This is still a typical combination problem, the only thing is the return level is when the sum of the path we gained is larger than the target, and we only collect the answer when it is equal. And Because a number can be used unlimited times, so that each time after we used one number, we do not increase the next start position. -\begin{lstlisting}[language=Python] -def combinationSum(self, candidates, target): - """ - :type candidates: List[int] - :type target: int - :rtype: List[List[int]] - """ - ans = [] - candidates.sort() - self.combine(candidates, target, 0, [], ans) - return ans - -def combine(self, nums, target, s, curr, ans): - if target < 0: - return # backtracking - if target == 0: - ans.append(curr) - return - for i in range(s, len(nums)): - # if nums[i] > target: - # return - self.combine(nums, target-nums[i], i, curr+[nums[i]], ans) # use i, instead of i+1 because we can reuse -\end{lstlisting} -40. Combination Sum II - -Given a collection of candidate numbers \textbf{(candidates with duplicates)} and a target number (target), find all unique combinations in candidates where the candidate numbers sums to target. - -Each number in candidates may only \textbf{be used once} in the combination. -\begin{lstlisting} -Note: - - All numbers (including target) will be positive integers. - The solution set must not contain duplicate combinations. - -Example 1: - -Input: candidates = [10,1,2,7,6,1,5], target = 8, -A solution set is: -[ - [1, 7], - [1, 2, 5], - [2, 6], - [1, 1, 6] -] - -Example 2: - -Input: candidates = [2,5,2,1,2], target = 5, -A solution set is: -[ - [1,2,2], - [5] -] -\end{lstlisting} -\textbf{Backtracking+Counter}. Because for the first example, if we reuse the code from the previous problem, we will get extra combinations: [7, 1], [2, 1, 5]. To avoid this, we need a dictionary to save all the unique candidates with its corresponding appearing times. For a certain number, it will be used at most its counter times. -\begin{lstlisting}[language=Python] -def combinationSum2(self, candidates, target): - """ - :type candidates: List[int] - :type target: int - :rtype: List[List[int]] - """ - - candidates = collections.Counter(candidates) - ans = [] - self.combine(list(candidates.items()), target, 0, [], ans) # convert the Counter to a list of (key, item) tuple - return ans - -def combine(self, nums, target, s, curr, ans): - if target < 0: - return - if target == 0: - ans.append(curr) - return - for idx in range(s, len(nums)): - num, count = nums[idx] - for c in range(count): - self.combine(nums, target-num*(c+1), idx+1, curr+[num]*(c+1), ans ) -\end{lstlisting} -377. Combination Sum IV (medium) -\begin{lstlisting} - Given an integer array with all positive numbers and no duplicates, find the number of possible combinations that add up to a positive integer target. - -Example: - -nums = [1, 2, 3] -target = 4 - -The possible combination ways are: -(1, 1, 1, 1) -(1, 1, 2) -(1, 2, 1) -(1, 3) -(2, 1, 1) -(2, 2) -(3, 1) - -Note that different sequences are counted as different combinations. - -Therefore the output is 7. - -Follow up: -What if negative numbers are allowed in the given array? -How does it change the problem? -What limitation we need to add to the question to allow negative numbers? -\end{lstlisting} -\textbf{DFS + MEMO}. This problem is similar to 39. Combination Sum. For [2, 3, 5], target = 8, comparison: -\begin{lstlisting} -[2, 3, 5], target = 8 -39. Combination Sum. # there is ordering (each time the start index is same or larger than before) -[ - [2,2,2,2], - [2,3,3], - [3,5] -] -377. Combination Sum IV, here we have no ordering( each time the start index is the same as before). Try all element. -[ - [2,2,2,2], - [2,3,3], -* [3,3,2] -* [3,2,3] - [3,5], -* [5,3] -] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def combinationSum4(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: int - """ - nums.sort() - n = len(nums) - def DFS(idx, memo, t): - if t < 0: - return 0 - if t == 0: - return 1 - count = 0 - if t not in memo: - for i in range(idx, n): - count += DFS(idx, memo, t-nums[i]) - memo[t] = count - return memo[t] - return(DFS(0, {}, target)) -\end{lstlisting} -Because, here we does not need to numerate all the possible solutions, we can use dynamic programming, which will be shown in Section~\ref{}. - -\subsection{K Sum} -In this subsection, we still trying to get subset that sum up to a target. But the length here is fixed. We would have 2, 3, 4 sums normally. Because it is still a combination problem, we can use the \textbf{backtracking} to do. Second, because the fixed length, we can use \textbf{multiple pointers} to build up the potential same lengthed subset. But in some cases, because the length is fixed, we can use \textbf{hashmap} to simplify the complexity. - -1. Two Sum -Given an array of integers, return \textbf{indices} of the two numbers such that they add up to a specific target. - -You may assume that each input would have \textbf{exactly} one solution, and you may not use the same element twice. -\begin{lstlisting} -Example: - -Given nums = [2, 7, 11, 15], target = 9, - -Because nums[0] + nums[1] = 2 + 7 = 9, -return [0, 1]. -\end{lstlisting} -\textbf{Hashmap}. Using backtracking or brute force will get us $O(n^2)$ time complexity. We can use hashmap to save the nums in a dictionary. Then we just check target-num in the dictionary. We would get $O(n)$ time complexity. We have two-pass hashmap and one-pass hashmap. -\begin{lstlisting}[language=Python] -# two-pass hashmap -def twoSum(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: List[int] - """ - dict = collections.defaultdict(int) - for i, t in enumerate(nums): - dict[t] = i - for i, t in enumerate(nums): - if target - t in dict and i != dict[target-t]: - return [i, dict[target-t]] -# one-pass hashmap -def twoSum(self, nums, target): - """ - :type nums: List[int] - :type target: int - :rtype: List[int] - """ - dict = collections.defaultdict(int) - for i, t in enumerate(nums): - if target - t in dict: - return [dict[target-t], i] - dict[t] = i -\end{lstlisting} - -15. 3Sum - -Given an array S of n integers, are there elements a, b, c in S such that a + b + c = 0? Find all unique triplets in the array which gives the sum of zero. - -Note: The solution set must not contain duplicate triplets. - -For example, given array S = [-1, 0, 1, 2, -1, -4], -\begin{lstlisting} -A solution set is: -[ - [-1, 0, 1], - [-1, -1, 2] -] -\end{lstlisting} - -Solution: Should use three pointers, no extra space. i is the start point from [0,len-2], l,r is the other two pointers. l=i+1, r=len-1 at the beignning. The saving of time complexity is totally from the sorting algorithm. -\begin{lstlisting} -[-4,-1,-1,0,1,2] -i, l-> ``````<-r -\end{lstlisting} -How to delete repeat? -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - res = [] - nums.sort() - for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: #make sure pointer not repeat - continue - l, r = i+1, len(nums)-1 - while l < r: - s = nums[i] + nums[l] + nums[r] - if s < 0: - l +=1 - elif s > 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l+=1 - r-=1 - - #after the first run, then check duplicate example. - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - return res -\end{lstlisting} -Use hashmap: -\begin{lstlisting}[language = Python] -def threeSum(self, nums): - """ - :type nums: List[int] - :rtype: List[List[int]] - """ - res =[] - nums=sorted(nums) - if not nums: - return [] - if nums[-1]<0 or nums[0]>0: - return [] - end_position = len(nums)-2 - dic_nums={} - for i in xrange(1,len(nums)): - dic_nums[nums[i]]=i# same result save the last index - - for i in xrange(end_position): - target = 0-nums[i] - if i>0 and nums[i] == nums[i-1]: #this is to avoid repeat - continue - if targeti+1 and nums[j]==nums[j-1]: - continue - complement =target - nums[j] - if complementj: #need to make sure the complement is bigger than nums[j] - res.append([nums[i],nums[j],complement]) - return res -\end{lstlisting} -The following code uses more time -\begin{lstlisting}[language = Python] -for i in xrange(len(nums)-2): - if i > 0 and nums[i] == nums[i-1]: - continue - l, r = i+1, len(nums)-1 - while l < r: - if l-1>=i+1 and nums[l] == nums[l-1]: #check the front - l += 1 - continue - if r+1 0: - r -= 1 - else: - res.append((nums[i], nums[l], nums[r])) - l += 1; r -= 1 - return res -\end{lstlisting} -18. 4Sum -\begin{lstlisting}[language = Python] -def fourSum(self, nums, target): - def findNsum(nums, target, N, result, results): - if len(nums) < N or N < 2 or target < nums[0]*N or target > nums[-1]*N: # early termination - return - if N == 2: # two pointers solve sorted 2-sum problem - l,r = 0,len(nums)-1 - while l < r: - s = nums[l] + nums[r] - if s == target: - results.append(result + [nums[l], nums[r]]) - l += 1 - r-=1 - while l < r and nums[l] == nums[l-1]: - l += 1 - while l < r and nums[r] == nums[r+1]: - r -= 1 - elif s < target: - l += 1 - else: - r -= 1 - else: # recursively reduce N - for i in range(len(nums)-N+1): - if i == 0 or (i > 0 and nums[i-1] != nums[i]): - findNsum(nums[i+1:], target-nums[i], N-1, result+[nums[i]], results) #reduce nums size, reduce target, save result - -results = [] - findNsum(sorted(nums), target, 4, [], results) - return results -\end{lstlisting} - -454. 4Sum II - -Given four lists A, B, C, D of integer values, compute how many tuples (i, j, k, l) there are such that A[i] + B[j] + C[k] + D[l] is zero. - -To make problem a bit easier, all A, B, C, D have same length of N where $0 \leq N \leq 500$. All integers are in the range of -228 to 228–1 and the result is guaranteed to be at most 231–1. - -Example: -\begin{lstlisting} -Input: -A = [ 1, 2] -B = [-2,-1] -C = [-1, 2] -D = [ 0, 2] - -Output: -2 -\end{lstlisting} - -Explanation: - -\begin{lstlisting} -The two tuples are: -1. (0, 0, 0, 1) -> A[0] + B[0] + C[0] + D[1] = 1 + (-2) + (-1) + 2 = 0 -2. (1, 1, 0, 0) -> A[1] + B[1] + C[0] + D[0] = 2 + (-1) + (-1) + 0 = 0 -\end{lstlisting} -Solution: if we use brute force, use 4 for loop, then it is $O(N^4)$. If we use divide and conquer, sum the first half, and save a dictionary (counter), time complexity is $O(2N^2)$. What if we have 6 sum, we can reduce it to $O(2N^3)$, what if 8 sum. - -\begin{lstlisting}[language = Python] -def fourSumCount(self, A, B, C, D): - AB = collections.Counter(a+b for a in A for b in B) - return sum(AB[-c-d] for c in C for d in D) -\end{lstlisting} - - -\subsubsection{Summary} -As we have seen from the shown examples in this section, to solve the combination problem, backtrack shown in Section~\ref{sec_combination} offers a universal solution. Also, there is another iterative solution which suits the power set purpose. And I would include its code here again: -\begin{lstlisting}[language = Python] -def subsets(self, nums): - result = [[]] #use two dimensional, which already have [] one element - for num in nums: - new_results = [] - for r in result: - new_results.append(r + [num]) - result += new_results - - return result -\end{lstlisting} -If we have duplicates, how to handle in the backtrack?? In the iterative solution, we can replace the array with a dictionary saves the counts. - -\subsection{Permutation} -46. Permutations -\begin{lstlisting} -Given a collection of distinct numbers, return all possible permutations. - -For example, - [1,2,3] have the following permutations: - -[ - [1,2,3], - [1,3,2], - [2,1,3], - [2,3,1], - [3,1,2], - [3,2,1] -] -\end{lstlisting} - -47. Permutations II - -Given a collection of numbers that might contain duplicates, return all possible unique permutations. - -For example, -\begin{lstlisting} - [1,1,2] have the following unique permutations: - -[ - [1,1,2], - [1,2,1], - [2,1,1] -] -\end{lstlisting} - -301. Remove Invalid Parentheses - -Remove the minimum number of invalid parentheses in order to make the input string valid. Return all possible results. - -Note: The input string may contain letters other than the parentheses ( and ). - -Examples: -\begin{lstlisting} -"()())()" -> ["()()()", "(())()"] -"(a)())()" -> ["(a)()()", "(a())()"] -")(" -> [""] -\end{lstlisting} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Merge List -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Merge and Partition} -\subsection{Merge Lists} -We can use divide and conquer (see the merge sort) and the priority queue. -\subsection{Partition Lists} -Partition of lists can be converted to subarray, combination, subsequence problems. For example, -\begin{enumerate} - \item 416. Partition Equal Subset Sum (combination) - \item 698. Partition to K Equal Sum Subsets -\end{enumerate} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%% Sweep Line -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Intervals} -\label{sec_sweep_line} -% \documentclass[../../main.tex]{subfiles} -Sweep Line is a type of algorithm that mainly used to solve problems with intervals of one-dimensional. Let us look at one example: -1. 253. Meeting Rooms II - -Given an array of meeting time intervals consisting of start and end times [[s1,e1],[s2,e2],...] (si < ei), find the minimum number of conference rooms required. -\begin{lstlisting} -Example 1: - -Input: [[0, 30],[5, 10],[15, 20]] -Output: 2 - -Example 2: - -Input: [[7,10],[2,4]] -Output: 1 -\end{lstlisting} -It would help a lot if at first we can draw one example with cooridinates. -\begin{figure}[h] - \centering - \includegraphics[width = 0.6\columnwidth]{fig/sweep_line_253.png} - \caption{Interval questions} - \label{fig:interval} -\end{figure} -First, the simplest situation is when we only need one meeting room is there is no intersection between these time intervals. If we add one interval that only intersect with one of the previous intervals, this means we need two conference rooms. So to find the minimum conference rooms we need, we need to find the maximum number of intersection between these time intervals. The most native solution is to scan all the time slot in one for loop, and at another inner loop go through all the intervals, if this time slot is in this intervals, then we increase the minimum number of meeting room counter. This gives us time complexity of $O(n*m)$, where $n$ is the number of intervals and $m$ is the total number of time slots. The Python code is as follows, unfortunately, with this solution we have LTE error. -\begin{lstlisting}[language = Python] -# Definition for an interval. -# class Interval(object): -# def __init__(self, s=0, e=0): -# self.start = s -# self.end = e - -from collections import defaultdict -from heapq import heappush, heappop -from sys import maxint -class Solution(object): - def minMeetingRooms(self, intervals): - """ - :type intervals: List[Interval] - :rtype: int - """ - if not intervals: - return 0 - #solution 1, voting, time complexity is O(e1-s1), 71/77 test, TLE - votes = defaultdict(int) - num_rooms = 0 - for interval in intervals: - s=interval.start - e=interval.end - for i in range(s+1,e+1): - votes[i]+=1 - num_rooms = max(num_rooms, votes[i]) - return num_rooms -\end{lstlisting} -\subsection{Speedup with Sweep Line} -Now, let us see how to speed up this process. We can use Sweep Line method. For the sweep line, we have three basic implementations: one-dimensional, min-heap, or map based. -\subsubsection{One-dimensional Implementation} - To get the maximum number of intersection of all the intervals, it is not necessarily to scan all the time slots, how about just scan the key slot: the starts and ends . Thus, what we can do is to open an array and put all the start or end slot into the array, and with $1$ to mark it as start and $0$ to mark it as end. Then we sort this array. Till this point, how to get the maximum intersection? We go through this sorted array, if we get a start our current number of room needed will increase by one, otherwise, if we encounter an end slot, it means one meeting room is freed, thus we decrease the current on-going meeting room by one. We use another global variable to track the maximum number of rooms needed in this whole process. Great, because now our time complexity is decided by the number of slots $2n$, with the sorting algorithm, which makes the whole time complexity $O(nlogn)$ and space complexity $n$. This speeded up algorithm is called Sweep Line algorithm. Before we write our code, we better check the \textit{special cases}, what if there is one slot that is marked as start in one interval but is the end of another interval. This means we can not increase the counting at first, but we need to decrease, so that the sorting should be based on the first element of the tuple, and followed by the second element of the tuple. For example, the simple case $[[13,15],[1,13]]$, we only need maximum of one meeting room. Thus it can be implemented as: -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/sweep_line_one_dimension.png} - \caption{One-dimensional Sweep Line} - \label{fig:one_dim_sl} -\end{figure} -\begin{lstlisting}[language=Python] - def minMeetingRooms(self, intervals): - if not intervals: - return 0 - #solution 2 - slots = [] - # put slots into one-dimensional axis - for i in intervals: - slots.append((i.start, 1)) - slots.append((i.end, 0)) - # sort these slots on this dimension - #slots.sort(key = lambda x: (x[0], x[1])) - slots.sort() - - # now execute the counting - crt_room, max_room = 0, 0 - for s in slots: - if s[1]==0: # if it ends, decrease - crt_room-=1 - else: - crt_room+=1 - max_room = max(max_room, crt_room) - return max_room -\end{lstlisting} -\subsubsection{Min-heap Implementation} -\begin{figure}[h] - \centering - \includegraphics[width=0.6\columnwidth]{fig/sweep_line_min_heap.png} - \caption{Min-heap for Sweep Line} - \label{fig:min_heap_sl} -\end{figure} -Instead of opening an array to save all the time slots, we can directly sort the intervals in the order of the start time. We can see Fig.~\ref{fig:min_heap_sl}, we go through the intervals and visit their end time, the first one we encounter is $30$, we put it in a min-heap, and then we visit the next interval $[5, 10]$, $5$ is smaller than the previous end time $30$, it means this interval intersected with a previous interval, so the number of maximum rooms increase $1$, we get $2$ rooms now. We put $10$ into the min-heap. Next, we visit $[15, 20]$, $15$ is larger than the first element in the min-heap $10$, it means that these two intervals can be merged into one $[5, 20]$, so we need to update the end time $10$ to $20$. - -This way, the time complexity is still the same which is decided by the sorting algorithm. While the space complexity is decided by real situation, it varies from $O(1)$ (no intersection) to $O(n)$ (all the meetings are intersected at at least one time slot). -\begin{lstlisting}[language=Python] -def minMeetingRooms(self, intervals): - if not intervals: - return 0 - #solution 2 - intervals.sort(key=lambda x:x.start) - h = [intervals[0].end] - rooms = 1 - for i in intervals[1:]: - s,e=i.start, i.end - e_before = h[0] - if s& intervals) { - map mp; - for (auto val : intervals) { - ++mp[val.start]; - --mp[val.end]; - } - int max_room = 0, crt_room = 0; - for (auto val : mp) { - crt_room += val.second; - max_room = max(max_room, crt_room); - } - return max_room; - } -}; -\end{lstlisting} - -\subsection{LeetCode Problems} -\begin{enumerate} - \item \textbf{986. Interval List Intersections} Given two lists of closed intervals, each list of intervals is pairwise disjoint and in sorted order. Return the intersection of these two interval lists. -\begin{lstlisting}[numbers=none] -Input: A = [[0,2],[5,10],[13,23],[24,25]], B = [[1,5],[8,12],[15,24],[25,26]] -Output: [[1,2],[5,5],[8,10],[15,23],[24,24],[25,25]] -Reminder: The inputs and the desired output are lists of Interval objects, and not arrays or lists. -\end{lstlisting} -\end{enumerate} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Intersection -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Intersection} -For problems to get intersections of lists, we can use hashmap, which takes $O(m+n)$ time complexity. Also, we can use sorting at first and use two pointers one start from the start of each array. Examples are shown as below; -\begin{enumerate} - \item 349. Intersection of Two Arrays (Easy) - - Given two arrays, write a function to compute their intersection. - -Example: -\begin{lstlisting} -Given nums1 = [1, 2, 2, 1], nums2 = [2, 2], return [2]. -\end{lstlisting} - -Note: -\begin{itemize} - \item Each element in the result must be unique. - \item The result can be in any order. -\end{itemize} -Solution 1: Using hashmap, here we use set to convert, this takes 43ms. -\begin{lstlisting}[language = Python] -def intersection(self, nums1, nums2): - """ - :type nums1: List[int] - :type nums2: List[int] - :rtype: List[int] - """ - if not nums1 or not nums2: - return [] - if len(nums1) > len(nums2): - nums1, nums2 = nums2, nums1 - ans = set() - nums1 = set(nums1) - for e in nums2: - if e in nums1: - ans.add(e) - return list(ans) -\end{lstlisting} -Solution2: sorting at first, and then use pointers. Take 46 ms. -\begin{lstlisting}[language = Python] -def intersection(self, nums1, nums2): - """ - :type nums1: List[int] - :type nums2: List[int] - :rtype: List[int] - """ - nums1.sort() - nums2.sort() - r = set() - i, j = 0, 0 - while i < len(nums1) and j < len(nums2): - if nums1[i] < nums2[j]: - i += 1 - elif nums1[i] > nums2[j]: - j += 1 - else: - r.add(nums1[i]) - i += 1 - j += 1 - return list(r) -\end{lstlisting} -\item 350. Intersection of Two Arrays II(Easy) - - Given two arrays, write a function to compute their intersection. - -Example: -\begin{lstlisting} -Given nums1 = [1, 2, 2, 1], nums2 = [2, 2], return [2, 2]. -\end{lstlisting} - -Note: -\begin{itemize} - \item Each element in the result should appear as many times as it shows in both arrays. - \item The result can be in any order. -\end{itemize} - -Follow up: -\begin{enumerate} - \item What if the given array is already sorted? How would you optimize your algorithm? - \item What if nums1's size is small compared to nums2's size? Which algorithm is better? - \item What if elements of nums2 are stored on disk, and the memory is limited such that you cannot load all elements into the memory at once? -\end{enumerate} - -\end{enumerate} - -\section{Miscellanous Questions} -\begin{examples}[resume] -\item \textbf{283. Move Zeroes. (Easy)} -Given an array nums, write a function to move all 0's to the end of it while maintaining the relative order of the non-zero elements. - -Note: -\begin{enumerate} - \item You must do this in-place without making a copy of the array. - \item Minimize the total number of operations. -\end{enumerate} -\begin{lstlisting}[language=Python] -Example: - -Input: [0,1,0,3,12] -Output: [1,3,12,0,0] -\end{lstlisting} -\textbf{Solution 1: Find All Zeros Subarray.} If we found the first all zeros subarray [0, ..., 0] + [x], and we can swap this subarray with the first non-zero element as swap last 0 with x, swap second last element with x, ..., and so on. Therefore, if 0 is at first index, one zero, then it takes O(n), if another 0, at index 1, it takes n-1+n-2 = 2n. It is bit tricky to compute the complexity analysis. The upper bound is $O(n^2)$. -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% Exercises -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercises} -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% %%%%% Subsequence -% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Subsequence with (DP)} - -\begin{enumerate} - \item 594. Longest Harmonious Subsequence - -We define a harmonious array is an array where the difference between its maximum value and its minimum value is exactly 1. - -Now, given an integer array, you need to find the length of its longest harmonious subsequence among all its possible subsequences. - -Example 1: -\begin{lstlisting} -Input: [1,3,2,2,5,2,3,7] -Output: 5 -Explanation: The longest harmonious subsequence is [3,2,2,2,3]. -\end{lstlisting} - -\textit{Note: The length of the input array will not exceed 20,000.} - -Solution: at first, use a Counter to save the whole set. Then visit the counter dictionary, to check key+1 and key-1, only when the item is not zero, we can count it as validate, or else it is 0. -\begin{lstlisting}[language = Python] -from collections import Counter -class Solution: - def findLHS(self, nums): - """ - :type nums: List[int] - :rtype: int - """ - if not nums or len(nums)<2: - return 0 - count=Counter(nums) #the list is sorted by the key value - maxLen = 0 - for key,item in count.items(): #to visit the key: item in the counter - if count[key+1]: #because the list is sorted, so we only need to check key+1 - maxLen = max(maxLen,item+count[key+1]) - - # if count[key-1]: - # maxLen=max(maxLen, item+count[key-1]) - return maxLen -\end{lstlisting} - -\item 521. Longest Uncommon Subsequence I - -Given a group of two strings, you need to find the longest uncommon subsequence of this group of two strings. The longest uncommon subsequence is defined as the longest subsequence of one of these strings and this subsequence should not be any subsequence of the other strings. - -A subsequence is a sequence that can be derived from one sequence by deleting some characters without changing the order of the remaining elements. Trivially, any string is a subsequence of itself and an empty string is a subsequence of any string. - -The input will be two strings, and the output needs to be the length of the longest uncommon subsequence. If the longest uncommon subsequence doesn’t exist, return -1. - -Example 1: -\begin{lstlisting} -Input: "aba", "cdc" -Output: 3 -Explanation: The longest uncommon subsequence is "aba" (or "cdc"), -because "aba" is a subsequence of "aba", -but not a subsequence of any other strings in the group of two strings. -\end{lstlisting} - -\textit{Note:} - - \textit{Both strings’ lengths will not exceed 100.} - - \textit{Only letters from a ~ z will appear in input strings.} - -Solution: if we get more examples, we could found the following rules, “aba”,”aba” return -1, -\begin{lstlisting}[language = Python] -def findLUSlength(self, a, b): - """ - :type a: str - :type b: str - :rtype: int - """ - if len(b)!=len(a): - return max(len(a),len(b)) - #length is the same - return len(a) if a!=b else -1 -\end{lstlisting} -\item 424. Longest Repeating Character Replacement - -Given a string that consists of only uppercase English letters, you can replace any letter in the string with another letter at most k times. Find the length of a longest substring containing all repeating letters you can get after performing the above operations. - -\textit{Note:} - - \textit{Both the string’s length and k will not exceed 104.} - -Example 1: -\begin{lstlisting} -Input: -s = "ABAB", k = 2 - -Output: -4 -\end{lstlisting} - -Explanation: -Replace the two 'A's with two 'B's or vice versa. - -Example 2: -\begin{lstlisting} -Input: -s = "AABABBA", k = 1 - -Output: -4 -\end{lstlisting} - -Explanation: -Replace the one 'A' in the middle with 'B' and form "AABBBBA". -The substring "BBBB" has the longest repeating letters, which is 4. - -Solution: the brute-force recursive solution for this, is try to replace any char into another when it is not equal or choose not too. LTE -\begin{lstlisting}[language = Python] -#brute force, use recursive function to write brute force solution - def replace(news, idx, re_char, k): - nonlocal maxLen - if k==0 or idx==len(s): - maxLen = max(maxLen, getLen(news)) - return - -if s[idx]!=re_char: #replace - news_copy=news[:idx]+re_char+news[idx+1:] - replace(news_copy, idx+1, re_char, k-1) - replace(news[:], idx+1, re_char,k) - - #what if we only have one char - # for char1 in chars.keys(): - # replace(s[:],0,char1, k) -\end{lstlisting} -To get the BCR, think about the sliding window. The longest repeating string we can by number of replacement = `length of string max(numer of occurence of letter i), i=’A’ to ‘Z’. With the constraint, which means the equation needs to be $\leq k$. So we can use sliding window to record the max occurence, and when the constraint is violated, we shrink the window. Given an example, strs= “BBCABBBAB”, k=2, when i=0, and j=7, 8–5=3>2, which is at A, we need to shrink it, the maxCharCount changed to 4, i=1, so that 8–1–4=3, i=2, 8–2–3=3, 8–3–3=2, so i=3, current length is 5. -\begin{lstlisting}[language = Python] -def characterReplacement(self, s, k): - """ - :type s: str - :type k: int - :rtype: int - """ - i,j = 0,0 #sliding window - counter=[0]*26 - ans = 0 - maxCharCount = 0 - while jk: #now shrink the window - counter[ord(s[i])-ord('A')]-=1 - i+=1 - #updata max - maxCharCount=max(counter) - ans=max(ans, j-i+1) - j+=1 - - return ans -\end{lstlisting} - -\item 395. Longest Substring with At Least K Repeating Characters - -Find the length of the longest substring T of a given string (consists of lowercase letters only) such that every character in T appears no less than k times. - -Example 1: -\begin{lstlisting} -Input: -s = "aaabb", k = 3 - -Output: -3 -\end{lstlisting} - -The longest substring is "aaa", as 'a' is repeated 3 times. - -Example 2: -\begin{lstlisting} -Input: -s = "ababbc", k = 2 - -Output: -5 -\end{lstlisting} - -The longest substring is "ababb", as 'a' is repeated 2 times and 'b' is repeated 3 times. - -Solution: use dynamic programming with memo: Cons: it takes too much space, and with LTE. -\begin{lstlisting}[language = Python] -from collections import Counter, defaultdict -class Solution: - def longestSubstring(self, s, k): - """ - :type s: str - :type k: int - :rtype: int - """ - if not s: - return 0 - if len(s)end: - return 0 - if memo[start][end]==None: - if any(0=k: - mid+=1 - if mid==len(s): return len(s) - left = self.longestSubstring(s[:mid],k) #"ababb" - #from pre_mid - cur_mid, get rid of those cant satisfy the condition - while mid a2 - \ - c1 -> c2 -> c3 - / -B: b1 -> b2 -> b3 -\end{lstlisting} - -begin to intersect at node c1. - -Notes: -\begin{itemize} - \item If the two linked lists have no intersection at all, return null. - \item The linked lists must retain their original structure after the function returns. - \item You may assume there are no cycles anywhere in the entire linked structure. - \item Your code should preferably run in O(n) time and use only O(1) memory. -\end{itemize} - - - - -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_4_linked_list_question.tex b/Easy-Book/chapters/question_4_linked_list_question.tex deleted file mode 100644 index 0153fc8..0000000 --- a/Easy-Book/chapters/question_4_linked_list_question.tex +++ /dev/null @@ -1,526 +0,0 @@ -\documentclass[../main.tex]{subfiles} - -\begin{document} -In this chapter, we focusing on solving problems that carried on or the solution is related using non-linear data structures that are not array/string, such as linked list, heap, queue, and stack. -%%%%%%%%%%%%%Linked List%%%%%%%%%%%%%%%%%%%%%%% -\section{Linked List} -Problems with linked list can be basic operations to add or remove node, or merge two different linked list. -\paragraph{Circular Linked List} For the circular linked list, when we are traversing the list, the most important thing is to know how to set up the end condition for the while loop. -\begin{examples}[resume] -\item \textbf{708. Insert into a Cyclic Sorted List (medium)} Given a node from a cyclic linked list which is sorted in ascending order, write a function to insert a value into the list such that it remains a cyclic sorted list. The given node can be a reference to any single node in the list, and may not be necessarily the smallest value in the cyclic list. For example, -\begin{figure}[h!] - \centering - \includegraphics[width=0.5\columnwidth]{fig/insertcyclicbefore.png} - \includegraphics[width=0.5\columnwidth]{fig/insertcyclicafter.png} - \caption{Example of insertion in circular list} - \label{fig:circular list} -\end{figure} - -\textbf{Analysis:} The maximum we traverse the list is one round. The potential positions we insert is related to the insert value. Suppose the linked list is in range of [s, e], s<=e. Given the insert value as m: -\begin{enumerate} - \item $m\in [s, e]$: we insert in the middle of the list. - \item $ m \geq e$ or $ m\leq s$: we insert at the end of the list, we need to detect the end as if the current node's value is larger than its successor's value. - \item After one loop, if we can not find a place, then we insert at the end. For example, 2->2->2 and insert 3 or 2->3->4->2 and insert 2. -\end{enumerate} -\begin{lstlisting}[language=Python] -def insert(self, head, insertVal): - if not head: # 0 node - head = Node(insertVal,None) - head.next = head - return head - - cur = head - while cur.next != head: - if cur.val <= insertVal <= cur.next.val: # insert - break - elif cur.val > cur.next.val: # end and start - if insertVal >= cur.val or insertVal <= cur.next.val: - break - cur = cur.next - else: - cur = cur.next - # insert - node = Node(insertVal,None) - node.next, cur.next = cur.next, node - return head -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%queue and stack%%%%%%%%%%%%%%%%%%%%%%% -\section{Queue and Stack} -Because Queue and Stack is used to implement BFS and DFS search respectively, therefore, that type of implementation is covered in Chapter~\ref{graph_problem}. The other problems include: Buffering problem with Queue(circular queue), -%%%%%%%%%%%%%%%%%%Implementation%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Implementing Queue and Stack} -\begin{examples}[resume] -\item \textbf{622. Design Circular Queue (medium).} Design your implementation of the circular queue. The circular queue is a linear data structure in which the operations are performed based on FIFO (First In First Out) principle and the last position is connected back to the first position to make a circle. It is also called \textbf{"Ring Buffer"}. - -Your implementation should support following operations: -\begin{itemize} - \item MyCircularQueue(k): Constructor, set the size of the queue to be k. - \item Front: Get the front item from the queue. If the queue is empty, return -1. - \item Rear: Get the last item from the queue. If the queue is empty, return -1. - \item enQueue(value): Insert an element into the circular queue. Return true if the operation is successful. - \item deQueue(): Delete an element from the circular queue. Return true if the operation is successful. - \item isEmpty(): Checks whether the circular queue is empty or not. - \item isFull(): Checks whether the circular queue is full or not. -\end{itemize} - -\textbf{Solution 1: Singly Linked List with Predefined Size.} This is a typical queue data structure and because it is a buffering, therefore, we need to limit its size. As shown in previous theory chapter of the book, queue can be implemented with singly linked list with two pointers, one at the head and the other at the rear. The additional controlling we need is to limit the size of the queue. -\begin{lstlisting}[language=Python] -class MyCircularQueue: - class Node: - def __init__(self, val): - self.val = val - self.next = None - def __init__(self, k): - self.size = k - self.head = None - self.tail = None - self.cur_size = 0 - - def enQueue(self, value): - if self.cur_size >= self.size: - return False - new_node = MyCircularQueue.Node(value) - if self.cur_size == 0: - self.tail = self.head = new_node - else: - self.tail.next = new_node - new_node.next = self.head - self.tail = new_node - self.cur_size += 1 - return True - - def deQueue(self): - - if self.cur_size == 0: - return False - # delete head node - val = self.head.val - if self.cur_size == 1: - self.head = self.tail = None - else: - self.head = self.head.next - self.cur_size -= 1 - return True - - def Front(self): - return self.head.val if self.head else -1 - - def Rear(self): - return self.tail.val if self.tail else -1 - - def isEmpty(self): - return True if self.cur_size == 0 else False - - def isFull(self): - return True if self.cur_size == self.size else False -\end{lstlisting} -\item \textbf{641. Design Circular Deque (medium)}. - -\textbf{Solution: Doubly linked List with Predefined size} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%Application%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Solving Problems Using Queue} - -\paragraph{Use as a Buffer} -\begin{examples}[resume] -\item \textbf{346. Moving Average from Data Stream (easy)}. Given a stream of integers and a window size, calculate the moving average of all integers in the sliding window. -\begin{lstlisting}[numbers = none] -Example: - -MovingAverage m = new MovingAverage(3); -m.next(1) = 1 -m.next(10) = (1 + 10) / 2 -m.next(3) = (1 + 10 + 3) / 3 -m.next(5) = (10 + 3 + 5) / 3 -\end{lstlisting} - -\textbf{Solution: module deque with maxlen.} When we have a fixed window size, this is like a buffer, it has a maximum of capacity. When the n+1 th element come, we need delete the leftmost element first. This is directly implemented in deque module if we set the maxlen to the size we want. Also, it is easy to use function like sum() and len() to compute the average value. -\begin{lstlisting}[language=Python] -from collections import deque -class MovingAverage: - def __init__(self, size): - self.q = deque(maxlen = size) - def next(self, val): - self.q.append(val) - return sum(self.q)/len(self.q) -\end{lstlisting} - -\subsection{Solving Problems with Stack and Monotone Stack} -84. Largest Rectangle in Histogram - -Given n non-negative integers representing the histogram’s bar height where the width of each bar is 1, find the area of largest rectangle in the histogram. -\begin{figure} - \centering - \includegraphics[width = 0.4\columnwidth]{fig/histogram.png} - \caption{Histogram} - \label{fig:histogram} -\end{figure} -Above is a histogram where width of each bar is 1, given height = $[2,1,5,6,2,3]$. The largest rectangle is shown in the shaded area, which has area = 10 unit. - -Solution: brute force. Start from $2$ which will be included, then we go to the right side to find the minimum height, we could have possible area $(1\times 2, 1\times 3, 1\times 4, 1\times 5, ...)$, which gave us $O(n^2)$ to track the min height and width. -\begin{lstlisting}[language = Python] -class Solution: - def largestRectangleArea(self, heights): - """ - :type heights: List[int] - :rtype: int - """ - if not heights: - return 0 - maxsize = max(heights) - - for i in range(len(heights)): - minheight = heights[i] - width = 1 - for j in range(i+1, len(heights)): - width+=1 - minheight = min(minheight, heights[j]) - maxsize = max(maxsize,minheight*width) - return maxsize -\end{lstlisting} -Now, try the BCR, which is $O(n)$. The maximum area is amony areas that use each height as the rectangle height multiplied by the width that works. For the above example,we would choose the maximum among $2\times 1, 1\times 6, 5\times 2, 6\times 1, 2\times 4, 3\times 1$. So, the important step here is to find the possible width, for element $2$, if the following height is increasing, then the width grows, however, since the following height $1$ is smaller, so $2$ will be popped out, we can get $2\times 1$, which satisfies the condition of the monotonic increasing stack, when one element is popped out, which means we found the next element that is smaller than the kicked out element, so the width span ended here. How to deal if current number equals to previous, 6,6,6,6,6, we need to pop previous, and append current. The structure we use here is called Monotonic Stack, which will only allow the increasing elements to get in the stack, and once smaller or equal ones get in, it kicks out the previous smaller elements. -\begin{lstlisting}[language = Python] -def largestRectangleArea(self, heights): - """ - :type heights: List[int] - :rtype: int - """ - if not heights: - return 0 - maxsize = max(heights) - - stack = [-1] - - #the stack will only grow - for i, h in enumerate(heights): - if stack[-1]!=-1: - if h>heights[stack[-1]]: - stack.append(i) - else: - #start to kick to pop and compute the area - while stack[-1]!=-1 and h<=heights[stack[-1]]: #same or equal needs to be pop out - idx = stack.pop() - v = heights[idx] - maxsize=max(maxsize, (i-stack[-1]-1)*v) - stack.append(i) - - else: - stack.append(i) - #handle the left stack - while stack[-1]!=-1: - idx = stack.pop() - v = heights[idx] - maxsize=max(maxsize, (len(heights)-stack[-1]-1)*v) - return maxsize -\end{lstlisting} -85. Maximal Rectangle -Solution: 64/66 with LTE -\begin{lstlisting}[language = Python] -def maximalRectangle(self, matrix): - """ - :type matrix: List[List[str]] - :rtype: int - """ - if not matrix: - return 0 - if len(matrix[0])==0: - return 0 - row,col = len(matrix),len(matrix[0]) - - def check(x,y,w,h): - #check the last col - for i in range(x, x+h): #change row - if matrix[i][y+w-1]=='0': - return 0 - for j in range(y, y+w): #change col - if matrix[x+h-1][j]=='0': - return 0 - return w*h - maxsize = 0 - for i in range(row): - for j in range(col): #start point i,j - if matrix[i][j]=='0': - continue - for h in range(1, row-i+1): #decide the size of the window - for w in range(1,col-j+1): - rslt = check(i,j,w,h) - if rslt==0: #we definitely need to break it. or else we get wrong result - break - maxsize = max(maxsize, check(i,j,w,h)) - return maxsize -\end{lstlisting} -Now, the same as before, use the sums -\begin{lstlisting}[language = Python] -def maximalRectangle(self, matrix): - """ - :type matrix: List[List[str]] - :rtype: int - """ - if not matrix: - return 0 - if len(matrix[0])==0: - return 0 - row,col = len(matrix),len(matrix[0]) - sums = [[0 for _ in range(col+1)] for _ in range(row+1)] - #no need to initialize row 0 and col 0, because we just need it to be 0 - for i in range(1, row+1): - for j in range(1, col+1): - sums[i][j]=sums[i-1][j]+sums[i][j-1]-sums[i-1][j-1]+[0,1][matrix[i-1][j-1]=='1'] - - def check(x,y,w,h): - count = sums[x+h-1][y+w-1]-sums[x+h-1][y-1]-sums[x-1][y+w-1]+sums[x-1][y-1] - return count if count==w*h else 0 - -maxsize = 0 - for i in range(row): - for j in range(col): #start point i,j - if matrix[i][j]=='0': - continue - for h in range(1, row-i+1): #decide the size of the window - for w in range(1,col-j+1): - rslt = check(i+1,j+1,w,h) - if rslt==0: #we definitely need to break it. or else we get wrong result - break - maxsize = max(maxsize, rslt) - return maxsize -\end{lstlisting} -Still can not be AC. So we need another solution. Now use the largest rectangle in histogram. -\begin{lstlisting}[language = Python] -def maximalRectangle(self, matrix): - """ - :type matrix: List[List[str]] - :rtype: int - """ - if not matrix: - return 0 - if len(matrix[0])==0: - return 0 - def getMaxAreaHist(heights): - if not heights: - return 0 - maxsize = max(heights) - -stack = [-1] - -#the stack will only grow - for i, h in enumerate(heights): - if stack[-1]!=-1: - if h>heights[stack[-1]]: - stack.append(i) - else: - #start to kick to pop and compute the area - while stack[-1]!=-1 and h<=heights[stack[-1]]: #same or equal needs to be pop out - idx = stack.pop() - v = heights[idx] - maxsize=max(maxsize, (i-stack[-1]-1)*v) - stack.append(i) - -else: - stack.append(i) - #handle the left stack - while stack[-1]!=-1: - idx = stack.pop() - v = heights[idx] - maxsize=max(maxsize, (len(heights)-stack[-1]-1)*v) - return maxsize - row,col = len(matrix),len(matrix[0]) - heights =[0]*col #save the maximum heights till here - maxsize = 0 - for r in range(row): - for c in range(col): - if matrix[r][c]=='1': - heights[c]+=1 - else: - heights[c]=0 - #print(heights) - maxsize = max(maxsize, getMaxAreaHist(heights)) - return maxsize -\end{lstlisting} - -\textbf{Monotonic Stack} - -122. Best Time to Buy and Sell Stock II - -Say you have an array for which the ith element is the price of a given stock on day i. - -Design an algorithm to find the maximum profit. You may complete as many transactions as you like (i.e., buy one and sell one share of the stock multiple times). - -Note: You may not engage in multiple transactions at the same time (i.e., you must sell the stock before you buy again). - -Example 1: -\begin{lstlisting} -Input: [7,1,5,3,6,4] -Output: 7 -Explanation: Buy on day 2 (price = 1) and sell on day 3 (price = 5), profit = 5-1 = 4. - Then buy on day 4 (price = 3) and sell on day 5 (price = 6), profit = 6-3 = 3. -\end{lstlisting} -Example 2: -\begin{lstlisting} -Input: [1,2,3,4,5] -Output: 4 -Explanation: Buy on day 1 (price = 1) and sell on day 5 (price = 5), profit = 5-1 = 4. - Note that you cannot buy on day 1, buy on day 2 and sell them later, as you are - engaging multiple transactions at the same time. You must sell before buying again. -\end{lstlisting} -Example 3: -\begin{lstlisting} -Input: [7,6,4,3,1] -Output: 0 -Explanation: In this case, no transaction is done, i.e. max profit = 0. -\end{lstlisting} -Solution: the difference compared with the first problem is that we can have multiple transaction, so whenever we can make profit we can have an transaction. We can notice that if we have [1,2,3,5], we only need one transaction to buy at 1 and sell at 5, which makes profit 4. This problem can be resolved with decreasing monotonic stack. whenever the stack is increasing, we kick out that number, which is the smallest number so far before i and this is the transaction that make the biggest profit = current price - previous element. Or else, we keep push smaller price inside the stack. -\begin{lstlisting}[language = Python] -def maxProfit(self, prices): - """ - :type prices: List[int] - :rtype: int - """ - mono_stack = [] - profit = 0 - for p in prices: - if not mono_stack: - mono_stack.append(p) - else: - if p= prices[i + 1]) - i++; - valley = prices[i]; - while (i < prices.length - 1 && prices[i] <= prices[i + 1]) - i++; - peak = prices[i]; - maxprofit += peak - valley; - } - return maxprofit; - } -} -\end{lstlisting} -This solution follows the logic used in Approach 2 itself, but with only a slight variation. In this case, instead of looking for every peak following a valley, we can simply go on crawling over the slope and keep on adding the profit obtained from every consecutive transaction. In the end,we will be using the peaks and valleys effectively, but we need not track the costs corresponding to the peaks and valleys along with the maximum profit, but we can directly keep on adding the difference between the consecutive numbers of the array if the second number is larger than the first one, and at the total sum we obtain will be the maximum profit. This approach will simplify the solution. This can be made clearer by taking this example: [1, 7, 2, 3, 6, 7, 6, 7] - -The graph corresponding to this array is: -\begin{figure}[h] - \centering - \includegraphics[width = 0.9\columnwidth]{fig/maxprofit_consecutive.png} - \caption{profit graph} - \label{fig:profit_graph} -\end{figure} - -From the above graph, we can observe that the sum A+B+CA+B+CA+B+C is equal to the difference D corresponding to the difference between the heights of the consecutive peak and valley. -\begin{lstlisting}[language = Python] -class Solution { - public int maxProfit(int[] prices) { - int maxprofit = 0; - for (int i = 1; i < prices.length; i++) { - if (prices[i] > prices[i - 1]) - maxprofit += prices[i] - prices[i - 1]; - } - return maxprofit; - } -} -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%queue and stack%%%%%%%%%%%%%%%%%%%%%%% -\section{Heap and Priority Queue} -\begin{examples}[resume] -\item \textbf{621. Task Scheduler (medium).} Given a char array representing tasks CPU need to do. It contains capital letters A to Z where different letters represent different tasks. Tasks could be done without original order. Each task could be done in one interval. For each interval, CPU could finish one task or just be idle. - -However, there is a non-negative cooling interval n that means between two \textbf{same tasks}, there must be at least n intervals that CPU are doing different tasks or just be idle. You need to return the \textbf{least} number of intervals the CPU will take to finish all the given tasks. -\begin{lstlisting}[numbers=none] -Example: - -Input: tasks = ["A","A","A","B","B","B"], n = 2 -Output: 8 -Explanation: A -> B -> idle -> A -> B -> idle -> A -> B. -\end{lstlisting} - -Analysis: we can approach the problem by thinking when we can get the least idle times? Whenever we put the same task together, they incurs largest idle time. Therefore, rule number 1: put different task next to each other whenever it is possible. However, consider the case: {"A":6, "B":1, "C":1, "D":1", "E":1}, if we simply do a round of using all of the available tasks in the decreasing order of their frequency, we get 'A, B, C, D, E, A , ?, A, ?, A, ?, A, ?', here we end up with four '?', which represents idle. However, this is not the best solution. A better way that this is to use up the most frequent task as soon as its cooling time is finished. The new order is 'A, B, C, A, D, E, A, ?, A, ?, A, ?, A'. We end up with one less idle session. We can implement it with heapq due to the fast that it is more efficient compared with PriorityQueue(). - -\textbf{Solution 1: heapq and idle cycle. } We can use a map to get the frequency of each task, then we put their frequencies into a heapq, by using heapify function. When the list is not empty yet, for each idle cycle: which is n+1, we pop out items out and decrease its frequency and add time. (Actually, using PriorityQueue() here we will receive LTE.) We need $O(n)$ to iterate through the tasks list to get its frequency. Then heapify takes $O(26)$, each time, heappush takes $O(\log 26)$. This still makes the time complexity $O(n)$. -\begin{lstlisting}[language=Python] -from collections import Counter -from queue import PriorityQueue -import heapq -def leastInterval(self, tasks, n): - c = Counter(tasks) - h = [-count for _, count in c.items()] - heapq.heapify(h) - - ans = 0 - - while h: - temp = [] - i = 0 - while i <= n: # a cycle is n+1 - - if h: - c = heapq.heappop(h) - if c < -1: - temp.append(c+1) - ans += 1 - # if the queue is empty, we reached the end, need to break, no idle - if not h and not temp: - break - i += 1 - for c in temp: - heapq.heappush(h, c) - return ans -\end{lstlisting} - -\begin{figure}[h!] - \centering - \includegraphics[width = 0.9\columnwidth]{fig/621_Task_Scheduler_new.png} - \caption{Task Scheduler, Left is the first step, the right is the one we end up with.} - \label{fig:task_scheduler} -\end{figure} -\textbf{Solution 2: Use Sorting}. Obversing Fig.~\ref{fig:task_scheduler}, the actually time = idle time + total number of tasks. So, all we need to do is getting the idle time. And we start with the initial idle time which is (biggest frequency - 1)*(n). Then we travese the sorted list from the second item, and decrase the initial idle time. This gives us $O(n)$ time too. But the concept and coding is easier. -\begin{lstlisting}[language=Python] -from collections import Counter -def leastInterval(self, tasks, n): - c = Counter(tasks) - f = [count for _, count in c.items()] - f.sort(reverse =True) - idle_time = (f[0] - 1) * n - - for i in range(1, len(f)): - c = f[i] - idle_time -= min(c, f[0]-1) - return idle_time + len(tasks) if idle_time > 0 else len(tasks) -\end{lstlisting} -\end{examples} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_5_pattern-matching.tex b/Easy-Book/chapters/question_5_pattern-matching.tex deleted file mode 100644 index 54d3b0a..0000000 --- a/Easy-Book/chapters/question_5_pattern-matching.tex +++ /dev/null @@ -1,672 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -For the string problems, it can be divided into two categories: \textbf{one string} and \textbf{two strings pattern matching}. - -For the one string problem, the first type is to do operations that meet certain requirements on a single string. (1). For the ad hoc easy string processing problems, we only need to read the requirement carefully and use basic programming skills, data structures, and sometimes requires us to be farmilar with some string libraries like Re other than the basic built-in string functions. We list some LeetCode Problems of this type in Section~\ref{string_ad_hoc}. (2) There are also more challenging problems: including find the longest/shortest/ count substring and subsequence that satisfy certain requirements. Usually the subsequence is more difficult than the substring. In this chapter we would list the following types in Section~\ref{string_advanced_single} -\begin{itemize} - \item Palindrome: A sequence of characters read the same forward and backward. - \item Anagram: A word or phrase formed by rearranging the letters of a different word or phrase. - \item Parentheses and others. -\end{itemize} - - -\textbf{Application for Pattern Matching for two strings:} Given two strings or two arrays, one is S, and the pattern P, The problems can be generalized to find pattern P in a string S, you would be given two strings. (1) If we do not care the order of the letters (anagram) in the pattern, then it is the best to use Sliding Window; This is detailed in Section~\ref{string_anagram} (2) If we care the order matters (identical to pattern), we use KMP. The problems of this type is listed in Section~\ref{string_exact_matching}. - -% \textbf{Brute Force Exact Pattern Matching} Before we proceed to the efficient pattern matching algorithms, let us take a look at how we do the pattern matching with brute force. Assume we are given a string S and a pattern P, now we need to search the position of P in S, if we do not use the built-in functions in Python, what would we do to solve this problem. - -% Solution: it is straightforward that we need to compare S and P char by char. Suppose now we are matching the ith char of S to the jth char in the P, all the previous j-1 chars in P have already been matched in S. Now we would have two situations: if S[i] == P[j], then we move forward i and j to the next position, otherwise, the match failed, for example S="ABCDF" and P="ABCE", when the i=3, and j = 3, 'D'!='E', so that j need to move to position j=0, which has backtracked j positions, and for the S, we need to move to char 'B', which only backtrack j-1 positions, so that $i - i_{new} = j-1$, which makes $i_{new} = i-j+1$. The Python code of the brute force solution is: -% \begin{lstlisting}[language = Python] -% def bruteForcePatternMatching(S, P): -% sLen, pLen = len(S), len(P) -% i, j = 0, 0 -% while i < sLen and j < pLen: -% if S[i] == P[j]: -% i += 1 -% j += 1 -% else: -% i = i -j + 1 -% j = 0 - -% if j == pLen: -% return i - j -% else: -% return -1 -% \end{lstlisting} -% This can only return the first match, to modify it that we can find all patterns in the string, we can do as follows: -% \begin{lstlisting}[language = Python] -% def bruteForcePatternMatchingAll(S, P): -% if not S or not P: -% return [] -% sLen, pLen = len(S), len(P) -% i, j = 0, 0 -% ans = [] -% while i < sLen: -% if j == pLen: #collect position -% ans.append(i - j) -% j = 0 -% i = i -j + 1 -% continue -% # do the pattern matching -% if S[i] == P[j]: -% i += 1 -% j += 1 -% else: -% i = i -j + 1 -% j = 0 -% return ans -% \end{lstlisting} -% The time complexity for brute force pattern matching is $O(S*P)$. It is not hard to see that when one char does not match, with the backtracking, we backtrack j-1 positions in S, and j positions in P. For example, S = "ABCDABD ABCDABCDABDE", P = "ABCDABD", when P match "ABCDABC" at the second 'C', then if we know .... With KMP, we can make the worst case time complexity to be $O(S)$. -% \section{Word Subsets} -\section{Ad Hoc Single String Problems} -\label{string_ad_hoc} -\begin{enumerate} - \item 125. Valid Palindrome - \item 65. Valid Number - \item 20. Valid Parentheses (use a stack to save left parenthe) - \item 214. Shortest Palindrome (KMP lookup table) - \item 5. Longest Palindromic Substring - \item 214 Shortest Palindrome , KMP lookup table, for example s=abba, constructed S = abba\#abba), - \item 58. Length of Last Word(easy) -\end{enumerate} - -\section{String Expression} -\begin{enumerate} - \item 8. String to Integer (atoi) (medium) -\end{enumerate} -%%%%%%%%%%%Advanced slingle string%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Advanced Single String} -\label{string_advanced_single} - For hard problem, reconstruct the problem to another so that it can be resolved by an algorithm that you know. -%%%%%%%%%%%%%%%%% palindrome %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Palindrome} -Palindrome is a sequence of characters read the same forward and backward. To identify if a sequence is a palindrome say ``abba" we just need to check if s == s[::-1]. In the structure, if we know ``bb" is palindrome, then ``abba" should be palindrome if s[0] == s[3]. Due to this structure, in the problems with finding palindromic substrings, we can apply dynamic programming and other algorithms to fight back the naive solution. - -To validate a palindrome we can use two pointers, one at the start, and the other and the end. We iterative them into the middle location. -\begin{enumerate} - - \item 409. Longest Palindrome (*) - \item 9. Palindrome Number (*) - \item Palindrome Linked List (234, *) - \item Valid Palindrome (125, *) - \item Valid Palindrome II (680, *) - \item Largest Palindrome Product (479, *) - \item 647. Palindromic Substrings (medium, check) - \item Longest Palindromic Substring (5, **, check) - \item Longest Palindromic Subsequence(516, **) - \item Shortest Palindrome (214, ***) - \item Find the Closest Palindrome(564, ***) - \item Count Different Palindromic Subsequences(730, ***) - \item Palindrome Partitioning (131, **) - \item Palindrome Partitioning II (132, ***) - - \item 266. Palindrome Permutation (Easy) - \item Palindrome Permutation II (267, **) - \item Prime Palindrome (866, **) - \item Super Palindromes (906, ***) - \item Palindrome Pairs (336, ***) - \item -\end{enumerate} -\begin{examples}[resume] -\item \textbf{Valid Palindrome II (L680, *).} Given a non-empty string s, you may delete \textbf{at most} one character. Judge whether you can make it a palindrome. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: "aba" -Output: True - -Example 2: - -Input: "abca" -Output: True -Explanation: You could delete the character 'c'. -\end{lstlisting} -\textbf{Solution: Two Pointers.} If we allow zero deletion, then it is a normal two pointers algorithm to check if the start i and the end j position has the same char. If we allow another time deletion is when the start and end char is not equal, we check if deleting s[i] or s[j], left s(i+1, j) or s(i, j-1) if they are palindrome. -\begin{lstlisting}[language=Python] -def validPalindrome(self, s): - if not s: - return True - - i, j = 0, len(s)-1 - while i <= j: - if s[i] == s[j]: - i += 1 - j -= 1 - else: - left = s[i+1: j+1] - right = s[i:j] - return left == left[::-1] or right == right[::-1] - return True -\end{lstlisting} -\item \label{l647} \textbf{Palindromic Substrings(L647, **).} Given a string, your task is to count how many palindromic substrings in this string. The substrings with different start indexes or end indexes are counted as different substrings even they consist of same characters. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: "abc" -Output: 3 -Explanation: Three palindromic strings: "a", "b", "c". - -Example 2: - -Input: "aaa" -Output: 6 -Explanation: Six palindromic strings: "a", "a", "a", "aa", "aa", "aaa". -\end{lstlisting} -\textbf{Solution 1: Dynamic Programming.} First, we use dp[i][j] to denotes if the substring s[i:j] is a palindrome or not. Thus, we have a matrix of size $n\times n$. We an apply a simple example ``aaa". -\begin{lstlisting}[numbers=none] -``aaa" - 0 1 2 -0 1 1 1 -1 0 1 1 -2 0 0 1 -\end{lstlisting} -From the example, first, we know this matrix would only have valid value at the upper part due to i<=j. Because if j-i>=3 which means the length is larger or equals to 3, dp[i][j] = 1 if s[i]==s[j] and dp[i+1][j-1]==1. Compare i:i+1, j:j-1. This means we need to iterate i reversely and j incrementally. -\begin{lstlisting}[language=Python] -def countSubstrings(self, s): - """ - :type s: str - :rtype: int - """ - n =len(s) - dp = [[0 for _ in range(n)] for _ in range(n)] # if from i to j is a palindrome - res = 0 - for i in range(n-1,-1,-1): - for j in range(i,n): - if j-i>2: #length >=3 - dp[i][j] = (s[i]==s[j] and dp[i+1][j-1]) - else: - dp[i][j] = (s[i]==s[j]) #length 1 and 2 - if dp[i][j]: - res += 1 - return res -\end{lstlisting} -\textbf{Range Type Dynamic Programming.}A sligtly different way to fill out the matrix is: -\begin{lstlisting} -def countSubstrings(self, s): - if not s: - return 0 - - rows = len(s) - dp = [[0 for col in range(rows)] for row in range(rows)] - ans = 0 - for i in range(0,rows): - dp[i][i] = 1 - ans += 1 - - for l in range(2, rows+1): #length of substring - for i in range(0,rows-l+1): #start 0, end len -l+1 - j = i+l-1 - if j > rows: - continue - if s[i] == s[j]: - if j-i > 2: - dp[i][j] = dp[i+1][j-1] - else: - dp[i][j] = 1 - ans += dp[i][j] - - return ans -\end{lstlisting} -\textbf{Solution 2: Center Expansion.} For s[0]='a', it is center at 0, s[0:2]='aa', is center between 0 and 1, s[1]='a', s[0:3] ='aaa', center at 1. s[1:3]='aa' is center between 1 and 2, for s[3]='a', is center at 2. There for our centers goes from: The time complexity if $O(n^2)$. -\begin{lstlisting}[numbers =none] -left = 0, right = 0, i = 0, i/2 = 0, i%2 = 0 -left = 0, right = 1, i = 1, i/2 = 0, i%2 = 1 -left = 1, right = 1, i = 2, i/2 = 1, i%2 = 0 -left = 1, right = 2, i = 3, i/2 = 1, i%2 = 1 -left = 2, right = 2, i = 4, i/2 = 2, i%2 = 0 -\end{lstlisting} -\begin{lstlisting}[language=Python] -def countSubstrings(self, S): - n = len(S) - ans = 0 - for i in range(2*n-1): - l = int(i/2) - r = l + i%2 - while l >= 0 and r < n and S[l] == S[r]: - ans += 1 - l -= 1 - r += 1 - return ans - -\end{lstlisting} -\textbf{Solution 3: Manacher’s Algorithm.} In the center expansion, we can save the result according to the position i. We can see from postion 6, the LPS table is symmetric, what Manacher's Algorithm do is to identify around the center of a palindrome, when it will be symmetric and when it wont (in case at position 3, for immediate left and right (2, 4) is symmetric, but not (0, 5). This is distinguished by the LPS length at position 3. only (i-d, i, i+d) will be symmetric. -\begin{figure} - \centering - \includegraphics[width=0.7\columnwidth]{fig/ltlp1.png} - \caption{LPS length at each position for palindrome. } - \label{fig:ltlp} -\end{figure} -The code for Python 2 is given: and try to understand later??? -\begin{lstlisting}[language = Python] -def manachers(S): - A = '@#' + '#'.join(S) + '#$' - Z = [0] * len(A) - center = right = 0 - for i in xrange(1, len(A) - 1): - if i < right: - Z[i] = min(right - i, Z[2 * center - i]) - while A[i + Z[i] + 1] == A[i - Z[i] - 1]: - Z[i] += 1 - if i + Z[i] > right: - center, right = i, i + Z[i] - return Z - -return sum((v+1)/2 for v in manachers(S)) -\end{lstlisting} -\item \textbf{Longest Palindromic Subsequence (L516, **).} Given a string s, find the longest palindromic subsequence's length in s. You may assume that the maximum length of s is 1000. -\begin{lstlisting}[numbers=none] -Example 1: -Input: -"bbbab" -Output: -4 -One possible longest palindromic subsequence is "bbbb". - -Example 2: -Input: -"cbbd" -Output: -2 -One possible longest palindromic subsequence is "bb". -\end{lstlisting} -\textbf{Solution: Range Type Dynamic Programming.} We use dp[i][j] to denote the maximum palindromic subsequence of s(i,j). Like the substring palindrome, we only need to fill out the upper bound of the matrix. Let us dismentle the problems into different length of substring: -\begin{lstlisting}[numbers=none] -L=2: bb bb ba ab i=0..n-L+1, j=i+L-1 -L=3: bbb bba bab, if s[i] == s[j], Yes: dp[i][j] = dp[i+1][j-1]+2, which we obtined from last L2, No: dp[i][j] = max(dp[i+1][j], dp[i][j-1]) -L=4, bbba, bbab -L=5, bbbab -\end{lstlisting} -The process will be controled by the range of the length of substring. And we fill out the matrix in the following way: this is a ranging type of dynamic programming. -\begin{lstlisting}[numbers=none] -[[1, 2, 0, 0, 0], [0, 1, 2, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]] -[[1, 2, 3, 0, 0], [0, 1, 2, 2, 0], [0, 0, 1, 1, 3], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]] -[[1, 2, 3, 3, 0], [0, 1, 2, 2, 3], [0, 0, 1, 1, 3], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]] -[[1, 2, 3, 3, 4], [0, 1, 2, 2, 3], [0, 0, 1, 1, 3], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def longestPalindromeSubseq(self, s): - if not s: - return 0 - if s == s[::-1]: - return len(s) - - rows = len(s) - dp = [[0 for col in range(rows)] for row in range(rows)] - for i in range(0,rows): - dp[i][i] = 1 - - for l in range(2, rows+1): #use a space - for i in range(0,rows-l+1): #start 0, end len -l+1 - j = i+l-1 - if j > rows: - continue - if s[i] == s[j]: - dp[i][j] = dp[i+1][j-1]+2 - else: - dp[i][j] = max(dp[i][j-1], dp[i+1][j]) - return dp[0][rows-1] -\end{lstlisting} -\end{examples} -\subsection{Calculator} -In this section, for basic calculator, we have operators '+', '-', '*', '/', and parentheses. Because of ('+', '-') and ('*', '/') has different priority, and the parentheses change the priority too. The basic step is to obtain the integers digit by digit from the string. And, if the previous sign is '-', we make sure we get a negative number. Given a string expression: (a+b/c)*(d-e)+((f-g)-(h-i))+(j-k). The rule here is to deduct this to: -\begin{equation} - \underline{\underline{(a+\underline{b/c}_b)}_a*\underline{(d-e)}_d}_a+\underline{(\underline{(f-g)}_f-\underline{(h-i)}_h)}_f+\underline{(j-k)}_j -\end{equation} -The rules are: 1) Reduce the '*' and '/': And we handle it when we encounter the following operator or at the end of the string. Because, when we encounter a sign(operator), we check the previous sign, if the previous sign is '/' or '*', we compute the previous number with current number to reduce it into one. 2) Reduce the parentheses into one: (d-e) is reduced to d, and because the previous sign is '*', it is further combined with a and become a. Thus, if we save the reduced result into a stack, there will be [a,, f, j], we just need to sum over. thus to avoid the boundary condition, we can add '+' at the end. In the later part, we will explain more about how to deal with the above two kinds of reduce. There are different levels of calculators: -\begin{enumerate} - \item \label{case_1}'+', '-', w/o parentheses: e.g., a+b+c, or a-b-c. - \begin{lstlisting}[numbers=none] - presign = '+', num=0, for the digits, stack for saving either negative or positive integer - 1. iterate through the char: - if a digit: obtain the integer - else if c in ['+, '-'] or c is the last char: - if presign == '-': - num = -num - stack.append(num) - num = 0 - presign = c - 2. sum over the positive and negative values in the stack - \end{lstlisting} - \item '+', '-', with parentheses: e.g. a-b-c vs a-(b-c-d). To handle the parentheses, we need to think of (b-c-d) as a single integer. When we encounter the left parenthesis we save its state: the previous sign and '('; when encountering ')', we do a sum over in the stack till we pop out the previous '('. And we recover its state: the previous sign and the num. - \begin{lstlisting}[numbers=none] - if c == '(': - stack.append(presign) - stack.append('(') - presign = '+' - num = 0 - else if c in ['+, '-', ')']: # if its operator or ')' - if presign == '-': - num = -num - if c == ')': - sum over in the stack till top is '(', - restore the state - else: - stack.append(num) - num=0, presign = c - \end{lstlisting} - \item '+', '-', '*', '/', w/o parentheses: This is similar to Case~\ref{case_1}, other than the '*', '-'. For example, a-b/c/d*e. When we are at c, we compute the pop the top element in the stack and compute (-b/c)=f, and append f into the stack. When we are at d, similarly, we compute (f/d)=g, and append g into the stack. - \begin{lstlisting}[numbers=none] - 1. iterate through the char: - if a digit: obtain the integer - if c in ['+, '-', '*', '/'] or c is the last char: - if presign == '-': - num = -num - # we reduce the current num with previous - elif presign in ['*', '/']: - num = operator(stack.pop(),presign, num) - stack.append(num) - num = 0 - presign = c - 2. sum over the positive and negative values in the stack - \end{lstlisting} - \item '+', '-', '*', '/', with parentheses. It is a combination of the previous cases, so I am not giving code here. -\end{enumerate} -\begin{examples}[resume] -\item \textbf{Basic Calculator (L224, ***).} Implement a basic calculator to evaluate a simple expression string. The expression string may contain open ( and closing parentheses ), the plus + or minus sign -, non-negative integers and empty spaces. -\begin{lstlisting}[numbers=none] -Example 1: -Input: "1 + 1" -Output: 2 - -Example 2: -Input: " 2-1 + 2 " -Output: 3 - -Example 3: -Input: "(1+(4+5+2)-3)+(6+8)" -Output: 23 -\end{lstlisting} -\textbf{Stack for Parentheses}. Suppose firstly we don't consider the parentheses, then it is linear iterating each char and handle the digits and the sign. The code are the first if and elif in the following Python code. Now, to think of the parentheses, it does affect the result: 2-(5-6). With and without parentheses give 3 and -9 for answer. When we encounter a '(', we need to reset ans and the sign, plus we need to save the previous ans and sign, at here it is (2, -). Then when we encounter a ')', we first collect the answer from last '(' to current ')'. And, we need to sum up the answer before '('. -\begin{lstlisting}[numbers=none] -(1+(4+5+2)-3)+(6+8) -at (: stack = [0, +] -at second '(': stack = [0, +, 1 +] -at first ')': ans=11, pop out [1, +], ans = 12, + -at second ')': ans = 9, pop out [0, +], ans = 9 -at third '(': ans = 9, +, stack = [9,+], reset ans = 0, sign = '+' -\end{lstlisting} -\begin{lstlisting}[language=Python] -def calculate(self, s): - s = s + '+' - ans = num = 0 #num is to get each number - sign = '+' - stack = collections.deque() - for c in s: - if c.isdigit(): #get number - num = 10*num + int(c) - elif c in ['-','+', ')']: - if sign == '-': - num = -num - if c == ')': - while stack and stack[-1] != '(': - num += stack.pop() - stack.pop() - sign = stack.pop() - else: - stack.append(num) - num = 0 - sign = c - elif c == '(': # left parathese, put the current ans and sign in the stack - stack.append(sign) - stack.append('(') - num = 0 - sign = '+' - - while stack: - ans += stack.pop() - return ans -\end{lstlisting} -\item \textbf{Basic Calculator III (L772, ***).} Implement a basic calculator to evaluate a simple expression string. The expression string may contain open ( and closing parentheses ), the plus + or minus sign -, \textbf{non-negative} integers and empty spaces . The expression string contains only non-negative integers, +, -, *, / operators , open ( and closing parentheses ) and empty spaces . The integer division should truncate toward zero. You may assume that the given expression is always valid. All intermediate results will be in the range of [-2147483648, 2147483647]. -\begin{lstlisting}[numbers=none] -Some examples: - -"1 + 1" = 2 -" 6-4 / 2 " = 4 -"2*(5+5*2)/3+(6/2+8)" = 21 -"(2+6* 3+5- (3*14/7+2)*5)+3"=-12 -\end{lstlisting} -\textbf{Solution: Case 4} -\begin{lstlisting}[language=Python] -def calculate(self, s): - ans = num = 0 - stack = collections.deque() - n = len(s) - presign = '+' - s = s+'+' - def op(pre, op, cur): - if op == '*': - return pre*cur - if op == '/': - return -abs(pre)//cur if pre < 0 else pre//cur - for i, c in enumerate(s): - if c.isdigit(): - num = 10*num + int(c) - elif c in ['+', '-', '*', '/', ')']: - if presign == '-': - num = -num - elif presign in ['*','/']: - num = op(stack.pop(),presign, num) - if c == ')': # reduce to one number, and restore the state - while stack and stack[-1] != '(': - num += stack.pop() - stack.pop() # pop out '(' - presign = stack.pop() - else: - stack.append(num) - num = 0 - presign = c - elif c == '(': #save state, and restart a new process - stack.append(presign) - stack.append(c) - presign = '+' - num = 0 - - ans = 0 - while stack: - ans += stack.pop() - return ans -\end{lstlisting} -\item 227. Basic Calculator II (exercise) -\end{examples} -\subsection{Others} - -Possible methods: two pointers, one loop$+$two pointers -\section{Exact Matching: Sliding Window and KMP} - -\paragraph{Exact Pattern Matching} -\begin{enumerate} - \item 14. Longest Common Prefix (easy) -\end{enumerate} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%% sliding window -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Anagram Matching: Sliding Window} -\label{string_anagram} - -% \section{Sliding Window for anagram or exact matching} -However, if the question is to find all anagrams of the pattern in string S. -For example: 438. Find All Anagrams in a String - -Example 1: -\begin{lstlisting} -Input: -s: "cbaebabacd" p: "abc" - -Output: -[0, 6] -\end{lstlisting} - -Explanation: -The substring with start index = 0 is "cba", which is an anagram of "abc". -The substring with start index = 6 is "bac", which is an anagram of "abc". - -Python code with sliding window: -\begin{lstlisting}[language = Python] -def findAnagrams(self, s, p): - """ - :type s: str - :type p: str - :rtype: List[int] - """ - if len(s)2: - dp[i][j]=(s[i]==s[j] and dp[i+1][j-1]) - else: - dp[i][j] =(s[i]==s[j]) - if dp[i][j] and j-i+1 >len(max_str): - max_str = s[i:j+1] - return max_str -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%end of answer - \end{Answer} -\setboolean{firstanswerofthechapter}{false} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_6_algorithms_for_tree.tex b/Easy-Book/chapters/question_6_algorithms_for_tree.tex deleted file mode 100644 index 4877141..0000000 --- a/Easy-Book/chapters/question_6_algorithms_for_tree.tex +++ /dev/null @@ -1,2032 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -When we first go for interviews, we may find tree and graph problems intimidating and challenging to solve within 40 mins normal interview window. This might because of our neglinance of the concept of Divide and Conquer or due to the recursion occurrence in tree-related problems. However, at this point, we have already studied the concepts of various trees, divide and conquer, and solved quite a large amount of related questions in previous chapters in this part. We will find out studing this chapter can be real easy compared with the Dynamic programming questions because the consistent principle to solve tree questions. The principle is to solve problems within \textbf{tree traversal}, either recursively or iteratively, in either of two ways: -\begin{enumerate} - \item \textbf{Top-down Searching}: Tree traversal and with visited nodes information as parameters to be passed to its subtree. The result will be returned from leaf node or empty node, or node that satisfy a certain condition. This is just an extension of the graph search, either BFS or DFS, with recorded path information. This usuallly requires None return from the recursion function, but instead always require a global data structure and a local data structure to track the final answer and the current path information. -\begin{lstlisting} -def treeTraversalParitial(root, tmp_result): - if node is empty or node is a leaf node: - collect the result or return the final result - construct the previous temp result using the current node - treeTraversalParitial(root.left, constructured_tmp_result) - treeTraversalParitial(root.right, constructured_tmp_result) -\end{lstlisting} - -\item \textbf{Bottom-up Divide and Conquer:} Due to the special structure of tree, a tree is naturally divided into two halves: left subtree and right subtree. Therefore, we can enforce the Divide and Conquer, to assign two ``agents`` to obtain the result for its subproblems, and back to current node, we ``merge`` the results of the subtree to gain the result for current node. This also requires us to define the return value for edge cases: normally would be empty node and/or leaves. -\begin{lstlisting} -def treeTraversalDivideConquer(root): - if node is empty or node is a leaf node: - return base result - # divide - left result = treeTraversalDivideConquer(root.left) - right result = treeTraversalDivideConquer(root.right) - - # conquer - merge the left and right result with the current node - return merged result of current node -\end{lstlisting} -\end{enumerate} -The difficulty of these problems are decided by the merge operation, and how many different variables we need to return to decide the next merge operation. However, if we don't like using the recursive function, we can use levelorder traversal implemented with Queue. - -Binary tree and Binary Searching tree are the most popular type of questions among interviews. They each take nearly half and half of all the tree questions. We would rarely came into the Segment Tree or Trie, but if you have extra time it will help you learn more if you would study thest two types too. -\section{Binary Tree (40\%)} -We classify the binary tree related questions as: -\begin{enumerate} - \item Tree Traversal; - \item Tree Property: Depth, Height, and Diameter - \item Tree Advanced Property: LCA - \item Tree Path -\end{enumerate} -%%%%%%%%%%%%%%%Tree Traversal%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Tree Traversal} -The problems appearing in this section has mostly been solved in tree traversal section, thus we only list the problems here. -\begin{enumerate} - \item 144. Binary Tree Preorder Traversal - \item 94. Binary Tree Inorder Traversal - \item 145. Binary Tree Postorder Traversal - \item 589. N-ary Tree Preorder Traversal - \item 590. N-ary Tree Postorder Traversal - \item 429. N-ary Tree Level Order Traversal - \item 103. Binary Tree Zigzag Level Order Traversal(medium) - \item 105. Construct Binary Tree from Preorder and Inorder Traversal -\end{enumerate} -\begin{examples}[resume] -\item \textbf{103. Binary Tree Zigzag Level Order Traversal (medium).} Given a binary tree, return the zigzag level order traversal of its nodes' values. (ie, from left to right, then right to left for the next level and alternate between). -\begin{lstlisting}[numbers=none] -For example: -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its zigzag level order traversal as: - -[ - [3], - [20,9], - [15,7] -] -\end{lstlisting} -\textbf{Solution: BFS level order traversal.} We use an variable to track the level of the current queue, and if its even, then we add the result in the original order, otherwise, use the reversed order: -\begin{lstlisting}[language=Python] -def zigzagLevelOrder(self, root): - """ - :type root: TreeNode - :rtype: List[List[int]] - """ - if root is None: - return [] - q = [root] - i = 0 - ans = [] - while q: - tmp = [] - tmpAns = [] - for node in q: - tmpAns.append(node.val) - if node.left: - tmp.append(node.left) - if node.right: - tmp.append(node.right) - q = tmp - if i % 2 == 0: - ans += [tmpAns] - else: - ans += [tmpAns[::-1]] - i += 1 - return ans -\end{lstlisting} - -\item \textbf{105. Construct Binary Tree from Preorder and Inorder Traversal.} Given preorder and inorder traversal of a tree, construct the binary tree. -Note:You may assume that duplicates do not exist in the tree. -\begin{lstlisting}[numbers=none] -For example, given preorder = [3,9,20,15,7], inorder = [9,3,15,20,7] -Return the following binary tree: - - 3 - / \ - 9 20 - / \ - 15 7 - \end{lstlisting} - -\textbf{Solution: the feature of tree traversal.} The inorder traversal puts the nodes from the left subtree on the left side of root, and the nodes from the right subtree on the right side of the root. While the preorder puts the root at the first place, followed by the left nodes and right nodes. Thus we can find the root node from the preorder, and then use the inorder list to find the root node, and cut the list into two parts: left nodes and right nodes. We use divide and conquer, and do such operation recursively till the preorder and inorder list is empty. -\begin{lstlisting} [language = Python] -def buildTree(self, preorder, inorder): - """ - :type preorder: List[int] - :type inorder: List[int] - :rtype: TreeNode - """ - #first to decide the root - def helper(preorder,inorder): - if not preorder or not inorder: - return None - - cur_val = preorder[0] - node = TreeNode(cur_val) - #divide: now cut the lists into two halfs - leftinorder,rightinorder = [],[] - bLeft=True - for e in inorder: - if e==cur_val: - bLeft=False #switch to the right side - continue - if bLeft: - leftinorder.append(e) - else: - rightinorder.append(e) - leftset, rightset = set(leftinorder),set(rightinorder) - leftpreorder, rightpreorder = [],[] - for e in preorder[1:]: - if e in leftset: - leftpreorder.append(e) - else: - rightpreorder.append(e) - - #conquer - node.left=helper(leftpreorder, leftinorder) - node.right= helper(rightpreorder,rightinorder) - return node - return helper(preorder,inorder) -\end{lstlisting} -However, the previous code has problem as 203 / 203 test cases passed. -Status: Memory Limit Exceeded. So instead of passing new array, I use index. -\begin{lstlisting} [language = Python] -def buildTree(self, preorder, inorder): - """ - :type preorder: List[int] - :type inorder: List[int] - :rtype: TreeNode - """ - #first to decide the root - def helper(pre_l, pre_r,in_l, in_r): #[pre_l,pre_r) - if pre_l>=pre_r or in_l>=in_r: - return None - - cur_val = preorder[pre_l] - node = TreeNode(cur_val) - #divide: now cut the lists into two halfs - leftinorder = set() - inorder_index = -1 - for i in range(in_l, in_r): - if inorder[i]==cur_val: - inorder_index = i - break - leftinorder.add(inorder[i]) - #when leftset is empty - new_pre_r=pre_l - for i in range(pre_l+1,pre_r): - if preorder[i] in leftinorder: - new_pre_r = i - else: - break - new_pre_r+=1 - - #conquer - node.left=helper(pre_l+1, new_pre_r, in_l, inorder_index) - node.right= helper(new_pre_r,pre_r, inorder_index+1, in_r) - return node - if not preorder or not inorder: - return None - return helper(0,len(preorder),0,len(inorder)) -\end{lstlisting} - - -\end{examples} - -% 94. Binary Tree Inorder Traversal - -% Given a binary tree, return the inorder traversal of its nodes' values. -% \begin{lstlisting} -% Example: - -% Input: [1,null,2,3] -% 1 -% \ -% 2 -% / -% 3 - -% Output: [1,3,2] - -% Follow up: Recursive solution is trivial, could you do it iteratively? -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% # recursive -% def inorderTraversal(self, root): -% """ -% :type root: TreeNode -% :rtype: List[int] -% """ -% # left, root, right -% if root is None: -% return [] -% left = self.inorderTraversal(root.left) -% right = self.inorderTraversal(root.right) -% return left+[root.val]+right -% \end{lstlisting} -% \begin{lstlisting}[language=Python] -% # iterative -% def inorderTraversal(self, root): -% """ -% :type root: TreeNode -% :rtype: List[int] -% """ -% # left, root, right -% if root is None: -% return [] -% ans = [] -% stack =[] -% current = root -% while current: -% stack.append(current) -% current = current.left -% while stack: -% tmp = stack.pop() -% ans.append(tmp.val) -% current = tmp.right -% while current: -% stack.append(current) -% current = current.left -% return ans -% \end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%Depth%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Depth/Height/Diameter} -In this section, focus on the property related problems of binary tree: including depth, height and diameter. We can be asked to validate balanced binary tree, or the maximum/minimum of these values. The solution is tree traversal along with some operations along can be used to solve this type of problems. -\begin{enumerate} - \item 111. Minimum Depth of Binary Tree (Easy) - \item 110. Balanced Binary Tree(Easy) - \item 543. Diameter of Binary Tree (Easy) - - \item 559. Maximum Depth of N-ary Tree (Easy) (Exercise) - \item 104. Maximum Depth of Binary Tree (Exercise) -\end{enumerate} - -\begin{examples}[resume] - -\item \textbf{Minimum Depth of Binary Tree (L111, Easy).} Given a binary tree, find its minimum depth. The minimum depth is the number of nodes along the shortest path from the root node down to the nearest leaf node. \textit{Note: A leaf is a node with no children.} -\begin{lstlisting} -Example: - -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its minimum depth = 2. -\end{lstlisting} -\textbf{Solution 1: Level-Order Iterative.} For the minumum path, we can traverse the tree level-by-level and once we encounter the first leaf node, this would be the minimum depth and we return from here and has no need to finish traversing the whole tree. The worst time complexity is $O(n)$ and with $O(n)$ space. -\begin{lstlisting}[language=Python] -def minDepth(self, root): - if root is None: - return 0 - q = [root] - d = 0 - while q: - d += 1 - for node in q: - if not node.left and not node.right: #a leaf - return d - - q = [neigbor for n in q for neigbor in [n.left, n.right] if neigbor] - return d -\end{lstlisting} -\textbf{Solution 2: DFS + Divide and Conquer.} In this problem, we can still use a DFS based traversal. However, in this solution, without iterating the whole tree we would not get the minimum depth. So, it might take bit longer time. And, this takes $O(h)$ stack space. -\begin{lstlisting}[language=Python] -def minDepth(self, root): - if not root: - return 0 - if not root.left and not root.right: # only leaves will have 1 - return 1 - ans = sys.maxsize - if root.left: - ans = min(ans, self.minDepth(root.left)) - if root.right: - ans = min(ans, self.minDepth(root.right)) - return ans+1 -\end{lstlisting} -\item \textbf{110. Balanced Binary Tree(L110, Easy).} Given a binary tree, determine if it is height-balanced. For this problem, a height-balanced binary tree is defined as: \textit{a binary tree in which the \textbf{height} of the two subtrees of every node never differ by more than 1.} (LeetCode used depth however, it should be the height) -\begin{lstlisting}[numbers=none] -Example 1: - -Given the following tree [3,9,20,null,null,15,7]: - - 3 - / \ - 9 20 - / \ - 15 7 - -Return true. - -Example 2: - -Given the following tree [1,2,2,3,3,null,null,4,4]: - - 1 - / \ - 2 2 - / \ - 3 3 - / \ - 4 4 - -Return false. -\end{lstlisting} -\textbf{Solution 1: Bottom-up DFS+Divide and conquer with height as return}. First, because the height of a tree is defined as the number of edges on the \textit{longest path} from node to a leaf. And a leaf will have a height of 0. Thus, for the DFS traversal, we need to return 0 for the leaf node, and for an empty node, we use -1 (for leaf node, we have max(-1, -1) + 1 = 0). In this process, we just need to check if the left subtree or the right subtree is already unbalanced which we use -2 to denote, or the difference of the height of the two subtrees is more than 1. -\begin{lstlisting}[language=Python] -def isBalanced(self, root): - """ - :type root: TreeNode - :rtype: bool - """ - def dfsHeight(root): - if not root: - return -1 - lh = dfsHeight(root.left) - rh = dfsHeight(root.right) - if lh == -2 or rh == -2 or abs(lh-rh) > 1: - return -2 - return max(lh, rh)+1 - return dfsHeight(root) != -2 -\end{lstlisting} -\item \textbf{543. Diameter of Binary Tree (Easy).} Given a binary tree, you need to compute the length of the diameter of the tree. The diameter of a binary tree is the length of the \textbf{longest} path between any two nodes in a tree. Note: The length of path between two nodes is represented by the number of edges between them. This path may or may not pass through the root. -\begin{lstlisting}[numbers=none] - Example: -Given a binary tree - - 1 - / \ - 2 3 - / \ - 4 5 - -Return 3, which is the length of the path [4,2,1,3] or [5,2,1,3]. -\end{lstlisting} -\textbf{Solution: Height of the tree with global variable to track the diameter.} For node 2, the hegiht should be 1, and the length of path from 4 to 5 is 2, which is sum of the height of 4, 5 and two edges. Thus, we use rootToLeaf to track the height of the subtree. Meanwhile, for each node, we use a global variable to track the maximum path that pass through this node, which we can get from the height of the left subtree and right subtree. -\begin{lstlisting}[language=Python] -def diameterOfBinaryTree(self, root): - """ - :type root: TreeNode - :rtype: int - """ - # this is the longest path from any to any - - def rootToAny(root, ans): - if not root: - return -1 - left = rootToAny(root.left, ans) - right = rootToAny(root.right, ans) - ans[0] = max(ans[0], left+right+2) - return max(left, right) + 1 - ans = [0] - rootToAny(root, ans) - return ans[0] -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%Paths%%%%%%%%%%%%%%%%%%%% -\subsection{Paths} -In this section, we mainly solve path related problems. As we mentioned in Chapter~\ref{chapter_tree}, there are three types of path depending on the starting and ending node type of the path. We might be asked to get minimum/maximum/each path sum/ path length from these three cases: 1) \textbf{root}-to-\textbf{leaf}, 2) \textbf{Root}-to-\textbf{Any} node, 3) \textbf{Any}-node to-\textbf{Any} node. - -Also, maximum or minimum questions is more difficult than the exact path sum, because sometimes when there are negative values in the tree, it makes the situation harder. - -We normally have two ways to solve these problems. One is using DFS traverse and use global variable and current path variable in the parameters of the recursive function to track the path and collect the results. - -The second way is DFS and Divide and Conquer, we treat each node as a root tree, we return its result, and for a node, after we get result of left and right subtree, we merge the result. -%%%%%%%%%%%%%%%%%%%%Root to Leaf Path%%%%%%%%%%%%%%%%%%%% -\subsubsection{Root to Leaf Path} -\begin{enumerate} - \item 112. Path Sum (Easy) - \item 113. Path Sum II (easy) - \item 129. Sum Root to Leaf Numbers (Medium) - \item 257. Binary Tree Paths (Easy, exer) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{112. Path Sum (Easy).} Given a binary tree and a sum, determine if the tree has a root-to-leaf path such that adding up all the values along the path equals the given sum. Note: A leaf is a node with no children. -\begin{lstlisting}[numbers=none] -Example: -Given the below binary tree and sum = 22, - - 5 - / \ - 4 8 - / / \ - 11 13 4 - / \ \ -7 2 1 - -return true, as there exist a root-to-leaf path 5->4->11->2 which sum is 22. -\end{lstlisting} -\textbf{Solution: Tree Traversal, Leaf Node as Base Case}. Here we are asked the root-to-leaf path sum, we just need to traverse the tree and use the remaining sum after minusing the value of current node to visit its subtree. At the leaf node, if the remaining sum is equal to the node's value, we return True, otherwise False is returned. Time complexity is $O(n)$. -% is used case The best way to testify the code is to walk through the code with a simple example. For the following example, 1 has no left tree, so the if is None, it should return False for that empty left branch. For any leaf, that is the only place we can possibly return True if the root.val equals to the left sum to need to get. -% \begin{lstlisting} -% 1 -% \ -% 2 -% For sum = 1: using the following code will return True -% \end{lstlisting} -\begin{lstlisting}[language=Python] -def hasPathSum(self, root, sum): - """ - :type root: TreeNode - :type sum: int - :rtype: bool - """ - if root is None: # this is for empty tree - return False - if root.left is None and root.right is None: # a leaf as base case - return True if sum == root.val else False - - left = self.hasPathSum(root.left, sum-root.val) - if left: - return True - right = self.hasPathSum(root.right, sum-root.val) - if right: - return True - return False -\end{lstlisting} -\item \textbf{129. Sum Root to Leaf Numbers (Medium).} Given a binary tree containing digits from 0-9 only, each root-to-leaf path could represent a number. An example is the root-to-leaf path 1->2->3 which represents the number 123. Find the total sum of all root-to-leaf numbers. Note: A leaf is a node with no children. -\begin{lstlisting}[numbers=none] -Example: - -Input: [1,2,3,4,5] - 1 - / \ - 2 3 - / \ - 4 5 -Output: 262 -Explanation: -The root-to-leaf path 1->2->4 represents the number 124. -The root-to-leaf path 1->2->5 represents the number 125. -The root-to-leaf path 1->3 represents the number 13. -Therefore, sum = 124 + 125 + 13 = 262. -\end{lstlisting} -\textbf{Solution 1: Divide and Conquer.} In divide and conquer solution, we treat each child as a root, for node 4 and 5, they return 4 and 5. For node 2, it should get 24+25, in order to construct this value, the recursive function should return the value of its tree and the path length (number of nodes) of current node to all of its leaf nodes. Therefore for node 2, it has two paths: one with 4, and path length is 1, we get $2*10^(len)$+left, and the same for the right side. The return -\begin{lstlisting}[language=Python] -def sumNumbers(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if not root: - return 0 - ans, _ = self.sumHelper(root) - return ans -def sumHelper(self, root): - if not root: - return (0, []) - if root.left is None and root.right is None: - return (root.val, [1]) # val and depth - left, ld = self.sumHelper(root.left) - right, rd = self.sumHelper(root.right) - # process: sum over the results till this subtree - ans = left+right - new_d = [] - for d in ld+rd: - new_d.append(d+1) - ans += root.val*10**(d) - return (ans, new_d) -\end{lstlisting} -\textbf{Solution 2: DFS and Parameter Tracker.} We can also construct the value from top-down, we simply record the path in the tree traversal, and at the end, we simply convert the result to the final answer. -\begin{lstlisting}[language=Python] - def sumNumbers(self, root): - """ - :type root: TreeNode - :rtype: int - """ - my_sum = [] - - self.dfs(root,"",my_sum) - - res = 0 - - for ele in my_sum: - res += int(ele) # convert a list to an int? - - return res - - - def dfs(self,node,routine,my_sum): - if not node: - return - - routine = routine + str(node.val) - if not node.left and not node.right: - my_sum.append(routine) - - self.dfs(node.left,routine,my_sum) - self.dfs(node.right,routine,my_sum -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%%%%Root to Any%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Root to Any Node Path} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Any to Any Path%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsubsection{Any to Any Node Path} -In this subsection, we need a concept called Dual Recursive Function. -\begin{enumerate} - \item 437. Path Sum III (medium) - \item 124. Binary Tree Maximum Path Sum (hard) - \item 543. Diameter of Binary Tree (Easy, put in exercise) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{437. Path Sum III} You are given a binary tree in which each node contains an integer value. Find the number of paths that sum to a given value. The path does not need to start or end at the root or a leaf, but it must go downwards (traveling only from parent nodes to child nodes). The tree has no more than 1,000 nodes and the values are in the range -1,000,000 to 1,000,000. -\begin{lstlisting}[numbers=none] -Example: - -root = [10,5,-3,3,2,null,11,3,-2,null,1], sum = 8 - - 10 - / \ - 5 -3 - / \ \ - 3 2 11 - / \ \ -3 -2 1 - -Return 3. The paths that sum to 8 are: - -1. 5 -> 3 -2. 5 -> 2 -> 1 -3. -3 -> 11 -\end{lstlisting} -\textbf{Solution 1: Dual Recurrence with Divide and Conquer.} In this problem, it is from any to any node, it is equivalent to finding the root->any with sum for all the nodes in the binary tree. We first write a function for root to any. The complexity is $O(n)$. -\begin{lstlisting}[language = Python] -def rootToAny(self, root, sum): - if root is None: - return 0 - # collect result at any node - sum -= root.val - count = 0 - if sum == 0: - count += 1 - return count + self.rootToAny(root.left, sum) + self.rootToAny(root.right, sum) -\end{lstlisting} -However, to get the sum of any to any path (downwards), for each node, we treat it as root node, and call rootToAny, to get satisfactary total paths starts from current node, and we divide the remaining tasks (starting from any other nodes to its left and right subtree). Thus the time complexity if $O(n^2)$. $n$ subproblems and each takes $O(n)$ time. -\begin{lstlisting}[language = Python] - '''first recursion: we traverse the tree and use any node as root, and call rootToAny to get its paths''' -def pathSum(self, root, sum): - if not root: - return 0 - - return self.rootToAny(root, sum) + self.pathSum(root.left, sum) + self.pathSum(root.right, sum) -\end{lstlisting} -\textbf{Solution 2: Optimization with Prefix Sum}. The above solution has large amount of recomputation. This is similar in being in an array: we need to set two pointers, one for subarray start and another for the end. We can use prefix sum to decrease the time complexity to $O(n)$. The sum from n1 to n2 is P[n2]-P[n1] = sum, thus, we need to check P[n1], which equals to P[n2]-sum at each node. To deal with case: [0,0], sum = 0, we need to add 0:1 into the hashmap. Another difference is: in the tree we are using DFS traversal, for a given node, when we finish visit its left subtree and right subtree, and return to its parent level, we need to reset the hashmap. So, this is DFS with backtracking too. -\begin{lstlisting}[language=Python] -def anyToAnyPreSum(self, root, sum, curr, ans, preSum): - if root is None: - return - # process - curr += root.val - ans[0] += preSum[curr-sum] - preSum[curr] += 1 - self.anyToAnyPreSum(root.left, sum, curr, ans, preSum) - self.anyToAnyPreSum(root.right, sum, curr, ans, preSum) - preSum[curr] -= 1 #backtrack to current state - return - -def pathSum(self, root, sum): - if not root: - return 0 - ans = [0] - preSum = collections.defaultdict(int) - preSum[0] = 1 - self.anyToAnyPreSum(root, sum, 0, ans, preSum) - return ans[0] -\end{lstlisting} -\item \textbf{124. Binary Tree Maximum Path Sum (hard).} Given a non-empty binary tree, find the maximum path sum. For this problem, a path is defined as any sequence of nodes from some starting node to any node in the tree along the parent-child connections. The path must contain at least one node and \textbf{does not need to go through the root}. -\begin{lstlisting}[numbers=none] -Example 1: -Input: [1,2,3] - - 1 - / \ - 2 3 - -Output: 6 - -Example 2: - -Input: [-10,9,20,null,null,15,7] - - -10 - / \ - 9 20 - / \ - 15 7 - -Output: 42 -\end{lstlisting} - -\textbf{Solution 1: Dual Recurrence}: Before we head over to the optimized solution, first to understand the question. The question can be reparahased as: for each node, find the largest path sum that goes through this node (the path must contain at least one node thus the current node is the one it must include) which is being treated as a root, that is the largest left path sum and the largest right path sum, max(ans[0], max(left, 0)+max(right,0) + root.val). At first, we gain the max path sum from the root to any node, which we implement in the function maxRootToAny. And at the main function, we call maxRootToAny for left and right subtree, then merge the result, then we traverse to the left branch and right branch to do those things too. This is a straightforward dual recurrence. With time complexity $O(n^2)$. -\begin{lstlisting}[language=Python] -def maxRootToAny(self, root): - if root is None: - return 0 - left = self.maxRootToAny(root.left) - right = self.maxRootToAny(root.right) - # conquer: the current node - return root.val+max(0, max(left, right)) #if the left and right are both negative, we get rid of it -def maxPathSum(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if root is None: - return 0 - def helper(root, ans): - if root is None: - return - left = self.maxRootToAny(root.left) - right = self.maxRootToAny(root.right) - ans[0] = max(ans[0], max(left, 0)+max(right,0)+root.val) - helper(root.left, ans) - helper(root.right, ans) - return - ans = [-sys.maxsize] - helper(root, ans) - return ans[0] -\end{lstlisting} -\textbf{Solution 2: Merge the Dual Recurrence}. If we observe these two recurrence function, we can see we use helper(root), we call maxRootToAny with left and right subtree, which is the same as maxRootToAny(root). Then in helper, we use helper(root.left) to call maxRootToAny(root.left.left) and maxRootToAny(root.left.right), which is exactly the same as maxRootToAny(root.left). Thus, the above solution has one power more of complexity. It can be simplied as the following code: -\begin{lstlisting}[language=Python] -def maxRootToAny(self, root, ans): - if root is None: - return 0 - left = self.maxRootToAny(root.left, ans) - right = self.maxRootToAny(root.right, ans) - ans[0] = max(ans[0], max(left, 0) + max(right,0) + root.val) #track the any->root->any maximum - # conquer: the current node - return root.val + max(0, max(left, right)) #track root->any maximum -def maxPathSum(self, root): - """ - :type root: TreeNode - :rtype: int - """ - if root is None: - return 0 - ans = [-sys.maxsize] - self.maxRootToAny(root, ans) - return ans[0] -\end{lstlisting} -The most important two lines of the code is: -\begin{lstlisting}[language=Python] -ans[0] = max(ans[0], max(left, 0) + max(right,0) + root.val) #track the any->root->any maximum -return root.val + max(0, max(left, right)) #track root->any maximum -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%Merge%%%%%%%%%%%%%%%%%%%% -\subsection{Reconstruct the Tree} -In this section, we can be asked to rearrange the node or the value of the tree either in-place or out-of-place. Unless be required to do it in-place we can always use the divide and conquer with return value and merge. -\subsubsection{In-place Reconstruction} -\begin{enumerate} - \item 114. Flatten Binary Tree to Linked List -\end{enumerate} -\begin{examples}[resume] -\item \textbf{114. Flatten Binary Tree to Linked List (medium).} Given a binary tree, flatten it to a linked list in-place. -\begin{lstlisting}[numbers=none] -For example, given the following tree: - - 1 - / \ - 2 5 - / \ \ -3 4 6 - -The flattened tree should look like: - -1 - \ - 2 - \ - 3 - \ - 4 - \ - 5 - \ - 6 -\end{lstlisting} -\textbf{Solution: Inorder Traversal.} For this, we first notice the flatten rule is to connect node, node.left and node.right, where node.left and node.right is already flatten by the recursive call of the function. For node 2, it will be 2->3->4. First, we need to connect node.right to node.left by setting the last node of the left's right child to be node.right. -\begin{lstlisting}[language=Python] -def flatten(self, root): - """ - :type root: TreeNode - :rtype: void Do not return anything, modify root in-place instead. - """ - - if not root: - return - # preorder - self.flatten(root.left) # modify root.left - self.flatten(root.right) - - # traverse the left branch to connect with the right branch - if root.left is not None: - node = root.left - while node.right: - node = node.right - node.right = root.right - - else: - root.left = root.right - # connet node, left right - root.right = root.left - root.left = None -\end{lstlisting} - - -\end{examples} -\subsubsection{Out-of-place Reconstruction} -\begin{enumerate} - \item 617. Merge Two Binary Trees - \item 226. Invert Binary Tree (Easy) - \item 654. Maximum Binary Tree(Medium) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{617. Merge Two Binary Trees.} Given two binary trees and imagine that when you put one of them to cover the other, some nodes of the two trees are overlapped while the others are not. - -You need to merge them into a new binary tree. The merge rule is that if two nodes overlap, then sum node values up as the new value of the merged node. Otherwise, the NOT null node will be used as the node of new tree. -\begin{lstlisting}[numbers=none] -Example 1: -Input: - Tree 1 Tree 2 - 1 2 - / \ / \ - 3 2 1 3 - / \ \ - 5 4 7 -Output: -Merged tree: - 3 - / \ - 4 5 - / \ \ - 5 4 7 - -Note: The merging process must start from the root nodes of both trees. -\end{lstlisting} -\textbf{Solution 1: DFS+Divide and Conquer}. In this problem, we just need to traverse these two trees at the same time with the same rule. When both is None which means we just reached a leaf node, we return None for its left and right child. If only one is None, then return the other according to the rule. Otherwise merge their values and assign the left subtree and right subtree to another recursive call and merge all the results to current new node. -\begin{lstlisting}[language=Python] -def mergeTrees(self, t1, t2): - if t1 is None and t2 is None: # both none - return None - if t1 is None and t2: - return t2 - if t1 and t2 is None: - return t1 - node = TreeNode(t1.val+t2.val) - # divide and conquer, left result and the right result - node.left = self.mergeTrees(t1.left, t2.left) - node.right = self.mergeTrees(t1.right, t2.right) - return node -\end{lstlisting} - -\item \textbf{226. Invert Binary Tree.} Invert a binary tree. -\begin{lstlisting}[numbers=none] -Example: - -Input: - - 4 - / \ - 2 7 - / \ / \ -1 3 6 9 - -Output: - - 4 - / \ - 7 2 - / \ / \ -9 6 3 1 -\end{lstlisting} -\textbf{Solution 1: Divide and Conquer}. -\begin{lstlisting}[language=Python] -def invertTree(self, root): - """ - :type root: TreeNode - :rtype: TreeNode - """ - if root is None: - return None - - # divide: the problem into reversing left subtree and right subtree - left = self.invertTree(root.left) - right = self.invertTree(root.right) - # conquer: current node - root.left = right - root.right = left - return root -\end{lstlisting} - -\item \textbf{654. Maximum Binary Tree.} Given an integer array with no duplicates. A maximum tree building on this array is defined as follow: - \begin{enumerate} - \item The root is the maximum number in the array. - \item The left subtree is the maximum tree constructed from left part subarray divided by the maximum number. - \item The right subtree is the maximum tree constructed from right part subarray divided by the maximum number. - \end{enumerate} - -Construct the maximum tree by the given array and output the root node of this tree. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [3,2,1,6,0,5] -Output: return the tree root node representing the following tree: - - 6 - / \ - 3 5 - \ / - 2 0 - \ - 1 - -Note: - - The size of the given array will be in the range [1,1000]. - -\end{lstlisting} -\textbf{Solution: Divide and Conquer}. The description of the maximum binary tree the root, left subtree, right subtree denotes the root node is the maximum value, and the left child is the max value in the left side of the max value in the array. This fits the divide and conquer. This is so similar as the concept of \textbf{quick sort}. Which divide an array into two halves. The time complexity is $O(nlgn)$. In the worst case, the depth of the recursive tree can grow up to n, which happens in the case of a sorted nums array, giving a complexity of $O(n^2)$. -\begin{lstlisting}[language=Python] - def constructMaximumBinaryTree(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - if not nums: - return None - (m,i) = max((v,i) for i,v in enumerate(nums)) - root = TreeNode(m) - root.left = self.constructMaximumBinaryTree(nums[:i]) - root.right = self.constructMaximumBinaryTree(nums[i+1:]) - return root -\end{lstlisting} -\textbf{Monotone Queue}. The key idea is: -\begin{enumerate} - \item We scan numbers from left to right, build the tree one node by one step; - \item We use a queue to keep some (not all) tree nodes and ensure a decreasing order; - \item For each number, we keep popping the queue until empty or a bigger number appears; 1) The kicked out smaller number is current node's left child (temporarily, this relationship may change in the future). 2) The bigger number (if exist, it will be still in stack) is current number's parent, this node is the bigger number's right child. Then we push current number into the stack. -\end{enumerate} -\begin{lstlisting}[language=Python] -def constructMaximumBinaryTree(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - if not nums: - return None - deQ = collections.deque() - for i, v in enumerate(nums): - node = TreeNode(v) - while deQ and deQ[-1].val < v: - node.left = deQ[-1] - deQ.pop() - if deQ: - deQ[-1].right = node - deQ.append(node) - return deQ[0] -\end{lstlisting} -\end{examples} -%%%%%%%%%%%%%%%%%%%%%%%Count%%%%%%%%%%%%%%%%%%%%%% -\subsection{Ad Hoc Problems} -There are some other problems that are flexible and are highly customized requirements. We usually need to be more flexbile with the solutions too. Sometimes, we need to write multiple functions in order to solve one problem. -\begin{enumerate} - \item 250. Count Univalue Subtrees - \item 863. All Nodes Distance K in Binary Tree -\end{enumerate} -\begin{examples}[resume] -\item \textbf{250. Count Univalue Subtrees (medium). } Given a binary tree, count the number of uni-value subtrees. A Uni-value subtree means all nodes of the subtree have the same value. -\begin{lstlisting}[numbers=none] -Example : - -Input: root = [5,1,5,5,5,null,5] - - 5 - / \ - 1 5 - / \ \ - 5 5 5 - -Output: 4 -\end{lstlisting} -\textbf{Solution 1: DFS and Divide and Conquer}. First, all the leaf nodes are univalue subtree with count 1 and also it is the base case with (True, leaf.val, 1) as return. If we are at node 1, we check the left subtree and right subtree if they are univalue, and what is their value, and what is there count. Or for cases that a node only has one subtree. If the val of the subtree and the current node equals, we increase the count by one, and return (True, node.val, l\_count+r\_count+1). All the other cases, we only have (False, None, l\_count+r\_count). -\begin{lstlisting}[language = Python] -def countUnivalSubtrees(self, root): - if not root: - return 0 - - def univalSubtree(root): - if root.left is None and root.right is None: - return (True, root.val, 1) - l_uni, l_val, l_count = True, None, 0 - if root.left: - l_uni, l_val, l_count = univalSubtree(root.left) - r_uni, r_val, r_count = True, None, 0 - if root.right: - r_uni, r_val, r_count = univalSubtree(root.right) - if l_uni and r_uni: - if l_val is None or r_val is None:# a node with only one subtree - if l_val == root.val or r_val == root.val: - return (True, root.val, l_count+r_count+1) - else: - return (False, None, l_count+r_count) - if l_val == r_val == root.val: # a node with both subtrees - return (True, root.val, l_count+r_count+1) - else: - return (False, None, l_count+r_count) - return (False, None, l_count+r_count) - - _, _, count = univalSubtree(root) - return count -\end{lstlisting} -Or else we can use a global variable to record the subtree instead of returning the result from the tree. -\begin{lstlisting}[language=Python] -def countUnivalSubtrees(self, root): - def helper(root): - if not root:return True - if not root.left and not root.right: - self.res += 1 - return True - left_res = helper(root.left) - right_res = helper(root.right) - if root.left and root.right: - if root.val == root.left.val and root.val == root.right.val and left_res and right_res: - self.res += 1 - return True - return False - if root.left and not root.right: - if root.val == root.left.val and left_res: - self.res += 1 - return True - return False - if root.right and not root.left: - if root.val == root.right.val and right_res: - self.res += 1 - return True - return False - self.res = 0 - helper(root) - return self.res -\end{lstlisting} -\item \textbf{863. All Nodes Distance K in Binary Tree (medium).}We are given a binary tree (with root node root), a target node, and an integer value K. (Note that the inputs "root" and "target" are actually TreeNodes.) Return a list of the values of all nodes that have a distance K from the target node. The answer can be returned in any order. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: root = [3,5,1,6,2,0,8,null,null,7,4], target = 5, K = 2 - 3 - / \ - 5 1 - / \ | \ - 6 2 0 8 - / \ - 7 4 -Output: [7,4,1] - -Explanation: -The nodes that are a distance 2 from the target node (with value 5) -have values 7, 4, and 1. -\end{lstlisting} -\begin{figure} - \centering - \includegraphics[width=0.7\columnwidth]{fig/example_863.png} - \caption{Two Cases of K Distance Nodes marked in blue and red arrows. } - \label{fig:distance_k} -\end{figure} -\textbf{Solution 1: DFS traversal with depth to target as return.} There are different cases with path that has target as denoted in Fig~\ref{fig:distance_k}: 1. target is the starting point, we traverse the target downwards to get nodes that is K distance away from target. 2. target is the ending point, we need to traverse back to its parents, and first check the distance of the parent node with the target to see if it is K, and second we use another function to find K-distance away nodes on the other branch of the parent node. Because we do not have pointer back to its parents directly, we use recursive tree traversal so that we can return to the parent node with its distance to the target. Therefore, we need two helper functions. The first function \textit{getDistanceK} takes a starting node, and a distance K, to return a list of K distance downwards from starting point. The second function \textit{getDepth} is designed to do the above task, when we find the target in the tree traversal, we return 0, for empty node return -1. -\begin{lstlisting}[language=Python] -def distanceK(self, root, target, K): - if not root: - return [] - def getDistanceK(target, K): - ans = [] - # from target to K distance - q = [target] - d = 0 - while q: - if d == K: - ans += [n.val for n in q] - break - nq = [] - for n in q: - if n.left: - nq.append(n.left) - if n.right: - nq.append(n.right) - q = nq - d += 1 - return ans - - # get depth of target - def getDepth(root, target, K, ans): - if not root: - return -1 - if root == target: - return 0 - # conquer - left = getDepth(root.left, target, K, ans) - right = getDepth(root.right, target, K, ans) - if left == -1 and right == -1: - return -1 - else: - dis = 0 - if left != -1: - dis = left+1 - if root.right: - ans += getDistanceK(root.right, K-dis-1) - else: - dis = right + 1 - if root.left: - ans += getDistanceK(root.left, K-dis-1) - if dis == K: - ans.append(root.val) - return dis - - ans = getDistanceK(target, K) - getDepth(root, target, K, ans) - return ans -\end{lstlisting} -\textbf{Solution 2: DFS to annotate parent node + BFS to K distance nodes.} In solution 1, we have two cases because we can't traverse to its parents node directly. If we can add the parent node to each node, and the whole tree would become a acyclic direct graph, thus, we can use BFS to find all the nodes that are K distance away. This still has the same complexity. -\begin{lstlisting}[language=Python] -def distanceK(self, root, target, K): - if not root: - return [] - def dfs(node, par = None): - if node is None: - return - node.par = par - dfs(node.left, node) - dfs(node.right, node) - dfs(root) - seen = set([target]) - q = [target] - d = 0 - while q: - if d == K: - return [node.val for node in q] - nq = [] - for n in q: - for nei in [n.left, n.right, n.par]: - if nei and nei not in seen: - seen.add(nei) - nq.append(nei) - q = nq - d += 1 - return [] -\end{lstlisting} -\end{examples} - - - -% \begin{enumerate} - -% \item Binary Tree Paths -% \begin{inparaenum} -% \item \textbf{Minimum Subtree} -% Given a binary tree, find the subtree with minimum sum. Return the root of the subtree. -% LintCode will print the subtree which root is your return node. - -% Solution: we need to get the value of the whole tree, = helper(left)+helper(right)+current val. It’s guaranteed that there is only one subtree with minimum sum and the given binary tree is not an empty tree. - -% \item \textbf{The maximum path sum in BT} -% \begin{inparaenum} -% \item the maximum path sum(root->leaf) - -% Example: For the following BT: -% \begin{lstlisting} -% 1 -% / \ -% 2 3 -% \end{lstlisting} -% Return 4. (The maximum path is 1->3). However, if we have negative value, this is not going to work. -% \begin{lstlisting} [language = Python] -% public int maxPathSum2(TreeNode root) { -% if (root == null) { -% return 0 #th -% } -% int left = maxPathSum2(root.left) -% int right = maxPathSum2(root.right) - -% return root.val + Math.max(left, right) #at least root+one of the subtree -% } -% \end{lstlisting} - -% \item the maximum path sum(root->any) - -% Binary Tree Maximum Path Sum II, http://www.lintcode.com/zh-cn/problem/binary-tree-maximum-path-sum-ii/ - -% The path can be from root to any node, but it needs include at least one nod, which is the root. -% Example, For the following BT: -% \begin{lstlisting} -% 1 -% / \ -% 2 3 -% \end{lstlisting} -% Return 4. (Maximum Path is 1->3) - -% Solution: this one is slightly different, for each node, we can return the sum of current node +left subtree, or current node+ right subtree, or we just return current node, which means the path ends here. -% For the divide and conquer: 1) Recursive end condition: when the node is null. 2) Divide: divide the tree into the result of the left subtree and right subtree. 3)Conquer: merge the result from the divide. -% \begin{lstlisting} [language = Python] -% public int maxPathSum2(TreeNode root) { -% if (root == null) { -% return 0; -% } -% //divide -% int left = maxPathSum2(root.left); -% int right = maxPathSum2(root.right); -% //conquer -% return root.val + Math.max(0, Math.max(left, right)); #if the max is negative, we get rid of them, use 0 instead. -% } -% \end{lstlisting} -% \item the maximum path sum(any->any) - -% 2.5 Binary Tree Maximum Path Sum - - -% \end{inparaenum} -% \item Reverse from Traverse result to build tree - - -% \end{inparaenum} -% \end{enumerate} -% \section{Time complexity of Binary Tree} -% If we spent O(n) to convert $T(n)$ to $2T(n/2)$. We have the following deduction: -% \begin{equation} \label{bt_time} -% \begin{split} -% T(n) & = 2T(n/2) + O(n)\\ -% & = 2 * 2T(n/4) + O(n) + O(n)\\ -% & = O(nlogn) -% \end{split} -% \end{equation} -% which is the same as merge sort. If the divide cost is only $O(1)$. -% \begin{equation}\label{bt_time2} -% \begin{split} -% T(n) &= 2T(n/2) + O(1)\\ -% & = 2 * 2T(n/4) + O(1) + O(1)\\ -% &= n + (1 + 2 + 4 +...+ n)\\ -% &\approx n + 2n\\ -% &\approx O(n) -% \end{split} -% \end{equation} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%BST Related problems and algorithms -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Binary Searching Tree (BST)} -\subsection{BST Rules} -\begin{enumerate} - \item 98. Validate Binary Search Tree (Medium) - \item 99. Recover Binary Search Tree(hard) - \item 426. Convert Binary Search Tree to Sorted Doubly Linked List (medium) -\end{enumerate} -\begin{examples}[resume] -\item \textbf{98. Validate Binary Search Tree (medium)} Given a binary tree, determine if it is a valid binary search tree (BST). Assume a BST is defined as follows: -\begin{itemize} - \item The left subtree of a node contains only nodes with keys less than the node's key. - \item The right subtree of a node contains only nodes with keys greater than the node's key. - \item Both the left and right subtrees must also be binary search trees. - \end{itemize} -\begin{lstlisting} -Example 1: - -Input: - 2 - / \ - 1 3 -Output: true - -Example 2: - - 5 - / \ - 1 4 - / \ - 3 6 -Output: false -Explanation: The input is: [5,1,4,null,null,3,6]. The root node's value - is 5 but its right child's value is 4. -\end{lstlisting} -\textbf{Solution1: Limit the value range for subtrees: top-down}. We start from the root, which should be in range $[-\inf, +\inf]$. And the left subtree should be limited into $[-\inf, root.val]$, and right in $[root.val, +\inf]$. The Code is simple and clean: -\begin{lstlisting}[language=Python] -def isValidBST(self, root, minv=float("-inf"), maxv=float("inf")): - """ - :type root: TreeNode - :rtype: bool - """ - if root is None: - return True - - if (minv < root.val < maxv): - return self.isValidBST(root.left, minv, root.val) and self.isValidBST(root.right, root.val, maxv) - return False -\end{lstlisting} -\textbf{Solution 2: Limit the value range for parent node: bottom-up}. We traverse the tree, and we return values from the None node, then we have three cases: -\begin{lstlisting} -1) both subtrees are None # a leaf - return (True, root.val, root.val) -2) both subtrees are not None: # a subtree with two branches - check if l2 < root.val < r1: - merge the range to: - return (True, l1, r2) -3) one subtree is None: # a subtree with one branches: - only check one of l2, r1 and merge accordingly -\end{lstlisting} -\textbf{Solution 2: Using inorder}. If we use inorder, then the tree resulting list we obtained should be strictly increasing. -\begin{lstlisting}[language=Python] -def isValidBST(self, root): - if root is None: - return True - - def inOrder(root): - if not root: - return [] - return inOrder(root.left) + [root.val] + inOrder(root.right) - ans = inOrder(root) - pre = float("-inf") - for v in ans: - if v <= pre: - return False - pre = v - return True -\end{lstlisting} -\item \textbf{99. Recover Binary Search Tree (hard).} Two elements of a binary search tree (BST) are swapped by mistake. Recover the tree without changing its structure. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [1,3,null,null,2] - - 1 - / - 3 - \ - 2 - -Output: [3,1,null,null,2] - - 3 - / - 1 - \ - 2 - -Example 2: - -Input: [3,1,4,null,null,2] - - 3 - / \ -1 4 - / - 2 - -Output: [2,1,4,null,null,3] - - 2 - / \ -1 4 - / - 3 -\end{lstlisting} -Follow up: A solution using O(n) space is pretty straight forward. Could you devise a constant space solution? - -\textbf{Solution 1: Recursive InOrder Traversal and Sorting, O(n) space.} The same as validating a BST, the inorder traversal of a valid BST must have a sorted order. Therefore, we obtain the inorder traversed list, and sort them by the node value, and compared the sorted list and the unsorted list to find the swapped nodes. -\begin{lstlisting}[language=Python] -def recoverTree(self, root): - """ - :type root: TreeNode - :rtype: void Do not return anything, modify root in-place instead. - """ - def inorder(root): - if not root: - return [] - return inorder(root.left) + [root] + inorder(root.right) - - - ans = inorder(root) - sans = sorted(ans, key = lambda x: x.val) - # swap - for x, y in zip(ans,sans): - if x != y: - x.val, y.val = y.val, x.val - break -\end{lstlisting} -\textbf{Solution 2: Iterative Traversal: O(1) space.} The inorder traversal for each example are: -\begin{lstlisting}[numbers=none] -Example 1: [3, 2, 1], need to switch 3, 1 -Example 2: [1, 3, 2, 4], need to switch 3, 2 -\end{lstlisting} -If we observe the inorder list: if we check the previous and current pair, if it is dropping as (3,2), (2,1), then we call this dropping pairs. In example 2, there is only one pair (3,2). This is the two possoble cases when we swap a pair of elements in a sorted list. If we use the inorder iterative traversal, and record the pre, cur dropping pairs, then it is straightforward to do the swapping of the dropping pair or just one pair. -\begin{lstlisting}[language=Python] -def recoverTree(self, root): - cur, pre, stack = root, TreeNode(float("-inf")), [] - drops = [] - # inorder iterative: left root, right - while stack or cur: - while cur: - stack.append(cur) - cur = cur.left - cur = stack.pop() - if cur.val < pre.val: - drops.append((pre, cur)) - pre, cur = cur, cur.right - - drops[0][0].val, drops[-1][1].val = drops[-1][1].val, drops[0][0].val -\end{lstlisting} -\item \textbf{426. Convert Binary Search Tree to Sorted Doubly Linked List (medium)} Convert a BST to a sorted circular doubly-linked list in-place. Think of the left and right pointers as synonymous to the previous and next pointers in a doubly-linked list. One example is shown in Fig.~\ref{fig:bst_dll}. -\begin{figure}[h!] - \centering - \includegraphics[width=0.45\columnwidth]{fig/bstdlloriginalbst.png} - \includegraphics[width=0.45\columnwidth]{fig/bstdllreturndll.png} - \caption{Example of BST to DLL} - \label{fig:bst_dll} -\end{figure} - -\textbf{Analysis} As we observe the example, for each node in the doubly linked list (dll), its predecessor and successor is the same as the same node in BST. As we have learned the concept of predecessor and successor in Chapter~\ref{chapter_tree}, we know how to find the predecessor and successor individually for each node. However, in this scene, it would be more useful with the inorder traversal, wherein we can use divide and conquer to obtain the left sorted list and the right sorted list for each node. More than this, we need to make the dll, we have two choices to do this: 1) Use our learned inorder traversal to generate a list, and then generate the dll from the list of BST nodes. 2) Combine the inorder traversal together with the linking process. - -\textbf{Solution 1: Inorder traversal + Doubly linked List Connect.} This process is straightforward, we need to handle the case where the BST only has one node, or for BST that has at least two nodes. For the second case, we should handle the head and tail node seperately due to its different linking rule: -\begin{lstlisting}[language=Python] -def treeToDoublyList(self, root): - """ - :type root: Node - :rtype: Node - """ - if not root: - return None - - def treeTraversal(root): - if not root: - return [] - left = treeTraversal(root.left) - - right = treeTraversal(root.right) - return left + [root] + right - - sortList = treeTraversal(root) - if len(sortList) == 1: - sortList[0].left = sortList[0] - sortList[0].right = sortList[0] - return sortList[0] - - for idx, node in enumerate(sortList): - if idx == 0: - node.right = sortList[idx+1] - node.left = sortList[-1] - elif idx == len(sortList) - 1: - node.right = sortList[0] - node.left = sortList[idx-1] - else: - node.right = sortList[idx+1] - node.left = sortList[idx-1] - return sortList[0] -\end{lstlisting} - -\textbf{Solution 2: Inorder traversal together with linking process.} We use divide and conquer method and assuming the left and right function call gives us the head of the dll on each side. With left\_head and right\_head, we just need to link these two separate dlls with current node in the process of inorder traversal. The key here is to find the tail left dll, and link them like: left\_tail+current\_node+right\_head, and link left\_head with right\_tail. With dlls, to find the tail from the head, we just need to use head.left. -\begin{lstlisting}[language=Python] -def treeToDoublyList(self, root): - """ - :type root: Node - :rtype: Node - """ - if not root: return None - - left_head = self.treeToDoublyList(root.left) - right_head = self.treeToDoublyList(root.right) - return self.concat(left_head, root, right_head) - - -""" -Concatenate a doubly linked list (prev_head), a node -(curr_node) and a doubly linked list (next_head) into -a new doubly linked list. -""" -def concat(self, left_head, curr_node, right_head): - # for current node, it has only one node, head and tail is the same - new_head, new_tail = curr_node, curr_node - - if left_head: - # find left tail - left_tail = left_head.left - # connect tail with current node - left_tail.right = curr_node - curr_node.left = left_tail - # new_head points to left_head - new_head = left_head - - if right_head: - right_tail = right_head.left - # connect head with current node - curr_node.right = right_head - right_head.left = curr_node - new_tail = right_tail # new_tail points to right_tail - - new_head.left = new_tail - new_tail.right = new_head - return new_head -\end{lstlisting} -\end{examples} - -\subsection{Operations} -In this section, we should problems related to operations we introduced in section~\ref{concept_binary_search_tree}, which include SEARCH, INSERT, GENERATE, DELETE. LeetCode Problems include: -\begin{enumerate} - \item 108. Convert Sorted Array to Binary Search Tree - \item 96. Unique Binary Search Trees -\end{enumerate} - -\begin{examples}[resume] -\item \textbf{108. Convert Sorted Array to Binary Search Tree.} Given an array where elements are sorted in ascending order, convert it to a height balanced BST. For this problem, a height-balanced binary tree is defined as a binary tree in which the depth of the two subtrees of every node never differ by more than 1. -\begin{lstlisting}[numbers=none] -Example: - -Given the sorted array: [-10,-3,0,5,9], - -One possible answer is: [0,-3,9,-10,null,5], which represents the following height balanced BST: -\begin{lstlisting} - 0 - / \ - -3 9 - / / - -10 5 -\end{lstlisting} - -\textbf{Solution: Binary Searching.} use the binary search algorithm, the stop condition is when the l>r. -\begin{lstlisting}[language = Python] -def sortedArrayToBST(self, nums): - """ - :type nums: List[int] - :rtype: TreeNode - """ - def generatebalancedBST(l,r): - if l>r: - return None - m = (l+r)//2 - tree = TreeNode(nums[m]) - tree.left = generatebalancedBST(l,m-1) - tree.right = generatebalancedBST(m+1,r) - return tree - return generatebalancedBST(0,len(nums)-1) -\end{lstlisting} - -109. Convert Sorted List to Binary Search Tree, the difference is here we have a linked list, we can convert the linked list into a list nums - -\item \textbf{96. Unique Binary Search Trees} - -Given n, how many structurally unique BST’s (binary search trees) that store values 1…n? -\begin{lstlisting}[numbers=none] -For example, - - Given n = 3, there are a total of 5 unique BST's. - 1 3 3 2 1 - \ / / / \ \ - 3 2 1 1 3 2 - / / \ \ - 2 1 2 3 -\end{lstlisting} - -Solution: When we read the signal, list all of it, we need to use for loop, to pose each element as root, and the left side is left tree, the right side is used for the right tree. Use DPS: We generated all the BST that use ith node as root -\begin{lstlisting}[language = Python] -def numTrees(self, n): - """ - :type n: int - :rtype: int - """ - def constructAllBST(start,end): - if start>end: - return [None] - - #go through the start to end, and use the ith as root - rslt=[] - leftsubs,rightsubs=[],[] - for i in xrange(start,end+1): - - leftsubs=constructAllBST(start,i-1) - rightsubs=constructAllBST(i+1,end) - for leftnode in leftsubs: - for rightnode in rightsubs: - node = TreeNode(i) - node.left=leftnode - node.right=rightnode - rslt.append(node) - return rslt - -rslt= constructAllBST(1,n) - return len(rslt) - \end{lstlisting} - -If we only need length, a slightly better solution showing as follows. -\begin{lstlisting}[language = Python] -def numTrees(self, n): - """ - :type n: int - :rtype: int - """ - def constructAllBST(start,end): - if start>end: - return 1 - - #go through the start to end, and use the ith as root - count = 0 - leftsubs,rightsubs=[],[] - for i in xrange(start,end+1): - - leftsubs=constructAllBST(start,i-1) - rightsubs=constructAllBST(i+1,end) - count+=leftsubs*rightsubs - return count - -rslt= constructAllBST(1,n) - return rslt - \end{lstlisting} - -However, it still cant pass the test, try the bottom up iterative solution with memorization: $T(start,end)=T(start,i-1)*T(i+1,end) T(j,i)=T(j,i-1)*T(i+1,i)$. How to explain this? -\begin{lstlisting}[language = Python] -def numTrees1(self, n): - res = [0] * (n+1) - res[0] = 1 - for i in xrange(1, n+1): #when i=2, j=[0,1] res[2] = res[0]*res[2-1-0] + res[1]*res[2-1-1] - for j in xrange(i): #i [1,n], j =[0,i), the case if for one node, - res[i] += res[j] * res[i-1-j] - return res[n] -\end{lstlisting} -Using math: -\begin{lstlisting}[language = Python] -# Catalan Number (2n)!/((n+1)!*n!) -def numTrees(self, n): - return math.factorial(2*n)/(math.factorial(n)*math.factorial(n+1)) -\end{lstlisting} - -\end{examples} - -\subsection{Find certain element of the tree} -successor or predecessor:285. Inorder Successor in BST, 235. Lowest Common Ancestor of a Binary Search Tree -\begin{enumerate} - \item 285. Inorder Successor in BST - \item 235. Lowest Common Ancestor of a Binary Search Tree - \item 230. Kth Smallest Element in a BST - \item 270. Closest Binary Search Tree Value - \item 272. Closest Binary Search Tree Value II - \item 426. Convert Binary Search Tree to Sorted Doubly Linked List (find the precessor and successor) -\end{enumerate} -285. Inorder Successor in BST - -First, we can follow the definition, use the inorder traverse to get a list of the whole nodes, and we search for the node p and return its next in the lst. -\begin{lstlisting}[language = Python] -#takes 236 ms -def inorderSuccessor(self, root, p): - """ - :type root: TreeNode - :type p: TreeNode - :rtype: TreeNode - """ - lst = [] - def inorderTravel(node): - if not node: - return None - inorderTravel(node.left) - lst.append(node) - inorderTravel(node.right) - - inorderTravel(root) - - for i in xrange(len(lst)): - if lst[i].val==p.val: - if i+1b: - return LCA(node.left) - if node.valtarget: - if root.val-target= L). You might need to change the root of the tree, so the result should return the new root of the trimmed binary search tree. - -Example 2: -\begin{lstlisting} -Input: - 3 - / \ - 0 4 - \ - 2 - / - 1 - - L = 1 - R = 3 - -Output: - 3 - / - 2 - / - 1 - \end{lstlisting} - -Solution: Based on F1, if the value of current node is smaller than L, suppose at 0, then we delete its left child, node.left = None, then we check its right size, go to node.right, we return node = goto(node.right), if it is within range, then we keep checking left, right, and return current node -\begin{lstlisting}[language = Python] -def trimBST(self, root, L, R): - """ - :type root: TreeNode - :type L: int - :type R: int - :rtype: TreeNode - """ - def trimUtil(node): - if not node: - return None - if node.valR: - node.right=None - node=trimUtil(node.left) - return node - else: - node.left=trimUtil(node.left) - node.right=trimUtil(node.right) - return node - return trimUtil(root) -\end{lstlisting} -A mutant of this is to split the BST into two, one is smaller or equal to the given value, the other is bigger. -\subsection{Split the Tree} -Split the tree - -with a certain value ,776. Split BST - -776. Split BST - -Given a Binary Search Tree (BST) with root node root, and a target value V, split the tree into two subtrees where one subtree has nodes that are all smaller or equal to the target value, while the other subtree has all nodes that are greater than the target value. It's not necessarily the case that the tree contains a node with value V. - -Additionally, most of the structure of the original tree should remain. Formally, for any child C with parent P in the original tree, if they are both in the same subtree after the split, then node C should still have the parent P. - -You should output the root TreeNode of both subtrees after splitting, in any order. - -Example 1: -\begin{lstlisting} -Input: root = [4,2,6,1,3,5,7], V = 2 -Output: [[2,1],[4,3,6,null,null,5,7]] -Explanation: -Note that root, output[0], and output[1] are TreeNode objects, not arrays. -\end{lstlisting} - -The given tree [4,2,6,1,3,5,7] is represented by the following diagram: -\begin{lstlisting} - 4 - / \ - 2 6 - / \ / \ - 1 3 5 7 -\end{lstlisting} - -Solution: The coding is quite similar as the trimming. -\begin{lstlisting}[language = Python] -class Solution(object): - def splitBST(self, root, V): - """ - :type root: TreeNode - :type V: int - :rtype: List[TreeNode] - """ - def splitUtil(node): - if not node: - return (None,None) - if node.val<=V: - sb1,sb2 = splitUtil(node.right) #the left subtree will satisfy the condition, split the right subtree - node.right=sb1 #Now set the right subtree with sb1 that - return (node, sb2) - else: - sb1, sb2=splitUtil(node.left) #the right subtree satisfy the condition, split the left subtree - node.left=sb2 - return (sb1,node) - return list(splitUtil(root)) -\end{lstlisting} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Exercise%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Exercise} -\subsection{Depth} -104. Maximum Depth of Binary Tree (Easy) - -Given a binary tree, find its maximum depth. The maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node. - -Note: A leaf is a node with no children. -\begin{lstlisting} -Example: - -Given binary tree [3,9,20,null,null,15,7], - - 3 - / \ - 9 20 - / \ - 15 7 - -return its depth = 3. -\end{lstlisting} -\textbf{DFS+Divide and conquer}. -\begin{lstlisting}[language=Python] -def maxDepth(self, root): - if not root: - return 0 - if not root.left and not root.right: - return 1 - depth = -sys.maxsize - if root.left: - depth = max(depth, self.maxDepth(root.left)) - if root.right: - depth = max(depth, self.maxDepth(root.right)) - return depth+1 -\end{lstlisting} -559. Maximum Depth of N-ary Tree (Easy) - -Given a n-ary tree, find its maximum depth. The maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node. -\begin{lstlisting}[language=Python] -# Definition for a Node. -class Node(object): - def __init__(self, val, children): - self.val = val - self.children = children - -def maxDepth(self, root): - if not root: - return 0 - children = root.children - if not any(children): # a leaf - return 1 - depth = -sys.maxsize - for c in children: - if c: - depth = max(depth, self.maxDepth(c)) - return depth+1 -\end{lstlisting} -%%%%%%%%%%%%%%%%%%%%%%%%%%%Paths%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Path} -\textbf{113. Path Sum II (medium).} Given a binary tree and a sum, find all root-to-leaf paths where each path's sum equals the given sum. -\textit{Note: A leaf is a node with no children.} -\begin{lstlisting}[numbers=none] -Example: -Given the below binary tree and sum = 22, - - 5 - / \ - 4 8 - / / \ - 11 13 4 - / \ / \ -7 2 5 1 - -Return: - -[ - [5,4,11,2], - [5,8,4,5] -] -\end{lstlisting} -\begin{lstlisting}[language=Python] -def pathSumHelper(self, root, sum, curr, ans): - if root is None: # this is for one brach tree - return - if root.left is None and root.right is None: # a leaf as base case - if sum == root.val: - ans.append(curr+[root.val]) - return - - self.pathSumHelper(root.left, sum-root.val, curr+[root.val], ans) - - self.pathSumHelper(root.right, sum-root.val, curr+[root.val], ans) - -def pathSum(self, root, sum): - """ - :type root: TreeNode - :type sum: int - :rtype: List[List[int]] - """ - ans = [] - self.pathSumhelper(root, sum, [], ans) - return ans -\end{lstlisting} - -257. Binary Tree Paths - -Given a binary tree, return all root-to-leaf paths. - -Note: A leaf is a node with no children. -\begin{lstlisting} -Example: -Input: - - 1 - / \ -2 3 - \ - 5 -Output: ["1->2->5", "1->3"] -Explanation: All root-to-leaf paths are: 1->2->5, 1->3 -\end{lstlisting} -\textbf{Root to Leaf}. Becareful that we only collect result at the leaf, and for the right tree and left tree we need to make sure it is not None: -\begin{lstlisting}[language=Python] -def binaryTreePaths(self, root): - """ - :type root: TreeNode - :rtype: List[str] - """ - def dfs(root, curr, ans): - if root.left is None and root.right is None: # a leaf - ans.append(curr+str(root.val)) - return - if root.left: - dfs(root.left, curr+str(root.val)+'->', ans) - if root.right: - dfs(root.right, curr+str(root.val)+'->', ans) - if root is None: - return [] - ans = [] - dfs(root, '', ans) - return ans -\end{lstlisting} -543. Diameter of Binary Tree - - Given a binary tree, you need to compute the length of the diameter of the tree. The diameter of a binary tree is the length of the longest path between any two nodes in a tree. This path may or may not pass through the root. -\begin{lstlisting} -Example: -Given a binary tree - - 1 - / \ - 2 3 - / \ - 4 5 - -Return 3, which is the length of the path [4,2,1,3] or [5,2,1,3]. -\end{lstlisting} -\textbf{Root to Any with Global Variable to track the any to any through root}. -\begin{lstlisting}[language=Python] -def diameterOfBinaryTree(self, root): - """ - :type root: TreeNode - :rtype: int - """ - # this is the longest path from any to any - - def rootToAny(root, ans): - if not root: - return 0 - left = rootToAny(root.left, ans) - right = rootToAny(root.right, ans) - ans[0] = max(ans[0], left+right) # track the any to any through root - return max(left, right) + 1 #get the maximum depth of root to any - ans = [0] - rootToAny(root, ans) - return ans[0] -\end{lstlisting} -% \end{examples} -\end{document} \ No newline at end of file diff --git a/Easy-Book/chapters/question_7_specific_algorithms_for_graph.tex b/Easy-Book/chapters/question_7_specific_algorithms_for_graph.tex deleted file mode 100644 index e60ec3d..0000000 --- a/Easy-Book/chapters/question_7_specific_algorithms_for_graph.tex +++ /dev/null @@ -1,459 +0,0 @@ -\documentclass[../main.tex]{subfiles} -\begin{document} -In this chapter, we will introduce a variety of algorithms for graphs, and summaries different type of questions from the LeetCode. There are mainly three sections, searching algorithms in graph, which we already introduced in Chapter XX, algorithms that can be applied in the graph include breadth-first search, depth-first search and the topological sort. The second is shortest paths searching algorithms. So for the graph data structure, we usually need to search. -Basic DFS/BFS can be applied into any graph data structures. The following sections include more advanced problems, including the concept in Chapter~\ref{chapter_advanced_non_linear_search}. -%%%%%%%%%%%%%%%%%%%%Basic BFS and DFS%%%%%%%%%%%%%%%% -\section{Basic BFS and DFS} -There are two types of questions : -\begin{itemize} - \item that explicitly telling us we need to find a path/shorest/logest path in the graph, - \item that implicitly requires us to use DFS/BFS, these type of problems we need to build the graph by ourselves first. -\end{itemize} -\subsection{Explicit BFS/DFS} - -\subsection{Implicit BFS/DFS} -\begin{examples}[resume] -\item \textbf{582. Kill Process (medium).} Given n processes, each process has a unique PID (process id) and its PPID (parent process id). - -Each process only has one parent process, but may have one or more children processes. This is just like a tree structure. Only one process has PPID that is 0, which means this process has no parent process. All the PIDs will be distinct positive integers. - -We use two list of integers to represent a list of processes, where the first list contains PID for each process and the second list contains the corresponding PPID. - -Now given the two lists, and a PID representing a process you want to kill, return a list of PIDs of processes that will be killed in the end. You should assume that when a process is killed, all its children processes will be killed. No order is required for the final answer. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: -pid = [1, 3, 10, 5] -ppid = [3, 0, 5, 3] -kill = 5 -Output: [5,10] -Explanation: - 3 - / \ - 1 5 - / - 10 -Kill 5 will also kill 10. -\end{lstlisting} - -Analysis: We know the parent and the child node is a tree-like data structure, which is also a graph. Instead of building a tree data structure first, we use graph defined as defaultdict indexed by the parent node, and the children nodes is a list. In such a graph, finding the killing process is the same as do a DFS/BFS starting from the kill node, we just save all the passing nodes in the process. Here, we only give the DFS solution. -\begin{lstlisting}[language=Python] -from collections import defaultdict -def killProcess(self, pid, ppid, kill): - """ - :type pid: List[int] - :type ppid: List[int] - :type kill: int - :rtype: List[int] - """ - # first sorting: nlog n, - graph = defaultdict(list) - for p_id, id in zip(ppid, pid): - graph[p_id].append(id) - - q = [kill] - path = set() - while q: - id = q.pop(0) - path.add(id) - for neig in graph[id]: - if neig in path: - continue - q.append(neig) - return list(path) -\end{lstlisting} -\end{examples} -\section{Connected Components} -\begin{examples}[resume] -\item \textbf{130. Surrounded Regions(medium).} Given a 2D board containing 'X' and 'O' (the letter O), capture all regions surrounded by 'X'. A region is captured by flipping all 'O's into 'X's in that surrounded region. Surrounded regions shouldn’t be on the border, which means that any 'O' on the border of the board are not flipped to 'X'. Any 'O' that is not on the border and it is not connected to an 'O' on the border will be flipped to 'X'. Two cells are connected if they are adjacent cells connected horizontally or vertically. -\begin{lstlisting}[numbers=none] -Example: - -X X X X -X O O X -X X O X -X O X X - -After running your function, the board should be: - -X X X X -X X X X -X X X X -X O X X -\end{lstlisting} -\textbf{Solution 1: Use DFS and visited matrix.} First, this is to do operations either filip 'O' or keep it. If 'O' is at the boarder, and any other 'O' that is connected to the boardary 'O', (the connected componets that can be found through DFS) will be kept. The complexity is $O(mn)$, m, n is the rows and columns. -\begin{lstlisting}[language=Python] -def solve(self, board): - """ - :type board: List[List[str]] - :rtype: void Do not return anything, modify board in-place instead. - """ - if not board: - return - rows, cols = len(board), len(board[0]) - if rows == 1 or cols == 1: - return - if rows == 2 and cols == 2: - return - moves = [(0, -1), (0, 1), (-1, 0), (1, 0)] - # find all connected components to the edge 0, and mark them as -1, - # then flip all 0s in the other parts - # change the -1 to 0s - visited = [[False for c in range(cols)] for r in range(rows)] - def dfs(x, y): # (x, y) is the edge 0s - for dx, dy in moves: - nx = x + dx - ny = y + dy - if nx < 0 or nx >= rows or ny < 0 or ny >= cols: - continue - if board[nx][ny] == 'O' and not visited[nx][ny]: - visited[nx][ny] = True - dfs(nx, ny) - # first and last col - for i in range(rows): - if board[i][0] == 'O' and not visited[i][0]: - visited[i][0] = True - dfs(i, 0) - if board[i][-1] == 'O' and not visited[i][-1]: - visited[i][-1] = True - dfs(i, cols-1) - # first and last row - for j in range(cols): - if board[0][j] == 'O' and not visited[0][j]: - visited[0][j] = True - dfs(0, j) - if board[rows-1][j] == 'O' and not visited[rows-1][j]: - visited[rows-1][j] = True - dfs(rows-1, j) - for i in range(rows): - for j in range(cols): - if board[i][j] == 'O' and not visited[i][j]: - board[i][j] = 'X' - -\end{lstlisting} -\textbf{Solution 2: mark visited 'O' as '-1' to save space.} Instead of using a $O(mn)$ space to track the visited vertices, we can just mark the connected components of the boundary 'O' as '-1' in the DFS process, and then we just need another round to iterate the matrix to flip all the remaining 'O' and flip the '-1' back to 'O'. -\begin{lstlisting}[language=Python] -def solve(self, board): - if not board: - return - rows, cols = len(board), len(board[0]) - if rows == 1 or cols == 1: - return - if rows == 2 and cols == 2: - return - moves = [(0, -1), (0, 1), (-1, 0), (1, 0)] - # find all connected components to the edge 0, and mark them as -1, - # then flip all 0s in the other parts - # change the -1 to 0s - def dfs(x, y): # (x, y) is the edge 0s - for dx, dy in moves: - nx = x + dx - ny = y + dy - if nx < 0 or nx >= rows or ny < 0 or ny >= cols: - continue - if board[nx][ny] == 'O': - board[nx][ny] = '-1' - dfs(nx, ny) - return - # first and last col - for i in range(rows): - if board[i][0] == 'O': - board[i][0] = '-1' - dfs(i, 0) - if board[i][-1] == 'O' : - board[i][-1] = '-1' - dfs(i, cols-1) - # # first and last row - for j in range(cols): - if board[0][j] == 'O': - board[0][j] = '-1' - dfs(0, j) - if board[rows-1][j] == 'O': - board[rows-1][j] = '-1' - dfs(rows-1, j) - for i in range(rows): - for j in range(cols): - if board[i][j] == 'O': - board[i][j] = 'X' - elif board[i][j] == '-1': - board[i][j] = 'O' - else: - pass -\end{lstlisting} - -\item \textbf{323. Number of Connected Components in an Undirected Graph (medium).} -Given n nodes labeled from 0 to n - 1 and a list of undirected edges (each edge is a pair of nodes), write a function to find the number of connected components in an undirected graph. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: n = 5 and edges = [[0, 1], [1, 2], [3, 4]] - - 0 3 - | | - 1 --- 2 4 - -Output: 2 - -Example 2: - -Input: n = 5 and edges = [[0, 1], [1, 2], [2, 3], [3, 4]] - - 0 4 - | | - 1 --- 2 --- 3 - -Output: 1 -\end{lstlisting} -\textbf{Solution: Use DFS.} First, if given n node, and have edges, it will have n components. -\begin{lstlisting}[numbers=none] -for n in vertices: - if n not visited: - DFS(n) # this is a component traverse its connected components and mark them as visited. -\end{lstlisting} -Before we start the main part, it is easier if we can convert the edge list into undirected graph using adjacencly list. Because it is undirected, one edge we need to add two directions in the adjancency list. -\begin{lstlisting}[language=Python] -def countComponents(self, n, edges): - """ - :type n: int - :type edges: List[List[int]] - :rtype: int - """ - if not edges: - return n - def dfs(i): - for n in g[i]: - if not visited[n]: - visited[n] = True - dfs(n) - return - # convert edges into a adjacency list - g = [[] for i in range(n)] - for i, j in edges: - g[i].append(j) - g[j].append(i) - - # find components - visited = [False]*n - ans = 0 - for i in range(n): - if not visited[i]: - visited[i] = True - dfs(i) - ans += 1 - return ans -\end{lstlisting} - - -\end{examples} -\section{Islands and Bridges} -An island is surrounded by water (usually '0's in the matrix) and is formed by connecting adjacent lands horizontally or vertically. An island is acutally a definition of the connected components. -\begin{enumerate} - \item 463. Island Perimeter - \item 305. Number of Islands II - \item 694. Number of Distinct Islands - \item 711. Number of Distinct Islands II \item 827. Making A Large Island - \item 695. Max Area of Island - \item 642. Design Search Autocomplete System -\end{enumerate} -\begin{examples}[resume] -\item \textbf{200. Number of Islands. (medium).} Given a 2d grid map of '1's (land) and '0's (water), count the number of islands. An island is surrounded by water and is formed by connecting adjacent lands horizontally or vertically. You may assume all four edges of the grid are all surrounded by water. -\begin{lstlisting}[numbers=none] -Example 1: - -Input: -11110 -11010 -11000 -00000 - -Output: 1 - -Example 2: - -Input: -11000 -11000 -00100 -00011 - -Output: 3 -\end{lstlisting} -\textbf{Solution; DFS without extra space.}. We use DFS and mark the visted components as '-1' in the grid. -\begin{lstlisting}[language=Python] -def numIslands(self, grid): - """ - :type grid: List[List[str]] - :rtype: int - """ - if not grid: - return 0 - rows, cols = len(grid), len(grid[0]) - moves = [(-1,0), (1,0), (0, -1), (0, 1)] - def dfs(x, y): - for dx, dy in moves: - nx, ny = x + dx, y + dy - if nx < 0 or ny < 0 or nx >= rows or ny >= cols: - continue - if grid[nx][ny] == '1': - grid[nx][ny] = '-1' - dfs(nx, ny) - return - ans = 0 - for i in range(rows): - for j in range(cols): - if grid[i][j] == '1': - grid[i][j] = '-1' - dfs(i, j) - ans += 1 - return ans -\end{lstlisting} -\item \textbf{934. Shortest Bridge} In a given 2D binary array A, there are two islands. (An island is a 4-directionally connected group of 1s not connected to any other 1s.) Now, we may change 0s to 1s so as to connect the two islands together to form 1 island. - -Return the smallest number of 0s that must be flipped. (It is guaranteed that the answer is at least 1.) -\begin{lstlisting}[numbers=none] -Example 1: - -Input: [[0,1],[1,0]] -Output: 1 - -Example 2: - -Input: [[0,1,0],[0,0,0],[0,0,1]] -Output: 2 - -Example 3: - -Input: [[1,1,1,1,1],[1,0,0,0,1],[1,0,1,0,1],[1,0,0,0,1],[1,1,1,1,1]] -Output: 1 - -Note: - - 1 <= A.length = A[0].length <= 100 - A[i][j] == 0 or A[i][j] == 1 -\end{lstlisting} -\textbf{Solution 1: DFS to find the complete connected components.} This is a two island problem, First we need to find one node '1' and use DFS to find identify all the '1's compose this first island, in this process, we mark them as '-1'. Then we can do another BFS starts from each node marked as '-1' that is saved in $bfs$ to find the shortest path (the first element that is another '1' to make the shortest bridge). A better solution for this is: at each step, we traverse all $bfs$ to only expand one step. This is an algorithm that finds the shortest path from multiple starting and multiple ending points. The code is: -\begin{lstlisting}[language = Python] -def shortestBridge(self, A): - def dfs(i, j): - A[i][j] = -1 - bfs.append((i, j)) - for x, y in ((i - 1, j), (i + 1, j), (i, j - 1), (i, j + 1)): - if 0 <= x < n and 0 <= y < n and A[x][y] == 1: - dfs(x, y) - def first(): - for i in range(n): - for j in range(n): - if A[i][j]: - return i, j - n, step, bfs = len(A), 0, [] - dfs(*first()) - print(A) - while bfs: - new = [] - for i, j in bfs: - for x, y in ((i - 1, j), (i + 1, j), (i, j - 1), (i, j + 1)): - if 0 <= x < n and 0 <= y < n: - if A[x][y] == 1: - return step - elif not A[x][y]: - A[x][y] = -1 - new.append((x, y)) - step += 1 - bfs = new -\end{lstlisting} - -\end{examples} -%%%%%%%%%%%%%%%%%%%%NP hard%%%%%%%%%%%%%%%%%%%% -\section{NP-hard Problems} -Traveling salesman problems (TSP): Given a set of cities and distance between every pair of cities, the problem is to find the shortest possible route that visits every city exactly once and returns to the starting point. In fact, there is no polynomial time solution available for this problem as the problem is a known NP-Hard problem. -\begin{examples}[resume] -\item \textbf{943. Find the Shortest Superstring (hard).} Given an array A of strings, find any smallest string that contains each string in A as a substring. We may assume that no string in A is substring of another string in A. - -\begin{lstlisting}[numbers=none] -Example 1: - -Input: ["alex","loves","leetcode"] -Output: "alexlovesleetcode" -Explanation: All permutations of "alex","loves","leetcode" would also be accepted. - -Example 2: - -Input: ["catg","ctaagt","gcta","ttca","atgcatc"] -Output: "gctaagttcatgcatc" -\end{lstlisting} -\textit{Note: 1 <= A.length <= 12, 1 <= A[i].length <= 20} -\textbf{Solution 1: DFS Permutation.} First, there are $n!$ possible ways to arrange the strings to connect to get the superstring, and pick the shortest one. This is a typical permutation problems, and when we connect string i to j, we can compute the maximum length of prefix in j that we can skip when connecting. However, with Python, we receive LTE error. -\begin{lstlisting}[language=Python] - def shortestSuperstring(self, A): - """ - :type A: List[str] - :rtype: str - """ - if not A: - return '' - n = len(A) - - def getGraph(A): - G = [[0 for i in range(n)] for _ in range(n)] # key is the index, value (index: length of suffix with the next prefix) - if not A: - return G - for i, s in enumerate(A): - for j in range(n): - if i == j: - continue - - t = A[j] - m = min(len(s), len(t)) - for l in range(m, 0, -1): #[n, 1] - if s[-l:] == t[0:l]: # suffix and prefix - G[i][j] = l - break - return G - - def dfs(used, d, curr, path, ans, best_path): - if curr >= ans[0]: - return - if d == n: - ans[0] = curr - best_path[0] = path - return - for i in range(n): - if used & (1<=2^k$. With the math formula that $t^0+t^1+...+t^n=\frac{1-t^{n+1}}{1-t}$. Therefore, we can rewrite our time complexity function as: -\begin{align} - T(n)&\geq \sum_{i=0}^{k-1} 2^i + 2^k\\ - &=\sum_{i=0}^{k} 2^k \\ - &=\frac{1-2^{k+1}}{1-2} \\ - &=2^{k+1}\\ - &=2^{n/2} -\end{align} - -It would be reasonable for us to guess $r^n$. For this type of recursion, it is hard to find the tight bound, we can do the following simplification to find a lower bound instead, and replace each term with out guess -\begin{align} -\label{complexity_eq_fibonacci_2} - T(n)&\geq T(n-1)+T(n-2)\\ - r^n &\geq r^{(n-1)}+r^{(n-2)}\notag\\ - r^{(n-2)}(r^2-r-1)&\geq 0 -\end{align} -With some math knowledge that given a general quadratic formula as $ax^2+bx+c=0$, the solution will be $x=\frac{-b\pm\sqrt{b^2-4ac}}{2a}$. With the formula, we get the solution for equation $(r^2-r-1)=0$, which is $r=\frac{1\pm\sqrt{5}}{2}$: -\begin{align} -\label{complexity_eq_fibonacci_3} - T(n)&\geq A(\frac{1+\sqrt{5}}{2})^n+B(\frac{1-\sqrt{5}}{2})^n\\ - T(n)&\geq A(\frac{1+\sqrt{5}}{2})^n\\ - T(n)&\geq2^{n/2}\\ - T(n) &= \Omega(2^{n/2}) -\end{align} -\subsection{Master Method} Recursion tree and the master theorem are the main ways we rely on to answer the time complexity for a divide and conquer method of form shown in Eq.~\ref{bt_time_1}. The master method is probably the easiest way to come up with the computational complexity analysis. It is a theorem that are proved by researchers, and we just need to learn how to use them. The master theorem goes: -\begin{align} -\label{bt_time_1} - T(n)=aT(n/b)+f(n) -\end{align} - -For Eq.~\ref{bt_time_1}, let $a\geq1, b>1$, and $f(n)$ is asymptotically positive function. This represents that using divide and conquer, we divide a problem of size $n$ into $a$ subproblems and each of size $n/b$. The $a$ subproblems are solved recursively, each in time $T(n/b)$. Plus with the cost of $f(n)$, which represents the cost of dividing the problem and combining results of the subprolems, we get the time complexity of size $n$. - -The master theorem states that for Eq.~\ref{bt_time_1}, $T(n)$ has three following asymptotic bounds. -\begin{enumerate} - \item If $f(n) = O(n^{\log_b a - \epsilon})$ for constant $\epsilon>0$, then we get $T(n) = \Theta(n^{\log_b a})$. - \item If $f(n) = \Theta(n^{\log_b a })$, then we get $T(n) = \Theta(n^{\log_b a} \log n)$. - \item If $f(n) = \Omega(n^{\log_b a + \epsilon})$ for constant $\epsilon>0$, and if $a f(n/b)\leq cf(n)$ for constant $c<1$ and all sufficiently large $n$, then we get $T(n) = \Theta(f(n))$. -\end{enumerate} - -\paragraph{Apply Master Method} To apply master method given a function $T(n)$, we first compute $n^{\log_b a}$, and then compare $f(n)$ with $n^{\log_b a}$. Intuitively, the larger of the two functions determines the solution to the recurrence. As shown in the following equation of case 1 and 3. For case 2, these two functions are of the same size, we multiply it by a logarithmic factor. -\begin{align} - f(n) &> n^{\log_b a}, \text{case 3}, T(n)= \Theta(f(n))\\ - f(n) &< n^{\log_b a}, \text{case 1}, T(n)=\Theta(n^{\log_b a})\\ -f(n) &= n^{\log_b a}, a f(n/b)\leq cf(n), \text{case 2}, T(n)=\Theta(n^{\log_b a} \log n) -\end{align} -Note that the comparison will be polynomial comparison. - -\paragraph{When We cant Use Master Method} The three cases do not cover all the possibilities for $f(n)$. There is a gap between case 1 and 2 when $f(n)$ is smaller but not polynomially smaller. Similarly, there is a gap between case 3 and 2 when $f(n)$ is larger but not polynomially larger. Or if the regularity condition in case 3 fails to hold, we can not use master method. We go back to other techniques instead. - - - - -% \paragraph{Solving Non-overlapping Recurrence Function:} - - - -% \subsubsection{Recursion Tree Method} -% Drawing out a recursion tree serves as a straightforward way to come up with a good guess. Normally we can tolerate a small amount of "sloppiness", because later on, we can prove the complexity with substitution method discussed in the last section. However, when we are drawing the recursion tree, if we are careful enough and summing up the costs from each level and each node, we can use is as a direct proof of the solution to the recurrence. - -% In the corresponding recursion tree for recurrence equation in divide and conquer, each node represents the cost of a single subproblem somewhere in the set of recursive function invocations. We sum the costs within each level of the tree to obtain a set of per-level costs, and then we sum all the per-level costs to determine the total cost of all levels of the recursion. Let's look at one example for given recursion $T(n) = 3T(\floor*{n/4}) + \Theta(n^2)$. We replace $\Theta(n^2) = cn^2$, where $c>0$. $cn^2$ is the cost we pay to divide a problem with $n$ input size to three problems each with $n/4$ input size and combine the solution of the subproblems to solve the current problem. We first expand $T(n)$, and put the cost $cn^2$ at the root, and with three children each noted with $T(n/4)$. Then we recursively replace $T(n/4)$ with the cost and its subproblem till the size of each subproblem to be 1, which means we get to the leaves. The computational complexity for this recursion would be the sum of all layers's costs. And we assume $T(1)=1$. -% \begin{figure}[h] -% \centering -% \includegraphics[width=0.8\columnwidth]{fig/recursive_tree_1.png} -% \includegraphics[width=0.8\columnwidth]{fig/recursive_tree_2.png} -% \caption{The process to construct a recursive tree for $T(n) = 3T(\floor*{n/4}) + cn^2$} -% \label{fig:recursive_tree} -% \end{figure} -% \begin{equation} \label{eg_recurrence_5} -% \begin{split} -% T(n) & = cn^2+\frac{3}{16}cn^2+(\frac{3}{16})^2cn^2+...+(\frac{3}{16})^{\log_4 {n-1}}cn^2+\Theta(n^{\log_4 3})\\ -% & = \sum_{i=0}^{\log_4 {n-1}}(\frac{3}{16})^{i}cn^2+\Theta(n^{\log_4 3})\\ -% &< \sum_{i=0}^{\infty}(\frac{3}{16})^{i}cn^2+\Theta(n^{\log_4 3})\\ -% & = \frac{1}{1-(3/16)} cn^2+\Theta(n^{\log_4 3})\\ -% & = O(n^2). -% \end{split} -% \end{equation} -% \subsubsection{Master Method} - -\end{document} \ No newline at end of file diff --git a/Easy-Book/refer.bib b/Easy-Book/refer.bib deleted file mode 100644 index 31dfe4f..0000000 --- a/Easy-Book/refer.bib +++ /dev/null @@ -1,77 +0,0 @@ -@book{cormen2009introduction, - title={Introduction to algorithms}, - author={Cormen, Thomas H}, - year={2009}, - publisher={MIT press} -} -@book{halim2013competitive, - title={Competitive Programming 3}, - author={Halim, Steven and Halim, Felix}, - year={2013}, - publisher={Lulu Independent Publish} -} -@book{slatkin2015effective, - title={Effective Python: 59 Specific Ways to Write Better Python}, - author={Slatkin, Brett}, - year={2015}, - publisher={Pearson Education} -} - -@misc{bworld, - author = {Hua hua jiang}, - title = {{Leetcode blogs}}, - howpublished = "\url{https://zxi.mytechroad.com/blog/category}", - year = {2018}, - note = "[Online; accessed 19-July-2018]" -} - -@book{beazley2009python, - title={Python essential reference}, - author={Beazley, David M}, - year={2009}, - publisher={Addison-Wesley Professional} -} - -@article{baka2017python, - title={Python Data Structures and Algorithms: Improve application performance with graphs, stacks, and queues}, - author={Baka, Benjamin}, - year={2017}, - publisher={Packt Publishing} -} - -@misc{CP, - title = {{Competitive Programming}}, - howpublished = "\url{https://cp-algorithms.com/}", - year = {2019}, - note = "[Online; accessed 19-July-2018]" -} - -@misc{cs_princeton, - title = {{cs princeton}}, - howpublished = "\url{https://aofa.cs.princeton.edu/60trees/}", - year = {2019}, - note = "[Online; accessed 19-July-2018]" -} - - - -@book{skiena1998algorithm, - title={The algorithm design manual: Text}, - author={Skiena, Steven S}, - volume={1}, - year={1998}, - publisher={Springer Science \& Business Media} -} -%%%%%%python -@book{phillips2010python, - title={Python 3 Object Oriented Programming}, - author={Phillips, Dusty}, - year={2010}, - publisher={Packt Publishing Ltd} -} - - - -%https://www2.cs.duke.edu/courses/fall05/cps234/notes/lecture07.pdf - -%https://cp-algorithms.com/ \ No newline at end of file diff --git a/Easy-Book/sample.tex b/Easy-Book/sample.tex deleted file mode 100644 index 573b9c9..0000000 --- a/Easy-Book/sample.tex +++ /dev/null @@ -1,260 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% LaTeX book template %% -%% Author: Amber Jain (http://amberj.devio.us/) %% -%% License: ISC license %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\documentclass[a4paper,11pt]{book} -\usepackage[T1]{fontenc} -\usepackage[utf8]{inputenc} -\usepackage{lmodern} -\usepackage{subcaption} -\usepackage[normalem]{ulem} -\usepackage{enumitem,kantlipsum} - -%%%%%%%%%%%question block -\usepackage[tikz]{bclogo} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Source: http://en.wikibooks.org/wiki/LaTeX/Hyperlinks % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{hyperref} -\usepackage{graphicx} -\usepackage[english]{babel} -\usepackage{graphicx,amssymb,amstext,amsmath} -\usepackage{tikz} -\usepackage{cancel} - -\usepackage{mathtools} -\DeclarePairedDelimiter\ceil{\lceil}{\rceil} -\DeclarePairedDelimiter\floor{\lfloor}{\rfloor} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% 'dedication' environment: To add a dedication paragraph at the start of book % -% Source: http://www.tug.org/pipermail/texhax/2010-June/015184.html % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\newenvironment{dedication} -{ - \cleardoublepage - \thispagestyle{empty} - \vspace*{\stretch{1}} - \hfill\begin{minipage}[t]{0.66\textwidth} - \raggedright -} -{ - \end{minipage} - \vspace*{\stretch{3}} - \clearpage -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% The setting for the exercise part: we can write the problems and the solutions at the same place, but can be displayed in the pdf at another place % -% Source: https://tex.stackexchange.com/questions/369265/math-book-how-to-write-exercise-and-answers % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{multicol} -\usepackage{multirow} -\usepackage{ifthen} -\newboolean{firstanswerofthechapter} - -\usepackage{xcolor} -\colorlet{lightcyan}{cyan!40!white} - -\usepackage{chngcntr} -\usepackage{stackengine} - -\usepackage{tasks} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% The setting for the chapter style % -% Source: https://texblog.org/2012/07/03/fancy-latex-chapter-styles/ % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\usepackage[Sonny]{fncychap} -% \usepackage{titlesec} - - -%%%%%%%%%%%%% -%%%%%%%%%%%%%%% -% use underline in the lstlisting% -%%%%%%%%%%%%%% -%%%%%%%%%%% - -\usepackage{upquote} - - -% \titleformat -% {\chapter} % command -% [display] % shape -% {\bfseries\Large}%\itshape} % format -% {Chapter No. \ \thechapter} % label -% {0.5ex} % sep -% { -% \rule{\textwidth}{1pt} -% \vspace{1ex} -% \centering -% } % before-code -% [ -% \vspace{-0.5ex}% -% \rule{\textwidth}{0.3pt} -% ] % after-code -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% The setting for the examples style % -% Source: https://tex.stackexchange.com/questions/295589/how-to-enumerate-a-problem-set-in-a-book-accordingly-with-the-chapter-number % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{enumitem} -\newlist{examples}{enumerate}{1} -\setlist[examples]{label={\thechapter.\arabic*}} - -% \BeforeBeginEnvironment{example}{\vspace{\baselineskip}} -% \AfterEndEnvironment{example}{\vspace{\baselineskip}} -% \BeforeBeginEnvironment{sourcecode}{\vspace{\baselineskip}} -% \AfterEndEnvironment{sourcecode}{\vspace{\baselineskip}} - -\newlength{\longestlabel} -\settowidth{\longestlabel}{\bfseries viii.} -\settasks{counter-format={tsk[r].}, label-format={\bfseries}, label-width=\longestlabel, - item-indent=0pt, label-offset=2pt, column-sep={10pt}} - -\usepackage[lastexercise,answerdelayed]{exercise} -\counterwithin{Exercise}{chapter} -\counterwithin{Answer}{chapter} -\renewcounter{Exercise}[chapter] -\newcommand{\QuestionNB}{\bfseries\arabic{Question}.\ } -\renewcommand{\ExerciseName}{EXERCISES} -\renewcommand{\ExerciseHeader}{\noindent\def\stackalignment{l}% code from https://tex.stackexchange.com/a/195118/101651 - \stackunder[0pt]{\colorbox{cyan}{\textcolor{white}{\textbf{\LARGE\ExerciseHeaderNB\;\large\ExerciseName}}}}{\textcolor{lightcyan}{\rule{\linewidth}{2pt}}}\medskip} -\renewcommand{\AnswerName}{Exercises} -\renewcommand{\AnswerHeader}{\ifthenelse{\boolean{firstanswerofthechapter}}% - {\bigskip\noindent\textcolor{cyan}{\textbf{CHAPTER \thechapter}}\newline\newline% - \noindent\bfseries\emph{\textcolor{cyan}{\AnswerName\ \ExerciseHeaderNB, page % - \pageref{\AnswerRef}}}\smallskip} - {\noindent\bfseries\emph{\textcolor{cyan}{\AnswerName\ \ExerciseHeaderNB, page \pageref{\AnswerRef}}}\smallskip}} -\setlength{\QuestionIndent}{16pt} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% design the code listing -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{listings} - -\usepackage{float} - - -\usepackage{color} - -\definecolor{codegreen}{rgb}{0,0.6,0} -\definecolor{codegray}{rgb}{0.5,0.5,0.5} -\definecolor{codepurple}{rgb}{0.58,0,0.82} -\definecolor{backcolour}{rgb}{0.95,0.95,0.92} - -\lstdefinestyle{mystyle}{ - backgroundcolor=\color{backcolour}, - commentstyle=\color{codegreen}, - keywordstyle=\color{magenta}, - numberstyle=\tiny\color{codegray}, - stringstyle=\color{codepurple}, - basicstyle=\footnotesize, - breakatwhitespace=false, - breaklines=true, - captionpos=b, - keepspaces=true, - numbers=left, - numbersep=5pt, - showspaces=false, - showstringspaces=false, - showtabs=false, - tabsize=2 -} - -\lstset{style=mystyle} - - -%%%%%%%%%%%%%%%theorem, corollary and lemma -\newtheorem{theorem}{}[section] -\newtheorem{corollary}{Corollary}[theorem] -\newtheorem{lemma}[theorem]{Lemma} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% package enumberate with different style % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{enumitem} %[label=(\alph*)], [label=(\Alph*)], [label=(\roman*)] -\usepackage{titlesec} - -\usepackage[utf8]{inputenc} -%\setcounter{secnumdepth}{3} %subsubsection and paragraph - -\newlist{inparaenum}{enumerate}{2}% allow two levels of nesting in an enumerate-like environment -\setlist[inparaenum]{nosep}% compact spacing for all nesting levels -\setlist[inparaenum,1]{label=\bfseries\arabic*.}% labels for top level -\setlist[inparaenum,2]{label=\arabic{inparaenumi}\emph{\alph*})}% labels for second level - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% better align the equation % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\usepackage{notes} -\usepackage{amsmath} -\usepackage{subfiles} -\usepackage{subcaption} -\usepackage{dramatist} -% \usepackage{blindtext} - -% \setcounter{chapter}{-1} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Chapter quote at the start of chapter % -% Source: http://tex.stackexchange.com/a/53380 % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\makeatletter -\renewcommand{\@chapapp}{}% Not necessary... -\newenvironment{chapquote}[2][2em] - {\setlength{\@tempdima}{#1}% - \def\chapquote@author{#2}% - \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax% - \itshape} - {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip} -\makeatother - -\title{\Huge \textbf{Combinatorial Search}} - -% \title{\Huge \textbf{The Comprehensive Coding Interview Guide} \footnote{This is a footnote.} \\ \huge Cracking LeetCode Problems Using Python \footnote{This is yet another footnote.}} - -% \title{\Huge \textbf{Notebook of Data Structures and Algorithms for Coding Interview } \footnote{This is a footnote.} \\ \huge Cracking LeetCode Problems Using Python \footnote{This is yet another footnote.}} -% Author -\author{\textsc{Li Yin}\thanks{\url{www.liyinscience.com}}} -\begin{document} -\frontmatter -\maketitle -\subfile{chapters/chapter_combinatorial_search} -\label{chapter_advanced_linear_search} - - -% \part{Problem-Patterns} -% \label{part_question} - -% \chapter{Array Questions(15\%)} -% \subfile{chapters/question_3_array_question} -% \label{array_problem} - -% \chapter{Linked List, Stack, Queue, and Heap Questions (12\%)} %(44+34+9+31) -% \label{other_linear_datastrcutre_problem} -% \subfile{chapters/question_4_linked_list_question} - - -% \chapter{String Questions (15\%)} -% \label{chapter_string_problem} -% \subfile{chapters/question_5_pattern-matching} - - -% \chapter{Tree Questions(10\%)} -% \label{chapter_tree_problem} -% \subfile{chapters/chapter_12_tree_algorithm } - - -% \chapter{Graph Questions (15\%)} -% \label{chapter_graph_problem} -% \subfile{chapters/question_7_specific_algorithms_for_graph} - -% % chapter 1 -% \chapter{Dynamic Programming Questions (15\%)} -% \subfile{chapters/question_1_dynamic_programming} -% \label{dp_problem} - -\end{document} return ans -\end{lstlisting} - diff --git a/Easy-Book/source-code/chapter_advanced_graph_search.ipynb b/Easy-Book/source-code/chapter_advanced_graph_search.ipynb deleted file mode 100644 index 4dc9882..0000000 --- a/Easy-Book/source-code/chapter_advanced_graph_search.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_advanced_graph_search.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3.7.5 64-bit","metadata":{"interpreter":{"hash":"6f3fdc50dad01a00ee690c4e47d0eb586e790b94dbf60d4ad3bb7d6e393ce6fb"}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"XFKuBveS1Cce"},"source":["* Cycle Detection : check the figure\n","* Topological Sort: keep the left figure\n","* Eulerian Circuit (optional)\n","\n","To do list\n","* Each section should have a fun application with better figure!!!\n","* the disjoint set can be improved using tree: https://www.hackerearth.com/practice/algorithms/graphs/minimum-spanning-tree/tutorial/\n","* Proof to dijkstra should be a practice of the proof of greedy algorithms\n","* Add more leetcode examples.\n","\n","Resources\n","* https://web.stanford.edu/class/archive/cs/cs161/cs161.1138/lectures/14/Small14.pdf\n","https://homes.luddy.indiana.edu/achauhan/Teaching/B403/LectureNotes/09-mst.html"]},{"cell_type":"markdown","metadata":{"id":"auwghE4hWHSK"},"source":["### Cycle Detection"]},{"cell_type":"code","metadata":{"id":"6nYEnOCo3FOC","executionInfo":{"status":"ok","timestamp":1615934605072,"user_tz":420,"elapsed":542,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# initialization\n","class STATE:\n"," white = 0\n"," gray = 1\n"," black = 2"],"execution_count":1,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dLTp6RWw2mYX"},"source":["#### Directed graph"]},{"cell_type":"code","metadata":{"id":"xPzr88zz2pw_","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605693,"user_tz":420,"elapsed":1135,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"e6708800-f8e8-4444-f87b-478a38d3def6"},"source":["dcg = [[1], [2], [0, 4], [], [3], [6], []]"],"execution_count":61,"outputs":[]},{"cell_type":"code","metadata":{"id":"iq07bZAp29mi","executionInfo":{"status":"ok","timestamp":1615934605694,"user_tz":420,"elapsed":1135,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def hasCycleDirected(g, s, state):\n"," state[s] = STATE.gray # first be visited\n"," for v in g[s]:\n"," if state[v] == STATE.white:\n"," if hasCycleDirected(g, v, state):\n"," print(f'Cycle found at node {v}.')\n"," return True\n"," elif state[v] == STATE.gray: # aback edge\n"," print(f'Cycle starts at node {v}.')\n"," return True\n"," else:\n"," pass\n"," state[s] = STATE.black # mark it as complete\n"," return False"],"execution_count":75,"outputs":[]},{"cell_type":"code","metadata":{"id":"zbtKB8Rg3bWg","executionInfo":{"status":"ok","timestamp":1615934605694,"user_tz":420,"elapsed":1133,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def cycleDetectDirected(g):\n"," n = len(g)\n"," state = [STATE.white] * n\n"," for i in range(n):\n"," if state[i] == STATE.white:\n"," if hasCycleDirected(g, i, state):\n"," return True\n"," return False"],"execution_count":63,"outputs":[]},{"cell_type":"code","metadata":{"id":"q-0e10OJ4M5i","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605695,"user_tz":420,"elapsed":1127,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"ac9579cb-4dfa-4c9c-ecec-475fe8e9e439"},"source":["cycleDetectDirected(dcg)"],"execution_count":64,"outputs":[{"output_type":"stream","name":"stdout","text":["Cycle starts at node 0.\nCycle found at node 2.\nCycle found at node 1.\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":64}]},{"cell_type":"code","execution_count":77,"metadata":{},"outputs":[],"source":["# run one with a forward edge, change (2, 0) to (0, 2)\n","dag = [[1, 2], [2], [4], [], [3], [6], []]\n"]},{"cell_type":"code","execution_count":78,"metadata":{},"outputs":[{"output_type":"execute_result","data":{"text/plain":["False"]},"metadata":{},"execution_count":78}],"source":["cycleDetectDirected(dag)"]},{"cell_type":"markdown","metadata":{"id":"GTx9ANEY562r"},"source":["#### Undirected Graph"]},{"cell_type":"code","metadata":{"id":"wE01rAq55-oF","executionInfo":{"status":"ok","timestamp":1615934605695,"user_tz":420,"elapsed":1126,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def hasCycleUndirected(g, s, p, visited):\n"," visited[s] = True\n"," for v in g[s]:\n"," if not visited[v]:\n"," if hasCycleUndirected(g, v, s, visited):\n"," print(f'Cycle found at node {v}.')\n"," return True\n"," else:\n"," if v != p: # both black and gray\n"," print(f'Cycle starts at node {v}.')\n"," print(visited[v])\n"," return True\n","\n"," return False"],"execution_count":98,"outputs":[]},{"cell_type":"code","metadata":{"id":"sA6dMqrE7MOC","executionInfo":{"status":"ok","timestamp":1615934605696,"user_tz":420,"elapsed":1125,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def cycleDetectUndirected(g):\n"," n = len(g)\n"," visited = [False] * n\n"," for i in range(n):\n"," if not visited[i]:\n"," if hasCycleUndirected(g, i, -1, visited):\n"," print(f'Cycle found at start node {i}.')\n"," return True\n","\n"," return False"],"execution_count":99,"outputs":[]},{"cell_type":"code","metadata":{"id":"aD-FizTB6WbB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605696,"user_tz":420,"elapsed":1118,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"6c8e950c-0cbb-4009-bbec-aa3e3e6530ff"},"source":["ucg=[[1, 2], [0, 2], [0, 4], [], [2, 3], [6], [5]]"],"execution_count":100,"outputs":[]},{"cell_type":"code","metadata":{"id":"30NtiTxy7TIq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605697,"user_tz":420,"elapsed":1112,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"6d1e5b2b-bc55-4a90-97f8-20405e8db6b1"},"source":["cycleDetectUndirected(ucg)"],"execution_count":101,"outputs":[{"output_type":"stream","name":"stdout","text":["Cycle starts at node 0.\nTrue\nCycle found at node 2.\nCycle found at node 1.\nCycle found at start node 0.\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":101}]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[{"output_type":"execute_result","data":{"text/plain":["False"]},"metadata":{},"execution_count":27}],"source":["# delete edge (0, 2)\n","uag = [[1], [0, 2], [4], [], [2, 3], [6], [5]]\n","cycleDetectUndirected(uag)"]},{"cell_type":"markdown","metadata":{"id":"WkpyVhW8WMN1"},"source":["# Topolgical Sort"]},{"cell_type":"code","metadata":{"id":"G1MfbU4eWPrA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605697,"user_tz":420,"elapsed":1106,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"f9c45bc2-48e6-4b6f-e899-105976220f66"},"source":["# Directed Acyclic Graph \n","dag = [[1], [2], [], [2, 4, 5], [], [6], []]\n","dag"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[[1], [2], [], [2, 4, 5], [], [6], []]"]},"metadata":{},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pmt__y5S0Wux","executionInfo":{"status":"ok","timestamp":1615934605698,"user_tz":420,"elapsed":1099,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"63caf923-44d6-4d12-87e3-230095a93776"},"source":["# Directed Cyclic Graph\n","dcg = [[1], [2], [3], [2, 4, 5], [], [6], []]\n","dcg"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[[1], [2], [3], [2, 4, 5], [], [6], []]"]},"metadata":{},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"TkdnK4fU1F4P"},"source":["## Kahn's algorithm"]},{"cell_type":"code","metadata":{"id":"FFG2ShFu1KPn","executionInfo":{"status":"ok","timestamp":1615934605908,"user_tz":420,"elapsed":1307,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["from collections import defaultdict\n","import heapq \n","def kahns_topo_sort(g):\n"," S = []\n"," V_S =[(0, node) for node in range(len(g))] # initialize node with 0 as in-degree\n"," indegrees = defaultdict(int)\n"," # Step 1: count the in-degree\n"," for u in range(len(g)):\n"," indegrees[u] = 0\n"," for u in range(len(g)):\n"," for v in g[u]:\n"," indegrees[v]+= 1\n"," print(f'initial indegree : {indegrees}')\n"," V_S = [(indegree, node) for node, indegree in indegrees.items()]\n"," heapq.heapify(V_S)\n","\n"," # Step 2: Kan's algorithm\n"," while len(V_S) > 0:\n"," indegree, first_node = V_S.pop(0)\n"," if indegree != 0: # cycle found, no topological ordering\n"," print(f'Cycle starts at {first_node}')\n"," return None\n"," S.append(first_node)\n"," # Remove edges\n"," for v in g[first_node]:\n"," indegrees[v] -= 1\n"," # update V_S\n"," for idx, (indegree, node) in enumerate(V_S):\n"," if indegree != indegrees[node]:\n"," V_S[idx] = (indegrees[node], node)\n"," heapq.heapify(V_S)\n"," return S"],"execution_count":30,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sOAJvRU31E3B","executionInfo":{"status":"ok","timestamp":1615934605909,"user_tz":420,"elapsed":1301,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"72ef4c93-01fb-4b15-c4fe-a22a038561a9"},"source":["kahns_topo_sort(dag)"],"execution_count":31,"outputs":[{"output_type":"stream","name":"stdout","text":["initial indegree : defaultdict(, {0: 0, 1: 1, 2: 2, 3: 0, 4: 1, 5: 1, 6: 1})\n"]},{"output_type":"execute_result","data":{"text/plain":["[0, 1, 3, 2, 4, 5, 6]"]},"metadata":{},"execution_count":31}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BfQLWZxi0qoC","executionInfo":{"status":"ok","timestamp":1615934605909,"user_tz":420,"elapsed":1293,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"9387a4b9-6be9-4162-e10e-a3ac8fddff4b"},"source":["kahns_topo_sort(dcg)"],"execution_count":32,"outputs":[{"output_type":"stream","name":"stdout","text":["initial indegree : defaultdict(, {0: 0, 1: 1, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1})\nCycle starts at 2\n"]}]},{"cell_type":"markdown","metadata":{"id":"-L7MyIwoz6T2"},"source":["## DFS"]},{"cell_type":"code","metadata":{"id":"Pf7Jaj8CXWSM","executionInfo":{"status":"ok","timestamp":1615934634337,"user_tz":420,"elapsed":459,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# def dfs(g, s, colors, complete_orders):\n","# colors[s] = STATE.gray\n","# no_cycle = True\n","# for v in g[s]:\n","# if colors[v] == STATE.white:\n","# no_cycle = no_cycle and dfs(g, v, colors, complete_orders)\n","# elif colors[v] == STATE.gray: # a cycle appears\n","# print(f'Cycle found at node {v}.')\n","# return False\n","# colors[s] = STATE.black\n","# complete_orders.append(s)\n","# return no_cycle\n","\n","def dfs(g, s, colors, complete_orders):\n"," colors[s] = STATE.gray\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," if dfs(g, v, colors, complete_orders):\n"," return True\n"," elif colors[v] == STATE.gray: # a cycle appears\n"," print(f'Cycle found at node {v}.')\n"," return True\n"," colors[s] = STATE.black\n"," complete_orders.append(s)\n"," return False"],"execution_count":43,"outputs":[]},{"cell_type":"code","metadata":{"id":"hbTw8Pf3YzjJ","executionInfo":{"status":"ok","timestamp":1615934635210,"user_tz":420,"elapsed":328,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def topo_sort(g):\n"," n = len(g)\n"," complete_orders = []\n"," colors = [STATE.white] * n\n"," for i in range(n): # run dfs on all the node\n"," if colors[i] == STATE.white:\n"," if dfs(g, i, colors, complete_orders):\n"," print('Cycle found, no topological ordering')\n"," return None \n"," return complete_orders[::-1]"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"id":"WeoxmZkcZUKA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934605911,"user_tz":420,"elapsed":1283,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"df557a8e-8326-4583-a881-4722ce0b3198"},"source":["topo_sort(dag) # [3, 5, 6, 4, 0, 1, 2]\n"],"execution_count":45,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[3, 5, 6, 4, 0, 1, 2]"]},"metadata":{},"execution_count":45}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7Ayk_142wAkY","executionInfo":{"status":"ok","timestamp":1615934605911,"user_tz":420,"elapsed":1276,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"5139825a-3352-4221-c1a7-319852b0d848"},"source":["topo_sort(dcg)"],"execution_count":46,"outputs":[{"output_type":"stream","name":"stdout","text":["Cycle found at node 2.\nCycle found, no topological ordering\n"]}]},{"cell_type":"markdown","metadata":{"id":"GTwh1v4l7G2B"},"source":["# Connected Components\n","\n","In the example, we only experiment with BFS. You can use DFS too."]},{"cell_type":"markdown","metadata":{"id":"GCcmT6fusr0E"},"source":["## Undirected Graph"]},{"cell_type":"markdown","metadata":{"id":"okkr1NPJegxk"},"source":["### Graph search approach"]},{"cell_type":"code","metadata":{"id":"HHma4NkEpk_F","executionInfo":{"status":"ok","timestamp":1615934605912,"user_tz":420,"elapsed":1275,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["ug = [[1, 2], [0, 2], [0, 4], [], [2, 3], [6], [5]]"],"execution_count":115,"outputs":[]},{"cell_type":"code","metadata":{"id":"kIPfcNX9ARU7","executionInfo":{"status":"ok","timestamp":1615934605912,"user_tz":420,"elapsed":1273,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def bfs(g, s, state):\n"," state[s] = True\n"," \n"," q, orders = [s], [s]\n"," while q:\n"," u = q.pop(0)\n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," q.append(v)\n"," orders.append(v)\n"," return orders"],"execution_count":116,"outputs":[]},{"cell_type":"code","metadata":{"id":"eH7-zUmhAoDp","executionInfo":{"status":"ok","timestamp":1615934605913,"user_tz":420,"elapsed":1272,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def connectedComponent(g):\n"," n = len(g)\n"," ccs = []\n"," state = [False] * n\n"," for i in range(n):\n"," if not state[i]:\n"," ccs.append(bfs(g, i, state))\n"," return ccs \n"," "],"execution_count":117,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5E8IP6vbpYG_","executionInfo":{"status":"ok","timestamp":1615934605913,"user_tz":420,"elapsed":1264,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"fa691fab-185f-4247-a945-6f1089c71dfc"},"source":["connectedComponent(ug)"],"execution_count":118,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[[0, 1, 2, 4, 3], [5, 6]]"]},"metadata":{"tags":[]},"execution_count":118}]},{"cell_type":"markdown","metadata":{"id":"0bf3nLK_ecxS"},"source":["### Union-find approach\n","\n"]},{"cell_type":"markdown","metadata":{"id":"JGAV7iacwLgm"},"source":["Use disjoint set union: https://cp-algorithms.com/data_structures/disjoint_set_union.html\n","\n","http://www.cs.utexas.edu/users/djimenez/utsa/cs3343/lecture18.html"]},{"cell_type":"code","metadata":{"id":"37Ofq3QGesi6","executionInfo":{"status":"ok","timestamp":1615934606066,"user_tz":420,"elapsed":1415,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# implement disjoint set with tree based set representative and path compression\n","class DisjointSet:\n"," def __init__(self, n):\n"," self.n = n\n"," self.p = [i for i in range(n)]\n","\n"," def find(self, x):\n"," if self.p[x] != x:\n"," self.p[x] = self.find(self.p[x])\n"," return self.p[x]\n","\n"," def union(self, x, y):\n"," xr = self.find(x)\n"," yr = self.find(y)\n"," self.p[xr] = yr\n"," \n"," # Add two more functions to visualize the sets\n"," def get_num_sets(self):\n"," return sum(self.find(i) == i for i in range(self.n))\n","\n"," def get_all_sets(self):\n"," sets = defaultdict(set)\n"," for i in range(self.n):\n"," p = self.find(i)\n"," sets[p].add(i)\n"," return sets"],"execution_count":119,"outputs":[]},{"cell_type":"code","metadata":{"id":"6bLTyHvtg_bw","executionInfo":{"status":"ok","timestamp":1615934606067,"user_tz":420,"elapsed":1414,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# implement union-find based connect component detection\n","from collections import defaultdict\n","def connectedComponent(g):\n"," n = len(g)\n"," # initialize disjoint set\n"," ds = DisjointSet(n)\n","\n"," for i in range(n):\n"," for j in g[i]: # for edge i<->j\n"," ds.union(i, j)\n"," return ds.get_num_sets(), ds.get_all_sets() "],"execution_count":120,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fjGkZI3djyPR","executionInfo":{"status":"ok","timestamp":1615934606067,"user_tz":420,"elapsed":1406,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"5f50ae7b-7784-4bff-f757-bb97fd3cdb08"},"source":["connectedComponent(ug)"],"execution_count":121,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(2, defaultdict(set, {3: {0, 1, 2, 3, 4}, 6: {5, 6}}))"]},"metadata":{"tags":[]},"execution_count":121}]},{"cell_type":"markdown","metadata":{"id":"_3kY3XvFlMG8"},"source":["Dynamic graph"]},{"cell_type":"code","metadata":{"id":"6nVPReRRup2o","executionInfo":{"status":"ok","timestamp":1615934606067,"user_tz":420,"elapsed":1404,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# represent the graph with a list of edges\n","ug_edges = [(0, 1), (0, 2), (1, 2), (2, 4), (4, 3), (4, 3), (5, 6)]"],"execution_count":122,"outputs":[]},{"cell_type":"code","metadata":{"id":"Q2e2Gab8lJ8O","executionInfo":{"status":"ok","timestamp":1615934606068,"user_tz":420,"elapsed":1403,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["class DynamicConnectedComponent():\n"," def __init__(self):\n"," self.ds = DisjointSet(0)\n"," self.node_index= defaultdict(int)\n"," self.index_node = defaultdict(int)\n"," self.index = 0\n","\n"," def add_edge(self, u, v):\n"," if u not in self.node_index:\n"," self.node_index[u], self.index_node[self.index] = self.index, u\n"," self.ds.p.append(self.index)\n"," self.ds.n += 1\n"," self.index += 1\n"," \n"," if v not in self.node_index:\n"," self.node_index[v], self.index_node[self.index] = self.index, v\n"," self.ds.p.append(self.index)\n"," self.ds.n += 1\n"," self.index += 1\n"," u, v = self.node_index[u], self.node_index[v]\n"," self.ds.union(u, v)\n"," return\n","\n"," def get_num_sets(self):\n"," return self.ds.get_num_sets()\n","\n"," def get_all_sets(self):\n"," sets = self.ds.get_all_sets()\n"," return {self.index_node[key] : set([self.index_node[i] for i in list(value)]) for key, value in sets.items()} "],"execution_count":123,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-Q80jrWeuACg","executionInfo":{"status":"ok","timestamp":1615934606068,"user_tz":420,"elapsed":1395,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"81ebf340-0dcf-43f4-abd1-5e69e201c5bb"},"source":["dcc = DynamicConnectedComponent()\n","for u, v in ug_edges: \n"," dcc.add_edge(u, v)\n","\n","dcc.get_num_sets(), dcc.get_all_sets()"],"execution_count":124,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(2, {3: {0, 1, 2, 3, 4}, 6: {5, 6}})"]},"metadata":{"tags":[]},"execution_count":124}]},{"cell_type":"markdown","metadata":{"id":"G_2ewYbDwVyY"},"source":["## Directed Graph and SCCs\n","https://web.stanford.edu/class/archive/cs/cs161/cs161.1138/lectures/04/Small04.pdf"]},{"cell_type":"code","metadata":{"id":"LpAv9r0OFvLY","executionInfo":{"status":"ok","timestamp":1615934606069,"user_tz":420,"elapsed":1395,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["dg = [[2], [0], [1, 3], [2, 4], [5], [6], []]"],"execution_count":36,"outputs":[]},{"cell_type":"code","metadata":{"id":"u_HBfb1ZGOzG","executionInfo":{"status":"ok","timestamp":1615934606069,"user_tz":420,"elapsed":1393,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# DFS traversal with reversed complete orders\n","def dfs(g, s, colors, complete_orders):\n"," colors[s] = STATE.gray\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, complete_orders)\n"," colors[s] = STATE.black\n"," complete_orders.append(s)\n"," return"],"execution_count":37,"outputs":[]},{"cell_type":"code","metadata":{"id":"iFTRlso9GcPp","executionInfo":{"status":"ok","timestamp":1615934606070,"user_tz":420,"elapsed":1392,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# topologically sort in terms of the last node of each scc\n","def topo_sort_scc(g):\n"," v = len(g)\n"," complete_orders = []\n"," colors = [STATE.white] * v\n"," for i in range(v): # run dfs on all the node\n"," if colors[i] == STATE.white:\n"," dfs(g,i, colors, complete_orders)\n"," return complete_orders[::-1]"],"execution_count":38,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LFeTS3dRG8my","executionInfo":{"status":"ok","timestamp":1615934606070,"user_tz":420,"elapsed":1384,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"9a577218-5e5f-41fa-fd75-c4cf888174a0"},"source":["topo_sort_scc(dg)"],"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[0, 2, 3, 4, 5, 6, 1]"]},"metadata":{},"execution_count":39}]},{"cell_type":"code","metadata":{"id":"ntT4sgl9lfDf","executionInfo":{"status":"ok","timestamp":1615934606070,"user_tz":420,"elapsed":1382,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# get conversed graph\n","def reverse_graph(g):\n"," rg = [[] for i in range(len(g))]\n"," for u in range(len(g)):\n"," for v in g[u]:\n"," rg[v].append(u)\n"," return rg"],"execution_count":40,"outputs":[]},{"cell_type":"code","metadata":{"id":"iSwBnNlhIgRl","executionInfo":{"status":"ok","timestamp":1615934606071,"user_tz":420,"elapsed":1381,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def scc(g):\n"," rg = reverse_graph(g)\n"," orders = topo_sort_scc(g)\n","\n"," # track states\n"," colors = [STATE.white] * len(g)\n"," sccs = []\n","\n"," # traverse the reversed graph\n"," for u in orders:\n"," if colors[u] != STATE.white:\n"," continue\n"," scc = []\n"," dfs(rg, u, colors, scc)\n"," sccs.append(scc)\n"," return sccs"],"execution_count":41,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"s9SxP0DvJ0ff","executionInfo":{"status":"ok","timestamp":1615934606071,"user_tz":420,"elapsed":1374,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"977d5e94-aebe-4860-8a36-9272efb34b41"},"source":["scc(dg)"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[[3, 2, 1, 0], [4], [5], [6]]"]},"metadata":{},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"YVz-BG3YFJFX"},"source":["# Minimum Spanning Tree\n","\n","https://web.stanford.edu/class/archive/cs/cs106b/cs106b.1138/lectures/24/Slides24.pdf"]},{"cell_type":"markdown","metadata":{"id":"vwa3SlexCQAn"},"source":["## Kruskal's Algorithm"]},{"cell_type":"code","metadata":{"id":"dG6PefDvGh8f","executionInfo":{"status":"ok","timestamp":1615934606201,"user_tz":420,"elapsed":1502,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["from typing import Dict\n","def kruskal(g: Dict):\n"," # g is a dict with node: adjacent nodes\n"," vertices = [i for i in range(1, 1 + len(g))]\n"," vertices = g.keys()\n"," n = len(vertices)\n"," ver_idx = {v: i for i, v in enumerate(vertices)}\n","\n"," # initialize a disjoint set\n"," ds = DisjointSet(n)\n","\n"," # sort all edges\n"," edges = []\n"," for u in vertices:\n"," for v, w in g[u]:\n"," if (v, u, w) not in edges:\n"," edges.append((u, v, w))\n"," edges.sort(key=lambda x: x[2])\n"," \n"," # main section\n"," A = []\n"," for u, v, w in edges:\n"," if ds.find(ver_idx[u]) != ds.find(ver_idx[v]):\n"," ds.union(ver_idx[u], ver_idx[v])\n"," print(f'{u} -> {v}: {w}')\n"," A.append((u, v, w))\n"," return A"],"execution_count":132,"outputs":[]},{"cell_type":"code","metadata":{"id":"swWe6sHPJ9ed","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606201,"user_tz":420,"elapsed":1494,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"f89684a1-c957-4e24-bf07-0a4cbb3d2cc7"},"source":["a= {1:[(2, 2), (3, 12)], 2:[(1, 2), (3, 4), (5, 5)], 3:[(1, 12), (2, 4), (4, 6), (5, 3)], 4:[(3, 6), (5, 7)], 5:[(2, 5), (3, 3), (4, 7)]}\n","kruskal(a)"],"execution_count":133,"outputs":[{"output_type":"stream","text":["1 -> 2: 2\n","3 -> 5: 3\n","2 -> 3: 4\n","3 -> 4: 6\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["[(1, 2, 2), (3, 5, 3), (2, 3, 4), (3, 4, 6)]"]},"metadata":{"tags":[]},"execution_count":133}]},{"cell_type":"markdown","metadata":{"id":"tI9aTeoy7Z2p"},"source":["## Prim's Algorithm"]},{"cell_type":"markdown","metadata":{"id":"FurwEn6zCZhP"},"source":["Priority queue by edges"]},{"cell_type":"code","metadata":{"id":"AdLTUkgxAI1I","executionInfo":{"status":"ok","timestamp":1615934606202,"user_tz":420,"elapsed":1493,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["import queue\n","\n","def _get_light_edge(pq, S):\n"," while pq:\n"," # Pick the light edge\n"," w, u, v = pq.get()\n"," # Filter out non-cross edge\n"," if v not in S:\n"," S.add(v)\n"," return (u, v, w)\n"," return None\n"," \n","def prim(g):\n"," cur = 1\n"," n = len(g.items())\n"," S = {cur} #spanning tree set\n"," pq = queue.PriorityQueue()\n"," A = []\n"," \n"," while len(S) < n:\n"," # Expand edges for the exploring vertex\n"," for v, w in g[cur]:\n"," if v not in S:\n"," pq.put((w, cur, v))\n","\n"," le = _get_light_edge(pq, S)\n"," if le:\n"," A.append(le)\n"," cur = le[1] #set the exploring vertex\n"," else:\n"," print(f'Graph {g} is not connected.')\n"," break\n"," return A "],"execution_count":134,"outputs":[]},{"cell_type":"code","metadata":{"id":"40vRdighTR8f","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606202,"user_tz":420,"elapsed":1486,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"2f7b8ffa-3eab-43e3-c82e-562171099767"},"source":["prim(a)"],"execution_count":135,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[(1, 2, 2), (2, 3, 4), (3, 5, 3), (3, 4, 6)]"]},"metadata":{"tags":[]},"execution_count":135}]},{"cell_type":"markdown","metadata":{"id":"hbz06jx0CflB"},"source":["priority queue by vertices"]},{"cell_type":"code","metadata":{"id":"AK0bNYdvkUFG","executionInfo":{"status":"ok","timestamp":1615934606203,"user_tz":420,"elapsed":1485,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["from heapq import heappush, heappop, heapify\n","from typing import List\n","import itertools\n","class PriorityQueue:\n"," def __init__(self, items:List[List]=[]):\n"," self.pq = [] # list of entries arranged in a heap\n"," self.entry_finder = {} # mapping of tasks to entries\n"," self.REMOVED = '' # placeholder for a removed task\n"," self._counter = itertools.count() # unique sequence count, this is hidden from user\n"," # add count to items\n"," for p, t, info in items:\n"," item = [p, next(self._counter), t, info]\n"," self.entry_finder[t] = item\n"," self.pq.append(item)\n"," heapify(self.pq)\n"," \n"," def add_task(self, task, priority=0, info=None): # O(logE)\n"," 'Add a new task or update the priority of an existing task'\n"," 'the old task is removed from entry_finder but still remained in the heapq'\n"," if task in self.entry_finder:\n"," self._remove_task(task)\n"," count = next(self._counter)\n"," entry = [priority, count, task, info]\n"," self.entry_finder[task] = entry\n"," heappush(self.pq, entry)\n"," \n"," def _remove_task(self, task, info=None):# O(1)\n"," 'Mark an existing task as REMOVED. Raise KeyError if not found.'\n"," entry = self.entry_finder.pop(task)\n"," entry[-2] = self.REMOVED\n","\n"," def pop_task(self): #O(logE)\n"," 'Remove and return the lowest priority task. Raise KeyError if empty.'\n"," while self.pq:\n"," priority, count, task, info = heappop(self.pq)\n"," if task is not self.REMOVED:\n"," del self.entry_finder[task]\n"," return task, info, priority\n"," raise KeyError('pop from an empty priority queue')\n"," \n"," def get_task(self, taskid):\n"," '''return task information given task id'''\n"," if taskid in self.entry_finder:\n"," p, _, t, info = self.entry_finder[taskid]\n"," return p, info\n"," else:\n"," return None, None\n"," \n"," def empty(self):\n"," return not self.entry_finder"],"execution_count":136,"outputs":[]},{"cell_type":"code","metadata":{"id":"kjms2ztVkVYE","executionInfo":{"status":"ok","timestamp":1615934606358,"user_tz":420,"elapsed":1637,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def prim2(g): \n"," n = len(g.items())\n"," pq = PriorityQueue()\n"," S = {}\n"," A = []\n"," # Initialization\n"," for i in range(n):\n"," pq.add_task(task=i+1, priority=float('inf'), info=None) # task: vertex, priority: edge cost, info: predecessor vertex\n"," \n"," S = {1}\n"," pq.add_task(1, 0, info=1)\n","\n"," while len(S) < n:\n"," u, p, w = pq.pop_task()\n"," if w == float('inf'):\n"," print(f'Graph {g} is not connected.')\n"," break\n"," A.append((p, u, w))\n"," S.add(u)\n"," for v, w in g[u]:\n"," if v not in S and w < pq.entry_finder[v][0]:\n"," pq.add_task(v, w, u)\n"," \n"," return A\n"],"execution_count":137,"outputs":[]},{"cell_type":"code","metadata":{"id":"0C0dHcnDmD8_","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606361,"user_tz":420,"elapsed":1633,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"70876a2f-cac6-424e-9392-280c95f79e58"},"source":["print(prim2(a))"],"execution_count":138,"outputs":[{"output_type":"stream","text":["[(1, 1, 0), (1, 2, 2), (2, 3, 4), (3, 5, 3), (3, 4, 6)]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"y7Qv3DhRCBnw"},"source":["# Shortest Path\n","\n","\n","1. brute force solution\n","2. illustrating the overleapping subproblem\n","3. illustrating the optimal subproblems\n","4. Introduce bellman-ford algorithm\n","5. introduce dijkstra algorithm\n","\n","Questions:\n","1. detect the negative weight cycle\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"CadIdLB8CepE"},"source":["#### Single Source Shortest Path"]},{"cell_type":"code","metadata":{"id":"pQJt2NkhD2re","executionInfo":{"status":"ok","timestamp":1615934606361,"user_tz":420,"elapsed":1631,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["import sys\n","from collections import defaultdict\n","g = {\n"," 's':[('t', 6), ('y', 7)],\n"," 't':[('x', 5), ('y', 8), ('z', -4)],\n"," 'x':[('t',-2)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)] \n","}\n","\n","g1 = {\n"," 's':[('t', 6), ('y', 7)],\n"," 't':[('x', -5), ('y', 8), ('z', -4)],\n"," 'x':[('t',2)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)] \n","}"],"execution_count":139,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"cHGikfV_qfRZ"},"source":["Backtracking to enumerate all paths from start vertex and its cost. The following code works for both undirected and directed graph.\n","\n"]},{"cell_type":"code","metadata":{"id":"S4qg5yO5qGue","executionInfo":{"status":"ok","timestamp":1615934606362,"user_tz":420,"elapsed":1630,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def all_paths(g, s, path, cost, ans):\n"," ans.append({'path': path[::], 'cost': cost})\n"," for v, w in g[s]:\n"," if v in path:\n"," continue\n"," path.append(v)\n"," cost += w\n"," all_paths(g, v, path, cost, ans)\n"," cost -= w\n"," path.pop()"],"execution_count":140,"outputs":[]},{"cell_type":"code","metadata":{"id":"Hd1BvnAMuVtA","executionInfo":{"status":"ok","timestamp":1615934606362,"user_tz":420,"elapsed":1628,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def get_shortest_path(ans, g):\n"," delta = {v: float('inf') for v in g.keys()} # shortest path value from s to all vertices\n"," delta_path = {v: [] for v in g.keys()}\n"," for item in ans:\n"," path = item['path']\n"," cost = item['cost']\n"," target = path[-1]\n"," if cost < delta[target]:\n"," delta[target] = cost\n"," delta_path[target] = path\n"," return delta, delta_path"],"execution_count":141,"outputs":[]},{"cell_type":"code","metadata":{"id":"kluInb73SATW","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606363,"user_tz":420,"elapsed":1622,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"d9dbbbef-c55f-47c3-906b-b11d8eef35c7"},"source":["ans, path, cost = [], ['s'], 0\n","all_paths(g, 's', path, cost, ans )\n","get_shortest_path(ans, g)"],"execution_count":142,"outputs":[{"output_type":"execute_result","data":{"text/plain":["({'s': 0, 't': 2, 'x': 4, 'y': 7, 'z': -2},\n"," {'s': ['s'],\n"," 't': ['s', 'y', 'x', 't'],\n"," 'x': ['s', 'y', 'x'],\n"," 'y': ['s', 'y'],\n"," 'z': ['s', 'y', 'x', 't', 'z']})"]},"metadata":{"tags":[]},"execution_count":142}]},{"cell_type":"code","metadata":{"id":"4bJG_oCYbbCW","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606363,"user_tz":420,"elapsed":1613,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"107d3daa-c486-4818-a58e-61b9ad9aa29b"},"source":["# on graph 2\n","ans1, path, cost = [], ['s'], 0\n","all_paths(g1, 's', path, cost, ans1 )\n","get_shortest_path(ans1, g1)"],"execution_count":143,"outputs":[{"output_type":"execute_result","data":{"text/plain":["({'s': 0, 't': 6, 'x': 1, 'y': 7, 'z': 2},\n"," {'s': ['s'],\n"," 't': ['s', 't'],\n"," 'x': ['s', 't', 'x'],\n"," 'y': ['s', 'y'],\n"," 'z': ['s', 't', 'z']})"]},"metadata":{"tags":[]},"execution_count":143}]},{"cell_type":"markdown","metadata":{"id":"mnmQXroqDybK"},"source":["## The Bellman-Ford Algorithm\n","Use the dp vector and W"]},{"cell_type":"code","metadata":{"id":"VSNGROmJjX4m","executionInfo":{"status":"ok","timestamp":1615934606364,"user_tz":420,"elapsed":1612,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["g = {\n"," 's':[('t', 6), ('y', 7)], \n"," 't':[('x', 5), ('y', 8), ('z', -4)],\n"," 'x':[('t',-2)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)],\n","}"],"execution_count":144,"outputs":[]},{"cell_type":"code","metadata":{"id":"VTeWqtP1kXvJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606364,"user_tz":420,"elapsed":1605,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"c5439c08-1935-4431-8c17-699ec3010e2b"},"source":["# convert g to W\n","n = len(g)\n","W = [[float('inf') for _ in range(n)] for _ in range(n)]\n","# Assign an enumerial index for each key\n","V = g.keys()\n","# Key : index\n","ver2idx = dict(zip(V, [i for i in range(n)]))\n","# Index : key\n","idx2ver = dict(zip([i for i in range(n)], V))\n","print(f'ver2idx : {ver2idx}')\n","print(f'idx2ver :{idx2ver}')\n","for u in V:\n"," ui = ver2idx[u]\n"," W[ui][ui] = 0\n"," for v, w in g[u]:\n"," vi = ver2idx[v]\n"," W[ui][vi] = w\n","W"],"execution_count":145,"outputs":[{"output_type":"stream","text":["ver2idx : {'s': 0, 't': 1, 'x': 2, 'y': 3, 'z': 4}\n","idx2ver :{0: 's', 1: 't', 2: 'x', 3: 'y', 4: 'z'}\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["[[0, 6, inf, 7, inf],\n"," [inf, 0, 5, 8, -4],\n"," [inf, -2, 0, inf, inf],\n"," [inf, inf, -3, 0, 9],\n"," [inf, inf, 7, inf, 0]]"]},"metadata":{"tags":[]},"execution_count":145}]},{"cell_type":"code","metadata":{"id":"Cd01eGEMmYT6","executionInfo":{"status":"ok","timestamp":1615934606523,"user_tz":420,"elapsed":1762,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["## using W\n","def bellman_ford_dp(s, W):\n"," n = len(W)\n"," # D, pi\n"," D = [float('inf') if i!=s else 0 for i in range(n)] # * n\n"," P = [None] * n\n"," for m in range(n-1): \n"," newD = D[:]\n"," for i in range(n): # endpoint\n"," for k in range(n): # intermediate node\n"," if D[k] + W[k][i] < newD[i]:\n"," P[i] = k\n"," newD[i] = D[k] + W[k][i]\n","\n"," D = newD\n"," print(f'D{m+1}: {D}')\n"," return D, P"],"execution_count":146,"outputs":[]},{"cell_type":"code","metadata":{"id":"aDhmuNoaozPZ","executionInfo":{"status":"ok","timestamp":1615934606524,"user_tz":420,"elapsed":1761,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# print the path from s-> u through backtracking starting from u\n","def get_path(P, s, u, path):\n"," path.append(u)\n"," if u == s:\n"," print('Reached to the source vertex, stop!')\n"," return path[::-1]\n"," elif u is None:\n"," print(f\"No path found between {s} and {u}.\")\n"," return [] \n"," else: \n"," return get_path(P, s, P[u], path)"],"execution_count":147,"outputs":[]},{"cell_type":"code","metadata":{"id":"ySPWM5kkm2D6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606524,"user_tz":420,"elapsed":1753,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"cffa0a59-0c7f-414f-c2f0-1aae2aa241c4"},"source":["D, P = bellman_ford_dp(0, W)\n","D, P"],"execution_count":148,"outputs":[{"output_type":"stream","text":["D1: [0, 6, inf, 7, inf]\n","D2: [0, 6, 4, 7, 2]\n","D3: [0, 2, 4, 7, 2]\n","D4: [0, 2, 4, 7, -2]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["([0, 2, 4, 7, -2], [None, 2, 3, 0, 1])"]},"metadata":{"tags":[]},"execution_count":148}]},{"cell_type":"code","metadata":{"id":"MoOy9-_Ho1Gg","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606525,"user_tz":420,"elapsed":1746,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"ce4ae8c1-f61b-40d7-e5d0-43264fbd3b9f"},"source":["get_path(P, 0, 4, [])"],"execution_count":149,"outputs":[{"output_type":"stream","text":["Reached to the source vertex, stop!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["[0, 3, 2, 1, 4]"]},"metadata":{"tags":[]},"execution_count":149}]},{"cell_type":"markdown","metadata":{"id":"0XuSoy4Ln7gf"},"source":["Formal bellman-ford algorithm"]},{"cell_type":"code","metadata":{"id":"d0riQJZqIdKU","executionInfo":{"status":"ok","timestamp":1615934606525,"user_tz":420,"elapsed":1744,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["g = {\n"," 's':[('t', 6), ('y', 7)], \n"," 't':[('x', 5), ('y', 8), ('z', -4)],\n"," 'x':[('t',-2)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)],\n","}"],"execution_count":150,"outputs":[]},{"cell_type":"code","metadata":{"id":"ZwEC3YcXFdQc","executionInfo":{"status":"ok","timestamp":1615934606526,"user_tz":420,"elapsed":1744,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def bellman_ford(g: dict, s: str):\n"," n = len(g)\n"," # Assign an enumerial index for each key\n"," V = g.keys()\n"," # Key to index\n"," ver2idx = dict(zip(V, [i for i in range(n)]))\n"," # Index to key\n"," idx2ver = dict(zip([i for i in range(n)], V))\n"," # Initialization the dp matrix with d estimate and predecessor\n"," si = ver2idx[s]\n"," D = [float('inf') if i!=si else 0 for i in range(n)] # * n\n"," P = [None] * n\n"," \n"," # n-1 passes\n"," for i in range(n-1): \n"," # relax all edges\n"," for u in V:\n"," ui = ver2idx[u]\n"," for v, w in g[u]:\n"," vi = ver2idx[v]\n"," # Update dp's minimum path value and predecessor\n"," if D[vi] > D[ui] + w:\n"," D[vi] = D[ui] + w\n"," P[vi] = ui\n"," print(f'D{i+1}: {D}') \n"," return D, P, ver2idx, idx2ver"],"execution_count":151,"outputs":[]},{"cell_type":"code","metadata":{"id":"IpwrrrIDQMk2","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934606526,"user_tz":420,"elapsed":1736,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"6c0f9f78-9a4e-4187-b540-b54901efb60d"},"source":["bellman_ford(g, 's')"],"execution_count":152,"outputs":[{"output_type":"stream","text":["D1: [0, 6, 4, 7, 2]\n","D2: [0, 2, 4, 7, 2]\n","D3: [0, 2, 4, 7, -2]\n","D4: [0, 2, 4, 7, -2]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["([0, 2, 4, 7, -2],\n"," [None, 2, 3, 0, 1],\n"," {'s': 0, 't': 1, 'x': 2, 'y': 3, 'z': 4},\n"," {0: 's', 1: 't', 2: 'x', 3: 'y', 4: 'z'})"]},"metadata":{"tags":[]},"execution_count":152}]},{"cell_type":"markdown","metadata":{"id":"rIsYCsv7hGMs"},"source":["Implement a version that plots the process, and use HTML-like to draw the graph with math symbols: https://stackoverflow.com/questions/9684807/how-to-insert-mathematical-symbols-like-greek-characters-in-a-graphviz-dot-file/41346289#41346289"]},{"cell_type":"markdown","metadata":{"id":"s_RLJ0PTxEFT"},"source":["Optimization with topologial sort"]},{"cell_type":"code","metadata":{"id":"fzuM1j5zxMCc","executionInfo":{"status":"ok","timestamp":1615934651235,"user_tz":420,"elapsed":202,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# dag and the ordering is topological sorted order already\n","dag = {\n"," 's':[('t', 6), ('y', 7)], \n"," 't':[('x', 5), ('y', 8), ('z', -4)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)],\n"," 'x':[], \n","}"],"execution_count":48,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WP9baC0K4p2m"},"source":["Linear Bellman-ford algorithm for DAG."]},{"cell_type":"code","metadata":{"id":"hwqQth1H4tpy","executionInfo":{"status":"ok","timestamp":1615934606664,"user_tz":420,"elapsed":1866,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def bellman_ford_dag(g, s):\n"," s = s\n"," n = len(g)\n"," # Key to index\n"," ver2idx = dict(zip(g.keys(), [i for i in range(n)]))\n"," # Index to key\n"," idx2ver = dict(zip([i for i in range(n)], g.keys()))\n"," print(ver2idx)\n"," # Convert g to index\n"," ng = [[] for _ in range(n)]\n"," for u in g.keys():\n"," for v, _ in g[u]:\n"," ui = ver2idx[u]\n"," vi = ver2idx[v]\n"," ng[ui].append(vi)\n"," print(ng)\n"," V = topo_sort(ng)\n"," print(V)\n"," # Initialization the dp matrix with d estimate and predecessor\n"," si = ver2idx[s]\n"," dp = [(float('inf'), None) for i in range(n)]\n"," dp[si] = (0, None)\n","\n"," # relax all edges\n"," for ui in V:\n"," u = idx2ver[ui]\n"," for v, w in g[u]:\n"," vi = ver2idx[v]\n"," # Update dp's minimum path value and predecessor\n"," if dp[vi][0] > dp[ui][0] + w:\n"," dp[vi] = (dp[ui][0] + w, ui)\n"," return dp"],"execution_count":47,"outputs":[]},{"cell_type":"code","metadata":{"id":"bI74keuR6KKo","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934655544,"user_tz":420,"elapsed":294,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"b2956f78-e27a-4b6d-98b1-5ce0409bdb76"},"source":["bellman_ford_dag(dag, 's')"],"execution_count":50,"outputs":[{"output_type":"stream","name":"stdout","text":["{'s': 0, 't': 1, 'y': 2, 'z': 3, 'x': 4}\n[[1, 2], [4, 2, 3], [4, 3], [4], []]\n[0, 1, 2, 3, 4]\n"]},{"output_type":"execute_result","data":{"text/plain":["[(0, None), (6, 0), (7, 0), (2, 1), (4, 2)]"]},"metadata":{},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"MRUhFcSQ0wHS"},"source":["## Dijkstra"]},{"cell_type":"code","metadata":{"id":"Sy6OITLr1swv","executionInfo":{"status":"ok","timestamp":1615934663408,"user_tz":420,"elapsed":291,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["g = {\n"," 's':[('t', 6), ('y', 7)], \n"," 't':[('x', 5), ('y', 8), ('z', 4)],\n"," 'y':[('x', 3), ('z', 9)],\n"," 'z':[('x', 7)],\n"," 'x':[('t', 2)], \n","}\n","\n","g = {\n"," '0':[('1', 50), ('2', 30), ('3', 100), ('4', 10)], \n"," '1':[('0', 50), ('2', 5), ('3', 20)],\n"," '2':[('1', 5), ('3', 6), ('0', 30)],\n"," '3':[('0', 100), ('1', 20), ('2', 6), ('4', 20)],\n"," '4':[('0', 10),('3', 20)], \n","}\n","\n"],"execution_count":164,"outputs":[]},{"cell_type":"code","metadata":{"id":"vxCW0Bg0XfaZ","executionInfo":{"status":"aborted","timestamp":1615934606819,"user_tz":420,"elapsed":2010,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["g"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ADsUVzERjePf","executionInfo":{"status":"aborted","timestamp":1615934606820,"user_tz":420,"elapsed":2009,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["plot_graph(g, name=\"directed_nonnegative_graph\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"dH-Jq9Hs006V","executionInfo":{"status":"aborted","timestamp":1615934606820,"user_tz":420,"elapsed":2007,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["\n","from typing import List, Tuple\n","import sys\n","\n","def initialize(Q, g, s):\n"," # put all edges into q\n"," for k in g.keys():\n"," Q.add_task(task=k, priority=sys.maxsize, info=None ) #task id is the id, info is the predecessor\n"," # set weight for vertices one edge away from source\n"," Q.add_task(task=s, priority=0, info=None)\n"," for id, w in g[s]:\n"," Q.add_task(task=id, priority=w, info=s) # weight, id, predecessor\n","\n"," \n","def extract_min(Q:List[Tuple]):\n"," '''extra minimum vertex with the weight using priority queue'''\n"," task, info, pri = Q.pop_task()\n"," return task, pri, info\n","\n","def update(Q, g, cid, cw):\n"," '''current id, w, p'''\n"," for id, w in g[cid]: #target\n"," pw, pp = Q.get_task(id)\n"," if not pw and not pp: # already found the shortest path for this id\n"," continue\n"," new_w, new_p = w, pp\n"," #print(cw, w, pw)\n"," if cw + w < pw:\n"," new_w = cw + w\n"," new_p = cid\n"," Q.add_task(task=id, priority=new_w, info=new_p)\n"," \n","def dijkstra(g, s):\n"," ''' '''\n"," Q = PriorityQueue() # the set of all edges, \n"," S = []\n"," # initialize\n"," initialize(Q, g, s)\n"," while not Q.empty():\n"," min_id, min_w, p = extract_min(Q)\n"," print(min_id, min_w, p)\n"," S.append((min_id, min_w, p))\n"," # need to update weight in the queue \n"," update(Q, g, min_id, min_w)\n"," return S"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"2Bg32B4B7tGb","executionInfo":{"status":"aborted","timestamp":1615934606821,"user_tz":420,"elapsed":2006,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["S=dijkstra(g, '0')\n","print(S)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"rKc5rfmc32fd","executionInfo":{"status":"aborted","timestamp":1615934606822,"user_tz":420,"elapsed":2005,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def dijkstra(g, s):\n"," Q = PriorityQueue()\n"," S = []\n"," # task: vertex id, priority: shortest-path estimate, info: predecessor\n"," Q.add_task(task=s, priority=0, info=None)\n"," visited = set()\n"," while not Q.empty():\n"," # Use the light vertex\n"," u, up, ud = Q.pop_task()\n"," visited.add(u)\n"," S.append((u, ud, up))\n","\n"," # Relax adjacent vertice\n"," for v, w in g[u]: \n"," # Already found the shortest path for this id\n"," if v in visited: \n"," continue\n"," \n"," vd, vp = Q.get_task(v)\n"," # First time to add the task or already in the queue, but need update \n"," if not vd or ud + w < vd:\n"," Q.add_task(task=v, priority=ud + w, info=u)\n"," return S"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"zWY2bDRw-ovd","executionInfo":{"status":"aborted","timestamp":1615934606822,"user_tz":420,"elapsed":2003,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["dijkstra(g, 's')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Czgsme2nrqu0"},"source":["Get visualization"]},{"cell_type":"code","metadata":{"id":"ntV9ILEFrpvx","executionInfo":{"status":"aborted","timestamp":1615934606823,"user_tz":420,"elapsed":2002,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def plot_subgraph(dot, v_pos, g, Q, index):\n"," #dot = Digraph(comment='directed_graph', engine=\"neato\", format='png', strict=True, )\n"," dot.attr(_attributes={'fontsize': '15'})\n"," name = ''\n"," if index == 0:\n"," for u, p in v_pos.items(): #label shows in the node, it is also what represents each node\n"," x, y = p.split(',')\n"," #p = str(int(x)+5*(index+1)) + ',' + y\n"," xlabel = '<∞>'\n"," # label is the one showing within the node, and name act as the name used in the edge, and xlabel is the tag above each node\n"," dot.node(name=u+name, label=xlabel, xlabel = u, _attributes={'pos': str(p), 'penwidth': \"1.5\"})\n"," for v, w in g[u]:\n"," dot.edge(tail_name=u+name, head_name=v+name, _attributes={'xlabel': str(w), 'penwidth': \"1.5\"})\n","\n"," for vd, _, v, u in Q.pq:\n"," if v == Q.REMOVED:\n"," continue\n"," dot.node(name=v+name, label=str(vd), xlabel=v, _attributes={'color': 'red', 'penwidth': \"1.5\"})\n"," if u is not None:\n"," dot.edge(u+name, v+name, _attributes={'color': 'red', 'penwidth': \"1.5\" })\n"," #init.view()\n"," dot.render(f'test-output/dijkstra_{index}', view=True, format='png') \n","\n","def dijkstra_fig(g, s):\n"," Q = PriorityQueue()\n"," S = []\n"," # task: vertex id, priority: shortest-path estimate, info: predecessor\n"," Q.add_task(task=s, priority=0, info=None)\n"," visited = set()\n"," v_pos = {'s': '0,0!', 't': '1,1!', 'x': '3, 1!', 'y': '1,-1!', 'z': '3,-1!'}\n"," idx = 0\n"," dot = Digraph(comment='directed_graph', engine=\"neato\", format='png', strict=True, )\n"," plot_subgraph(dot, v_pos, g, Q, idx)\n"," while not Q.empty():\n"," # get all edges\n"," idx += 1\n"," # Use the light vertex\n"," # Update set S\n"," u, up, ud = Q.pop_task()\n"," dot.node(name=u, label=str(ud), xlabel = u, _attributes={'pos': v_pos[u], 'penwidth': \"1.5\", 'color': 'blue'})\n"," \n"," if up is not None:\n"," dot.edge(up, u, _attributes={'color': 'blue', 'penwidth': \"1.5\" })\n"," dot.render(f'test-output/dijkstra_{idx-1+0.5}', view=True, format='png') \n","\n"," visited.add(u)\n"," S.append((u, ud, up))\n","\n"," # Relax adjacent vertice\n"," for v, w in g[u]: \n"," # Already found the shortest path for this id\n"," if v in visited: \n"," continue\n"," \n"," vd, vp = Q.get_task(v)\n"," # First time to add the task or already in the queue, but need update \n"," if not vd or ud + w < vd:\n"," Q.add_task(task=v, priority=ud + w, info=u)\n","\n"," # return the old edge to normal\n"," if vd is not None and ud + w < vd:\n"," dot.edge(vp, v, _attributes={'color': 'black', 'penwidth': \"1.5\" })\n"," # Expand edges and return the old edges to normal\n"," plot_subgraph(dot, v_pos, g, Q, idx) \n"," return S, dot"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"qYgb1KFbvKtP","executionInfo":{"status":"aborted","timestamp":1615934606823,"user_tz":420,"elapsed":2000,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["dijkstra_fig(g, 's')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6WHcUsLy2FH4"},"source":["##All-pairs shortest paths\n","* Extension on Bellman-Ford algorithm\n","* Extension on Dijkstra's algorithm"]},{"cell_type":"markdown","metadata":{"id":"VOn2dU_WB1uC"},"source":["##### Extended Bellman-ford algorithm"]},{"cell_type":"code","metadata":{"id":"i_MZSUd5gef-","executionInfo":{"status":"aborted","timestamp":1615934606823,"user_tz":420,"elapsed":1998,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# rewrite the non negative weight graph g as\n","g = {\n"," 's':[('t', 6), ('y', 7)],\n"," 't':[('x', 5), ('y', 8), ('z', -4)],\n"," 'x':[('t',-2)],\n"," 'y':[('x',-3), ('z',9)],\n"," 'z':[('x',7)] \n","}\n","n = len(g)\n","W = [[float('inf') for _ in range(n)] for _ in range(n)]\n","key2idx = {k : i for k, i in zip(g.keys(), range(n))}\n","idx2key = {i : k for k, i in zip(g.keys(), range(n))}\n","for u in g.keys():\n"," ui = key2idx[u]\n"," W[ui][ui] = 0\n"," for v, w in g[u]:\n"," vi = key2idx[v]\n"," W[ui][vi] = w\n","W, key2idx, idx2key"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"I36J1RUw3Br0","executionInfo":{"status":"aborted","timestamp":1615934606824,"user_tz":420,"elapsed":1997,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["import copy\n","def bellman_ford(W, L):\n"," n = len(W)\n"," for i in range(n): # source\n"," for j in range(n): # endpoint\n"," for k in range(n): # extend one edge\n"," L[i][j] = min(L[i][j], L[i][k]+W[k][j])\n"," \n","def extended_bellman_ford(W):\n"," n = len(W)\n"," # initialize L, first pass\n"," L = copy.deepcopy(W)\n"," print(f'L1 : {L} \\n')\n"," # n-2 passes\n"," for i in range(n-2):\n"," bellman_ford(W, L)\n"," print(f'L{i+2}: {L} \\n')\n"," return L"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"0NqRr1nelDoh","executionInfo":{"status":"aborted","timestamp":1615934606824,"user_tz":420,"elapsed":1995,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["L = extended_bellman_ford(W)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NkkqO3kyrau4"},"source":["Implement a version that tracks the predecessor\n"]},{"cell_type":"code","metadata":{"id":"9Y3wf294rZ3W","executionInfo":{"status":"aborted","timestamp":1615934606825,"user_tz":420,"elapsed":1994,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["import copy\n","def bellman_ford_with_predecessor(W, L, P):\n"," n = len(W)\n"," for i in range(n): # source\n"," for j in range(n): # endpoint\n"," for k in range(n): # extend one edge\n"," if L[i][k] + W[k][j] < L[i][j]:\n"," L[i][j] = L[i][k] + W[k][j] # set d\n"," P[i][j] = k # set predecessor\n"," \n","def extended_bellman_ford_with_predecessor(W):\n"," n = len(W)\n"," # initialize L, first pass\n"," L = copy.deepcopy(W)\n"," print(f'L1 : {L} \\n')\n"," P = [[None for _ in range(n)] for _ in range(n)]\n"," for i in range(n):\n"," for j in range(n):\n"," if L[i][j] != 0 and L[i][j] != float('inf'):\n"," P[i][j] = i\n"," # n-2 passes\n"," for i in range(n-2):\n"," bellman_ford_with_predecessor(W, L, P)\n"," print(f'L{i+2}: {L} \\n')\n"," return L, P"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Xll1FnsYqlNZ","executionInfo":{"status":"aborted","timestamp":1615934606825,"user_tz":420,"elapsed":1993,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# print the path from s-> u through backtracking starting from u\n","def print_path(P, s, u, path):\n"," path.append(u)\n"," if u == s:\n"," print('Reached to the source vertex, stop!')\n"," return path[::-1]\n"," elif u is None:\n"," print(f\"No path found between {s} and {u}.\")\n"," return []\n"," else: \n"," return print_path(P, s, P[s][u], path)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"3TUUdrwltp8J","executionInfo":{"status":"aborted","timestamp":1615934606826,"user_tz":420,"elapsed":1992,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["L, P = extended_bellman_ford_with_predecessor(W)\n","L, P"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"56UUV3ljq6SD","executionInfo":{"status":"aborted","timestamp":1615934606826,"user_tz":420,"elapsed":1991,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["# get path from s to t\n","path = print_path(P, key2idx['t'], key2idx['z'], [])\n","path = [idx2key[i] for i in path]\n","path"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Q5FL1hXGyNll","executionInfo":{"status":"aborted","timestamp":1615934606829,"user_tz":420,"elapsed":1992,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["#Find edges for each shortest_path tree\n","shortest_path_trees = [[] for _ in range(n)]\n","for i in range(n):\n"," for j in range(n):\n"," path = print_path(P, i, j, [])\n"," for u, v in zip(path[:-1], path[1:]):\n"," if (u, v) not in shortest_path_trees[i]:\n"," shortest_path_trees[i].append((u, v))\n","# now replace it all to keys\n","shortest_path_tree_dic = {}\n","for i in range(len(shortest_path_trees)):\n"," s = idx2key[i]\n"," if s not in shortest_path_tree_dic:\n"," shortest_path_tree_dic[s] = []\n"," for u, v in shortest_path_trees[i]:\n"," shortest_path_tree_dic[s].append((idx2key[u], idx2key[v]))\n","shortest_path_tree_dic"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"j6s6Uauc3AVz","executionInfo":{"status":"aborted","timestamp":1615934606829,"user_tz":420,"elapsed":1990,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["from google.colab.patches import cv2_imshow\n","import cv2\n","\n","for idx, s in enumerate(list(shortest_path_tree_dic.keys())):\n"," # stric so that one edge wont be repeat but keep the last time's edit\n"," dot = Digraph(comment='directed_graph', engine=\"neato\", format='png', strict=True)\n"," # Generate grapg\n"," v_pos = {'s': '0,0!', 't': '1,1!', 'x': '3, 1!', 'y': '1,-1!', 'z': '3,-1!'}\n"," for u, p in v_pos.items():\n"," dot.node(u, _attributes={'pos': str(p), 'fillcolor': \"#d62728\"})\n"," for v, w in g[u]:\n"," dot.edge(u, v, _attributes={'xlabel': str(w)})\n"," \n"," # Color one tree\n"," colors = ['red', 'green', 'yellow', 'blue', 'purple']\n"," dot.node(s, _attributes={'pos': str(v_pos[s]), 'color': colors[idx]})\n"," for x, y in shortest_path_tree_dic[s]:\n"," dot.edge(x, y, _attributes={'color': colors[idx]} )\n","\n"," dot.render(f'test-output/shortest_path_trees_{idx}', view=True) \n"," img = cv2.imread(f'test-output/shortest_path_trees_{idx}.png')\n"," cv2_imshow(img)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"o3BQwl5klO5t"},"source":["##### Repeated Square \n","\n"]},{"cell_type":"code","metadata":{"id":"UuT9mjAdluQ7","executionInfo":{"status":"ok","timestamp":1615934692405,"user_tz":420,"elapsed":323,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["import copy\n","import math\n","def bellman_ford_repeated_square(L):\n"," n = len(W)\n"," for i in range(n): # source\n"," for j in range(n): # endpoint\n"," for k in range(n): # double the extending length\n"," L[i][j] = min(L[i][j], L[i][k]+L[k][j])\n"," \n","def extended_bellman_ford_repeated_square(W):\n"," n = len(W)\n"," # initialize L, first pass\n"," L = copy.deepcopy(W)\n"," print(f'L1 : {L} \\n')\n"," # log n passes\n"," for i in range(math.ceil(math.log(n))):\n"," bellman_ford_repeated_square(L)\n"," print(f'L{2^(i+1)}: {L} \\n')\n"," return L"],"execution_count":165,"outputs":[]},{"cell_type":"code","metadata":{"id":"LVYfDIC3mfsf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934693859,"user_tz":420,"elapsed":314,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"43e1804b-0f21-4243-af36-2bdc16080775"},"source":["L = extended_bellman_ford_repeated_square(W)"],"execution_count":166,"outputs":[{"output_type":"stream","text":["L1 : [[0, 6, inf, 7, inf], [inf, 0, 5, 8, -4], [inf, -2, 0, inf, inf], [inf, inf, -3, 0, 9], [inf, inf, 7, inf, 0]] \n","\n","L3: [[0, 6, 4, 7, 2], [inf, 0, 3, 8, -4], [inf, -2, 0, 6, -6], [inf, -5, -3, 0, -9], [inf, 5, 7, 13, 0]] \n","\n","L0: [[0, 2, 4, 7, -2], [inf, 0, 3, 8, -4], [inf, -2, 0, 6, -6], [inf, -5, -3, 0, -9], [inf, 5, 7, 13, 0]] \n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7TVAk4QRm8LW","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934696007,"user_tz":420,"elapsed":280,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"4bdb9f8d-c31b-48a2-9f0e-8a3511c1f8ba"},"source":["math.ceil(math.log(5))"],"execution_count":167,"outputs":[{"output_type":"execute_result","data":{"text/plain":["2"]},"metadata":{"tags":[]},"execution_count":167}]},{"cell_type":"markdown","metadata":{"id":"4om6uVmvB8Zd"},"source":["#####The Floyd-Warshall Algorithm"]},{"cell_type":"code","metadata":{"id":"XpXn11SKCJoL","executionInfo":{"status":"ok","timestamp":1615934754533,"user_tz":420,"elapsed":307,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}}},"source":["def floyd_warshall(W):\n"," L = copy.deepcopy(W) #L0\n"," n = len(W)\n"," for k in range(n): # intermediate node\n"," for i in range(n): # start node\n"," for j in range(n): # end node\n"," L[i][j] = min(L[i][j], L[i][k] + L[k][j])\n"," print(L)\n"," return L"],"execution_count":172,"outputs":[]},{"cell_type":"code","metadata":{"id":"ZQTdHMQyobD0","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1615934755754,"user_tz":420,"elapsed":326,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"e169c2ed-a09a-435d-b141-7851747959dc"},"source":["floyd_warshall(W)"],"execution_count":173,"outputs":[{"output_type":"stream","text":["[[0, 2, 4, 7, -2], [inf, 0, 3, 8, -4], [inf, -2, 0, 6, -6], [inf, -5, -3, 0, -9], [inf, 5, 7, 13, 0]]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["[[0, 2, 4, 7, -2],\n"," [inf, 0, 3, 8, -4],\n"," [inf, -2, 0, 6, -6],\n"," [inf, -5, -3, 0, -9],\n"," [inf, 5, 7, 13, 0]]"]},"metadata":{"tags":[]},"execution_count":173}]},{"cell_type":"markdown","metadata":{"id":"94WFec3HX6tn"},"source":["##Exercies\n","\n","1. 847. Shortest Path Visiting All Nodes (hard). Traveling salesman problem, but can be better with shortest path, breath-first, and dijkstra algorthm"]}]} \ No newline at end of file diff --git a/Easy-Book/source-code/chapter_search_strategies.ipynb b/Easy-Book/source-code/chapter_search_strategies.ipynb deleted file mode 100644 index 03179df..0000000 --- a/Easy-Book/source-code/chapter_search_strategies.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_search_strategies.ipynb","provenance":[{"file_id":"137HAhswPSNpP45mzu7u9rnF6XHUzO71L","timestamp":1568757688910}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3.7.5 64-bit","metadata":{"interpreter":{"hash":"6f3fdc50dad01a00ee690c4e47d0eb586e790b94dbf60d4ad3bb7d6e393ce6fb"}}}},"cells":[{"source":["# Linear Search\n","A humble start!"],"cell_type":"markdown","metadata":{}},{"cell_type":"code","metadata":{"id":"23e3DXOGE59o"},"source":["#Linear Search\n","def linear_search(A, t):\n"," for i, v in enumerate(A):\n"," if A[i] == t:\n"," return i\n"," return -1"],"execution_count":19,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hfcQ8BdWSO8U"},"source":["# Uninformed Search"]},{"cell_type":"code","metadata":{"id":"nRps_otMIJGX","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614451907814,"user_tz":480,"elapsed":299,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"87ef3966-6013-49fc-825b-6ac254f462b2"},"source":["## Define the exemplar graph\n","from collections import defaultdict\n","al = defaultdict(list)\n","al['S'] = [('A', 4), ('B', 5)]\n","al['A'] = [('G', 7)]\n","al['B'] = [('G', 3)]\n","al"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["defaultdict(list,\n"," {'S': [('A', 4), ('B', 5)], 'A': [('G', 7)], 'B': [('G', 3)]})"]},"metadata":{},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"YT0tkaJ4SifH"},"source":["## Breath-first Search"]},{"cell_type":"code","metadata":{"id":"2yOpMY0_U-ov"},"source":["def bfs(g, s):\n"," q = [s]\n"," while q:\n"," n = q.pop(0)\n"," print(n, end = ' ')\n"," for v, _ in g[n]:\n"," q.append(v)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"LqiB-SJ8VJMg","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678671,"user_tz":480,"elapsed":4529,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"98a4fbbf-ebef-41a2-f911-b49bc1c9b45e"},"source":["bfs(al, 'S')"],"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["S A B G G "]}]},{"cell_type":"markdown","metadata":{"id":"PJ2MLkuJSTf_"},"source":["## Depth-first Search\n","The completeness of DFS depends on the search space. If your search space is finite, then Depth-First Search is complete. However, if there are infinitely many alternatives, it might not find a solution. For example, suppose you were coding a path-search problem on city streets, and every time you turn left at an intersection, you will always search the left-most street first. Then you might just keep going around the same block indefinitely.\n","\n","Sometimes there are ways to bound the search to get completeness even when the search space is unbounded. For example, for the path-search problem above, if we prune the search whenever a path returns to a previous location on the path, then DFS will always find a solution if one exists.\n","\n","There are variants of DFS that are complete. One is iterative deepening: you set a maximum search depth for DFS, and only search that far down the search tree. If you don’t find a solution, then you increase the bound and try again. (Note, however, that this method might run forever if there is no solution.)"]},{"cell_type":"code","metadata":{"id":"IH9FpE8DTce7"},"source":["# Implementation of recursive dfs\n","def dfs(g, vi):\n"," print(vi, end=' ')\n"," for v, _ in g[vi]: \n"," dfs(g, v)"],"execution_count":7,"outputs":[]},{"cell_type":"code","metadata":{"id":"Q9tKweIvUHFC","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678677,"user_tz":480,"elapsed":4498,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"de3e961e-8181-4b30-ab94-dc9319a32c39"},"source":["dfs(al, 'S')"],"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":["S A G B G "]}]},{"cell_type":"code","metadata":{"id":"Fikls4VUUReu"},"source":["# Implementation of iterative dfs\n","def dfs_iter(g, s):\n"," stack = [s]\n"," while stack:\n"," n = stack.pop()\n"," print(n, end = ' ')\n"," for v, _ in g[n]:\n"," stack.append(v)"],"execution_count":9,"outputs":[]},{"cell_type":"code","metadata":{"id":"KegJMT0iUyJ3","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678681,"user_tz":480,"elapsed":4456,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"287815f2-1fbf-4b4c-cd84-4ca905d3b616"},"source":["dfs_iter(al, 'S')"],"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":["S B G A G "]}]},{"cell_type":"markdown","metadata":{"id":"yZ3viz03H4EJ"},"source":["## Uniform-Cost Search"]},{"cell_type":"markdown","metadata":{"id":"fdT2z_NrLKJP"},"source":["```\n","q = [(0, S)]\n","Expand S, add A and B\n","q = [(4, A), (5, B)]\n","Expand A, add G\n","q = [(5, B), (11, G)]\n","Expand B, add G\n","q = [(8, G), (11, G)]\n","Expand G, goal found, terminate.\n","```"]},{"cell_type":"code","metadata":{"id":"ZhCNgx-rKzHq"},"source":["import heapq\n","def ucs(graph, s, t):\n"," q = [(0, s)] # initial path with cost 0\n"," while q:\n"," cost, n = heapq.heappop(q)\n"," # Need to check when goal is removed from the priority queue\n"," if n == t:\n"," return cost\n"," else:\n"," for v, c in graph[n]:\n"," heapq.heappush(q, (c + cost, v))\n"," return None"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"QQb89fDwMm6V","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678686,"user_tz":480,"elapsed":4412,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"e0d318db-87b2-4200-a1f4-07e2d6c12820"},"source":["ucs(al, s='A', t='G')"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":["7"]},"metadata":{},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"mahGDvIKNs0k"},"source":["## Iterative-Deepening Search (IDS)\n"]},{"cell_type":"code","metadata":{"id":"MoPms3bOSbIK"},"source":["# Depth limited Search\n","def dls(graph, cur, t, maxDepth):\n"," # End Condition\n"," if maxDepth == 0:\n"," print('test: ', cur, end = ' ')\n"," if cur == t:\n"," return True\n"," if maxDepth < 0:\n"," return False\n","\n"," # Recur for adjacent vertices\n"," for n, _ in graph[cur]:\n"," if dls(graph, n, t, maxDepth - 1):\n"," return True\n"," return False"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"id":"5D25vnOVN2H5"},"source":["# Iterative-deepening search\n","def ids(graph, s, t, maxDepth):\n"," for i in range(maxDepth):\n"," print('depth:', i)\n"," if dls(graph, s, t, i):\n"," return True\n"," print('\\n')\n"," return False\n"," "],"execution_count":23,"outputs":[]},{"cell_type":"code","metadata":{"id":"PPUE1EfrTff0","colab":{"base_uri":"https://localhost:8080/","height":156},"executionInfo":{"status":"ok","timestamp":1577839678693,"user_tz":480,"elapsed":4375,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"083f0c90-ab80-4a10-e331-efad578966d7"},"source":["# Find 'B'\n","ids(al, 'S', 'G', 3)"],"execution_count":24,"outputs":[{"output_type":"stream","name":"stdout","text":["depth: 0\ntest: S \n\ndepth: 1\ntest: A test: B \n\ndepth: 2\ntest: G "]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"84kFFfrjHxH7"},"source":["## Bidirectional Search"]},{"cell_type":"code","metadata":{"id":"g7NGJLqrH1AM"},"source":["def bfs_level(graph, q, bStep):\n"," '''\n"," q: set for better efficiency in intersection checking\n"," '''\n"," if not bStep:\n"," return q\n"," nq = set()\n"," for n in q:\n"," for v, c in graph[n]:\n"," nq.add(v)\n"," return nq\n","\n","def intersect(qs, qt):\n"," if qs & qt: # intersection \n"," return True\n"," return False"],"execution_count":16,"outputs":[]},{"cell_type":"code","metadata":{"id":"6a9IpOExIXXm"},"source":["def bis(graph, s, t):\n"," # First build a graph with opposite edges \n"," bgraph = defaultdict(list)\n"," for key, value in graph.items():\n"," for n, c in value:\n"," bgraph[n].append((key, c))\n"," print(graph, bgraph)\n"," # Start bidirectional search\n"," qs = {s}\n"," qt = {t}\n"," step = 0\n"," while qs and qt:\n"," print(qs, qt)\n"," if intersect(qs, qt):\n"," return True\n"," qs = bfs_level(graph, qs, step%2 == 0)\n"," qt = bfs_level(bgraph, qt, step%2 == 1)\n"," step = 1 - step\n"," return False\n"],"execution_count":25,"outputs":[]},{"cell_type":"code","metadata":{"id":"nrZW4C9_K37z","colab":{"base_uri":"https://localhost:8080/","height":106},"executionInfo":{"status":"ok","timestamp":1577839678698,"user_tz":480,"elapsed":4337,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"56c325a2-63a9-4247-ba7e-87e461bf896c"},"source":["bis(al, 'S', 'A')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["defaultdict(, {'S': [('A', 4), ('B', 5)], 'A': [('G', 7)], 'B': [('G', 3)], 'G': []}) defaultdict(, {'A': [('S', 4)], 'B': [('S', 5)], 'G': [('A', 7), ('B', 3)]})\n","{'S'} {'A'}\n","{'B', 'A'} {'A'}\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":493}]},{"cell_type":"markdown","metadata":{"id":"ByphOhrKZtMj"},"source":["# Graph Search"]},{"cell_type":"code","metadata":{"id":"2K9nipQJff_F"},"source":["# Prepare Graph Example\n","# Adjacency List with cycle\n","ft = [[1], [2], [4], [], [3, 5], []]"],"execution_count":26,"outputs":[]},{"cell_type":"code","metadata":{"id":"ajKsBPhBgQcN"},"source":["# directed cyclc graph\n","dcg = [[1], [2],[0, 4], [1], [3, 5], [] ]"],"execution_count":27,"outputs":[]},{"cell_type":"code","metadata":{"id":"0385JGxBPGWl"},"source":["# Prepare Graph Example\n","# Adjacency List with cycle\n","ucg = [[1, 2], [0, 2, 3], [0, 1, 4], [1, 4], [2, 3, 5], [4]]"],"execution_count":28,"outputs":[]},{"cell_type":"code","metadata":{"id":"E_EBbni8WYoB"},"source":["#STATE Class\n","class STATE:\n"," white = 0\n"," gray = 1\n"," black = 2"],"execution_count":29,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"fjb4NQ4QGRIu"},"source":["## Depth-first Search in Graph"]},{"cell_type":"markdown","metadata":{"id":"7O4uRkjzouHz"},"source":[]},{"cell_type":"code","metadata":{"id":"BnwTjoskRGm0"},"source":["#Free Tree Search\n","def dfs(g, vi):\n"," print(vi, end=' ')\n"," for nv in g[vi]: \n"," dfs(g, nv)"],"execution_count":32,"outputs":[]},{"cell_type":"code","metadata":{"id":"oo4bQGakSble","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678718,"user_tz":480,"elapsed":4228,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"7491e818-5a1c-4c48-a95c-9901eb5a2edc"},"source":["dfs(ft, 0)"],"execution_count":33,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1 2 4 3 5 "]}]},{"cell_type":"code","metadata":{"id":"CpnZbW1nUV5v"},"source":["# Failed with directed cyclic graph with Recursion Error\n","dfs(dcg, 0)"],"execution_count":34,"outputs":[{"output_type":"stream","name":"stdout","text":["0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2"]},{"output_type":"error","ename":"RecursionError","evalue":"maximum recursion depth exceeded while encoding a JSON object","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Failed with directed cyclic graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdcg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(g, vi)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","... last 1 frames repeated, from the frame below ...\n","\u001b[0;32m\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(g, vi)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded while encoding a JSON object"]}]},{"cell_type":"code","metadata":{"id":"95BRIU3VlyAb"},"source":["# Graph Search, track paths and avoids cycle\n","def dfs(g, vi, path):\n"," paths.append(path)\n"," orders.append(vi)\n"," for nv in g[vi]: \n"," if nv not in path: \n"," dfs(g, nv, path+[nv])\n"," return "],"execution_count":35,"outputs":[]},{"cell_type":"code","metadata":{"id":"LjXfmd17mKAi","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839680383,"user_tz":480,"elapsed":5860,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"8526d563-8e86-4508-84c2-c9b4cf393cec"},"source":["# Test free tree\n","paths, orders = [], []\n","dfs(ft, 0, [0])\n","paths, orders"],"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5])"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","metadata":{"id":"Ki0HtnO8mgD4","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839680386,"user_tz":480,"elapsed":5850,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"546bd0ca-41ca-413d-923c-7ec57f1f8a05"},"source":["# Test dcg\n","paths, orders = [], []\n","dfs(dcg, 0, [0])\n","paths, orders"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5])"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","metadata":{"id":"nARQsx-3nlGd","colab":{"base_uri":"https://localhost:8080/","height":364},"executionInfo":{"status":"ok","timestamp":1577839680387,"user_tz":480,"elapsed":5835,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"fe99a308-04d7-44ab-80f7-e0b90cb91f33"},"source":["# Test ucg\n","paths, orders = [], []\n","dfs(ucg, 0, [0])\n","paths, orders"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0],\n"," [0, 1],\n"," [0, 1, 2],\n"," [0, 1, 2, 4],\n"," [0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3],\n"," [0, 1, 3, 4],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2],\n"," [0, 2, 1],\n"," [0, 2, 1, 3],\n"," [0, 2, 1, 3, 4],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4],\n"," [0, 2, 4, 3],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{},"execution_count":38}]},{"cell_type":"markdown","metadata":{"id":"ZVhNQx4DgDe6"},"source":["Draw the search tree"]},{"cell_type":"code","metadata":{"id":"mSYmoZ_0gG4B"},"source":["# Only keep the longest path\n","def dfs_helper(g, vi, path):\n"," orders.append(vi)\n"," bpath = True\n"," for nv in g[vi]: \n"," if nv not in path: \n"," dfs_helper(g, nv, path+[nv])\n"," bpath = False\n"," if bpath:\n"," paths.append(path)\n"," return "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"C0wF99z6huEu","colab":{"base_uri":"https://localhost:8080/","height":156},"executionInfo":{"status":"ok","timestamp":1577839680388,"user_tz":480,"elapsed":5814,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"96f8adfa-b67d-4751-9fc1-44648993cb49"},"source":["paths, orders = [], []\n","dfs_helper(ucg, 0, [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{"tags":[]},"execution_count":509}]},{"cell_type":"code","metadata":{"id":"4TVSieudlvH3"},"source":["# Track edges\n","def dfs_helper(g, vi, path):\n"," orders.append(vi)\n"," node = (vi, 0)\n"," if vi in tracker:\n"," node = (vi, tracker[vi] + 1)\n"," tracker[vi] += 1\n"," else:\n"," tracker[vi] = 0\n"," for nv in g[vi]: \n"," if nv not in path: \n"," # add an edge\n"," node1 = (nv, 0)\n"," if nv in tracker:\n"," node1 = (nv, tracker[nv]+1)\n"," edges[node].append(node1)\n"," dfs_helper(g, nv, path+[nv]) \n"," return "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"WwGSwE4PnRbB","colab":{"base_uri":"https://localhost:8080/","height":312},"executionInfo":{"status":"ok","timestamp":1577839680391,"user_tz":480,"elapsed":5789,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"1dffc8ad-fa7e-4694-ea51-ad50d18cd0e6"},"source":["paths, orders = [], []\n","tracker = defaultdict(int) # node: maximum count\n","edges = defaultdict(list) # node: node\n","dfs_helper(ucg, 0, [0])\n","paths, orders, edges, tracker, len(orders)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5],\n"," defaultdict(list,\n"," {(0, 0): [(1, 0), (2, 2)],\n"," (1, 0): [(2, 0), (3, 1)],\n"," (1, 1): [(3, 2)],\n"," (2, 0): [(4, 0)],\n"," (2, 2): [(1, 1), (4, 3)],\n"," (3, 1): [(4, 1)],\n"," (3, 2): [(4, 2)],\n"," (3, 3): [(1, 2)],\n"," (4, 0): [(3, 0), (5, 0)],\n"," (4, 1): [(2, 1), (5, 1)],\n"," (4, 2): [(5, 2)],\n"," (4, 3): [(3, 3), (5, 3)]}),\n"," defaultdict(int, {0: 0, 1: 2, 2: 2, 3: 3, 4: 3, 5: 3}),\n"," 19)"]},"metadata":{"tags":[]},"execution_count":511}]},{"cell_type":"markdown","metadata":{"id":"ZD5tQSWFoVOK"},"source":["We can see as the edges in the directed graph increase, the search tree explode exponentially with the number of edges. "]},{"cell_type":"code","metadata":{"id":"yCuIaMa4c-Cl"},"source":["#Graph Search, track paths and avoids cycle\n","def recursive(g, vi, path):\n"," '''\n"," g: graph as an adjacency list\n"," vi: the vertex index\n"," '''\n"," #print(vi, end=' ')\n"," paths, nodes = [[]], []\n"," for nv in g[vi]: \n"," if nv not in path: \n"," spaths, snodes = recursive(g, nv, path+[nv])\n"," paths.extend(spaths)\n"," nodes.extend(snodes)\n"," paths = [[vi] + p for p in paths]\n"," return paths, [vi] + nodes\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"FUkmQgjndWkn","colab":{"base_uri":"https://localhost:8080/","height":364},"executionInfo":{"status":"ok","timestamp":1577839680396,"user_tz":480,"elapsed":5751,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"d10feafd-209b-43fd-c43d-fd9f5ef00fd6"},"source":["# print path with free tree\n","recursive(ucg, 0, [0])\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0],\n"," [0, 1],\n"," [0, 1, 2],\n"," [0, 1, 2, 4],\n"," [0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3],\n"," [0, 1, 3, 4],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2],\n"," [0, 2, 1],\n"," [0, 2, 1, 3],\n"," [0, 2, 1, 3, 4],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4],\n"," [0, 2, 4, 3],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{"tags":[]},"execution_count":514}]},{"cell_type":"markdown","metadata":{"id":"qG_4NLC-o4bn"},"source":["#### Graph-based Search that avoids repeating vertex"]},{"cell_type":"code","metadata":{"id":"NTH_S5wQi_Lf"},"source":["#Avoid Repeating Vertex\n","def dfgs(g, vi, visited, path):\n"," visited.add(vi)\n"," orders.append(vi)\n"," bEnd = True # node without unvisited adjacent nodes \n"," for nv in g[vi]: \n"," if nv not in visited: \n"," if bEnd:\n"," bEnd = False\n"," dfgs(g, nv, visited, path + [nv])\n"," if bEnd:\n"," paths.append(path)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"037PpuQvjMTj","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680399,"user_tz":480,"elapsed":5730,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"0f8982ed-76ef-4e3d-faf6-2b317cd6aa15"},"source":["paths, orders = [], []\n","dfgs(ucg, 0, set(), [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":516}]},{"cell_type":"code","metadata":{"id":"vn-U1OFJof4B"},"source":["def backtrace(parent, s, t):\n"," p = t\n"," path = []\n"," while p != s:\n"," path.append(p)\n"," p = parent[p]\n"," path.append(s)\n"," return path[::-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1aN6kwYPoH5d"},"source":["# Backtrace path\n","# s is used to backtrace the path\n","def dfgs(g, vi, s, t, visited, parent): \n"," visited.add(vi) \n"," if vi == t:\n"," return backtrace(parent, s, t)\n","\n"," for nv in g[vi]: \n"," if nv not in visited: \n"," parent[nv] = vi\n"," fpath = dfgs(g, nv, s, t, visited, parent)\n"," if fpath:\n"," return fpath\n"," \n"," return None"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"_dGSXhR2phQh","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680403,"user_tz":480,"elapsed":5702,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"2800b185-daae-46d1-c7d8-7d115fcece60"},"source":["parent = {}\n","path = dfgs(ucg, 0, 0, None, set(), parent)\n","path, parent"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(None, {1: 0, 2: 1, 3: 4, 4: 2, 5: 4})"]},"metadata":{"tags":[]},"execution_count":519}]},{"cell_type":"markdown","metadata":{"id":"b_55IC6F8iP_"},"source":["### Breath-first Search in Graph"]},{"cell_type":"markdown","metadata":{"id":"8YhKn3V3biau"},"source":["#### Track pahts and avoid cycle in the tree-based search"]},{"cell_type":"code","metadata":{"id":"9-iHJjQYWfPy"},"source":["# Track paths and avoid cycles\n","def bfs(g, s):\n"," q = [[s]]\n"," paths, orders = [], []\n"," while q:\n"," path = q.pop(0)\n"," n = path[-1]\n"," orders.append(n)\n"," bEnd = True\n"," for v in g[n]:\n"," if v not in path:\n"," if bEnd:\n"," bEnd = False\n"," q.append(path + [v])\n"," if bEnd:\n"," paths.append(path)\n"," return paths, orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LSYyXM_EXHxD","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680406,"user_tz":480,"elapsed":5670,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"590521ea-deb7-4f93-dadf-e18881cf634c"},"source":["# Test free tree\n","bfs(ft, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":522}]},{"cell_type":"code","metadata":{"id":"00091jV7Yr3z","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680407,"user_tz":480,"elapsed":5657,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"6376b5c1-d2c3-4368-84e3-feadcafc95f3"},"source":["# Test dcg\n","bfs(dcg, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":523}]},{"cell_type":"code","metadata":{"id":"1xo7CLOBYzyE","colab":{"base_uri":"https://localhost:8080/","height":72},"executionInfo":{"status":"ok","timestamp":1577839680409,"user_tz":480,"elapsed":5644,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"dd54c574-02ee-444d-9c52-c3afa45ae105"},"source":["# Test ucg\n","paths, orders = bfs(ucg, 0)\n","print(paths, orders)\n","'''\n","From dfs\n","[[0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n","'''\n","print(len(orders))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[[0, 2, 4, 5], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 3, 4, 2], [0, 1, 3, 4, 5], [0, 2, 4, 3, 1], [0, 2, 1, 3, 4, 5]] [0, 1, 2, 2, 3, 1, 4, 4, 4, 3, 3, 5, 3, 5, 2, 5, 4, 1, 5]\n","19\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"OaceX9XiboJr"},"source":["#### Graph-based search that avoids repeating vertex"]},{"cell_type":"code","metadata":{"id":"r6g03TClZAJu"},"source":["# Track paths and avoid cycles in a memory efficient way\n","# Only track shortest paths\n","def bfgs(g, s, t):\n"," q = [s]\n"," bfgs.parent = {}\n"," visited = {s}\n"," while q:\n"," n = q.pop(0)\n"," if n == t:\n"," return backtrace(s, t, bfgs.parent)\n"," for v in g[n]:\n"," if v not in visited:\n"," q.append(v)\n"," visited.add(v)\n"," bfgs.parent[v] = n\n"," return None"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"p1JuvjNMqZ0O"},"source":["#Print Shortest Path Iterative\n","def backtrace(s, t, parent):\n"," p = t\n"," path = []\n"," while p != s:\n"," path.append(p)\n"," p = parent[p]\n"," path.append(s)\n"," return path[::-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OGUHuKrb6EDu"},"source":["#Print Shortest Path Recursive\n","def get_path(s, t, pl, path):\n"," if s == t: \n"," pass\n"," elif pl[t] is None:\n"," print('no path from ', s, ' to ', t)\n"," else:\n"," get_path(s, pl[t], pl, path) \n"," path.append(t)\n"," return"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"7129c26IbK0N","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680414,"user_tz":480,"elapsed":5605,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"ccb546e7-be27-4573-dfbe-63cf48b5cbbd"},"source":["# Test ucg\n","bfgs(ucg, 0, 5)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[0, 2, 4, 5]"]},"metadata":{"tags":[]},"execution_count":528}]},{"cell_type":"markdown","metadata":{"id":"w0UuaLXj5bgZ"},"source":["#### Multiple Starts"]},{"cell_type":"code","metadata":{"id":"yg1tgj8Y5czv"},"source":["#Multiple Starts\n","def BFSLevel(starts):\n"," q = starts # a list of nodes\n"," #root.visited = 1\n"," while q:\n"," new_q = []\n"," for node in q:\n"," for neig in node.adjacent:\n"," if not neig.visited:\n"," neig.visited = 1\n"," new_q.append(neig)\n"," q = new_q"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"I-LT_HgDrrtI"},"source":["####Level by level bfs"]},{"cell_type":"code","metadata":{"id":"sQyW-n5qrt6k"},"source":["def bfs_level(g, s):\n"," '''level by level bfs'''\n"," v = len(g)\n"," state = [False] * v\n"," \n"," orders = []\n"," lst = [s]\n"," state[s] = True\n"," d = 0 # track distance\n"," while lst:\n"," print('distance ', d, ': ', lst)\n"," tmp_lst = []\n"," for u in lst:\n"," orders.append(u)\n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," tmp_lst.append(v) \n"," lst = tmp_lst\n"," d += 1\n"," return orders\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"KwUfwlRN2qbe","colab":{"base_uri":"https://localhost:8080/","height":104},"executionInfo":{"status":"ok","timestamp":1577839680425,"user_tz":480,"elapsed":5569,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"e2048ab1-018e-4141-ea4f-766d0cefd436"},"source":["print(bfs_level(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["distance 0 : [0]\n","distance 1 : [1, 2]\n","distance 2 : [3, 4]\n","distance 3 : [5]\n","[0, 1, 2, 3, 4, 5]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"KDFDgG-BOBml"},"source":["\n","## Depth-first Graph Search"]},{"cell_type":"markdown","metadata":{"id":"V0lQCtvcWWxi"},"source":["### Recursive Implementation"]},{"cell_type":"code","execution_count":49,"metadata":{},"outputs":[],"source":["ucg = [[1, 2], [0, 2, 3], [0, 1, 4], [1, 4], [2, 3, 5], [4]]\n","dcg = [[1], [2],[0, 4], [1], [3, 5], [] ]"]},{"cell_type":"code","metadata":{"id":"ddy-8DejWciM"},"source":["#Recursive implementation with three states\n","def dfs(g, s, colors, orders, complete_orders):\n"," colors[s] = STATE.gray\n"," orders.append(s)\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, orders, complete_orders)\n"," # complete\n"," colors[s] = STATE.black\n"," complete_orders.append(s)\n"," return"],"execution_count":50,"outputs":[]},{"cell_type":"code","metadata":{"id":"Co2HV7k4XDJm","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680427,"user_tz":480,"elapsed":5552,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"4f4b44f1-ce20-4fc3-802b-254aa2d19898"},"source":["# run on directed graph\n","# '''start from 0'''\n","v = len(dcg)\n","orders, complete_orders = [], []\n","colors = [STATE.white] * v\n","dfs(ucg,0, colors, orders, complete_orders)\n","print(orders, complete_orders)"],"execution_count":53,"outputs":[{"output_type":"stream","name":"stdout","text":["[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0]\n"]}]},{"cell_type":"markdown","metadata":{"id":"hjByXvweDX3c"},"source":["Visualizing the state change of nodes"]},{"cell_type":"code","execution_count":52,"metadata":{},"outputs":[{"output_type":"stream","name":"stdout","text":["[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0]\n"]}],"source":["# run on undirected graph\n","'''start from 0'''\n","v = len(ucg)\n","orders, complete_orders = [], []\n","colors = [STATE.white] * v\n","dfs(ucg,0, colors, orders, complete_orders)\n","print(orders, complete_orders)"]},{"cell_type":"code","metadata":{"id":"jDQvblzsOK9m"},"source":["# directed cyclc graph\n","dcg = [[] for _ in range(6)]\n","dcg[0] = [1, 2]\n","dcg[1] = [2, 4]\n","dcg[2] = [0, 4]\n","dcg[3] = [1]\n","dcg[4] = [3, 5]\n","dcg[5] = [3] # cross edge"],"execution_count":45,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"fSCrzb82GPUy"},"source":["More about the application of classificatio of edges. "]},{"cell_type":"markdown","metadata":{"id":"XIfzSpwDYZON"},"source":["Discover and finish time"]},{"cell_type":"code","metadata":{"id":"XmsNF54Gd0rN"},"source":["#Discovering and finishing time\n","def dfs(g, s, colors):\n"," dfs.t += 1 # static variable\n"," colors[s] = STATE.gray\n"," dfs.discover[s] = dfs.t\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors)\n"," # complete\n"," dfs.t += 1\n"," dfs.finish[s] = dfs.t\n"," return"],"execution_count":42,"outputs":[]},{"cell_type":"code","metadata":{"id":"4U0E0RQfeKkh","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839693988,"user_tz":480,"elapsed":18995,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"43996495-b35e-4dc9-f5c8-34830552c240"},"source":["v = len(dcg)\n","colors = [STATE.white] * v\n","dfs.t = -1\n","dfs.discover, dfs.finish = [-1] * v, [-1] * v\n","dfs(dcg,0, colors)\n","dfs.discover, dfs.finish"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([0, 1, 2, 4, 3, 6], [11, 10, 9, 5, 8, 7])"]},"metadata":{},"execution_count":43}]},{"cell_type":"code","metadata":{"id":"Q3qw9TVueO4r"},"source":["def parenthesis(dt, ft, n):\n"," merge_orders = [-1] * 2 * n\n"," for v, t in enumerate(dt):\n"," merge_orders[t] = v\n"," for v, t in enumerate(ft):\n"," merge_orders[t] = v\n","\n"," print(merge_orders)\n"," nodes = set()\n"," for i in merge_orders:\n"," if i not in nodes:\n"," print('(', i, end = ', ')\n"," nodes.add(i)\n"," else:\n"," print(i, '),', end = ' ')"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"id":"bsZ4WOk3e4OT","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839694207,"user_tz":480,"elapsed":19193,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"6460ea8f-3334-444f-d6ce-cbc0b389c90a"},"source":["parenthesis(dfs.discover, dfs.finish, v)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 3, 5, 5, 4, 2, 1, 0]\n","( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, 5 ), 4 ), 2 ), 1 ), 0 ), "],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qRdHVeAG9z29"},"source":["### Iterative Implementation ***"]},{"cell_type":"code","metadata":{"id":"xT0mVw8gZTIi"},"source":["#Iterative implementation with three states\n","def dftIter(g, s):\n"," '''not preserving the same discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"q3hq9ARKqqnw","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694212,"user_tz":480,"elapsed":19176,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"3d0b9d01-b012-4794-af03-042a1e5512ce"},"source":["# initialization\n","'''start from 0'''\n","print(dftIter(ucg,0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"RIVOD221rHii","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694216,"user_tz":480,"elapsed":19166,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"9f6a74e9-769b-405c-b518-b88d2c9eeae2"},"source":["print(dftIter(ucg, 1))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[1, 0, 2, 3, 4, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"I4eVWYzGj01U"},"source":["def dftIter(g, s):\n"," '''preserving only discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," #orders.append(s) # track gray order\n"," #colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," if colors[u] == STATE.white:\n"," orders.append(u) # track gray order\n"," colors[u] = STATE.gray\n"," for v in g[u][::-1]:\n"," if colors[v] == STATE.white:\n"," \n"," stack.append(v)\n"," #orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"5JWNwi9rlAER","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694223,"user_tz":480,"elapsed":19153,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"61c6eca2-9dcd-4b23-ebe0-fe7fae0ba312"},"source":["print(dftIter(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"mBqP-iy9ma3d","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694229,"user_tz":480,"elapsed":19145,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"dbad8a60-6b64-432b-e053-ab3aade53a5a"},"source":["print(dftIter(ucg, 1))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[1, 0, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"rrFl2gwokZON"},"source":["def dfsIter(g, s):\n"," '''iterative dfs'''\n"," v = len(g)\n"," orders, complete_orders = [], []\n"," colors = [STATE.white] * v\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack[-1]\n"," bAdj = False\n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," bAdj = True\n"," break\n"," \n"," if not bAdj: # if no adjacent is found, pop out\n"," # complete\n"," colors[u] = STATE.black # this is not necessary in the code, just to help track the state\n"," complete_orders.append(u)\n"," stack.pop()\n"," \n"," return orders, complete_orders "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"vGaO1vCbly-a","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694502,"user_tz":480,"elapsed":19399,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"5f6a1035-9b9f-40dd-be46-34686fdb9534"},"source":["print(dfsIter(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["([0, 1, 2, 4, 3, 5], [3, 5, 4, 2, 1, 0])\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"oAxBg1ZR-ct5"},"source":["## Breath-first Graph Search"]},{"cell_type":"code","metadata":{"id":"aR60hTwOQOo0"},"source":["def bfgs_state(g, s):\n"," v = len(g)\n"," colors = [STATE.white] * v\n"," \n"," q, orders = [s], [s]\n"," complete_orders = []\n"," colors[s] = STATE.gray # make the state of the visiting node\n"," while q:\n"," u = q.pop(0) \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," q.append(v)\n"," orders.append(v)\n","\n"," # complete \n"," colors[u] = STATE.black\n"," complete_orders.append(u)\n"," return orders, complete_orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ld25XyC8Uxun","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694506,"user_tz":480,"elapsed":19381,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"539d70aa-6c17-48b1-b328-6a9aa97b4649"},"source":["bfgs_state(dcg, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([0, 1, 2, 4, 3, 5], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":558}]},{"cell_type":"markdown","metadata":{"id":"I0j42hiHHZBq"},"source":["## Tree Search\n","\n","The sample code is writen in [tree_datastructure_and_traversal](https://colab.research.google.com/drive/1pg49npUd4Rhbg5fggs8ZYakA563YATeA)."]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}]} \ No newline at end of file diff --git a/Easy-Book/source-code/chapter_search_strategy_plot.ipynb b/Easy-Book/source-code/chapter_search_strategy_plot.ipynb deleted file mode 100644 index 459c833..0000000 --- a/Easy-Book/source-code/chapter_search_strategy_plot.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"chapter_search_strategies.ipynb","provenance":[{"file_id":"137HAhswPSNpP45mzu7u9rnF6XHUzO71L","timestamp":1568757688910}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"H5Qrk7gnrucq","executionInfo":{"status":"ok","timestamp":1614449498942,"user_tz":480,"elapsed":84764,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"134ecd87-e4eb-47bd-b12c-60d313685e47"},"source":["# set up graph directory\n","from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"FUweu2KErzxW"},"source":["# setup figure saving path\n","import os\n","import shutil\n","\n","figure_dir = 'drive/My Drive/hands-on-algorithm-source-code/figure/chapter_search_strategies'\n","if not os.path.exists(figure_dir):\n"," os.mkdir(figure_dir) \n","!ls drive/My\\ Drive/hands-on-algorithm-source-code/figure/chapter_search_strategies"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kHfY1ECkv6Xn"},"source":["# plot graph functions\n","from graphviz import Digraph, Graph\n","import os\n","import shutil\n","def plot_graph(v_pos, edges, graph_type, figure_dir, name = 'naive_graph'):\n"," dot = graph_type(comment=name, engine=\"neato\", format='png')\n"," for u, p in v_pos.items():\n"," dot.node(u, _attributes={'pos': str(p), 'fillcolor': \"#d62728\"})\n"," for u, v, w in edges:\n"," dot.edge(u, v, _attributes={'xlabel': str(w)})\n"," file_name = f'{name}.png'\n"," dot.render(f'test-output/{name}', view=True) \n"," shutil.copyfile(f'test-output/{name}.png', os.path.join(figure_dir, file_name)) \n"," return dot"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Ev0G5LfkFTzu"},"source":["## Linear Search"]},{"cell_type":"code","metadata":{"id":"23e3DXOGE59o"},"source":["#Linear Search\n","def linear_search(A, t):\n"," for i, v in enumerate(A):\n"," if A[i] == t:\n"," return i\n"," return -1"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hfcQ8BdWSO8U"},"source":["## Uninformed Search"]},{"cell_type":"code","metadata":{"id":"nRps_otMIJGX","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614451907814,"user_tz":480,"elapsed":299,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIRi0T9vdv7_5OZi96H4VGhtX2CMD9asUJHPo4cQ=s64","userId":"13365523799853678553"}},"outputId":"87ef3966-6013-49fc-825b-6ac254f462b2"},"source":["## Graph Search\n","from collections import defaultdict\n","al = defaultdict(list)\n","al['S'] = [('A', 4), ('B', 5)]\n","al['A'] = [('G', 7)]\n","al['B'] = [('G', 3)]\n","al"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["defaultdict(list,\n"," {'A': [('G', 7)], 'B': [('G', 3)], 'S': [('A', 4), ('B', 5)]})"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"YT0tkaJ4SifH"},"source":["### Breath-first Search"]},{"cell_type":"code","metadata":{"id":"2yOpMY0_U-ov"},"source":["def bfs(g, s):\n"," q = [s]\n"," while q:\n"," n = q.pop(0)\n"," print(n, end = ' ')\n"," for v, _ in g[n]:\n"," q.append(v)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LqiB-SJ8VJMg","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678671,"user_tz":480,"elapsed":4529,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"98a4fbbf-ebef-41a2-f911-b49bc1c9b45e"},"source":["bfs(al, 'S')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["S A B G G "],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"PJ2MLkuJSTf_"},"source":["### Depth-first Search\n","The completeness of DFS depends on the search space. If your search space is finite, then Depth-First Search is complete. However, if there are infinitely many alternatives, it might not find a solution. For example, suppose you were coding a path-search problem on city streets, and every time your partial path came to an intersection, you always searched the left-most street first. Then you might just keep going around the same block indefinitely.\n","\n","Sometimes there are ways to bound the search to get completeness even when the search space is unbounded. For example, for the path-search problem above, if we prune the search whenever a path returns to a previous location on the path, then DFS will always find a solution if one exists.\n","\n","There are variants of DFS that are complete. One is iterative deepening: you set a maximum search depth for DFS, and only search that far down the search tree. If you don’t find a solution, then you increase the bound and try again. (Note, however, that this method might run forever if there is no solution.)"]},{"cell_type":"code","metadata":{"id":"IH9FpE8DTce7"},"source":["# Implementation of recursive dfs\n","def dfs(g, vi):\n"," print(vi, end=' ')\n"," for v, _ in g[vi]: \n"," dfs(g, v)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Q9tKweIvUHFC","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678677,"user_tz":480,"elapsed":4498,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"de3e961e-8181-4b30-ab94-dc9319a32c39"},"source":["dfs(al, 'S')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["S A G B G "],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Fikls4VUUReu"},"source":["# Implementation of iterative dfs\n","def dfs_iter(g, s):\n"," stack = [s]\n"," while stack:\n"," n = stack.pop()\n"," print(n, end = ' ')\n"," for v, _ in g[n]:\n"," stack.append(v)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"KegJMT0iUyJ3","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678681,"user_tz":480,"elapsed":4456,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"287815f2-1fbf-4b4c-cd84-4ca905d3b616"},"source":["dfs_iter(al, 'S')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["S B G A G "],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"yZ3viz03H4EJ"},"source":["### Uniform-Cost Search"]},{"cell_type":"markdown","metadata":{"id":"fdT2z_NrLKJP"},"source":["```\n","q = [(0, S)]\n","Expand S, add A and B\n","q = [(4, A), (5, B)]\n","Expand A, add G\n","q = [(5, B), (11, G)]\n","Expand B, add G\n","q = [(8, G), (11, G)]\n","Expand G, goal found, terminate.\n","```"]},{"cell_type":"code","metadata":{"id":"ZhCNgx-rKzHq"},"source":["import heapq\n","def ucs(graph, s, t):\n"," q = [(0, s)] # initial path with cost 0\n"," while q:\n"," cost, n = heapq.heappop(q)\n"," # Need to check when goal is removed from the priority queue\n"," if n == t:\n"," return cost\n"," else:\n"," for v, c in graph[n]:\n"," heapq.heappush(q, (c + cost, v))\n"," return None"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"QQb89fDwMm6V","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678686,"user_tz":480,"elapsed":4412,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"e0d318db-87b2-4200-a1f4-07e2d6c12820"},"source":["ucs(al, s='A', t='G')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["7"]},"metadata":{"tags":[]},"execution_count":487}]},{"cell_type":"markdown","metadata":{"id":"mahGDvIKNs0k"},"source":["### Iterative-Deepening Search (IDS)\n"]},{"cell_type":"code","metadata":{"id":"MoPms3bOSbIK"},"source":["# Depth limited Search\n","def dls(graph, cur, t, maxDepth):\n"," # End Condition\n"," if maxDepth == 0:\n"," print('test: ', cur, end = ' ')\n"," if cur == t:\n"," return True\n"," if maxDepth < 0:\n"," return False\n","\n"," # Recur for adjacent vertices\n"," for n, _ in graph[cur]:\n"," if dls(graph, n, t, maxDepth - 1):\n"," return True\n"," return False"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"5D25vnOVN2H5"},"source":["# Iterative-deepening search\n","def ids(graph, s, t, maxDepth):\n"," for i in range(maxDepth):\n"," print('depth:', i)\n"," if dls(graph, s, t, i):\n"," return True\n"," print('\\n')\n"," return False\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"PPUE1EfrTff0","colab":{"base_uri":"https://localhost:8080/","height":156},"executionInfo":{"status":"ok","timestamp":1577839678693,"user_tz":480,"elapsed":4375,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"083f0c90-ab80-4a10-e331-efad578966d7"},"source":["# Find 'B'\n","ids(al, 'S', 'G', 3)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["depth: 0\n","test: S \n","\n","depth: 1\n","test: A test: B \n","\n","depth: 2\n","test: G "],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":490}]},{"cell_type":"markdown","metadata":{"id":"84kFFfrjHxH7"},"source":["### Bidirectional Search"]},{"cell_type":"code","metadata":{"id":"g7NGJLqrH1AM"},"source":["def bfs_level(graph, q, bStep):\n"," '''\n"," q: set for better efficiency in intersection checking\n"," '''\n"," if not bStep:\n"," return q\n"," nq = set()\n"," for n in q:\n"," for v, c in graph[n]:\n"," nq.add(v)\n"," return nq\n","\n","def intersect(qs, qt):\n"," if qs & qt: # intersection \n"," return True\n"," return False"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"6a9IpOExIXXm"},"source":["def bis(graph, s, t):\n"," # First build a graph with opposite edges \n"," bgraph = defaultdict(list)\n"," for key, value in graph.items():\n"," for n, c in value:\n"," bgraph[n].append((key, c))\n"," print(graph, bgraph)\n"," # Start bidirectional search\n"," qs = {s}\n"," qt = {t}\n"," step = 0\n"," while qs and qt:\n"," print(qs, qt)\n"," if intersect(qs, qt):\n"," return True\n"," qs = bfs_level(graph, qs, step%2 == 0)\n"," qt = bfs_level(bgraph, qt, step%2 == 1)\n"," step = 1 - step\n"," return False\n"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"nrZW4C9_K37z","colab":{"base_uri":"https://localhost:8080/","height":106},"executionInfo":{"status":"ok","timestamp":1577839678698,"user_tz":480,"elapsed":4337,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"56c325a2-63a9-4247-ba7e-87e461bf896c"},"source":["bis(al, 'S', 'A')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["defaultdict(, {'S': [('A', 4), ('B', 5)], 'A': [('G', 7)], 'B': [('G', 3)], 'G': []}) defaultdict(, {'A': [('S', 4)], 'B': [('S', 5)], 'G': [('A', 7), ('B', 3)]})\n","{'S'} {'A'}\n","{'B', 'A'} {'A'}\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{"tags":[]},"execution_count":493}]},{"cell_type":"markdown","metadata":{"id":"ByphOhrKZtMj"},"source":["## Graph Search"]},{"cell_type":"code","metadata":{"id":"E7ZgUDIIaSaD","colab":{"base_uri":"https://localhost:8080/","height":465},"executionInfo":{"status":"ok","timestamp":1577839678700,"user_tz":480,"elapsed":4318,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"b4b4bbaa-2199-4415-b31e-28eda9b623bf"},"source":["from graphviz import Digraph, Graph\n","dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","for node in nodes:\n"," dot.node(str(node))\n","dot.edges(['01','12', '24', '43', '45'])\n","dot.render('test-output/free_tree', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":494}]},{"cell_type":"code","metadata":{"id":"-5v2VZ8bImzM","colab":{"base_uri":"https://localhost:8080/","height":369},"executionInfo":{"status":"ok","timestamp":1577839678702,"user_tz":480,"elapsed":4303,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"eb70eca0-d48b-45c6-9141-b8685859e244"},"source":["dot = Graph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","# for node in nodes:\n","# dot.node(str(node))\n","dot.edges(['01','02', '12','13', '24','34', '45'])\n","rank1 = [0]\n","rank2 = [1, 2]\n","rank3 = [3, 4]\n","rank4 = [5]\n","for rank in [rank1, rank2, rank3, rank4]:\n"," with dot.subgraph() as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," s.node(str(node))\n","dot.render('test-output/undirected_cyclic_graph', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0--1\n\n\n\n\n2\n\n2\n\n\n\n0--2\n\n\n\n\n1--2\n\n\n\n\n3\n\n3\n\n\n\n1--3\n\n\n\n\n4\n\n4\n\n\n\n2--4\n\n\n\n\n3--4\n\n\n\n\n5\n\n5\n\n\n\n4--5\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":495}]},{"cell_type":"code","metadata":{"id":"cI2CmyM-eGna","colab":{"base_uri":"https://localhost:8080/","height":369},"executionInfo":{"status":"ok","timestamp":1577839678705,"user_tz":480,"elapsed":4289,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"f5f64c50-7e20-403c-a32a-36db75db2f52"},"source":["dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","# for node in nodes:\n","# dot.node(str(node))\n","dot.edges(['01', '12','31', '20', '24','43', '45'])\n","rank1 = [0]\n","rank2 = [1, 2]\n","rank3 = [3, 4]\n","rank4 = [5]\n","for rank in [rank1, rank2, rank3, rank4]:\n"," with dot.subgraph() as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," s.node(str(node))\n","dot.render('test-output/directed_cyclic_graph', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n2->0\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n3->1\n\n\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":496}]},{"cell_type":"code","metadata":{"id":"2K9nipQJff_F"},"source":["# Prepare Graph Example\n","# Adjacency List with cycle\n","ft = [[] for _ in range(6)]\n","ft[0] = [1]\n","ft[1] = [2]\n","ft[2] = [4]\n","ft[4] = [3, 5]\n","ft[5] = []"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ajKsBPhBgQcN"},"source":["# directed cyclc graph\n","dcg = [[] for _ in range(6)]\n","dcg[0] = [1]\n","dcg[1] = [2]\n","dcg[2] = [0, 4]\n","dcg[3] = [1]\n","dcg[4] = [3, 5]\n","dcg[5] = []"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"0385JGxBPGWl"},"source":["# Prepare Graph Example\n","# Adjacency List with cycle\n","ucg = [[] for _ in range(6)]\n","ucg[0] = [1, 2]\n","ucg[1] = [0, 2, 3]\n","ucg[2] = [0, 1, 4]\n","ucg[3] = [1, 4]\n","ucg[4] = [2, 3, 5]\n","ucg[5] = [4]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"E_EBbni8WYoB"},"source":["#STATE Class\n","class STATE:\n"," white = 0\n"," gray = 1\n"," black = 2"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"fjb4NQ4QGRIu"},"source":["### Depth-first Search in Graph"]},{"cell_type":"markdown","metadata":{"id":"7O4uRkjzouHz"},"source":["#### Track Paths and Avoid Cycle in the tree-based search"]},{"cell_type":"code","metadata":{"id":"BnwTjoskRGm0"},"source":["#Free Tree Search\n","def dfs(g, vi):\n"," print(vi, end=' ')\n"," for nv in g[vi]: \n"," dfs(g, nv)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"oo4bQGakSble","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839678718,"user_tz":480,"elapsed":4228,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"7491e818-5a1c-4c48-a95c-9901eb5a2edc"},"source":["dfs(ft, 0)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["0 1 2 4 3 5 "],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"CpnZbW1nUV5v"},"source":["# Failed with directed cyclic graph\n","# dfs(dcg, 0)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"95BRIU3VlyAb"},"source":["#Graph Search, track paths and avoids cycle\n","def dfs(g, vi, path):\n"," paths.append(path)\n"," orders.append(vi)\n"," for nv in g[vi]: \n"," if nv not in path: \n"," dfs(g, nv, path+[nv])\n"," return "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LjXfmd17mKAi","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839680383,"user_tz":480,"elapsed":5860,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"8526d563-8e86-4508-84c2-c9b4cf393cec"},"source":["# Test free tree\n","paths, orders = [], []\n","dfs(ft, 0, [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":505}]},{"cell_type":"code","metadata":{"id":"Ki0HtnO8mgD4","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839680386,"user_tz":480,"elapsed":5850,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"546bd0ca-41ca-413d-923c-7ec57f1f8a05"},"source":["# Test dcg\n","paths, orders = [], []\n","dfs(dcg, 0, [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0], [0, 1], [0, 1, 2], [0, 1, 2, 4], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":506}]},{"cell_type":"code","metadata":{"id":"nARQsx-3nlGd","colab":{"base_uri":"https://localhost:8080/","height":364},"executionInfo":{"status":"ok","timestamp":1577839680387,"user_tz":480,"elapsed":5835,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"fe99a308-04d7-44ab-80f7-e0b90cb91f33"},"source":["# Test ucg\n","paths, orders = [], []\n","dfs(ucg, 0, [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0],\n"," [0, 1],\n"," [0, 1, 2],\n"," [0, 1, 2, 4],\n"," [0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3],\n"," [0, 1, 3, 4],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2],\n"," [0, 2, 1],\n"," [0, 2, 1, 3],\n"," [0, 2, 1, 3, 4],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4],\n"," [0, 2, 4, 3],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{"tags":[]},"execution_count":507}]},{"cell_type":"markdown","metadata":{"id":"ZVhNQx4DgDe6"},"source":["Draw the search tree"]},{"cell_type":"code","metadata":{"id":"mSYmoZ_0gG4B"},"source":["# Only keep the longest path\n","def dfs_helper(g, vi, path):\n"," orders.append(vi)\n"," bpath = True\n"," for nv in g[vi]: \n"," if nv not in path: \n"," dfs_helper(g, nv, path+[nv])\n"," bpath = False\n"," if bpath:\n"," paths.append(path)\n"," return "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"C0wF99z6huEu","colab":{"base_uri":"https://localhost:8080/","height":156},"executionInfo":{"status":"ok","timestamp":1577839680388,"user_tz":480,"elapsed":5814,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"96f8adfa-b67d-4751-9fc1-44648993cb49"},"source":["paths, orders = [], []\n","dfs_helper(ucg, 0, [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{"tags":[]},"execution_count":509}]},{"cell_type":"code","metadata":{"id":"4TVSieudlvH3"},"source":["# Track edges\n","def dfs_helper(g, vi, path):\n"," orders.append(vi)\n"," node = (vi, 0)\n"," if vi in tracker:\n"," node = (vi, tracker[vi] + 1)\n"," tracker[vi] += 1\n"," else:\n"," tracker[vi] = 0\n"," for nv in g[vi]: \n"," if nv not in path: \n"," # add an edge\n"," node1 = (nv, 0)\n"," if nv in tracker:\n"," node1 = (nv, tracker[nv]+1)\n"," edges[node].append(node1)\n"," dfs_helper(g, nv, path+[nv]) \n"," return "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"WwGSwE4PnRbB","colab":{"base_uri":"https://localhost:8080/","height":312},"executionInfo":{"status":"ok","timestamp":1577839680391,"user_tz":480,"elapsed":5789,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"1dffc8ad-fa7e-4694-ea51-ad50d18cd0e6"},"source":["paths, orders = [], []\n","tracker = defaultdict(int) # node: maximum count\n","edges = defaultdict(list) # node: node\n","dfs_helper(ucg, 0, [0])\n","paths, orders, edges, tracker, len(orders)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5],\n"," defaultdict(list,\n"," {(0, 0): [(1, 0), (2, 2)],\n"," (1, 0): [(2, 0), (3, 1)],\n"," (1, 1): [(3, 2)],\n"," (2, 0): [(4, 0)],\n"," (2, 2): [(1, 1), (4, 3)],\n"," (3, 1): [(4, 1)],\n"," (3, 2): [(4, 2)],\n"," (3, 3): [(1, 2)],\n"," (4, 0): [(3, 0), (5, 0)],\n"," (4, 1): [(2, 1), (5, 1)],\n"," (4, 2): [(5, 2)],\n"," (4, 3): [(3, 3), (5, 3)]}),\n"," defaultdict(int, {0: 0, 1: 2, 2: 2, 3: 3, 4: 3, 5: 3}),\n"," 19)"]},"metadata":{"tags":[]},"execution_count":511}]},{"cell_type":"code","metadata":{"id":"s8Ti-Bo-ic0J","colab":{"base_uri":"https://localhost:8080/","height":561},"executionInfo":{"status":"ok","timestamp":1577839680393,"user_tz":480,"elapsed":5774,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"bbdb4d18-1cc1-426e-ef12-168ecdefd045"},"source":["# Plot the search tree\n","dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","for node, count in tracker.items():\n"," for i in range(count+1):\n"," name=str(node)+str(i)\n"," label=str(node)\n"," #print(name, label)\n"," dot.node(name=str(node)+str(i), label=str(node))\n","for node1, nlist in edges.items():\n"," for node2 in nlist:\n"," a = str(node1[0])+str(node1[1])\n"," b = str(node2[0])+str(node2[1])\n"," #print(a, b)\n"," dot.edge(a, b)\n","#dot.edges(['01', '12','31', '20', '24','43', '45'])\n","dot.render('test-output/search_tree_dfs', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n00\n\n0\n\n\n\n10\n\n1\n\n\n\n00->10\n\n\n\n\n\n22\n\n2\n\n\n\n00->22\n\n\n\n\n\n20\n\n2\n\n\n\n10->20\n\n\n\n\n\n31\n\n3\n\n\n\n10->31\n\n\n\n\n\n11\n\n1\n\n\n\n32\n\n3\n\n\n\n11->32\n\n\n\n\n\n12\n\n1\n\n\n\n40\n\n4\n\n\n\n20->40\n\n\n\n\n\n21\n\n2\n\n\n\n22->11\n\n\n\n\n\n43\n\n4\n\n\n\n22->43\n\n\n\n\n\n30\n\n3\n\n\n\n40->30\n\n\n\n\n\n50\n\n5\n\n\n\n40->50\n\n\n\n\n\n41\n\n4\n\n\n\n41->21\n\n\n\n\n\n51\n\n5\n\n\n\n41->51\n\n\n\n\n\n42\n\n4\n\n\n\n52\n\n5\n\n\n\n42->52\n\n\n\n\n\n33\n\n3\n\n\n\n43->33\n\n\n\n\n\n53\n\n5\n\n\n\n43->53\n\n\n\n\n\n31->41\n\n\n\n\n\n32->42\n\n\n\n\n\n33->12\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":512}]},{"cell_type":"markdown","metadata":{"id":"ZD5tQSWFoVOK"},"source":["We can see as the edges in the directed graph increase, the search tree explode exponentially with the number of edges. "]},{"cell_type":"code","metadata":{"id":"yCuIaMa4c-Cl"},"source":["#Graph Search, track paths and avoids cycle\n","def recursive(g, vi, path):\n"," '''\n"," g: graph as an adjacency list\n"," vi: the vertex index\n"," '''\n"," #print(vi, end=' ')\n"," paths, nodes = [[]], []\n"," for nv in g[vi]: \n"," if nv not in path: \n"," spaths, snodes = recursive(g, nv, path+[nv])\n"," paths.extend(spaths)\n"," nodes.extend(snodes)\n"," paths = [[vi] + p for p in paths]\n"," return paths, [vi] + nodes\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"FUkmQgjndWkn","colab":{"base_uri":"https://localhost:8080/","height":364},"executionInfo":{"status":"ok","timestamp":1577839680396,"user_tz":480,"elapsed":5751,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"d10feafd-209b-43fd-c43d-fd9f5ef00fd6"},"source":["# print path with free tree\n","recursive(ucg, 0, [0])\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0],\n"," [0, 1],\n"," [0, 1, 2],\n"," [0, 1, 2, 4],\n"," [0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3],\n"," [0, 1, 3, 4],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2],\n"," [0, 2, 1],\n"," [0, 2, 1, 3],\n"," [0, 2, 1, 3, 4],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4],\n"," [0, 2, 4, 3],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n"," [0, 1, 2, 4, 3, 5, 3, 4, 2, 5, 2, 1, 3, 4, 5, 4, 3, 1, 5])"]},"metadata":{"tags":[]},"execution_count":514}]},{"cell_type":"markdown","metadata":{"id":"qG_4NLC-o4bn"},"source":["#### Graph-based Search that avoids repeating vertex"]},{"cell_type":"code","metadata":{"id":"NTH_S5wQi_Lf"},"source":["#Avoid Repeating Vertex\n","def dfgs(g, vi, visited, path):\n"," visited.add(vi)\n"," orders.append(vi)\n"," bEnd = True # node without unvisited adjacent nodes \n"," for nv in g[vi]: \n"," if nv not in visited: \n"," if bEnd:\n"," bEnd = False\n"," dfgs(g, nv, visited, path + [nv])\n"," if bEnd:\n"," paths.append(path)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"037PpuQvjMTj","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680399,"user_tz":480,"elapsed":5730,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"0f8982ed-76ef-4e3d-faf6-2b317cd6aa15"},"source":["paths, orders = [], []\n","dfgs(ucg, 0, set(), [0])\n","paths, orders"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":516}]},{"cell_type":"code","metadata":{"id":"vn-U1OFJof4B"},"source":["def backtrace(parent, s, t):\n"," p = t\n"," path = []\n"," while p != s:\n"," path.append(p)\n"," p = parent[p]\n"," path.append(s)\n"," return path[::-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1aN6kwYPoH5d"},"source":["# Backtrace path\n","# s is used to backtrace the path\n","def dfgs(g, vi, s, t, visited, parent): \n"," visited.add(vi) \n"," if vi == t:\n"," return backtrace(parent, s, t)\n","\n"," for nv in g[vi]: \n"," if nv not in visited: \n"," parent[nv] = vi\n"," fpath = dfgs(g, nv, s, t, visited, parent)\n"," if fpath:\n"," return fpath\n"," \n"," return None"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"_dGSXhR2phQh","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680403,"user_tz":480,"elapsed":5702,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"2800b185-daae-46d1-c7d8-7d115fcece60"},"source":["parent = {}\n","path = dfgs(ucg, 0, 0, None, set(), parent)\n","path, parent"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(None, {1: 0, 2: 1, 3: 4, 4: 2, 5: 4})"]},"metadata":{"tags":[]},"execution_count":519}]},{"cell_type":"code","metadata":{"id":"TXjsA5BRLx87","colab":{"base_uri":"https://localhost:8080/","height":465},"executionInfo":{"status":"ok","timestamp":1577839680404,"user_tz":480,"elapsed":5688,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"b30b4b1f-73fc-4e38-8939-6c8c2b6d61ac"},"source":["# Visualize the search tree\n","dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","for node in nodes:\n"," dot.node(name=str(node))\n","for s, p in parent.items():\n"," dot.edge(str(p), str(s))\n","#dot.edges(['01', '12','31', '20', '24','43', '45'])\n","dot.render('test-output/depth_first_graph_search_tree', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":520}]},{"cell_type":"markdown","metadata":{"id":"b_55IC6F8iP_"},"source":["### Breath-first Search in Graph"]},{"cell_type":"markdown","metadata":{"id":"8YhKn3V3biau"},"source":["#### Track pahts and avoid cycle in the tree-based search"]},{"cell_type":"code","metadata":{"id":"9-iHJjQYWfPy"},"source":["# Track paths and avoid cycles\n","def bfs(g, s):\n"," q = [[s]]\n"," paths, orders = [], []\n"," while q:\n"," path = q.pop(0)\n"," n = path[-1]\n"," orders.append(n)\n"," bEnd = True\n"," for v in g[n]:\n"," if v not in path:\n"," if bEnd:\n"," bEnd = False\n"," q.append(path + [v])\n"," if bEnd:\n"," paths.append(path)\n"," return paths, orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LSYyXM_EXHxD","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680406,"user_tz":480,"elapsed":5670,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"590521ea-deb7-4f93-dadf-e18881cf634c"},"source":["# Test free tree\n","bfs(ft, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":522}]},{"cell_type":"code","metadata":{"id":"00091jV7Yr3z","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680407,"user_tz":480,"elapsed":5657,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"6376b5c1-d2c3-4368-84e3-feadcafc95f3"},"source":["# Test dcg\n","bfs(dcg, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([[0, 1, 2, 4, 3], [0, 1, 2, 4, 5]], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":523}]},{"cell_type":"code","metadata":{"id":"1xo7CLOBYzyE","colab":{"base_uri":"https://localhost:8080/","height":72},"executionInfo":{"status":"ok","timestamp":1577839680409,"user_tz":480,"elapsed":5644,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"dd54c574-02ee-444d-9c52-c3afa45ae105"},"source":["# Test ucg\n","paths, orders = bfs(ucg, 0)\n","print(paths, orders)\n","'''\n","From dfs\n","[[0, 1, 2, 4, 3],\n"," [0, 1, 2, 4, 5],\n"," [0, 1, 3, 4, 2],\n"," [0, 1, 3, 4, 5],\n"," [0, 2, 1, 3, 4, 5],\n"," [0, 2, 4, 3, 1],\n"," [0, 2, 4, 5]],\n","'''\n","print(len(orders))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[[0, 2, 4, 5], [0, 1, 2, 4, 3], [0, 1, 2, 4, 5], [0, 1, 3, 4, 2], [0, 1, 3, 4, 5], [0, 2, 4, 3, 1], [0, 2, 1, 3, 4, 5]] [0, 1, 2, 2, 3, 1, 4, 4, 4, 3, 3, 5, 3, 5, 2, 5, 4, 1, 5]\n","19\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"OaceX9XiboJr"},"source":["#### Graph-based search that avoids repeating vertex"]},{"cell_type":"code","metadata":{"id":"r6g03TClZAJu"},"source":["# Track paths and avoid cycles in a memory efficient way\n","# Only track shortest paths\n","def bfgs(g, s, t):\n"," q = [s]\n"," bfgs.parent = {}\n"," visited = {s}\n"," while q:\n"," n = q.pop(0)\n"," if n == t:\n"," return backtrace(s, t, bfgs.parent)\n"," for v in g[n]:\n"," if v not in visited:\n"," q.append(v)\n"," visited.add(v)\n"," bfgs.parent[v] = n\n"," return None"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"p1JuvjNMqZ0O"},"source":["#Print Shortest Path Iterative\n","def backtrace(s, t, parent):\n"," p = t\n"," path = []\n"," while p != s:\n"," path.append(p)\n"," p = parent[p]\n"," path.append(s)\n"," return path[::-1]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OGUHuKrb6EDu"},"source":["#Print Shortest Path Recursive\n","def get_path(s, t, pl, path):\n"," if s == t: \n"," pass\n"," elif pl[t] is None:\n"," print('no path from ', s, ' to ', t)\n"," else:\n"," get_path(s, pl[t], pl, path) \n"," path.append(t)\n"," return"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"7129c26IbK0N","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680414,"user_tz":480,"elapsed":5605,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"ccb546e7-be27-4573-dfbe-63cf48b5cbbd"},"source":["# Test ucg\n","bfgs(ucg, 0, 5)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[0, 2, 4, 5]"]},"metadata":{"tags":[]},"execution_count":528}]},{"cell_type":"code","metadata":{"id":"Mwt51VlKFI5-","colab":{"base_uri":"https://localhost:8080/","height":369},"executionInfo":{"status":"ok","timestamp":1577839680414,"user_tz":480,"elapsed":5592,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"482b57a4-29b1-4136-85fe-835ee577c4c2"},"source":["# Visualize the search tree\n","bfgs(ucg, 0, None)\n","parent = bfgs.parent\n","dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","for node in nodes:\n"," dot.node(name=str(node))\n","for s, p in parent.items():\n"," dot.edge(str(p), str(s))\n","#dot.edges(['01', '12','31', '20', '24','43', '45'])\n","dot.render('test-output/breath_first_graph_search_tree', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n3\n\n3\n\n\n\n1->3\n\n\n\n\n\n4\n\n4\n\n\n\n2->4\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":529}]},{"cell_type":"markdown","metadata":{"id":"w0UuaLXj5bgZ"},"source":["#### Multiple Starts"]},{"cell_type":"code","metadata":{"id":"yg1tgj8Y5czv"},"source":["#Multiple Starts\n","def BFSLevel(starts):\n"," q = starts # a list of nodes\n"," #root.visited = 1\n"," while q:\n"," new_q = []\n"," for node in q:\n"," for neig in node.adjacent:\n"," if not neig.visited:\n"," neig.visited = 1\n"," new_q.append(neig)\n"," q = new_q"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"I-LT_HgDrrtI"},"source":["####Level by level bfs"]},{"cell_type":"code","metadata":{"id":"sQyW-n5qrt6k"},"source":["def bfs_level(g, s):\n"," '''level by level bfs'''\n"," v = len(g)\n"," state = [False] * v\n"," \n"," orders = []\n"," lst = [s]\n"," state[s] = True\n"," d = 0 # track distance\n"," while lst:\n"," print('distance ', d, ': ', lst)\n"," tmp_lst = []\n"," for u in lst:\n"," orders.append(u)\n"," for v in g[u]:\n"," if not state[v]:\n"," state[v] = True\n"," tmp_lst.append(v) \n"," lst = tmp_lst\n"," d += 1\n"," return orders\n"," "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"KwUfwlRN2qbe","colab":{"base_uri":"https://localhost:8080/","height":104},"executionInfo":{"status":"ok","timestamp":1577839680425,"user_tz":480,"elapsed":5569,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"e2048ab1-018e-4141-ea4f-766d0cefd436"},"source":["print(bfs_level(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["distance 0 : [0]\n","distance 1 : [1, 2]\n","distance 2 : [3, 4]\n","distance 3 : [5]\n","[0, 1, 2, 3, 4, 5]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"KDFDgG-BOBml"},"source":["\n","## Depth-first Graph Search"]},{"cell_type":"markdown","metadata":{"id":"V0lQCtvcWWxi"},"source":["### Recursive Implementation"]},{"cell_type":"code","metadata":{"id":"ddy-8DejWciM"},"source":["#Recursive implementation with three states\n","def dfs(g, s, colors, orders, complete_orders):\n"," colors[s] = STATE.gray\n"," orders.append(s)\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors, orders, complete_orders)\n"," # complete\n"," colors[s] = STATE.black\n"," complete_orders.append(s)\n"," return"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Co2HV7k4XDJm","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839680427,"user_tz":480,"elapsed":5552,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"4f4b44f1-ce20-4fc3-802b-254aa2d19898"},"source":["# initialization\n","'''start from 0'''\n","v = len(ucg)\n","orders, complete_orders = [], []\n","colors = [STATE.white] * v\n","dfs(ucg,0, colors, orders, complete_orders)\n","print(orders, complete_orders)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5] [3, 5, 4, 2, 1, 0]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"hjByXvweDX3c"},"source":["Visualizing the state change of nodes"]},{"cell_type":"code","metadata":{"id":"jDQvblzsOK9m"},"source":["# directed cyclc graph\n","dcg = [[] for _ in range(6)]\n","dcg[0] = [1, 2]\n","dcg[1] = [2, 4]\n","dcg[2] = [0, 4]\n","dcg[3] = [1]\n","dcg[4] = [3, 5]\n","dcg[5] = [3] # cross edge"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"7ZK2pX87OvMH","colab":{"base_uri":"https://localhost:8080/","height":369},"executionInfo":{"status":"ok","timestamp":1577839680429,"user_tz":480,"elapsed":5535,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"cb41c3bd-7aa5-4304-c571-0bb39aea86ad"},"source":["dot = Digraph(comment='The Round Table', format='png')\n","#print(get_methods(Digraph))\n","#print(Digraph.__dict__)\n","nodes = [0, 1, 2, 3, 4, 5]\n","rank1 = [0]\n","rank2 = [1, 2]\n","rank3=[3, 4]\n","rank4 = [5]\n","ranks=[rank1, rank2, rank3, rank4]\n","for i, rank in enumerate(ranks):\n"," with dot.subgraph(name=name+str(i)) as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," s.node(str(node))\n","dot.edges(['01', '02', '12', '14', '31', '20', '24','43', '45', '53'])\n","dot.render('test-output/directed_cyclic_graph_2', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n1->4\n\n\n\n\n\n2->0\n\n\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n3->1\n\n\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n5->3\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":536}]},{"cell_type":"code","metadata":{"id":"8TdvPlxTAVhp"},"source":["def set_node_style(dot, color, node, name):\n"," dot.attr('node', style='filled', fillcolor=color, fontcolor='red')#color=color)\n"," dot.node(name=name, label=str(node))\n","\n","\n","def plot(g, colors, dot, edges, nodes):\n"," #dot = Digraph(comment='The Round Table', format='png')\n"," name = str(next(counter))\n"," \n"," with dot.subgraph(name=name) as s:\n"," for node in nodes:\n"," if colors[node] == STATE.gray:\n"," set_node_style(dot, 'gray', node, name=str(node)+name)\n"," elif colors[node] == STATE.black:\n"," set_node_style(dot, 'black', node, name=str(node)+name)\n"," else:\n"," set_node_style(dot, 'white', node, name=str(node)+name)\n"," for s, e in edges:\n"," dot.edge(str(s)+name, str(e)+name)\n","\n"," \n"," #s.edges(['01', '12','31', '20', '24','43', '45'])\n"," #dot.render('test-output/depth_first_graph_search'+str(next(counter)), view=True) \n"," #s.view()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Av-u8YzcCW8_"},"source":["def plot_state(g, colors, dot, edges_list):\n"," #dot = Digraph(comment='The Round Table', format='png')\n"," name = str(next(counter))\n"," with dot.subgraph(name=name) as init:\n"," nodes = len(g)\n"," rank1 = [0]\n"," rank2 = [2, 1]\n"," rank3 = [4, 3]\n"," rank4 = [5]\n"," ranks = [rank1, rank2, rank3, rank4]\n"," \n"," for i, rank in enumerate(ranks):\n"," with init.subgraph(name=name+str(i)) as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," \n"," if colors[node] == STATE.gray:\n"," set_node_style(s, 'gray', node, name=str(node)+name)\n"," elif colors[node] == STATE.black:\n"," set_node_style(s, 'black', node, name=str(node)+name)\n"," else:\n"," set_node_style(s, 'white', node, name=str(node)+name)\n"," # \n"," for s in range(nodes):\n"," for e in g[s]:\n"," init.edge(str(s)+name, str(e)+name)\n"," # tracker = defaultdict(set) # edges\n"," # for s in range(nodes):\n"," # for e in g[s]:\n"," # ##print(s, e, tracker)\n"," # if e in tracker and s in tracker[e]:\n"," # continue\n"," # else:\n"," # tracker[s].add(e)\n"," # init.edge(str(s)+name, str(e)+name)\n"," init.render('test-output/depth_first_graph_search_process'+name, view=True, format='png') "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"dwJMwokbXyPX"},"source":["def plot_all(g, colors, dot, edges_list):\n"," #dot = Digraph(comment='The Round Table', format='png')\n"," name = str(next(counter))\n"," with dot.subgraph(name=name) as init:\n"," nodes = set()\n"," rank1 = [0]\n"," rank2 = [1]\n"," rank2_1=[2]\n"," rank3 = [4]\n"," rank4 = [3, 5]\n"," ranks = [rank1, rank2, rank2_1, rank3, rank4]\n"," for edges in edges_list:\n"," for s, e in edges:\n"," nodes.add(s)\n"," nodes.add(e)\n"," \n"," for i, rank in enumerate(ranks):\n"," with init.subgraph(name=name+str(i)) as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," if node not in nodes:\n"," continue\n"," \n"," if colors[node] == STATE.gray:\n"," set_node_style(s, 'gray', node, name=str(node)+name)\n"," elif colors[node] == STATE.black:\n"," set_node_style(s, 'black', node, name=str(node)+name)\n"," else:\n"," set_node_style(s, 'white', node, name=str(node)+name)\n"," # \n"," # tracker = defaultdict(set) # edges\n"," # for s in range(nodes):\n"," # for e in g[s]:\n"," # ##print(s, e, tracker)\n"," # if e in tracker and s in tracker[e]:\n"," # continue\n"," # else:\n"," # tracker[s].add(e)\n"," # init.edge(str(s)+name, str(e)+name)\n"," \n"," \n"," colors = ['black', 'red']\n"," for i, edges in enumerate(edges_list):\n"," if not edges:\n"," continue\n"," for start, end in edges:\n"," if start is not None:\n"," init.edge(str(start)+name, str(end)+name,_attributes={'dir':'forward', 'color': colors[i]})\n"," init.render('test-output/depth_first_graph_search_process'+name, view=True, format='png') \n","\n"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kLCE2dNJJjum"},"source":["!rm test-output/depth_first_graph_search*"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SpzWbATRNjtL","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839684864,"user_tz":480,"elapsed":9928,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"05521fd1-9abb-46ae-98c4-50d1b2e9e076"},"source":["!rm *.gv*"],"execution_count":null,"outputs":[{"output_type":"stream","text":["rm: cannot remove '*.gv*': No such file or directory\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"TCNTJ6sBADIU"},"source":["# Visualizing\n","edges = []\n","back_edges = []\n","def dfs(g, s, colors, dot, nodes, pre_node):\n"," nodes.add(s)\n"," colors[s] = STATE.gray\n"," #plot(g, colors, dot, edges, nodes)\n"," global edges\n"," global back_edges\n"," plot_fun(g, colors, dot, [edges, back_edges])\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," edges += [(s, v)]\n"," dfs(g, v, colors, dot, nodes, s)\n"," back_edges += [(v, s)]\n"," #plot_fun(g, colors, dot, [edges, back_edges])\n"," # complete\n"," \n"," colors[s] = STATE.black\n"," #plot(g, colors, dot, edges, nodes)\n"," plot_fun(g, colors, dot, [edges, back_edges])\n"," return"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"3y8edac7CkYd","colab":{"base_uri":"https://localhost:8080/","height":406},"executionInfo":{"status":"ok","timestamp":1577839693978,"user_tz":480,"elapsed":19021,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"d562fdbd-be9b-48ee-ebe4-a4ecfaaaa0a3"},"source":["# Plot_state\n","import itertools\n","counter = itertools.count()\n","print(dcg)\n","v = len(dcg)\n","colors = [STATE.white] * v\n","dot = Digraph(comment='The Round Table', format='png')\n","plot_fun = plot_state\n","dfs(dcg,0, colors, dot, set(), None)\n","dot.render('test-output/depth_first_graph_search_process', view=True) \n","dot"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[[1, 2], [2, 4], [0, 4], [1], [3, 5], [3]]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n00\n\n0\n\n\n\n20\n\n2\n\n\n\n00->20\n\n\n\n\n\n10\n\n1\n\n\n\n00->10\n\n\n\n\n\n20->00\n\n\n\n\n\n40\n\n4\n\n\n\n20->40\n\n\n\n\n\n10->20\n\n\n\n\n\n10->40\n\n\n\n\n\n30\n\n3\n\n\n\n40->30\n\n\n\n\n\n50\n\n5\n\n\n\n40->50\n\n\n\n\n\n30->10\n\n\n\n\n\n50->30\n\n\n\n\n\n01\n\n0\n\n\n\n21\n\n2\n\n\n\n01->21\n\n\n\n\n\n11\n\n1\n\n\n\n01->11\n\n\n\n\n\n21->01\n\n\n\n\n\n41\n\n4\n\n\n\n21->41\n\n\n\n\n\n11->21\n\n\n\n\n\n11->41\n\n\n\n\n\n31\n\n3\n\n\n\n41->31\n\n\n\n\n\n51\n\n5\n\n\n\n41->51\n\n\n\n\n\n31->11\n\n\n\n\n\n51->31\n\n\n\n\n\n02\n\n0\n\n\n\n22\n\n2\n\n\n\n02->22\n\n\n\n\n\n12\n\n1\n\n\n\n02->12\n\n\n\n\n\n22->02\n\n\n\n\n\n42\n\n4\n\n\n\n22->42\n\n\n\n\n\n12->22\n\n\n\n\n\n12->42\n\n\n\n\n\n32\n\n3\n\n\n\n42->32\n\n\n\n\n\n52\n\n5\n\n\n\n42->52\n\n\n\n\n\n32->12\n\n\n\n\n\n52->32\n\n\n\n\n\n03\n\n0\n\n\n\n23\n\n2\n\n\n\n03->23\n\n\n\n\n\n13\n\n1\n\n\n\n03->13\n\n\n\n\n\n23->03\n\n\n\n\n\n43\n\n4\n\n\n\n23->43\n\n\n\n\n\n13->23\n\n\n\n\n\n13->43\n\n\n\n\n\n33\n\n3\n\n\n\n43->33\n\n\n\n\n\n53\n\n5\n\n\n\n43->53\n\n\n\n\n\n33->13\n\n\n\n\n\n53->33\n\n\n\n\n\n04\n\n0\n\n\n\n24\n\n2\n\n\n\n04->24\n\n\n\n\n\n14\n\n1\n\n\n\n04->14\n\n\n\n\n\n24->04\n\n\n\n\n\n44\n\n4\n\n\n\n24->44\n\n\n\n\n\n14->24\n\n\n\n\n\n14->44\n\n\n\n\n\n34\n\n3\n\n\n\n44->34\n\n\n\n\n\n54\n\n5\n\n\n\n44->54\n\n\n\n\n\n34->14\n\n\n\n\n\n54->34\n\n\n\n\n\n05\n\n0\n\n\n\n25\n\n2\n\n\n\n05->25\n\n\n\n\n\n15\n\n1\n\n\n\n05->15\n\n\n\n\n\n25->05\n\n\n\n\n\n45\n\n4\n\n\n\n25->45\n\n\n\n\n\n15->25\n\n\n\n\n\n15->45\n\n\n\n\n\n35\n\n3\n\n\n\n45->35\n\n\n\n\n\n55\n\n5\n\n\n\n45->55\n\n\n\n\n\n35->15\n\n\n\n\n\n55->35\n\n\n\n\n\n06\n\n0\n\n\n\n26\n\n2\n\n\n\n06->26\n\n\n\n\n\n16\n\n1\n\n\n\n06->16\n\n\n\n\n\n26->06\n\n\n\n\n\n46\n\n4\n\n\n\n26->46\n\n\n\n\n\n16->26\n\n\n\n\n\n16->46\n\n\n\n\n\n36\n\n3\n\n\n\n46->36\n\n\n\n\n\n56\n\n5\n\n\n\n46->56\n\n\n\n\n\n36->16\n\n\n\n\n\n56->36\n\n\n\n\n\n07\n\n0\n\n\n\n27\n\n2\n\n\n\n07->27\n\n\n\n\n\n17\n\n1\n\n\n\n07->17\n\n\n\n\n\n27->07\n\n\n\n\n\n47\n\n4\n\n\n\n27->47\n\n\n\n\n\n17->27\n\n\n\n\n\n17->47\n\n\n\n\n\n37\n\n3\n\n\n\n47->37\n\n\n\n\n\n57\n\n5\n\n\n\n47->57\n\n\n\n\n\n37->17\n\n\n\n\n\n57->37\n\n\n\n\n\n08\n\n0\n\n\n\n28\n\n2\n\n\n\n08->28\n\n\n\n\n\n18\n\n1\n\n\n\n08->18\n\n\n\n\n\n28->08\n\n\n\n\n\n48\n\n4\n\n\n\n28->48\n\n\n\n\n\n18->28\n\n\n\n\n\n18->48\n\n\n\n\n\n38\n\n3\n\n\n\n48->38\n\n\n\n\n\n58\n\n5\n\n\n\n48->58\n\n\n\n\n\n38->18\n\n\n\n\n\n58->38\n\n\n\n\n\n09\n\n0\n\n\n\n29\n\n2\n\n\n\n09->29\n\n\n\n\n\n19\n\n1\n\n\n\n09->19\n\n\n\n\n\n29->09\n\n\n\n\n\n49\n\n4\n\n\n\n29->49\n\n\n\n\n\n19->29\n\n\n\n\n\n19->49\n\n\n\n\n\n39\n\n3\n\n\n\n49->39\n\n\n\n\n\n59\n\n5\n\n\n\n49->59\n\n\n\n\n\n39->19\n\n\n\n\n\n59->39\n\n\n\n\n\n010\n\n0\n\n\n\n210\n\n2\n\n\n\n010->210\n\n\n\n\n\n110\n\n1\n\n\n\n010->110\n\n\n\n\n\n210->010\n\n\n\n\n\n410\n\n4\n\n\n\n210->410\n\n\n\n\n\n110->210\n\n\n\n\n\n110->410\n\n\n\n\n\n310\n\n3\n\n\n\n410->310\n\n\n\n\n\n510\n\n5\n\n\n\n410->510\n\n\n\n\n\n310->110\n\n\n\n\n\n510->310\n\n\n\n\n\n011\n\n0\n\n\n\n211\n\n2\n\n\n\n011->211\n\n\n\n\n\n111\n\n1\n\n\n\n011->111\n\n\n\n\n\n211->011\n\n\n\n\n\n411\n\n4\n\n\n\n211->411\n\n\n\n\n\n111->211\n\n\n\n\n\n111->411\n\n\n\n\n\n311\n\n3\n\n\n\n411->311\n\n\n\n\n\n511\n\n5\n\n\n\n411->511\n\n\n\n\n\n311->111\n\n\n\n\n\n511->311\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":543}]},{"cell_type":"code","metadata":{"id":"WBFPoKth-Eui","colab":{"base_uri":"https://localhost:8080/","height":769},"executionInfo":{"status":"ok","timestamp":1577839693982,"user_tz":480,"elapsed":19009,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"77ef6b6b-6ac2-4ee8-a663-0ae3da4ac5e2"},"source":["# Classify edges\n","# edges is tree edge\n","# need to formulate better\n","'''\n","dcg[0] = [1, 2]\n","dcg[1] = [2, 4]\n","dcg[2] = [0, 4]\n","dcg[3] = [1]\n","dcg[4] = [3, 5]\n","dcg[5] = [3] # cross edge\n","'''\n","print(edges, back_edges)\n","tree = ft\n","tree_edges = edges\n","nodes = len(dcg)\n","new_edges, n_back_edges, forward_edges, cross_edges = [], [], [], []\n","# reversed tree:\n","reverse_tree = [[] for _ in range(nodes)]\n","for s in range(nodes):\n"," for e in tree[s]: \n"," reverse_tree[e].append(s)\n","for s in range(nodes):\n"," for e in dcg[s]:\n"," print(s, e, tree)\n"," if (s, e) in tree_edges:\n"," new_edges.append((s, e))\n"," elif bfgs(tree, e, s ):\n"," n_back_edges.append((s, e))\n"," elif bfgs(tree, s, e ):\n"," forward_edges.append((s ,e))\n"," else:\n"," cross_edges.append((s, e))\n","\n","edges_list = [new_edges, n_back_edges, forward_edges, cross_edges ]\n","dot = Digraph(comment='The Round Table', format='png')\n","print(edges_list)\n","rank1 = [0]\n","rank2 = [1]\n","rank2_1=[2]\n","rank3 = [4]\n","rank4 = [3, 5]\n","ranks = [rank1, rank2, rank2_1, rank3, rank4]\n","dot.attr(ranksep='0.75', rank='same')\n","\n"," \n","\n","colors = ['black', 'red', 'yellow', 'blue']\n","for i, edgesx in enumerate(edges_list):\n"," if not edgesx:\n"," continue\n"," for start, end in edgesx:\n"," if start is not None:\n"," dot.edge(str(start), str(end),_attributes={ 'color': colors[i]})\n","for i, rank in enumerate(ranks):\n"," with dot.subgraph(name=str(i)) as s:\n"," s.attr(ranksep='0.75', rank='same')\n"," for node in rank:\n"," s.node(name=str(node))\n","dot.render('test-output/depth_first_graph_search_edges', view=True, format='png') \n","#print(dot.source)\n","dot\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[(0, 1), (1, 2), (2, 4), (4, 3), (4, 5)] [(3, 4), (5, 4), (4, 2), (2, 1), (1, 0)]\n","0 1 [[1], [2], [4], [], [3, 5], []]\n","0 2 [[1], [2], [4], [], [3, 5], []]\n","1 2 [[1], [2], [4], [], [3, 5], []]\n","1 4 [[1], [2], [4], [], [3, 5], []]\n","2 0 [[1], [2], [4], [], [3, 5], []]\n","2 4 [[1], [2], [4], [], [3, 5], []]\n","3 1 [[1], [2], [4], [], [3, 5], []]\n","4 3 [[1], [2], [4], [], [3, 5], []]\n","4 5 [[1], [2], [4], [], [3, 5], []]\n","5 3 [[1], [2], [4], [], [3, 5], []]\n","[[(0, 1), (1, 2), (2, 4), (4, 3), (4, 5)], [(2, 0), (3, 1)], [(0, 2), (1, 4)], [(5, 3)]]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0\n\n0\n\n\n\n1\n\n1\n\n\n\n0->1\n\n\n\n\n\n2\n\n2\n\n\n\n0->2\n\n\n\n\n\n1->2\n\n\n\n\n\n4\n\n4\n\n\n\n1->4\n\n\n\n\n\n2->0\n\n\n\n\n\n2->4\n\n\n\n\n\n3\n\n3\n\n\n\n4->3\n\n\n\n\n\n5\n\n5\n\n\n\n4->5\n\n\n\n\n\n3->1\n\n\n\n\n\n5->3\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":544}]},{"cell_type":"markdown","metadata":{"id":"fSCrzb82GPUy"},"source":["More about the application of classificatio of edges. "]},{"cell_type":"markdown","metadata":{"id":"XIfzSpwDYZON"},"source":["Discover and finish time"]},{"cell_type":"code","metadata":{"id":"XmsNF54Gd0rN"},"source":["#Discovering and finishing time\n","def dfs(g, s, colors):\n"," dfs.t += 1 # static variable\n"," colors[s] = STATE.gray\n"," dfs.discover[s] = dfs.t\n"," for v in g[s]:\n"," if colors[v] == STATE.white:\n"," dfs(g, v, colors)\n"," # complete\n"," dfs.t += 1\n"," dfs.finish[s] = dfs.t\n"," return"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"4U0E0RQfeKkh","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839693988,"user_tz":480,"elapsed":18995,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"43996495-b35e-4dc9-f5c8-34830552c240"},"source":["v = len(dcg)\n","colors = [STATE.white] * v\n","dfs.t = -1\n","dfs.discover, dfs.finish = [-1] * v, [-1] * v\n","dfs(dcg,0, colors)\n","dfs.discover, dfs.finish"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([0, 1, 2, 4, 3, 6], [11, 10, 9, 5, 8, 7])"]},"metadata":{"tags":[]},"execution_count":546}]},{"cell_type":"code","metadata":{"id":"Q3qw9TVueO4r"},"source":["def parenthesis(dt, ft, n):\n"," merge_orders = [-1] * 2 * n\n"," for v, t in enumerate(dt):\n"," merge_orders[t] = v\n"," for v, t in enumerate(ft):\n"," merge_orders[t] = v\n","\n"," print(merge_orders)\n"," nodes = set()\n"," for i in merge_orders:\n"," if i not in nodes:\n"," print('(', i, end = ', ')\n"," nodes.add(i)\n"," else:\n"," print(i, '),', end = ' ')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bsZ4WOk3e4OT","colab":{"base_uri":"https://localhost:8080/","height":52},"executionInfo":{"status":"ok","timestamp":1577839694207,"user_tz":480,"elapsed":19193,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"6460ea8f-3334-444f-d6ce-cbc0b389c90a"},"source":["parenthesis(dfs.discover, dfs.finish, v)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 3, 5, 5, 4, 2, 1, 0]\n","( 0, ( 1, ( 2, ( 4, ( 3, 3 ), ( 5, 5 ), 4 ), 2 ), 1 ), 0 ), "],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qRdHVeAG9z29"},"source":["### Iterative Implementation ***"]},{"cell_type":"code","metadata":{"id":"xT0mVw8gZTIi"},"source":["#Iterative implementation with three states\n","def dftIter(g, s):\n"," '''not preserving the same discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"q3hq9ARKqqnw","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694212,"user_tz":480,"elapsed":19176,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"3d0b9d01-b012-4794-af03-042a1e5512ce"},"source":["# initialization\n","'''start from 0'''\n","print(dftIter(ucg,0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"RIVOD221rHii","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694216,"user_tz":480,"elapsed":19166,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"9f6a74e9-769b-405c-b518-b88d2c9eeae2"},"source":["print(dftIter(ucg, 1))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[1, 0, 2, 3, 4, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"I4eVWYzGj01U"},"source":["def dftIter(g, s):\n"," '''preserving only discovery ordering'''\n"," n = len(g)\n"," orders = []\n"," colors = [STATE.white] * n\n"," stack = [s]\n","\n"," #orders.append(s) # track gray order\n"," #colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack.pop()\n"," if colors[u] == STATE.white:\n"," orders.append(u) # track gray order\n"," colors[u] = STATE.gray\n"," for v in g[u][::-1]:\n"," if colors[v] == STATE.white:\n"," \n"," stack.append(v)\n"," #orders.append(v) # track gray order\n"," \n"," return orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"5JWNwi9rlAER","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694223,"user_tz":480,"elapsed":19153,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"61c6eca2-9dcd-4b23-ebe0-fe7fae0ba312"},"source":["print(dftIter(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[0, 1, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"mBqP-iy9ma3d","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694229,"user_tz":480,"elapsed":19145,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"dbad8a60-6b64-432b-e053-ab3aade53a5a"},"source":["print(dftIter(ucg, 1))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[1, 0, 2, 4, 3, 5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"rrFl2gwokZON"},"source":["def dfsIter(g, s):\n"," '''iterative dfs'''\n"," v = len(g)\n"," orders, complete_orders = [], []\n"," colors = [STATE.white] * v\n"," stack = [s]\n","\n"," orders.append(s) # track gray order\n"," colors[s] = STATE.gray\n"," \n"," while stack:\n"," u = stack[-1]\n"," bAdj = False\n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," stack.append(v)\n"," orders.append(v) # track gray order\n"," bAdj = True\n"," break\n"," \n"," if not bAdj: # if no adjacent is found, pop out\n"," # complete\n"," colors[u] = STATE.black # this is not necessary in the code, just to help track the state\n"," complete_orders.append(u)\n"," stack.pop()\n"," \n"," return orders, complete_orders "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"vGaO1vCbly-a","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694502,"user_tz":480,"elapsed":19399,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"5f6a1035-9b9f-40dd-be46-34686fdb9534"},"source":["print(dfsIter(ucg, 0))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["([0, 1, 2, 4, 3, 5], [3, 5, 4, 2, 1, 0])\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"oAxBg1ZR-ct5"},"source":["## Breath-first Graph Search"]},{"cell_type":"code","metadata":{"id":"aR60hTwOQOo0"},"source":["def bfgs_state(g, s):\n"," v = len(g)\n"," colors = [STATE.white] * v\n"," \n"," q, orders = [s], [s]\n"," complete_orders = []\n"," colors[s] = STATE.gray # make the state of the visiting node\n"," while q:\n"," u = q.pop(0) \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," q.append(v)\n"," orders.append(v)\n","\n"," # complete \n"," colors[u] = STATE.black\n"," complete_orders.append(u)\n"," return orders, complete_orders"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ld25XyC8Uxun","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1577839694506,"user_tz":480,"elapsed":19381,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"539d70aa-6c17-48b1-b328-6a9aa97b4649"},"source":["bfgs_state(dcg, 0)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["([0, 1, 2, 4, 3, 5], [0, 1, 2, 4, 3, 5])"]},"metadata":{"tags":[]},"execution_count":558}]},{"cell_type":"code","metadata":{"id":"GxtnwF3_XN1F"},"source":["# Plot state\n","def plot_state_bfs(g, colors, dot, png_name):\n"," #dot = Digraph(comment='The Round Table', format='png')\n"," #name = str(next(counter))\n"," with dot.subgraph(name=png_name) as init:\n"," nodes = len(g)\n"," rank1 = [0]\n"," rank2 = [2, 1]\n"," rank3 = [4, 3]\n"," rank4 = [5]\n"," ranks = [rank1, rank2, rank3, rank4]\n"," \n"," for i, rank in enumerate(ranks):\n"," subgraph_name = png_name + str(i)\n"," with init.subgraph() as s:\n"," s.attr(rank='same')\n"," for node in rank:\n"," \n"," if colors[node] == STATE.gray:\n"," set_node_style(s, 'gray', node, name=str(node)+png_name)\n"," elif colors[node] == STATE.black:\n"," set_node_style(s, 'black', node, name=str(node)+png_name)\n"," else:\n"," set_node_style(s, 'white', node, name=str(node)+png_name)\n"," # \n"," for s in range(nodes):\n"," for e in g[s]:\n"," init.edge(str(s)+png_name, str(e)+png_name)\n"," init.render('test-output/'+png_name, view=True, format='png') "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"sFud8KIWVZka"},"source":["def bfgs_state_plot(g, s):\n"," v = len(g)\n"," colors = [STATE.white] * v\n","\n"," dot = Digraph(comment='The Round Table', format='png')\n","\n"," \n"," q = [s]\n"," colors[s] = STATE.gray # make the state of the visiting node\n"," counter = itertools.count()\n"," plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n"," while q:\n"," u = q.pop(0) \n"," for v in g[u]:\n"," if colors[v] == STATE.white:\n"," colors[v] = STATE.gray\n"," q.append(v)\n"," plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n","\n"," # complete \n"," colors[u] = STATE.black\n"," plot_state_bfs(g, colors, dot, png_name='breath_first_graph_search_process'+str(next(counter)))\n"," #dot.render('test-output/breath_first_graph_search_process', view=True) \n"," return dot\n"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"b6C9AhL5WCPH","colab":{"base_uri":"https://localhost:8080/","height":389},"executionInfo":{"status":"ok","timestamp":1577839695426,"user_tz":480,"elapsed":20268,"user":{"displayName":"Li Yin","photoUrl":"https://lh3.googleusercontent.com/a-/AAuE7mDHkgtY8ax6h1ZXf46S1b3QtrhKej1YjeAjlwhj5w=s64","userId":"13365523799853678553"}},"outputId":"82ca88fe-7e9e-40b8-ac00-1af9dc05b62a"},"source":["dot = bfgs_state_plot(dcg, 0)\n","dot"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""],"image/svg+xml":"\n\n\n\n\n\n%3\n\n\n\n0breath_first_graph_search_process0\n\n0\n\n\n\n2breath_first_graph_search_process0\n\n2\n\n\n\n0breath_first_graph_search_process0->2breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0\n\n1\n\n\n\n0breath_first_graph_search_process0->1breath_first_graph_search_process0\n\n\n\n\n\n2breath_first_graph_search_process0->0breath_first_graph_search_process0\n\n\n\n\n\n4breath_first_graph_search_process0\n\n4\n\n\n\n2breath_first_graph_search_process0->4breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0->2breath_first_graph_search_process0\n\n\n\n\n\n1breath_first_graph_search_process0->4breath_first_graph_search_process0\n\n\n\n\n\n3breath_first_graph_search_process0\n\n3\n\n\n\n4breath_first_graph_search_process0->3breath_first_graph_search_process0\n\n\n\n\n\n5breath_first_graph_search_process0\n\n5\n\n\n\n4breath_first_graph_search_process0->5breath_first_graph_search_process0\n\n\n\n\n\n3breath_first_graph_search_process0->1breath_first_graph_search_process0\n\n\n\n\n\n5breath_first_graph_search_process0->3breath_first_graph_search_process0\n\n\n\n\n\n0breath_first_graph_search_process1\n\n0\n\n\n\n2breath_first_graph_search_process1\n\n2\n\n\n\n0breath_first_graph_search_process1->2breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1\n\n1\n\n\n\n0breath_first_graph_search_process1->1breath_first_graph_search_process1\n\n\n\n\n\n2breath_first_graph_search_process1->0breath_first_graph_search_process1\n\n\n\n\n\n4breath_first_graph_search_process1\n\n4\n\n\n\n2breath_first_graph_search_process1->4breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1->2breath_first_graph_search_process1\n\n\n\n\n\n1breath_first_graph_search_process1->4breath_first_graph_search_process1\n\n\n\n\n\n3breath_first_graph_search_process1\n\n3\n\n\n\n4breath_first_graph_search_process1->3breath_first_graph_search_process1\n\n\n\n\n\n5breath_first_graph_search_process1\n\n5\n\n\n\n4breath_first_graph_search_process1->5breath_first_graph_search_process1\n\n\n\n\n\n3breath_first_graph_search_process1->1breath_first_graph_search_process1\n\n\n\n\n\n5breath_first_graph_search_process1->3breath_first_graph_search_process1\n\n\n\n\n\n0breath_first_graph_search_process2\n\n0\n\n\n\n2breath_first_graph_search_process2\n\n2\n\n\n\n0breath_first_graph_search_process2->2breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2\n\n1\n\n\n\n0breath_first_graph_search_process2->1breath_first_graph_search_process2\n\n\n\n\n\n2breath_first_graph_search_process2->0breath_first_graph_search_process2\n\n\n\n\n\n4breath_first_graph_search_process2\n\n4\n\n\n\n2breath_first_graph_search_process2->4breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2->2breath_first_graph_search_process2\n\n\n\n\n\n1breath_first_graph_search_process2->4breath_first_graph_search_process2\n\n\n\n\n\n3breath_first_graph_search_process2\n\n3\n\n\n\n4breath_first_graph_search_process2->3breath_first_graph_search_process2\n\n\n\n\n\n5breath_first_graph_search_process2\n\n5\n\n\n\n4breath_first_graph_search_process2->5breath_first_graph_search_process2\n\n\n\n\n\n3breath_first_graph_search_process2->1breath_first_graph_search_process2\n\n\n\n\n\n5breath_first_graph_search_process2->3breath_first_graph_search_process2\n\n\n\n\n\n0breath_first_graph_search_process3\n\n0\n\n\n\n2breath_first_graph_search_process3\n\n2\n\n\n\n0breath_first_graph_search_process3->2breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3\n\n1\n\n\n\n0breath_first_graph_search_process3->1breath_first_graph_search_process3\n\n\n\n\n\n2breath_first_graph_search_process3->0breath_first_graph_search_process3\n\n\n\n\n\n4breath_first_graph_search_process3\n\n4\n\n\n\n2breath_first_graph_search_process3->4breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3->2breath_first_graph_search_process3\n\n\n\n\n\n1breath_first_graph_search_process3->4breath_first_graph_search_process3\n\n\n\n\n\n3breath_first_graph_search_process3\n\n3\n\n\n\n4breath_first_graph_search_process3->3breath_first_graph_search_process3\n\n\n\n\n\n5breath_first_graph_search_process3\n\n5\n\n\n\n4breath_first_graph_search_process3->5breath_first_graph_search_process3\n\n\n\n\n\n3breath_first_graph_search_process3->1breath_first_graph_search_process3\n\n\n\n\n\n5breath_first_graph_search_process3->3breath_first_graph_search_process3\n\n\n\n\n\n0breath_first_graph_search_process4\n\n0\n\n\n\n2breath_first_graph_search_process4\n\n2\n\n\n\n0breath_first_graph_search_process4->2breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4\n\n1\n\n\n\n0breath_first_graph_search_process4->1breath_first_graph_search_process4\n\n\n\n\n\n2breath_first_graph_search_process4->0breath_first_graph_search_process4\n\n\n\n\n\n4breath_first_graph_search_process4\n\n4\n\n\n\n2breath_first_graph_search_process4->4breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4->2breath_first_graph_search_process4\n\n\n\n\n\n1breath_first_graph_search_process4->4breath_first_graph_search_process4\n\n\n\n\n\n3breath_first_graph_search_process4\n\n3\n\n\n\n4breath_first_graph_search_process4->3breath_first_graph_search_process4\n\n\n\n\n\n5breath_first_graph_search_process4\n\n5\n\n\n\n4breath_first_graph_search_process4->5breath_first_graph_search_process4\n\n\n\n\n\n3breath_first_graph_search_process4->1breath_first_graph_search_process4\n\n\n\n\n\n5breath_first_graph_search_process4->3breath_first_graph_search_process4\n\n\n\n\n\n0breath_first_graph_search_process5\n\n0\n\n\n\n2breath_first_graph_search_process5\n\n2\n\n\n\n0breath_first_graph_search_process5->2breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5\n\n1\n\n\n\n0breath_first_graph_search_process5->1breath_first_graph_search_process5\n\n\n\n\n\n2breath_first_graph_search_process5->0breath_first_graph_search_process5\n\n\n\n\n\n4breath_first_graph_search_process5\n\n4\n\n\n\n2breath_first_graph_search_process5->4breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5->2breath_first_graph_search_process5\n\n\n\n\n\n1breath_first_graph_search_process5->4breath_first_graph_search_process5\n\n\n\n\n\n3breath_first_graph_search_process5\n\n3\n\n\n\n4breath_first_graph_search_process5->3breath_first_graph_search_process5\n\n\n\n\n\n5breath_first_graph_search_process5\n\n5\n\n\n\n4breath_first_graph_search_process5->5breath_first_graph_search_process5\n\n\n\n\n\n3breath_first_graph_search_process5->1breath_first_graph_search_process5\n\n\n\n\n\n5breath_first_graph_search_process5->3breath_first_graph_search_process5\n\n\n\n\n\n0breath_first_graph_search_process6\n\n0\n\n\n\n2breath_first_graph_search_process6\n\n2\n\n\n\n0breath_first_graph_search_process6->2breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6\n\n1\n\n\n\n0breath_first_graph_search_process6->1breath_first_graph_search_process6\n\n\n\n\n\n2breath_first_graph_search_process6->0breath_first_graph_search_process6\n\n\n\n\n\n4breath_first_graph_search_process6\n\n4\n\n\n\n2breath_first_graph_search_process6->4breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6->2breath_first_graph_search_process6\n\n\n\n\n\n1breath_first_graph_search_process6->4breath_first_graph_search_process6\n\n\n\n\n\n3breath_first_graph_search_process6\n\n3\n\n\n\n4breath_first_graph_search_process6->3breath_first_graph_search_process6\n\n\n\n\n\n5breath_first_graph_search_process6\n\n5\n\n\n\n4breath_first_graph_search_process6->5breath_first_graph_search_process6\n\n\n\n\n\n3breath_first_graph_search_process6->1breath_first_graph_search_process6\n\n\n\n\n\n5breath_first_graph_search_process6->3breath_first_graph_search_process6\n\n\n\n\n\n0breath_first_graph_search_process7\n\n0\n\n\n\n2breath_first_graph_search_process7\n\n2\n\n\n\n0breath_first_graph_search_process7->2breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7\n\n1\n\n\n\n0breath_first_graph_search_process7->1breath_first_graph_search_process7\n\n\n\n\n\n2breath_first_graph_search_process7->0breath_first_graph_search_process7\n\n\n\n\n\n4breath_first_graph_search_process7\n\n4\n\n\n\n2breath_first_graph_search_process7->4breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7->2breath_first_graph_search_process7\n\n\n\n\n\n1breath_first_graph_search_process7->4breath_first_graph_search_process7\n\n\n\n\n\n3breath_first_graph_search_process7\n\n3\n\n\n\n4breath_first_graph_search_process7->3breath_first_graph_search_process7\n\n\n\n\n\n5breath_first_graph_search_process7\n\n5\n\n\n\n4breath_first_graph_search_process7->5breath_first_graph_search_process7\n\n\n\n\n\n3breath_first_graph_search_process7->1breath_first_graph_search_process7\n\n\n\n\n\n5breath_first_graph_search_process7->3breath_first_graph_search_process7\n\n\n\n\n\n0breath_first_graph_search_process8\n\n0\n\n\n\n2breath_first_graph_search_process8\n\n2\n\n\n\n0breath_first_graph_search_process8->2breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8\n\n1\n\n\n\n0breath_first_graph_search_process8->1breath_first_graph_search_process8\n\n\n\n\n\n2breath_first_graph_search_process8->0breath_first_graph_search_process8\n\n\n\n\n\n4breath_first_graph_search_process8\n\n4\n\n\n\n2breath_first_graph_search_process8->4breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8->2breath_first_graph_search_process8\n\n\n\n\n\n1breath_first_graph_search_process8->4breath_first_graph_search_process8\n\n\n\n\n\n3breath_first_graph_search_process8\n\n3\n\n\n\n4breath_first_graph_search_process8->3breath_first_graph_search_process8\n\n\n\n\n\n5breath_first_graph_search_process8\n\n5\n\n\n\n4breath_first_graph_search_process8->5breath_first_graph_search_process8\n\n\n\n\n\n3breath_first_graph_search_process8->1breath_first_graph_search_process8\n\n\n\n\n\n5breath_first_graph_search_process8->3breath_first_graph_search_process8\n\n\n\n\n\n0breath_first_graph_search_process9\n\n0\n\n\n\n2breath_first_graph_search_process9\n\n2\n\n\n\n0breath_first_graph_search_process9->2breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9\n\n1\n\n\n\n0breath_first_graph_search_process9->1breath_first_graph_search_process9\n\n\n\n\n\n2breath_first_graph_search_process9->0breath_first_graph_search_process9\n\n\n\n\n\n4breath_first_graph_search_process9\n\n4\n\n\n\n2breath_first_graph_search_process9->4breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9->2breath_first_graph_search_process9\n\n\n\n\n\n1breath_first_graph_search_process9->4breath_first_graph_search_process9\n\n\n\n\n\n3breath_first_graph_search_process9\n\n3\n\n\n\n4breath_first_graph_search_process9->3breath_first_graph_search_process9\n\n\n\n\n\n5breath_first_graph_search_process9\n\n5\n\n\n\n4breath_first_graph_search_process9->5breath_first_graph_search_process9\n\n\n\n\n\n3breath_first_graph_search_process9->1breath_first_graph_search_process9\n\n\n\n\n\n5breath_first_graph_search_process9->3breath_first_graph_search_process9\n\n\n\n\n\n0breath_first_graph_search_process10\n\n0\n\n\n\n2breath_first_graph_search_process10\n\n2\n\n\n\n0breath_first_graph_search_process10->2breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10\n\n1\n\n\n\n0breath_first_graph_search_process10->1breath_first_graph_search_process10\n\n\n\n\n\n2breath_first_graph_search_process10->0breath_first_graph_search_process10\n\n\n\n\n\n4breath_first_graph_search_process10\n\n4\n\n\n\n2breath_first_graph_search_process10->4breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10->2breath_first_graph_search_process10\n\n\n\n\n\n1breath_first_graph_search_process10->4breath_first_graph_search_process10\n\n\n\n\n\n3breath_first_graph_search_process10\n\n3\n\n\n\n4breath_first_graph_search_process10->3breath_first_graph_search_process10\n\n\n\n\n\n5breath_first_graph_search_process10\n\n5\n\n\n\n4breath_first_graph_search_process10->5breath_first_graph_search_process10\n\n\n\n\n\n3breath_first_graph_search_process10->1breath_first_graph_search_process10\n\n\n\n\n\n5breath_first_graph_search_process10->3breath_first_graph_search_process10\n\n\n\n\n\n0breath_first_graph_search_process11\n\n0\n\n\n\n2breath_first_graph_search_process11\n\n2\n\n\n\n0breath_first_graph_search_process11->2breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11\n\n1\n\n\n\n0breath_first_graph_search_process11->1breath_first_graph_search_process11\n\n\n\n\n\n2breath_first_graph_search_process11->0breath_first_graph_search_process11\n\n\n\n\n\n4breath_first_graph_search_process11\n\n4\n\n\n\n2breath_first_graph_search_process11->4breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11->2breath_first_graph_search_process11\n\n\n\n\n\n1breath_first_graph_search_process11->4breath_first_graph_search_process11\n\n\n\n\n\n3breath_first_graph_search_process11\n\n3\n\n\n\n4breath_first_graph_search_process11->3breath_first_graph_search_process11\n\n\n\n\n\n5breath_first_graph_search_process11\n\n5\n\n\n\n4breath_first_graph_search_process11->5breath_first_graph_search_process11\n\n\n\n\n\n3breath_first_graph_search_process11->1breath_first_graph_search_process11\n\n\n\n\n\n5breath_first_graph_search_process11->3breath_first_graph_search_process11\n\n\n\n\n\n"},"metadata":{"tags":[]},"execution_count":561}]},{"cell_type":"markdown","metadata":{"id":"I0j42hiHHZBq"},"source":["## Tree Search\n","\n","The sample code is writen in [tree_datastructure_and_traversal](https://colab.research.google.com/drive/1pg49npUd4Rhbg5fggs8ZYakA563YATeA)."]}]} \ No newline at end of file diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 38e17fd..0000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [2022] [Li Yin] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/README.md b/README.md deleted file mode 100644 index 96e177e..0000000 --- a/README.md +++ /dev/null @@ -1,162 +0,0 @@ - -
-

Hands-on Algorithmic Problem Solving

-

A 100% open source one-stop coding interview prep book!

-
DSA · Python · LeetCode · Principles · Problem patterns
-

- -
- -
- -   -   - -
- 🔥  Among the top DSA repositories on GitHub -
- -
-
- -## Contributing -The book is written in latex, and all contents except the code is located under [**Easy-Book** folder](Easy-Book). Feel free to send me [pull requests](https://github.com/liyin2015/python-coding-interview/pulls) to contribute contents. Before you start to improve the contents, it would be helpful to know the [high-level structure of the book](#about-this-book). Read [How to contribute?](how_to_contribute.md) for more details. - -All contributors (with decent amount of contributions) will be listed as authors in the project. - -
- - - -## About this book - -

- book_structure -

- - -In short, this is a middle-to-high level algorithm book designed with cracking coding interviews at hearts. It offers a one-stop coding interview prep experience. The structure of the book: -* **Preparation**: introduce the global picture of algorithmic problem solving and coding interviews, learn abstract data structures and highly related and useful math such as recurrence relation, and hands-on Python practice by relating the abstract data structures to Python data structures. *Coding is not just code after all.*, -* **Principles**: we organize the design and principle here so that readers can use them as guidance while not seeking for peculiar algorithm for solving a problem. -* **Classical algorithms**: We enhance our algorithm database via learning how to apply the core principles to a variety of classical problems. A database that we can quickly relate to when seeing problems. -* **Coding interview problem patterns**: We close our book with the analyzing and categorizing problems by patterns. We address classical and best solutions for each problem pattern. - - - -Besides trying to make the content easy to follow, here summarizes the uniqueness of this book: (1) it offers Python source code that is tailored to be simple so that it would be natural for you to use in interviews (2) all the exercises and examples are from Leetcode problems so that you get to practise online (3) Classical algorithms are explained with design principles. No algorithm is magic. (Check out [advanced graph algorithms](https://github.com/liyin2015/Hands-on-Algorithmic-Problem-Solving/blob/master/chapters_pdf/chapter_advanced_graph_algorithm.pdf) as an example) (4) problem patterns to help you tackle coding interview questions topic by topic. -
- -## How did I come up with this book? -Preparing for the coding interview is not easy! Cracking the coding interview? Nearly impossible for most of us! Luck does play a role in the outcome. So, let's just treat it as a learning process and have some fun! - -Computer Science is really not just computer science. It is a combination of all fields; our normal interview problems fall into the enumerative combinatorics and our computer vision mostly consists of Linear Algebra. What really matters is our passion to learn and the ability to apply this knowledge to solve real-life problems. - -There are plenty of books out there focusing on either teaching algorithmic knowledge (*Introduction to Algorithms*, *Algorithmic Problem Solving*, etc) or introducing the interview process and solving interview problems(*Cracking the Coding Interview*, *Coding Interview Questions*, etc), but none of these books truly combine the two. This is a book designed to make up this role in the categorization. Principle, Pattern, and Leetcode Problems make up the core of this book. - -This is **NOT** a book that provides hiring statistics for each company or gives the reader quick tricks in order to pass a few coding interviews. Its purpose is to show you the beauty of algorithmic problem solving in the hope that you will be more passionate and confident about software engineering; the interview questions just set up a playground where we strengthen what we learn. -
- -## For Readers -[The whole book](Easy-Book/main.pdf) is compiled as pdf. - -For readers, you can read the book as a whole or read chapters selectively following the below links. - -## Table of Contents -
- -### Warm Up: Abstract Data Structures and Tools -* [Abstract Data Structures](chapters_pdf/Abstract_Data_Structures.pdf) -* Discrete Programming -* Recurrence Relation - -### Get Started: Programming and Python Data Structures -* Iteration and Recursion -* Bit Manipulation -* [**Python Data Structures**](chapters_pdf/Python_Data_Structure.pdf)( [source code](Colab_Codes/chapter_python_datastrcutures.ipynb) ) - -### Core Principles: Algorithm Design and Analysis -* Complexity Analysis -* [Search Strategies](chapters_pdf/search_strategies.pdf)([source code: Graph Search](Colab_Codes/chapter_search_strategies.ipynb), [source code: Tree Traversal](Colab_Codes/chapter_tree_data_structure_and_traversal.ipynb)) -* [Combinatorial Search](chapters_pdf/combinatorial_search.pdf)( [source code](Colab_Codes/chapter_combinatorial_search.ipynb)) -* Reduce and Conquer - -* **Decrease and Conquer** -> * [Binary Search, Binary Search Tree, and Segment Tree](chapters_pdf/decrease_and_conquer.pdf)( [source code](Colab_Codes/chapter_decrease_and_conquer.ipynb)) -* [**Sorting and Selection**](chapters_pdf/sorting_algorithms_with_python3.pdf)( source code: [ sorting algorithms](Colab_Codes/chapter_sorting_and_selection_algorithms.ipynb),[ Python comparison and sort functions](Colab_Codes/chapter_python_comparison_sorting.ipynb)) -* Dynamic Programming -* Greedy Algorithms - -### Advanced Algorithms -* Advanced Data Structures -* [**Advanced Search on Linear Data Structures**](chapters_pdf/advanced_search_on_linear_data_structures.pdf)( [source code](Colab_Codes/Advanced_Search_on_Linear_Data_Structures.ipynb)) -* [Advanced Graph Algorithms](chapters_pdf/chapter_advanced_graph_algorithm.pdf) -* String Pattern Matches -* Math and Geometry Algorithms - -### Problem Patterns -* Dynamic Programming Questions (15%) -* Array Questions (15%) -* Linked List, Stack, Queue, and Heap Questions (12%) -* String Questions (15%) -* [Tree Questions (10%)](unorganized_pdf/tree_questions.pdf) -* Graph Questions (15%) - -*Note: everything is still in progress, so use it with caution.* -
- -## Referring Books and Materials - -* Skiena, Steven S. The algorithm design manual: Text. Vol. 1. Springer Science & Business Media, 1998. - -* T. H. Cormen, Introduction to algorithms, MIT press, 2009. - -* Manber, Udi. Introduction to algorithms: a creative approach. Addison-Wesley Longman Publishing Co., Inc., 1989. - -* Kleinberg, Jon, and Eva Tardos. Algorithm design. Pearson Education India, 2006. - -* Russell, Stuart J., and Peter Norvig. Artificial intelligence: a modern approach. Malaysia; Pearson Education Limited,, 2016. (**Best book ever in explaining searching problem-solving, differentiate tree-search and graph-search**) - -* D. M. Beazley, Python essential reference, Addison-Wesley Professional,2009. - -* S. Halim and F. Halim, Competitive Programming 3, Lulu Independent -Publish, 2013. - -* B. Slatkin, Effective Python: 59 Specific Ways to Write Better Python,Pearson Education, 2015. - -* H. hua jiang, “Leetcode blogs,” https://zxi.mytechroad.com/blog/category, 2018, [Online; accessed 19-July-2018]. - -* B. Baka, “Python data structures and algorithms: Improve application performance with graphs, stacks, and queues,” 2017. - -* “Competitive Programming,”https://cp-algorithms.com/, 2019, [Online; accessed 19-July-2018]. - -* “cs princeton,”https://aofa.cs.princeton.edu/60trees/, 2019, -[Online; accessed 19-July-2018] -* https://stanford-cs161.github.io/winter2021/schedule/ -
- -## Tools -* Graph Visualize with [graphviz](http://www.webgraphviz.com/). [Examples](https://graphs.grevian.org/example). [Tutorial to use Python](https://graphviz.readthedocs.io/en/stable/manual.html) -
- -## Mocking Interviews -Practice is important. Schedule some mocking interviews with [interviewing.io](https://interviewing.io/). If you cant manage to register, you can join us in [discord server](https://discord.gg/ZXnSag7fMP) and ask peers for practice interview. -
- -## Community -**Join me on discord server: https://discord.gg/ZXnSag7fMP, for a supportive community** -
- -## Feedback -If you have ideas to improve the book, about formatting, more contents, or correct the errors, do not hesitate to let me know. - - -To cite this content, please use: -
-```bibtex -@misc{handsondsa, - author = {Li Yin}, - title = {Hands-on Algorithmic Problem Solving}, - howpublished = {\url{https://github.com/liyin2015/python-coding-interview/}}, - year = {2021} -} -``` diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..9da9a02 --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-dinky \ No newline at end of file diff --git a/chapters_md/discrete_programming.md b/chapters_md/discrete_programming.md deleted file mode 100644 index a403624..0000000 --- a/chapters_md/discrete_programming.md +++ /dev/null @@ -1 +0,0 @@ -sfsfsfs \ No newline at end of file diff --git a/chapters_pdf/Abstract_Data_Structures.pdf b/chapters_pdf/Abstract_Data_Structures.pdf deleted file mode 100644 index 3b188e5..0000000 Binary files a/chapters_pdf/Abstract_Data_Structures.pdf and /dev/null differ diff --git a/chapters_pdf/Python_Data_Structure.pdf b/chapters_pdf/Python_Data_Structure.pdf deleted file mode 100644 index a54cabf..0000000 Binary files a/chapters_pdf/Python_Data_Structure.pdf and /dev/null differ diff --git a/chapters_pdf/advanced_search_on_linear_data_structures.pdf b/chapters_pdf/advanced_search_on_linear_data_structures.pdf deleted file mode 100644 index 46bd49d..0000000 Binary files a/chapters_pdf/advanced_search_on_linear_data_structures.pdf and /dev/null differ diff --git a/chapters_pdf/chapter_advanced_graph_algorithm.pdf b/chapters_pdf/chapter_advanced_graph_algorithm.pdf deleted file mode 100644 index a5f90df..0000000 Binary files a/chapters_pdf/chapter_advanced_graph_algorithm.pdf and /dev/null differ diff --git a/chapters_pdf/combinatorial_search.pdf b/chapters_pdf/combinatorial_search.pdf deleted file mode 100644 index c4de42e..0000000 Binary files a/chapters_pdf/combinatorial_search.pdf and /dev/null differ diff --git a/chapters_pdf/decrease_and_conquer.pdf b/chapters_pdf/decrease_and_conquer.pdf deleted file mode 100644 index 95a1d9e..0000000 Binary files a/chapters_pdf/decrease_and_conquer.pdf and /dev/null differ diff --git a/chapters_pdf/search_strategies.pdf b/chapters_pdf/search_strategies.pdf deleted file mode 100644 index d1e1214..0000000 Binary files a/chapters_pdf/search_strategies.pdf and /dev/null differ diff --git a/chapters_pdf/sorting_algorithms_with_python3.pdf b/chapters_pdf/sorting_algorithms_with_python3.pdf deleted file mode 100644 index 34d03ce..0000000 Binary files a/chapters_pdf/sorting_algorithms_with_python3.pdf and /dev/null differ diff --git a/figures/book_structure.png b/figures/book_structure.png deleted file mode 100644 index b61e166..0000000 Binary files a/figures/book_structure.png and /dev/null differ diff --git a/how_to_contribute.md b/how_to_contribute.md deleted file mode 100644 index 2498285..0000000 --- a/how_to_contribute.md +++ /dev/null @@ -1,10 +0,0 @@ -# How to contribute to this project? -The projects requires you to edit on Latex source file located in [Easy-Book](Easy-Book) folder. You can either set up a [local Visual Code environment for latex](https://dev.to/ucscmozilla/how-to-create-and-compile-latex-documents-on-visual-studio-code-3jbk), or you can directly use [Github Codespaces](https://github.com/features/codespaces). - -To submite a change, please use Pull Requests. - -## What help is needed? -1. Content editing. -2. Better figure, if you know a better way to draw graph than using GraphViz, let me know. -3. Complete some chapters, including both theoretical and the problem patterns. -4. Improve the content. \ No newline at end of file diff --git a/index.md b/index.md new file mode 100644 index 0000000..d1cf2b3 --- /dev/null +++ b/index.md @@ -0,0 +1,3 @@ +## Welcome to GitHub Pages + +This is an initial page for the book in progress. diff --git a/unorganized_pdf/backtracking.pdf b/unorganized_pdf/backtracking.pdf deleted file mode 100644 index 5670b68..0000000 Binary files a/unorganized_pdf/backtracking.pdf and /dev/null differ diff --git a/unorganized_pdf/binary_search.pdf b/unorganized_pdf/binary_search.pdf deleted file mode 100644 index a7c5ddd..0000000 Binary files a/unorganized_pdf/binary_search.pdf and /dev/null differ diff --git a/unorganized_pdf/bit manipulation.pdf b/unorganized_pdf/bit manipulation.pdf deleted file mode 100644 index f28a77a..0000000 Binary files a/unorganized_pdf/bit manipulation.pdf and /dev/null differ diff --git a/unorganized_pdf/heap_priority_queue.pdf b/unorganized_pdf/heap_priority_queue.pdf deleted file mode 100644 index c4ea472..0000000 Binary files a/unorganized_pdf/heap_priority_queue.pdf and /dev/null differ diff --git a/unorganized_pdf/linear_data_structure.pdf b/unorganized_pdf/linear_data_structure.pdf deleted file mode 100644 index d86bfb6..0000000 Binary files a/unorganized_pdf/linear_data_structure.pdf and /dev/null differ diff --git a/unorganized_pdf/tree_questions.pdf b/unorganized_pdf/tree_questions.pdf deleted file mode 100644 index 00b5086..0000000 Binary files a/unorganized_pdf/tree_questions.pdf and /dev/null differ diff --git a/unorganized_pdf/two_pointer.pdf b/unorganized_pdf/two_pointer.pdf deleted file mode 100644 index f5dfd2d..0000000 Binary files a/unorganized_pdf/two_pointer.pdf and /dev/null differ diff --git a/unorganized_pdf/workspace.code-workspace b/unorganized_pdf/workspace.code-workspace deleted file mode 100644 index 7b4bd81..0000000 --- a/unorganized_pdf/workspace.code-workspace +++ /dev/null @@ -1,11 +0,0 @@ -{ - "folders": [ - { - "path": ".." - }, - { - "path": "../../cv_ai_applications" - } - ], - "settings": {} -} \ No newline at end of file