diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..991b2c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +# Sphinx documentation +docs/_build/ + +# Pants workspace files +/.pants.d/ +/dist/ +/.pids +/.pants.workdir.file_lock* + +# Others +*.bak diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..feacd05 --- /dev/null +++ b/BUILD @@ -0,0 +1,17 @@ +python_requirements() + +python_distribution( + name="python-algorithms", + dependencies=[ + ], + provides=setup_py( + name="python-algorithms", + version="1.0", + description="Python algorithms.", + author="Laurent Luce", + classifiers=[ + "Programming Language :: Python :: 3.6", + ], + ), + setup_py_commands=["sdist", "bdist_wheel", "--python-tag", "py36.py37"] +) diff --git a/README b/README deleted file mode 100644 index 5711b77..0000000 --- a/README +++ /dev/null @@ -1,28 +0,0 @@ -## Python Algorithms Library -## Laurent Luce - -### Description -The purpose of this library is to help you with common algorithms like: - -String matching - - Naive - - Rabin-Karp - - Knuth-Morris-Pratt - - Boyer-Moore-Horspool - -Binary tree - - node and tree class - - lookup - - insert - - delete - - compare 2 trees - - print tree - - tree inorder generator - -### Installation -Get the source and run - - $ python setup.py install - -### License -The Python Algorithms Library is distributed under the MIT License diff --git a/README.md b/README.md new file mode 100644 index 0000000..85c29c2 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +## Python Algorithms Library +## Laurent Luce + +### Description +The purpose of this library is to help you with common algorithms like: + +A* path finding. + +String Matching + - Naive. + - Rabin-Karp. + - Knuth-Morris-Pratt. + - Boyer-Moore-Horspool. + +String + - Convert string to integer without using int on the full string. + - Reverse string containing words. + +Generators + - Permutations. + +Lists + - Find integer using binary search. + - Find subset with max sum. + - Merge sort. + - Quicksort. + +Binary tree + +### Tests + + $ ./pants test :: + +### License +The Python Algorithms Library is distributed under the MIT License diff --git a/algorithms/BUILD b/algorithms/BUILD new file mode 100644 index 0000000..d06a175 --- /dev/null +++ b/algorithms/BUILD @@ -0,0 +1,7 @@ +python_library( + name="algorithms", + sources=["*.py", "!*_test.py"], + interpreter_constraints=["==2.7.*", ">=3.6"], +) + + diff --git a/algorithms/a_star_path_finding.py b/algorithms/a_star_path_finding.py new file mode 100644 index 0000000..1448cb7 --- /dev/null +++ b/algorithms/a_star_path_finding.py @@ -0,0 +1,148 @@ +import heapq + + +class Cell(object): + def __init__(self, x, y, reachable): + """Initialize new cell. + + @param reachable is cell reachable? not a wall? + @param x cell x coordinate + @param y cell y coordinate + @param g cost to move from the starting cell to this cell. + @param h estimation of the cost to move from this cell + to the ending cell. + @param f f = g + h + """ + self.reachable = reachable + self.x = x + self.y = y + self.parent = None + self.g = 0 + self.h = 0 + self.f = 0 + + def __lt__(self, other): + return self.f < other.f + + +class AStar(object): + def __init__(self): + # open list + self.opened = [] + heapq.heapify(self.opened) + # visited cells list + self.closed = set() + # grid cells + self.cells = [] + self.grid_height = None + self.grid_width = None + + def init_grid(self, width, height, walls, start, end): + """Prepare grid cells, walls. + + @param width grid's width. + @param height grid's height. + @param walls list of wall x,y tuples. + @param start grid starting point x,y tuple. + @param end grid ending point x,y tuple. + """ + self.grid_height = height + self.grid_width = width + for x in range(self.grid_width): + for y in range(self.grid_height): + if (x, y) in walls: + reachable = False + else: + reachable = True + self.cells.append(Cell(x, y, reachable)) + self.start = self.get_cell(*start) + self.end = self.get_cell(*end) + + def get_heuristic(self, cell): + """Compute the heuristic value H for a cell. + + Distance between this cell and the ending cell multiply by 10. + + @returns heuristic value H + """ + return 10 * (abs(cell.x - self.end.x) + abs(cell.y - self.end.y)) + + def get_cell(self, x, y): + """Returns a cell from the cells list. + + @param x cell x coordinate + @param y cell y coordinate + @returns cell + """ + return self.cells[x * self.grid_height + y] + + def get_adjacent_cells(self, cell): + """Returns adjacent cells to a cell. + + Clockwise starting from the one on the right. + + @param cell get adjacent cells for this cell + @returns adjacent cells list. + """ + cells = [] + if cell.x < self.grid_width-1: + cells.append(self.get_cell(cell.x+1, cell.y)) + if cell.y > 0: + cells.append(self.get_cell(cell.x, cell.y-1)) + if cell.x > 0: + cells.append(self.get_cell(cell.x-1, cell.y)) + if cell.y < self.grid_height-1: + cells.append(self.get_cell(cell.x, cell.y+1)) + return cells + + def get_path(self): + cell = self.end + path = [(cell.x, cell.y)] + while cell.parent is not self.start: + cell = cell.parent + path.append((cell.x, cell.y)) + + path.append((self.start.x, self.start.y)) + path.reverse() + return path + + def update_cell(self, adj, cell): + """Update adjacent cell. + + @param adj adjacent cell to current cell + @param cell current cell being processed + """ + adj.g = cell.g + 10 + adj.h = self.get_heuristic(adj) + adj.parent = cell + adj.f = adj.h + adj.g + + def solve(self): + """Solve maze, find path to ending cell. + + @returns path or None if not found. + """ + # add starting cell to open heap queue + heapq.heappush(self.opened, (self.start.f, self.start)) + while len(self.opened): + # pop cell from heap queue + f, cell = heapq.heappop(self.opened) + # add cell to closed list so we don't process it twice + self.closed.add(cell) + # if ending cell, return found path + if cell is self.end: + return self.get_path() + # get adjacent cells for cell + adj_cells = self.get_adjacent_cells(cell) + for adj_cell in adj_cells: + if adj_cell.reachable and adj_cell not in self.closed: + if (adj_cell.f, adj_cell) in self.opened: + # if adj cell in open list, check if current path is + # better than the one previously found + # for this adj cell. + if adj_cell.g > cell.g + 10: + self.update_cell(adj_cell, cell) + else: + self.update_cell(adj_cell, cell) + # add adj cell to open list + heapq.heappush(self.opened, (adj_cell.f, adj_cell)) diff --git a/algorithms/binary_tree.py b/algorithms/binary_tree.py index e821ecc..3e8c20b 100644 --- a/algorithms/binary_tree.py +++ b/algorithms/binary_tree.py @@ -1,156 +1,166 @@ -class Node: - """ - Tree node: left and right child + data which can be any object - """ - def __init__(self, data): - """ - Node constructor +from __future__ import print_function - @param data node data object - """ - self.left = None - self.right = None - self.data = data - def insert(self, data): - """ - Insert new node with data +class Node(object): + """Tree node: left and right child + data which can be any object - @param data node data object to insert """ - if data < self.data: - if self.left == None: - self.left = Node(data) - else: - self.left.insert(data) - else: - if self.right == None: - self.right = Node(data) - else: - self.right.insert(data) - - def lookup(self, data, parent=None): - """ - Lookup node containing data + def __init__(self, data): + """Node constructor - @param data node data object to look up - @param parent node's parent - @returns node and node's parent if found or None, None - """ - if data < self.data: - if self.left == None: - return None, None - return self.left.lookup(data, self) - elif data > self.data: - if self.right == None: - return None, None - return self.right.lookup(data, self) - else: - return self, parent - - def delete(self, data): - """ - Delete node containing data + @param data node data object + """ + self.left = None + self.right = None + self.data = data - @param data node's content to delete - """ - # get node containing data - node, parent = self.lookup(data) - if node != None: - children_count = node.children_count() - if children_count == 0: - # if node has no children, just remove it - if parent.left is node: - parent.left = None - else: - parent.right = None - elif children_count == 1: - # if node has 1 child - # replace node by its child - if node.left: - node.data = node.left.data - node.left = None + def insert(self, data): + """Insert new node with data + + @param data node data object to insert + """ + if self.data: + if data < self.data: + if self.left is None: + self.left = Node(data) + else: + self.left.insert(data) + elif data > self.data: + if self.right is None: + self.right = Node(data) + else: + self.right.insert(data) else: - node.data = node.right.data - node.right = None - else: - # if node has 2 children - # find its successor - parent = node - successor = node.right - while successor.left: - parent = successor - successor = successor.left - # replace node data by its successor data - node.data = successor.data - # fix successor's parent node child - if parent.left == successor: - parent.left = successor.right + self.data = data + + def lookup(self, data, parent=None): + """Lookup node containing data + + @param data node data object to look up + @param parent node's parent + @returns node and node's parent if found or None, None + """ + if data < self.data: + if self.left is None: + return None, None + return self.left.lookup(data, self) + elif data > self.data: + if self.right is None: + return None, None + return self.right.lookup(data, self) else: - parent.right = successor.right + return self, parent - def compare_trees(self, node): - """ - Compare 2 trees + def delete(self, data): + """Delete node containing data - @param node tree to compare - @returns True if the tree passed is identical to this tree - """ - if node == None: - return False - if self.data != node.data: - return False - res = True - if self.left == None: - if node.left: - return False - else: - res = self.left.compare_trees(node.left) - if self.right == None: - if node.right: - return False - else: - res = self.right.compare_trees(node.right) - return res - - def print_tree(self): - """ - Print tree content inorder - """ - if self.left: - self.left.print_tree() - print self.data, - if self.right: - self.right.print_tree() + @param data node's content to delete + """ + # get node containing data + node, parent = self.lookup(data) + if node is not None: + children_count = node.children_count() + if children_count == 0: + # if node has no children, just remove it + if parent: + if parent.left is node: + parent.left = None + else: + parent.right = None + else: + self.data = None + elif children_count == 1: + # if node has 1 child + # replace node by its child + if node.left: + n = node.left + else: + n = node.right + if parent: + if parent.left is node: + parent.left = n + else: + parent.right = n + else: + self.left = n.left + self.right = n.right + self.data = n.data + else: + # if node has 2 children + # find its successor + parent = node + successor = node.right + while successor.left: + parent = successor + successor = successor.left + # replace node data by its successor data + node.data = successor.data + # fix successor's parent node child + if parent.left == successor: + parent.left = successor.right + else: + parent.right = successor.right - def tree_data(self): - """ - Generator to get the tree nodes data - """ - # we use a stack to traverse the tree in a non-recursive way - stack = [] - node = self - while stack or node: - if node: - stack.append(node) - node = node.left - else: # we are returning so we pop the node and we yield it - node = stack.pop() - yield node.data - node = node.right - - def children_count(self): - """ - Return the number of children + def compare_trees(self, node): + """Compare 2 trees - @returns number of children: 0, 1, 2 - """ - cnt = 0 - if self.left: - cnt += 1 - if self.right: - cnt += 1 - return cnt + @param node tree to compare + @returns True if the tree passed is identical to this tree + """ + if node is None: + return False + if self.data != node.data: + return False + res = True + if self.left is None: + if node.left: + return False + else: + res = self.left.compare_trees(node.left) + if res is False: + return False + if self.right is None: + if node.right: + return False + else: + res = self.right.compare_trees(node.right) + return res + + def print_tree(self): + """Print tree content inorder + + """ + if self.left: + self.left.print_tree() + print(self.data, end=" ") + if self.right: + self.right.print_tree() + + def tree_data(self): + """Generator to get the tree nodes data + """ + # we use a stack to traverse the tree in a non-recursive way + stack = [] + node = self + while stack or node: + if node: + stack.append(node) + node = node.left + else: + # we are returning so we pop the node and we yield it + node = stack.pop() + yield node.data + node = node.right + def children_count(self): + """Return the number of children + @returns number of children: 0, 1, 2 + """ + cnt = 0 + if self.left: + cnt += 1 + if self.right: + cnt += 1 + return cnt diff --git a/algorithms/generators.py b/algorithms/generators.py index b037434..0a55e7a 100644 --- a/algorithms/generators.py +++ b/algorithms/generators.py @@ -1,14 +1,15 @@ def fib(n): - """ - Generator for Fibonacci serie - - Example: for i in fib(5): print i - @param n fib range upper bound - """ - a, b = 0, 1 - i = 0 - while i < n: - yield b - a, b = b, a+b - i += 1 + """Generator for Fibonacci serie. + Example: for i in fib(5): print i + @param n fib range upper bound + """ + if not n: + return + a, b = 0, 1 + yield a + i = 0 + while i < n - 1: + yield b + a, b = b, a+b + i += 1 diff --git a/algorithms/list.py b/algorithms/list.py index f82c423..1546904 100644 --- a/algorithms/list.py +++ b/algorithms/list.py @@ -1,25 +1,122 @@ -def find_max_sub(l): - """ - Find subset with higest sum - - Example: [-2, 3, -4, 5, 1, -5] -> (3,4), 6 - @param l list - @returns subset bounds and highest sum - """ - # max sum - max = l[0] - # current sum - m = 0 - # max sum subset bounds - bounds = (0, 0) - # current subset start - s = 0 - for i in range(len(l)): - m += l[i] - if m > max: - max = m - bounds = (s, i) - elif m < 0: - m = 0 - s = i+1 - return bounds, max +def find_int(i, lst): + """Find integer in a sorted list. + + Example: 4 in [1, 3, 4, 6, 7, 9] -> 2 + @param i integer to find. + @param lst sorted list. + @returns index if found, None if not. + """ + if lst: + p_idx = len(lst) / 2 + p = lst[p_idx] + if i == p: + return p_idx + elif len(lst) == 1: + return + elif i < p: + res = find_int(i, lst[:p_idx]) + if res: + return res + elif i > p: + res = find_int(i, lst[p_idx:]) + if res: + return res + p_idx + + +def find_max_sub(lst): + """Find subset with highest sum. + + Example: [-2, 3, -4, 5, 1, -5] -> (3,4), 6 + @param lst list + @returns subset bounds and highest sum + """ + # max sum + max = lst[0] + # current sum + m = 0 + # max sum subset bounds + bounds = (0, 0) + # current subset start + s = 0 + for i in range(len(lst)): + m += lst[i] + if m > max: + max = m + bounds = (s, i) + elif m < 0: + m = 0 + s = i+1 + return bounds, max + + +def merge_sort(lst): + """Sort list using merge sort. + + Complexity: O(n log n) + + @param l list to sort. + @returns sorted list. + """ + def merge(l1, l2): + """Merge sorted lists l1 and l2. + + [1, 2, 4], [1, 3, 4, 5] -> [1, 1, 2, 3, 4, 5] + @param l1 sorted list + @param l2 sorted list + @returns merge sorted list + """ + res = [] + i = 0 + j = 0 + while i < len(l1) and j < len(l2): + if l1[i] <= l2[j]: + res.append(l1[i]) + i += 1 + elif l2[j] < l1[i]: + res.append(l2[j]) + j += 1 + + while i < len(l1): + res.append(l1[i]) + i += 1 + + while j < len(l2): + res.append(l2[j]) + j += 1 + + return res + + length = len(lst) + if length <= 1: + return lst + mid = length / 2 + h1 = merge_sort(lst[:mid]) + h2 = merge_sort(lst[mid:]) + + return merge(h1, h2) + + +def quicksort(lst): + """Sort list using quick sort. + + Complexity: O(n log n). Worst: O(n2) + + @param lst list to sort. + @returns sorted list. + """ + if len(lst) <= 1: + return lst + + pivot = lst[0] + less = [] + equal = [] + greater = [] + for e in lst: + if e < pivot: + less.append(e) + elif e == pivot: + equal.append(e) + else: + greater.append(e) + + return quicksort(less) + equal + quicksort(greater) diff --git a/algorithms/performance/performance_string_matching.py b/algorithms/performance/performance_string_matching.py deleted file mode 100644 index 6642df8..0000000 --- a/algorithms/performance/performance_string_matching.py +++ /dev/null @@ -1,41 +0,0 @@ -import time -import string_matching - -class StringMatchingPerformance: - - def __init__(self): - pass - - def calculate_performance(self): - t = 'ababbababa' - s = 'aba' - times = 1000 - - ts = time.time() - for i in range(times): - string_matching.string_matching_naive(t, s) - t1 = time.time() - ts - print 'string_matching_naive: %.2f seconds' % t1 - - ts = time.time() - for i in range(times): - string_matching.string_matching_rabin_karp(t, s) - t2 = time.time() - ts - print 'string_matching_rabin_karp: %.2f seconds' % t2 - - ts = time.time() - for i in range(times): - string_matching.string_matching_knuth_morris_pratt(t, s) - t2 = time.time() - ts - print 'string_matching_knuth_morris_pratt: %.2f seconds' % t2 - - ts = time.time() - for i in range(times): - string_matching.string_matching_boyer_moore_horspool(t, s) - t2 = time.time() - ts - print 'string_matching_boyer_moore_horspool: %.2f seconds' % t2 - -if __name__ == '__main__': - p = StringMatchingPerformance() - p.calculate_performance() - diff --git a/algorithms/permutations.py b/algorithms/permutations.py index 6ac855c..f013d2d 100644 --- a/algorithms/permutations.py +++ b/algorithms/permutations.py @@ -1,18 +1,26 @@ -def permutations(l): - """ - Generator for list permutations - - Example: [1,2,3] = [1,2,3], [1,3,2], [2,1,3] ... +def permutations(lst): + """Generator for list permutations. - @param l list to generate permutations for - @result yield each permutation - """ - print 'permutations: ',l - if len(l) <= 1: - yield l - else: - a = [l.pop(0)] - for p in permutations(l): - for i in range(len(p)+1): - yield p[:i] + a + p[i:] + @param lst list to generate permutations for + @result yield each permutation + Example: + lst = [1,2,3] + a = [1] + permutations([2,3]) = [[2,3], [3,2]] + [2,3] + yield [1,2,3] + yield [2,1,3] + yield [2,3,1] + [3,2] + yield [1,3,2] + yield [3,1,2] + yield [3,2,1] + """ + if len(lst) <= 1: + yield lst + else: + a = [lst.pop(0)] + for p in permutations(lst): + for i in range(len(p)+1): + yield p[:i] + a + p[i:] diff --git a/algorithms/string.py b/algorithms/string.py new file mode 100644 index 0000000..a809fab --- /dev/null +++ b/algorithms/string.py @@ -0,0 +1,221 @@ +def string_matching_naive(text='', pattern=''): + """Returns positions where pattern is found in text. + + Sliding window. + + O((n-m)m) + Example: text = 'ababbababa', pattern = 'aba' + string_matching_naive(t, s) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + + n = len(text) + m = len(pattern) + offsets = [] + for i in range(n-m+1): + if pattern == text[i:i+m]: + offsets.append(i) + + return offsets + + +def string_matching_rabin_karp(text='', pattern='', hash_base=256): + """Returns positions where pattern is found in text. + + Similar to the naive approach but matches the hash value of the pattern + with the hash value of current substring of text. Needs to match + individual characters once a match is found because of potential + hash collisions. + + worst case: O(nm) + O(n+m) if the number of valid matches is small and the pattern is large. + + Performance: ord() is slow so we shouldn't use it here + + Example: text = 'ababbababa', pattern = 'aba' + string_matching_rabin_karp(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @param hash_base base to calculate the hash value + @return list containing offsets (shifts) where pattern is found inside text + """ + + n = len(text) + m = len(pattern) + offsets = [] + htext = hash_value(text[:m], hash_base) + hpattern = hash_value(pattern, hash_base) + for i in range(n-m+1): + if htext == hpattern: + if text[i:i+m] == pattern: + offsets.append(i) + if i < n-m: + htext = (hash_base * + (htext - + (ord(text[i]) * + (hash_base ** (m-1))))) + ord(text[i+m]) + + return offsets + + +def hash_value(s, base): + """Calculate the hash value of a string using base. + + Example: 'abc' = 97 x base^2 + 98 x base^1 + 99 x base^0 + @param s string to compute hash value for + @param base base to use to compute hash value + @return hash value + """ + v = 0 + p = len(s)-1 + for i in range(p+1): + v += ord(s[i]) * (base ** p) + p -= 1 + + return v + + +def string_matching_knuth_morris_pratt(text='', pattern=''): + """Returns positions where pattern is found in text. + + https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm + + O(m+n) + Example: text = 'ababbababa', pattern = 'aba' + string_matching_knuth_morris_pratt(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + n = len(text) + m = len(pattern) + offsets = [] + pi = compute_prefix_function(pattern) + q = 0 + for i in range(n): + while q > 0 and pattern[q] != text[i]: + q = pi[q - 1] + if pattern[q] == text[i]: + q = q + 1 + if q == m: + offsets.append(i - m + 1) + q = pi[q-1] + + return offsets + + +def compute_prefix_function(p): + m = len(p) + pi = [0] * m + k = 0 + for q in range(1, m): + while k > 0 and p[k] != p[q]: + k = pi[k - 1] + if p[k] == p[q]: + k = k + 1 + pi[q] = k + return pi + + +def string_matching_boyer_moore_horspool(text='', pattern=''): + """Returns positions where pattern is found in text. + + https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm + + O(n) + Performance: ord() is slow so we shouldn't use it here + + Example: text = 'ababbababa', pattern = 'aba' + string_matching_boyer_moore_horspool(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + m = len(pattern) + n = len(text) + offsets = [] + if m > n: + return offsets + skip = [] + for k in range(256): + skip.append(m) + for k in range(m-1): + skip[ord(pattern[k])] = m - k - 1 + skip = tuple(skip) + k = m - 1 + while k < n: + j = m - 1 + i = k + while j >= 0 and text[i] == pattern[j]: + j -= 1 + i -= 1 + if j == -1: + offsets.append(i + 1) + k += skip[ord(text[k])] + + return offsets + + +def atoi(s): + """Convert string to integer without doing int(s). + + '123' -> 123 + @param s string to convert. + @returns integer + """ + if not s: + raise ValueError + i = 0 + idx = 0 + neg = False + if s[0] == '-': + neg = True + idx += 1 + + for c in s[idx:]: + i *= 10 + i += int(c) + + if neg: + i = -i + + return i + + +def reverse_string_words(s): + """Reverse words inside a string (in place). + + Since strings are immutable in Python, we copy the string chars to a list + first. + 'word1 word2 word3' -> 'word3 word2 word1' + + Complexity: O(n) + + @param s string words to reverse. + @returns reversed string words. + """ + def reverse(lst, i, j): + # 'word1' -> '1drow' + # Complexity: O(n/2) + while i != j: + lst[i], lst[j] = lst[j], lst[i] + i += 1 + j -= 1 + + w = [e for e in s] + i = 0 + j = len(w) - 1 + reverse(w, i, j) + + i = 0 + j = 0 + while j < len(w): + while j < len(w) and w[j] != ' ': + j += 1 + reverse(w, i, j-1) + i = j + 1 + j += 1 + + return ''.join(e for e in w) diff --git a/algorithms/string_matching.py b/algorithms/string_matching.py deleted file mode 100644 index 8373250..0000000 --- a/algorithms/string_matching.py +++ /dev/null @@ -1,167 +0,0 @@ -""" -Filename: string_matching.py -""" - -def string_matching_naive(text='', pattern=''): - """ - Returns positions where pattern is found in text - - We slide the string to match 'pattern' over the text - - O((n-m)m) - Example: text = 'ababbababa', pattern = 'aba' - string_matching_naive(t, s) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - for i in range(n-m+1): - if pattern == text[i:i+m]: - offsets.append(i) - - return offsets - - -def string_matching_rabin_karp(text='', pattern='', hash_base=256): - """ - Returns positions where pattern is found in text - - We calculate the hash value of the pattern and we compare it to the hash - value of text[i:i+m] for i = 0..n-m - The nice thing is that we don't need to calculate the hash value of - text[i:i+m] each time from scratch, we know that: - h(text[i+1:i+m+1]) = (base * (h(text[i:i+m]) - (text[i] * (base ^ (m-1))))) + text[i+m] - We can get h('bcd') from h('abc'). - h('bcd') = (base * (h('abc') - ('a' * (base ^ 2)))) + 'd' - - worst case: O(nm) - we can expect O(n+m) if the number of valid matches is small and the pattern - large - - Performance: ord() is slow so we shouldn't use it here - - Example: text = 'ababbababa', pattern = 'aba' - string_matching_rabin_karp(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @param hash_base base to calculate the hash value - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - htext = hash_value(text[:m], hash_base) - hpattern = hash_value(pattern, hash_base) - for i in range(n-m+1): - if htext == hpattern: - if text[i:i+m] == pattern: - offsets.append(i) - if i < n-m: - htext = (hash_base * (htext - (ord(text[i]) * (hash_base ** (m-1))))) + ord(text[i+m]) - - return offsets - -def hash_value(s, base): - """ - Calculate the hash value of a string using base - - Example: 'abc' = 97 x base^2 + 98 x base^1 + 99 x base^0 - @param s string to compute hash value for - @param base base to use to compute hash value - @return hash value - """ - v = 0 - p = len(s)-1 - for i in range(p+1): - v += ord(s[i]) * (base ** p) - p -= 1 - - return v - -def string_matching_knuth_morris_pratt(text='', pattern=''): - """ - Returns positions where pattern is found in text - - See http://jboxer.com/2009/12/the-knuth-morris-pratt-algorithm-in-my-own-words/ for a great explanation on how this algorithm works. - - O(m+n) - Example: text = 'ababbababa', pattern = 'aba' - string_matching_knuth_morris_pratt(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - pi = compute_prefix_function(pattern) - q = 0 - for i in range(n): - while q > 0 and pattern[q] != text[i]: - q = pi[q - 1] - if pattern[q] == text[i]: - q = q + 1 - if q == m: - offsets.append(i - m + 1) - q = pi[q-1] - - return offsets - -def compute_prefix_function(p): - m = len(p) - pi = [0] * m - k = 0 - for q in range(1, m): - while k > 0 and p[k] != p[q]: - k = pi[k - 1] - if p[k] == p[q]: - k = k + 1 - pi[q] = k - return pi - -def string_matching_boyer_moore_horspool(text='', pattern=''): - """ - Returns positions where pattern is found in text - - See http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm for an explanation on how - this algorithm works. - - O(n) - Performance: ord() is slow so we shouldn't use it here - - Example: text = 'ababbababa', pattern = 'aba' - string_matching_boyer_moore_horspool(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - m = len(pattern) - n = len(text) - offsets = [] - if m > n: - return offsets - skip = [] - for k in range(256): - skip.append(m) - for k in range(m-1): - skip[ord(pattern[k])] = m - k - 1 - skip = tuple(skip) - k = m - 1 - while k < n: - j = m - 1; i = k - while j >= 0 and text[i] == pattern[j]: - j -= 1 - i -= 1 - if j == -1: - offsets.append(i + 1) - k += skip[ord(text[k])] - - return offsets - diff --git a/algorithms/tests/BUILD b/algorithms/tests/BUILD new file mode 100644 index 0000000..e9edb5d --- /dev/null +++ b/algorithms/tests/BUILD @@ -0,0 +1,6 @@ +# `sources` defaults to ['*_test.py', 'test_*.py', 'conftest.py']. +# `dependencies` are inferred. +python_tests( + name = 'tests', + interpreter_constraints=["==2.7.*", ">=3.6"], +) diff --git a/algorithms/tests/test_a_star_path_finding.py b/algorithms/tests/test_a_star_path_finding.py new file mode 100644 index 0000000..396388d --- /dev/null +++ b/algorithms/tests/test_a_star_path_finding.py @@ -0,0 +1,37 @@ +import algorithms.a_star_path_finding as pf + +import unittest + + +class Test(unittest.TestCase): + + def setUp(self): + pass + + def test_maze(self): + a = pf.AStar() + walls = ((0, 5), (1, 0), (1, 1), (1, 5), (2, 3), + (3, 1), (3, 2), (3, 5), (4, 1), (4, 4), (5, 1)) + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + path = a.solve() + self.assertEqual(path, [(0, 0), (0, 1), (0, 2), (1, 2), (1, 3), (1, 4), + (2, 4), (3, 4), (3, 3), (4, 3), (5, 3), (5, 4), + (5, 5)]) + + def test_maze_no_walls(self): + a = pf.AStar() + walls = () + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + path = a.solve() + self.assertEqual(len(path), 11) + + def test_maze_no_solution(self): + a = pf.AStar() + walls = ((0, 5), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (2, 3), (3, 1), (3, 2), (3, 5), (4, 1), (4, 4), (5, 1)) + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + self.assertIsNone(a.solve()) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_binary_tree.py b/algorithms/tests/test_binary_tree.py index c57f4bd..ca0eede 100644 --- a/algorithms/tests/test_binary_tree.py +++ b/algorithms/tests/test_binary_tree.py @@ -1,69 +1,166 @@ +import copy import unittest + import algorithms.binary_tree as binary_tree + class BinaryTreeTest(unittest.TestCase): - - def test_binary_tree(self): - - data = [10, 5, 15, 4, 7, 13, 17, 11, 14] - # create 2 trees with the same content - root = binary_tree.Node(data[0]) - for i in data[1:]: - root.insert(i) - - root2 = binary_tree.Node(data[0]) - for i in data[1:]: - root2.insert(i) - - # check if both trees are identical - self.assertTrue(root.compare_trees(root2)) - - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [4, 5, 7, 10, 11, 13, 14, 15, 17]) - - # test lookup - node, parent = root.lookup(9) - self.assertTrue(node == None) - # check if returned node and parent are correct - node, parent = root.lookup(11) - self.assertTrue(node.data == 11) - self.assertTrue(parent.data == 13) - - # delete a leaf node - root.delete(4) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [5, 7, 10, 11, 13, 14, 15, 17]) - - # delete a node with 1 child - root.delete(5) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 13, 14, 15, 17]) - - # delete a node with 2 children - root.delete(13) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 14, 15, 17]) - - # delete a node with 2 children - root.delete(15) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 14, 17]) -if __name__ == '__main__': - unittest.main() + def setUp(self): + self.root_single_node = binary_tree.Node(None) + self.root = binary_tree.Node(10) + self.root.left = binary_tree.Node(5) + self.root.left.left = binary_tree.Node(3) + self.root.left.right = binary_tree.Node(7) + self.root.right = binary_tree.Node(15) + self.root.right.left = binary_tree.Node(12) + self.root.right.left.left = binary_tree.Node(11) + self.root.right.right = binary_tree.Node(20) + self.root_copy = copy.deepcopy(self.root) + + def test_insert(self): + root = self.root_single_node + + root.insert(10) + self.assertEqual(root.data, 10) + + root.insert(5) + self.assertEqual(root.left.data, 5) + + root.insert(15) + self.assertEqual(root.right.data, 15) + + root.insert(8) + self.assertEqual(root.left.right.data, 8) + + root.insert(2) + self.assertEqual(root.left.left.data, 2) + + root.insert(12) + self.assertEqual(root.right.left.data, 12) + + root.insert(17) + self.assertEqual(root.right.right.data, 17) + + def test_lookup(self): + node, parent = self.root.lookup(0) + self.assertIsNone(parent) + self.assertIsNone(node) + + node, parent = self.root.lookup(13) + self.assertIsNone(parent) + self.assertIsNone(node) + + node, parent = self.root.lookup(7) + self.assertIs(node, self.root.left.right) + self.assertIs(parent, self.root.left) + + def test_delete_root_no_child(self): + self.root_single_node.data = 7 + self.root_single_node.delete(7) + self.assertIsNone(self.root_single_node.data) + + def test_delete_root_one_child(self): + self.root_single_node.data = 7 + self.root_single_node.insert(3) + self.root_single_node.delete(7) + self.assertEqual(self.root_single_node.data, 3) + def test_delete_one_child_left(self): + self.root.delete(12) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_one_child_right(self): + self.root.insert(25) + self.root.delete(20) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 25) + + def test_delete_right_leaf(self): + self.root.delete(7) + self.assertIsNone(self.root.left.right) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_left_leaf(self): + self.root.delete(3) + self.assertIsNone(self.root.left.left) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_right_node_two_childs(self): + self.root.delete(15) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 20) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + + def test_delete_left_node_two_childs(self): + self.root.delete(5) + self.assertEqual(self.root.left.data, 7) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_root_two_childs(self): + self.root.delete(10) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.data, 11) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.right.data, 20) + + def test_compare_trees_left_leaf_missing(self): + self.root_copy.delete(11) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_right_leaf_missing(self): + self.root_copy.delete(20) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_diff_value(self): + self.root_copy.left.data = 16 + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_extra_right_leaf(self): + self.root_copy.insert(25) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_extra_left_leaf(self): + self.root_copy.insert(18) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_print_tree(self): + self.root.print_tree() + + def test_tree_data(self): + self.assertEqual([e for e in self.root.tree_data()], + [3, 5, 7, 10, 11, 12, 15, 20]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_generators.py b/algorithms/tests/test_generators.py new file mode 100644 index 0000000..7670340 --- /dev/null +++ b/algorithms/tests/test_generators.py @@ -0,0 +1,21 @@ +import unittest + +import algorithms.generators as generators + + +class GeneratorsTest(unittest.TestCase): + + def setUp(self): + pass + + def test_fib(self): + fib = [e for e in generators.fib(10)] + self.assertEqual(fib, [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) + + def test_fib_empty(self): + fib = [e for e in generators.fib(0)] + self.assertEqual(fib, []) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_list.py b/algorithms/tests/test_list.py new file mode 100644 index 0000000..e8c61c4 --- /dev/null +++ b/algorithms/tests/test_list.py @@ -0,0 +1,62 @@ +import unittest + +import algorithms.list as list + + +class List(unittest.TestCase): + + def setUp(self): + pass + + def test_find_max_sub(self): + bounds, m = [e for e in list.find_max_sub([-2, 3, -4, 5, 1, -5])] + self.assertEqual(bounds, (3, 4)) + self.assertEqual(m, 6) + + def test_find_int_first_half(self): + idx = list.find_int(4, [1, 2, 4, 5, 7, 9]) + self.assertEqual(idx, 2) + + def test_find_int_second_half(self): + idx = list.find_int(7, [1, 2, 4, 5, 7, 9]) + self.assertEqual(idx, 4) + + def test_find_int_not_found(self): + idx = list.find_int(3, [1, 2, 4, 5, 7, 9]) + self.assertIsNone(idx) + + def test_find_int_single_element_list(self): + idx = list.find_int(3, [3, ]) + self.assertEqual(idx, 0) + + def test_find_int_empty_list(self): + idx = list.find_int(3, []) + self.assertIsNone(idx) + + def test_merge_sort(self): + res = list.merge_sort([3, 4, 1, 5, 0]) + self.assertListEqual(res, [0, 1, 3, 4, 5]) + + def test_merge_sort_duplicates(self): + res = list.merge_sort([3, 4, 1, 5, 0, 4]) + self.assertListEqual(res, [0, 1, 3, 4, 4, 5]) + + def test_merge_sort_single_element(self): + res = list.merge_sort([3]) + self.assertListEqual(res, [3]) + + def test_quicksort(self): + res = list.quicksort([3, 4, 1, 5, 0]) + self.assertListEqual(res, [0, 1, 3, 4, 5]) + + def test_quicksort_duplicates(self): + res = list.quicksort([3, 4, 1, 5, 4, 0, 1]) + self.assertListEqual(res, [0, 1, 1, 3, 4, 4, 5]) + + def test_quicksort_single_element(self): + res = list.quicksort([3]) + self.assertListEqual(res, [3]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_permutations.py b/algorithms/tests/test_permutations.py new file mode 100644 index 0000000..167a244 --- /dev/null +++ b/algorithms/tests/test_permutations.py @@ -0,0 +1,22 @@ +import unittest + +import algorithms.permutations as permutations + + +class GeneratorsTest(unittest.TestCase): + + def setUp(self): + pass + + def test_permutations(self): + p = [e for e in permutations.permutations([1, 2, 3])] + self.assertEqual(p, [[1, 2, 3], [2, 1, 3], [2, 3, 1], [1, 3, 2], + [3, 1, 2], [3, 2, 1]]) + + def test_permutations_single(self): + p = [e for e in permutations.permutations([1])] + self.assertEqual(p, [[1]]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_string.py b/algorithms/tests/test_string.py new file mode 100644 index 0000000..1d27a6e --- /dev/null +++ b/algorithms/tests/test_string.py @@ -0,0 +1,67 @@ +import unittest + +import algorithms.string as string + + +class StringTest(unittest.TestCase): + + def test_atoi(self): + self.assertEqual(string.atoi('123'), 123) + + def test_atoi_neg(self): + self.assertEqual(string.atoi('-123'), -123) + + def test_atoi_empty_string(self): + self.assertRaises(ValueError, string.atoi, '') + + def test_reverse_string_words(self): + s = 'word1 word2 word3' + s = string.reverse_string_words(s) + self.assertEqual(s, 'word3 word2 word1') + + def test_reverse_string_word(self): + s = 'word1' + s = string.reverse_string_words(s) + self.assertEqual(s, 'word1') + + def test_string_matching_naive(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_naive(t, s), [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_naive(t, s), []) + + def test_string_matching_rabin_karp(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_rabin_karp(t, s), [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_rabin_karp(t, s), []) + + def test_string_matching_knuth_morris_pratt(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_knuth_morris_pratt(t, s), + [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_knuth_morris_pratt(t, s), []) + + def test_string_matching_boyer_moore_horspool(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), + [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), []) + + s = 'ababbababa' + t = 'abbb' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), []) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_string_matching.py b/algorithms/tests/test_string_matching.py deleted file mode 100644 index 4b7d11d..0000000 --- a/algorithms/tests/test_string_matching.py +++ /dev/null @@ -1,40 +0,0 @@ -import unittest -import string_matching - -class StringMatchingTest(unittest.TestCase): - - def test_string_matching_naive(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_naive(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_naive(t, s), []) - - def test_string_matching_rabin_karp(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_rabin_karp(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_rabin_karp(t, s), []) - - def test_string_matching_knuth_morris_pratt(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_knuth_morris_pratt(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_knuth_morris_pratt(t, s), []) - - def test_string_matching_boyer_moore_horspool(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_boyer_moore_horspool(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_boyer_moore_horspool(t, s), []) - -if __name__ == '__main__': - unittest.main() - diff --git a/binary_tree_tutorial.txt b/binary_tree_tutorial.txt deleted file mode 100644 index 561daee..0000000 --- a/binary_tree_tutorial.txt +++ /dev/null @@ -1,528 +0,0 @@ -This article is about a Python library I created to manage binary search trees. I will go over the following: - -