diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..991b2c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +# Sphinx documentation +docs/_build/ + +# Pants workspace files +/.pants.d/ +/dist/ +/.pids +/.pants.workdir.file_lock* + +# Others +*.bak diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..feacd05 --- /dev/null +++ b/BUILD @@ -0,0 +1,17 @@ +python_requirements() + +python_distribution( + name="python-algorithms", + dependencies=[ + ], + provides=setup_py( + name="python-algorithms", + version="1.0", + description="Python algorithms.", + author="Laurent Luce", + classifiers=[ + "Programming Language :: Python :: 3.6", + ], + ), + setup_py_commands=["sdist", "bdist_wheel", "--python-tag", "py36.py37"] +) diff --git a/README b/README deleted file mode 100644 index 5711b77..0000000 --- a/README +++ /dev/null @@ -1,28 +0,0 @@ -## Python Algorithms Library -## Laurent Luce - -### Description -The purpose of this library is to help you with common algorithms like: - -String matching - - Naive - - Rabin-Karp - - Knuth-Morris-Pratt - - Boyer-Moore-Horspool - -Binary tree - - node and tree class - - lookup - - insert - - delete - - compare 2 trees - - print tree - - tree inorder generator - -### Installation -Get the source and run - - $ python setup.py install - -### License -The Python Algorithms Library is distributed under the MIT License diff --git a/README.md b/README.md new file mode 100644 index 0000000..85c29c2 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +## Python Algorithms Library +## Laurent Luce + +### Description +The purpose of this library is to help you with common algorithms like: + +A* path finding. + +String Matching + - Naive. + - Rabin-Karp. + - Knuth-Morris-Pratt. + - Boyer-Moore-Horspool. + +String + - Convert string to integer without using int on the full string. + - Reverse string containing words. + +Generators + - Permutations. + +Lists + - Find integer using binary search. + - Find subset with max sum. + - Merge sort. + - Quicksort. + +Binary tree + +### Tests + + $ ./pants test :: + +### License +The Python Algorithms Library is distributed under the MIT License diff --git a/algorithms/BUILD b/algorithms/BUILD new file mode 100644 index 0000000..d06a175 --- /dev/null +++ b/algorithms/BUILD @@ -0,0 +1,7 @@ +python_library( + name="algorithms", + sources=["*.py", "!*_test.py"], + interpreter_constraints=["==2.7.*", ">=3.6"], +) + + diff --git a/algorithms/a_star_path_finding.py b/algorithms/a_star_path_finding.py new file mode 100644 index 0000000..1448cb7 --- /dev/null +++ b/algorithms/a_star_path_finding.py @@ -0,0 +1,148 @@ +import heapq + + +class Cell(object): + def __init__(self, x, y, reachable): + """Initialize new cell. + + @param reachable is cell reachable? not a wall? + @param x cell x coordinate + @param y cell y coordinate + @param g cost to move from the starting cell to this cell. + @param h estimation of the cost to move from this cell + to the ending cell. + @param f f = g + h + """ + self.reachable = reachable + self.x = x + self.y = y + self.parent = None + self.g = 0 + self.h = 0 + self.f = 0 + + def __lt__(self, other): + return self.f < other.f + + +class AStar(object): + def __init__(self): + # open list + self.opened = [] + heapq.heapify(self.opened) + # visited cells list + self.closed = set() + # grid cells + self.cells = [] + self.grid_height = None + self.grid_width = None + + def init_grid(self, width, height, walls, start, end): + """Prepare grid cells, walls. + + @param width grid's width. + @param height grid's height. + @param walls list of wall x,y tuples. + @param start grid starting point x,y tuple. + @param end grid ending point x,y tuple. + """ + self.grid_height = height + self.grid_width = width + for x in range(self.grid_width): + for y in range(self.grid_height): + if (x, y) in walls: + reachable = False + else: + reachable = True + self.cells.append(Cell(x, y, reachable)) + self.start = self.get_cell(*start) + self.end = self.get_cell(*end) + + def get_heuristic(self, cell): + """Compute the heuristic value H for a cell. + + Distance between this cell and the ending cell multiply by 10. + + @returns heuristic value H + """ + return 10 * (abs(cell.x - self.end.x) + abs(cell.y - self.end.y)) + + def get_cell(self, x, y): + """Returns a cell from the cells list. + + @param x cell x coordinate + @param y cell y coordinate + @returns cell + """ + return self.cells[x * self.grid_height + y] + + def get_adjacent_cells(self, cell): + """Returns adjacent cells to a cell. + + Clockwise starting from the one on the right. + + @param cell get adjacent cells for this cell + @returns adjacent cells list. + """ + cells = [] + if cell.x < self.grid_width-1: + cells.append(self.get_cell(cell.x+1, cell.y)) + if cell.y > 0: + cells.append(self.get_cell(cell.x, cell.y-1)) + if cell.x > 0: + cells.append(self.get_cell(cell.x-1, cell.y)) + if cell.y < self.grid_height-1: + cells.append(self.get_cell(cell.x, cell.y+1)) + return cells + + def get_path(self): + cell = self.end + path = [(cell.x, cell.y)] + while cell.parent is not self.start: + cell = cell.parent + path.append((cell.x, cell.y)) + + path.append((self.start.x, self.start.y)) + path.reverse() + return path + + def update_cell(self, adj, cell): + """Update adjacent cell. + + @param adj adjacent cell to current cell + @param cell current cell being processed + """ + adj.g = cell.g + 10 + adj.h = self.get_heuristic(adj) + adj.parent = cell + adj.f = adj.h + adj.g + + def solve(self): + """Solve maze, find path to ending cell. + + @returns path or None if not found. + """ + # add starting cell to open heap queue + heapq.heappush(self.opened, (self.start.f, self.start)) + while len(self.opened): + # pop cell from heap queue + f, cell = heapq.heappop(self.opened) + # add cell to closed list so we don't process it twice + self.closed.add(cell) + # if ending cell, return found path + if cell is self.end: + return self.get_path() + # get adjacent cells for cell + adj_cells = self.get_adjacent_cells(cell) + for adj_cell in adj_cells: + if adj_cell.reachable and adj_cell not in self.closed: + if (adj_cell.f, adj_cell) in self.opened: + # if adj cell in open list, check if current path is + # better than the one previously found + # for this adj cell. + if adj_cell.g > cell.g + 10: + self.update_cell(adj_cell, cell) + else: + self.update_cell(adj_cell, cell) + # add adj cell to open list + heapq.heappush(self.opened, (adj_cell.f, adj_cell)) diff --git a/algorithms/binary_tree.py b/algorithms/binary_tree.py index e821ecc..3e8c20b 100644 --- a/algorithms/binary_tree.py +++ b/algorithms/binary_tree.py @@ -1,156 +1,166 @@ -class Node: - """ - Tree node: left and right child + data which can be any object - """ - def __init__(self, data): - """ - Node constructor +from __future__ import print_function - @param data node data object - """ - self.left = None - self.right = None - self.data = data - def insert(self, data): - """ - Insert new node with data +class Node(object): + """Tree node: left and right child + data which can be any object - @param data node data object to insert """ - if data < self.data: - if self.left == None: - self.left = Node(data) - else: - self.left.insert(data) - else: - if self.right == None: - self.right = Node(data) - else: - self.right.insert(data) - - def lookup(self, data, parent=None): - """ - Lookup node containing data + def __init__(self, data): + """Node constructor - @param data node data object to look up - @param parent node's parent - @returns node and node's parent if found or None, None - """ - if data < self.data: - if self.left == None: - return None, None - return self.left.lookup(data, self) - elif data > self.data: - if self.right == None: - return None, None - return self.right.lookup(data, self) - else: - return self, parent - - def delete(self, data): - """ - Delete node containing data + @param data node data object + """ + self.left = None + self.right = None + self.data = data - @param data node's content to delete - """ - # get node containing data - node, parent = self.lookup(data) - if node != None: - children_count = node.children_count() - if children_count == 0: - # if node has no children, just remove it - if parent.left is node: - parent.left = None - else: - parent.right = None - elif children_count == 1: - # if node has 1 child - # replace node by its child - if node.left: - node.data = node.left.data - node.left = None + def insert(self, data): + """Insert new node with data + + @param data node data object to insert + """ + if self.data: + if data < self.data: + if self.left is None: + self.left = Node(data) + else: + self.left.insert(data) + elif data > self.data: + if self.right is None: + self.right = Node(data) + else: + self.right.insert(data) else: - node.data = node.right.data - node.right = None - else: - # if node has 2 children - # find its successor - parent = node - successor = node.right - while successor.left: - parent = successor - successor = successor.left - # replace node data by its successor data - node.data = successor.data - # fix successor's parent node child - if parent.left == successor: - parent.left = successor.right + self.data = data + + def lookup(self, data, parent=None): + """Lookup node containing data + + @param data node data object to look up + @param parent node's parent + @returns node and node's parent if found or None, None + """ + if data < self.data: + if self.left is None: + return None, None + return self.left.lookup(data, self) + elif data > self.data: + if self.right is None: + return None, None + return self.right.lookup(data, self) else: - parent.right = successor.right + return self, parent - def compare_trees(self, node): - """ - Compare 2 trees + def delete(self, data): + """Delete node containing data - @param node tree to compare - @returns True if the tree passed is identical to this tree - """ - if node == None: - return False - if self.data != node.data: - return False - res = True - if self.left == None: - if node.left: - return False - else: - res = self.left.compare_trees(node.left) - if self.right == None: - if node.right: - return False - else: - res = self.right.compare_trees(node.right) - return res - - def print_tree(self): - """ - Print tree content inorder - """ - if self.left: - self.left.print_tree() - print self.data, - if self.right: - self.right.print_tree() + @param data node's content to delete + """ + # get node containing data + node, parent = self.lookup(data) + if node is not None: + children_count = node.children_count() + if children_count == 0: + # if node has no children, just remove it + if parent: + if parent.left is node: + parent.left = None + else: + parent.right = None + else: + self.data = None + elif children_count == 1: + # if node has 1 child + # replace node by its child + if node.left: + n = node.left + else: + n = node.right + if parent: + if parent.left is node: + parent.left = n + else: + parent.right = n + else: + self.left = n.left + self.right = n.right + self.data = n.data + else: + # if node has 2 children + # find its successor + parent = node + successor = node.right + while successor.left: + parent = successor + successor = successor.left + # replace node data by its successor data + node.data = successor.data + # fix successor's parent node child + if parent.left == successor: + parent.left = successor.right + else: + parent.right = successor.right - def tree_data(self): - """ - Generator to get the tree nodes data - """ - # we use a stack to traverse the tree in a non-recursive way - stack = [] - node = self - while stack or node: - if node: - stack.append(node) - node = node.left - else: # we are returning so we pop the node and we yield it - node = stack.pop() - yield node.data - node = node.right - - def children_count(self): - """ - Return the number of children + def compare_trees(self, node): + """Compare 2 trees - @returns number of children: 0, 1, 2 - """ - cnt = 0 - if self.left: - cnt += 1 - if self.right: - cnt += 1 - return cnt + @param node tree to compare + @returns True if the tree passed is identical to this tree + """ + if node is None: + return False + if self.data != node.data: + return False + res = True + if self.left is None: + if node.left: + return False + else: + res = self.left.compare_trees(node.left) + if res is False: + return False + if self.right is None: + if node.right: + return False + else: + res = self.right.compare_trees(node.right) + return res + + def print_tree(self): + """Print tree content inorder + + """ + if self.left: + self.left.print_tree() + print(self.data, end=" ") + if self.right: + self.right.print_tree() + + def tree_data(self): + """Generator to get the tree nodes data + """ + # we use a stack to traverse the tree in a non-recursive way + stack = [] + node = self + while stack or node: + if node: + stack.append(node) + node = node.left + else: + # we are returning so we pop the node and we yield it + node = stack.pop() + yield node.data + node = node.right + def children_count(self): + """Return the number of children + @returns number of children: 0, 1, 2 + """ + cnt = 0 + if self.left: + cnt += 1 + if self.right: + cnt += 1 + return cnt diff --git a/algorithms/generators.py b/algorithms/generators.py index b037434..0a55e7a 100644 --- a/algorithms/generators.py +++ b/algorithms/generators.py @@ -1,14 +1,15 @@ def fib(n): - """ - Generator for Fibonacci serie - - Example: for i in fib(5): print i - @param n fib range upper bound - """ - a, b = 0, 1 - i = 0 - while i < n: - yield b - a, b = b, a+b - i += 1 + """Generator for Fibonacci serie. + Example: for i in fib(5): print i + @param n fib range upper bound + """ + if not n: + return + a, b = 0, 1 + yield a + i = 0 + while i < n - 1: + yield b + a, b = b, a+b + i += 1 diff --git a/algorithms/list.py b/algorithms/list.py index f82c423..1546904 100644 --- a/algorithms/list.py +++ b/algorithms/list.py @@ -1,25 +1,122 @@ -def find_max_sub(l): - """ - Find subset with higest sum - - Example: [-2, 3, -4, 5, 1, -5] -> (3,4), 6 - @param l list - @returns subset bounds and highest sum - """ - # max sum - max = l[0] - # current sum - m = 0 - # max sum subset bounds - bounds = (0, 0) - # current subset start - s = 0 - for i in range(len(l)): - m += l[i] - if m > max: - max = m - bounds = (s, i) - elif m < 0: - m = 0 - s = i+1 - return bounds, max +def find_int(i, lst): + """Find integer in a sorted list. + + Example: 4 in [1, 3, 4, 6, 7, 9] -> 2 + @param i integer to find. + @param lst sorted list. + @returns index if found, None if not. + """ + if lst: + p_idx = len(lst) / 2 + p = lst[p_idx] + if i == p: + return p_idx + elif len(lst) == 1: + return + elif i < p: + res = find_int(i, lst[:p_idx]) + if res: + return res + elif i > p: + res = find_int(i, lst[p_idx:]) + if res: + return res + p_idx + + +def find_max_sub(lst): + """Find subset with highest sum. + + Example: [-2, 3, -4, 5, 1, -5] -> (3,4), 6 + @param lst list + @returns subset bounds and highest sum + """ + # max sum + max = lst[0] + # current sum + m = 0 + # max sum subset bounds + bounds = (0, 0) + # current subset start + s = 0 + for i in range(len(lst)): + m += lst[i] + if m > max: + max = m + bounds = (s, i) + elif m < 0: + m = 0 + s = i+1 + return bounds, max + + +def merge_sort(lst): + """Sort list using merge sort. + + Complexity: O(n log n) + + @param l list to sort. + @returns sorted list. + """ + def merge(l1, l2): + """Merge sorted lists l1 and l2. + + [1, 2, 4], [1, 3, 4, 5] -> [1, 1, 2, 3, 4, 5] + @param l1 sorted list + @param l2 sorted list + @returns merge sorted list + """ + res = [] + i = 0 + j = 0 + while i < len(l1) and j < len(l2): + if l1[i] <= l2[j]: + res.append(l1[i]) + i += 1 + elif l2[j] < l1[i]: + res.append(l2[j]) + j += 1 + + while i < len(l1): + res.append(l1[i]) + i += 1 + + while j < len(l2): + res.append(l2[j]) + j += 1 + + return res + + length = len(lst) + if length <= 1: + return lst + mid = length / 2 + h1 = merge_sort(lst[:mid]) + h2 = merge_sort(lst[mid:]) + + return merge(h1, h2) + + +def quicksort(lst): + """Sort list using quick sort. + + Complexity: O(n log n). Worst: O(n2) + + @param lst list to sort. + @returns sorted list. + """ + if len(lst) <= 1: + return lst + + pivot = lst[0] + less = [] + equal = [] + greater = [] + for e in lst: + if e < pivot: + less.append(e) + elif e == pivot: + equal.append(e) + else: + greater.append(e) + + return quicksort(less) + equal + quicksort(greater) diff --git a/algorithms/performance/performance_string_matching.py b/algorithms/performance/performance_string_matching.py deleted file mode 100644 index 6642df8..0000000 --- a/algorithms/performance/performance_string_matching.py +++ /dev/null @@ -1,41 +0,0 @@ -import time -import string_matching - -class StringMatchingPerformance: - - def __init__(self): - pass - - def calculate_performance(self): - t = 'ababbababa' - s = 'aba' - times = 1000 - - ts = time.time() - for i in range(times): - string_matching.string_matching_naive(t, s) - t1 = time.time() - ts - print 'string_matching_naive: %.2f seconds' % t1 - - ts = time.time() - for i in range(times): - string_matching.string_matching_rabin_karp(t, s) - t2 = time.time() - ts - print 'string_matching_rabin_karp: %.2f seconds' % t2 - - ts = time.time() - for i in range(times): - string_matching.string_matching_knuth_morris_pratt(t, s) - t2 = time.time() - ts - print 'string_matching_knuth_morris_pratt: %.2f seconds' % t2 - - ts = time.time() - for i in range(times): - string_matching.string_matching_boyer_moore_horspool(t, s) - t2 = time.time() - ts - print 'string_matching_boyer_moore_horspool: %.2f seconds' % t2 - -if __name__ == '__main__': - p = StringMatchingPerformance() - p.calculate_performance() - diff --git a/algorithms/permutations.py b/algorithms/permutations.py index 6ac855c..f013d2d 100644 --- a/algorithms/permutations.py +++ b/algorithms/permutations.py @@ -1,18 +1,26 @@ -def permutations(l): - """ - Generator for list permutations - - Example: [1,2,3] = [1,2,3], [1,3,2], [2,1,3] ... +def permutations(lst): + """Generator for list permutations. - @param l list to generate permutations for - @result yield each permutation - """ - print 'permutations: ',l - if len(l) <= 1: - yield l - else: - a = [l.pop(0)] - for p in permutations(l): - for i in range(len(p)+1): - yield p[:i] + a + p[i:] + @param lst list to generate permutations for + @result yield each permutation + Example: + lst = [1,2,3] + a = [1] + permutations([2,3]) = [[2,3], [3,2]] + [2,3] + yield [1,2,3] + yield [2,1,3] + yield [2,3,1] + [3,2] + yield [1,3,2] + yield [3,1,2] + yield [3,2,1] + """ + if len(lst) <= 1: + yield lst + else: + a = [lst.pop(0)] + for p in permutations(lst): + for i in range(len(p)+1): + yield p[:i] + a + p[i:] diff --git a/algorithms/string.py b/algorithms/string.py new file mode 100644 index 0000000..a809fab --- /dev/null +++ b/algorithms/string.py @@ -0,0 +1,221 @@ +def string_matching_naive(text='', pattern=''): + """Returns positions where pattern is found in text. + + Sliding window. + + O((n-m)m) + Example: text = 'ababbababa', pattern = 'aba' + string_matching_naive(t, s) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + + n = len(text) + m = len(pattern) + offsets = [] + for i in range(n-m+1): + if pattern == text[i:i+m]: + offsets.append(i) + + return offsets + + +def string_matching_rabin_karp(text='', pattern='', hash_base=256): + """Returns positions where pattern is found in text. + + Similar to the naive approach but matches the hash value of the pattern + with the hash value of current substring of text. Needs to match + individual characters once a match is found because of potential + hash collisions. + + worst case: O(nm) + O(n+m) if the number of valid matches is small and the pattern is large. + + Performance: ord() is slow so we shouldn't use it here + + Example: text = 'ababbababa', pattern = 'aba' + string_matching_rabin_karp(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @param hash_base base to calculate the hash value + @return list containing offsets (shifts) where pattern is found inside text + """ + + n = len(text) + m = len(pattern) + offsets = [] + htext = hash_value(text[:m], hash_base) + hpattern = hash_value(pattern, hash_base) + for i in range(n-m+1): + if htext == hpattern: + if text[i:i+m] == pattern: + offsets.append(i) + if i < n-m: + htext = (hash_base * + (htext - + (ord(text[i]) * + (hash_base ** (m-1))))) + ord(text[i+m]) + + return offsets + + +def hash_value(s, base): + """Calculate the hash value of a string using base. + + Example: 'abc' = 97 x base^2 + 98 x base^1 + 99 x base^0 + @param s string to compute hash value for + @param base base to use to compute hash value + @return hash value + """ + v = 0 + p = len(s)-1 + for i in range(p+1): + v += ord(s[i]) * (base ** p) + p -= 1 + + return v + + +def string_matching_knuth_morris_pratt(text='', pattern=''): + """Returns positions where pattern is found in text. + + https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm + + O(m+n) + Example: text = 'ababbababa', pattern = 'aba' + string_matching_knuth_morris_pratt(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + n = len(text) + m = len(pattern) + offsets = [] + pi = compute_prefix_function(pattern) + q = 0 + for i in range(n): + while q > 0 and pattern[q] != text[i]: + q = pi[q - 1] + if pattern[q] == text[i]: + q = q + 1 + if q == m: + offsets.append(i - m + 1) + q = pi[q-1] + + return offsets + + +def compute_prefix_function(p): + m = len(p) + pi = [0] * m + k = 0 + for q in range(1, m): + while k > 0 and p[k] != p[q]: + k = pi[k - 1] + if p[k] == p[q]: + k = k + 1 + pi[q] = k + return pi + + +def string_matching_boyer_moore_horspool(text='', pattern=''): + """Returns positions where pattern is found in text. + + https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm + + O(n) + Performance: ord() is slow so we shouldn't use it here + + Example: text = 'ababbababa', pattern = 'aba' + string_matching_boyer_moore_horspool(text, pattern) returns [0, 5, 7] + @param text text to search inside + @param pattern string to search for + @return list containing offsets (shifts) where pattern is found inside text + """ + m = len(pattern) + n = len(text) + offsets = [] + if m > n: + return offsets + skip = [] + for k in range(256): + skip.append(m) + for k in range(m-1): + skip[ord(pattern[k])] = m - k - 1 + skip = tuple(skip) + k = m - 1 + while k < n: + j = m - 1 + i = k + while j >= 0 and text[i] == pattern[j]: + j -= 1 + i -= 1 + if j == -1: + offsets.append(i + 1) + k += skip[ord(text[k])] + + return offsets + + +def atoi(s): + """Convert string to integer without doing int(s). + + '123' -> 123 + @param s string to convert. + @returns integer + """ + if not s: + raise ValueError + i = 0 + idx = 0 + neg = False + if s[0] == '-': + neg = True + idx += 1 + + for c in s[idx:]: + i *= 10 + i += int(c) + + if neg: + i = -i + + return i + + +def reverse_string_words(s): + """Reverse words inside a string (in place). + + Since strings are immutable in Python, we copy the string chars to a list + first. + 'word1 word2 word3' -> 'word3 word2 word1' + + Complexity: O(n) + + @param s string words to reverse. + @returns reversed string words. + """ + def reverse(lst, i, j): + # 'word1' -> '1drow' + # Complexity: O(n/2) + while i != j: + lst[i], lst[j] = lst[j], lst[i] + i += 1 + j -= 1 + + w = [e for e in s] + i = 0 + j = len(w) - 1 + reverse(w, i, j) + + i = 0 + j = 0 + while j < len(w): + while j < len(w) and w[j] != ' ': + j += 1 + reverse(w, i, j-1) + i = j + 1 + j += 1 + + return ''.join(e for e in w) diff --git a/algorithms/string_matching.py b/algorithms/string_matching.py deleted file mode 100644 index 8373250..0000000 --- a/algorithms/string_matching.py +++ /dev/null @@ -1,167 +0,0 @@ -""" -Filename: string_matching.py -""" - -def string_matching_naive(text='', pattern=''): - """ - Returns positions where pattern is found in text - - We slide the string to match 'pattern' over the text - - O((n-m)m) - Example: text = 'ababbababa', pattern = 'aba' - string_matching_naive(t, s) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - for i in range(n-m+1): - if pattern == text[i:i+m]: - offsets.append(i) - - return offsets - - -def string_matching_rabin_karp(text='', pattern='', hash_base=256): - """ - Returns positions where pattern is found in text - - We calculate the hash value of the pattern and we compare it to the hash - value of text[i:i+m] for i = 0..n-m - The nice thing is that we don't need to calculate the hash value of - text[i:i+m] each time from scratch, we know that: - h(text[i+1:i+m+1]) = (base * (h(text[i:i+m]) - (text[i] * (base ^ (m-1))))) + text[i+m] - We can get h('bcd') from h('abc'). - h('bcd') = (base * (h('abc') - ('a' * (base ^ 2)))) + 'd' - - worst case: O(nm) - we can expect O(n+m) if the number of valid matches is small and the pattern - large - - Performance: ord() is slow so we shouldn't use it here - - Example: text = 'ababbababa', pattern = 'aba' - string_matching_rabin_karp(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @param hash_base base to calculate the hash value - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - htext = hash_value(text[:m], hash_base) - hpattern = hash_value(pattern, hash_base) - for i in range(n-m+1): - if htext == hpattern: - if text[i:i+m] == pattern: - offsets.append(i) - if i < n-m: - htext = (hash_base * (htext - (ord(text[i]) * (hash_base ** (m-1))))) + ord(text[i+m]) - - return offsets - -def hash_value(s, base): - """ - Calculate the hash value of a string using base - - Example: 'abc' = 97 x base^2 + 98 x base^1 + 99 x base^0 - @param s string to compute hash value for - @param base base to use to compute hash value - @return hash value - """ - v = 0 - p = len(s)-1 - for i in range(p+1): - v += ord(s[i]) * (base ** p) - p -= 1 - - return v - -def string_matching_knuth_morris_pratt(text='', pattern=''): - """ - Returns positions where pattern is found in text - - See http://jboxer.com/2009/12/the-knuth-morris-pratt-algorithm-in-my-own-words/ for a great explanation on how this algorithm works. - - O(m+n) - Example: text = 'ababbababa', pattern = 'aba' - string_matching_knuth_morris_pratt(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - n = len(text) - m = len(pattern) - offsets = [] - pi = compute_prefix_function(pattern) - q = 0 - for i in range(n): - while q > 0 and pattern[q] != text[i]: - q = pi[q - 1] - if pattern[q] == text[i]: - q = q + 1 - if q == m: - offsets.append(i - m + 1) - q = pi[q-1] - - return offsets - -def compute_prefix_function(p): - m = len(p) - pi = [0] * m - k = 0 - for q in range(1, m): - while k > 0 and p[k] != p[q]: - k = pi[k - 1] - if p[k] == p[q]: - k = k + 1 - pi[q] = k - return pi - -def string_matching_boyer_moore_horspool(text='', pattern=''): - """ - Returns positions where pattern is found in text - - See http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm for an explanation on how - this algorithm works. - - O(n) - Performance: ord() is slow so we shouldn't use it here - - Example: text = 'ababbababa', pattern = 'aba' - string_matching_boyer_moore_horspool(text, pattern) returns [0, 5, 7] - @param text text to search inside - @param pattern string to search for - @return list containing offsets (shifts) where pattern is found inside text - """ - - m = len(pattern) - n = len(text) - offsets = [] - if m > n: - return offsets - skip = [] - for k in range(256): - skip.append(m) - for k in range(m-1): - skip[ord(pattern[k])] = m - k - 1 - skip = tuple(skip) - k = m - 1 - while k < n: - j = m - 1; i = k - while j >= 0 and text[i] == pattern[j]: - j -= 1 - i -= 1 - if j == -1: - offsets.append(i + 1) - k += skip[ord(text[k])] - - return offsets - diff --git a/algorithms/tests/BUILD b/algorithms/tests/BUILD new file mode 100644 index 0000000..e9edb5d --- /dev/null +++ b/algorithms/tests/BUILD @@ -0,0 +1,6 @@ +# `sources` defaults to ['*_test.py', 'test_*.py', 'conftest.py']. +# `dependencies` are inferred. +python_tests( + name = 'tests', + interpreter_constraints=["==2.7.*", ">=3.6"], +) diff --git a/algorithms/tests/test_a_star_path_finding.py b/algorithms/tests/test_a_star_path_finding.py new file mode 100644 index 0000000..396388d --- /dev/null +++ b/algorithms/tests/test_a_star_path_finding.py @@ -0,0 +1,37 @@ +import algorithms.a_star_path_finding as pf + +import unittest + + +class Test(unittest.TestCase): + + def setUp(self): + pass + + def test_maze(self): + a = pf.AStar() + walls = ((0, 5), (1, 0), (1, 1), (1, 5), (2, 3), + (3, 1), (3, 2), (3, 5), (4, 1), (4, 4), (5, 1)) + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + path = a.solve() + self.assertEqual(path, [(0, 0), (0, 1), (0, 2), (1, 2), (1, 3), (1, 4), + (2, 4), (3, 4), (3, 3), (4, 3), (5, 3), (5, 4), + (5, 5)]) + + def test_maze_no_walls(self): + a = pf.AStar() + walls = () + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + path = a.solve() + self.assertEqual(len(path), 11) + + def test_maze_no_solution(self): + a = pf.AStar() + walls = ((0, 5), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (2, 3), (3, 1), (3, 2), (3, 5), (4, 1), (4, 4), (5, 1)) + a.init_grid(6, 6, walls, (0, 0), (5, 5)) + self.assertIsNone(a.solve()) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_binary_tree.py b/algorithms/tests/test_binary_tree.py index c57f4bd..ca0eede 100644 --- a/algorithms/tests/test_binary_tree.py +++ b/algorithms/tests/test_binary_tree.py @@ -1,69 +1,166 @@ +import copy import unittest + import algorithms.binary_tree as binary_tree + class BinaryTreeTest(unittest.TestCase): - - def test_binary_tree(self): - - data = [10, 5, 15, 4, 7, 13, 17, 11, 14] - # create 2 trees with the same content - root = binary_tree.Node(data[0]) - for i in data[1:]: - root.insert(i) - - root2 = binary_tree.Node(data[0]) - for i in data[1:]: - root2.insert(i) - - # check if both trees are identical - self.assertTrue(root.compare_trees(root2)) - - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [4, 5, 7, 10, 11, 13, 14, 15, 17]) - - # test lookup - node, parent = root.lookup(9) - self.assertTrue(node == None) - # check if returned node and parent are correct - node, parent = root.lookup(11) - self.assertTrue(node.data == 11) - self.assertTrue(parent.data == 13) - - # delete a leaf node - root.delete(4) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [5, 7, 10, 11, 13, 14, 15, 17]) - - # delete a node with 1 child - root.delete(5) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 13, 14, 15, 17]) - - # delete a node with 2 children - root.delete(13) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 14, 15, 17]) - - # delete a node with 2 children - root.delete(15) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 14, 17]) -if __name__ == '__main__': - unittest.main() + def setUp(self): + self.root_single_node = binary_tree.Node(None) + self.root = binary_tree.Node(10) + self.root.left = binary_tree.Node(5) + self.root.left.left = binary_tree.Node(3) + self.root.left.right = binary_tree.Node(7) + self.root.right = binary_tree.Node(15) + self.root.right.left = binary_tree.Node(12) + self.root.right.left.left = binary_tree.Node(11) + self.root.right.right = binary_tree.Node(20) + self.root_copy = copy.deepcopy(self.root) + + def test_insert(self): + root = self.root_single_node + + root.insert(10) + self.assertEqual(root.data, 10) + + root.insert(5) + self.assertEqual(root.left.data, 5) + + root.insert(15) + self.assertEqual(root.right.data, 15) + + root.insert(8) + self.assertEqual(root.left.right.data, 8) + + root.insert(2) + self.assertEqual(root.left.left.data, 2) + + root.insert(12) + self.assertEqual(root.right.left.data, 12) + + root.insert(17) + self.assertEqual(root.right.right.data, 17) + + def test_lookup(self): + node, parent = self.root.lookup(0) + self.assertIsNone(parent) + self.assertIsNone(node) + + node, parent = self.root.lookup(13) + self.assertIsNone(parent) + self.assertIsNone(node) + + node, parent = self.root.lookup(7) + self.assertIs(node, self.root.left.right) + self.assertIs(parent, self.root.left) + + def test_delete_root_no_child(self): + self.root_single_node.data = 7 + self.root_single_node.delete(7) + self.assertIsNone(self.root_single_node.data) + + def test_delete_root_one_child(self): + self.root_single_node.data = 7 + self.root_single_node.insert(3) + self.root_single_node.delete(7) + self.assertEqual(self.root_single_node.data, 3) + def test_delete_one_child_left(self): + self.root.delete(12) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_one_child_right(self): + self.root.insert(25) + self.root.delete(20) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 25) + + def test_delete_right_leaf(self): + self.root.delete(7) + self.assertIsNone(self.root.left.right) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_left_leaf(self): + self.root.delete(3) + self.assertIsNone(self.root.left.left) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_right_node_two_childs(self): + self.root.delete(15) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.right.data, 20) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + + def test_delete_left_node_two_childs(self): + self.root.delete(5) + self.assertEqual(self.root.left.data, 7) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.left.left.data, 11) + self.assertEqual(self.root.right.right.data, 20) + + def test_delete_root_two_childs(self): + self.root.delete(10) + self.assertEqual(self.root.left.data, 5) + self.assertEqual(self.root.left.left.data, 3) + self.assertEqual(self.root.left.right.data, 7) + self.assertEqual(self.root.data, 11) + self.assertEqual(self.root.right.data, 15) + self.assertEqual(self.root.right.left.data, 12) + self.assertEqual(self.root.right.right.data, 20) + + def test_compare_trees_left_leaf_missing(self): + self.root_copy.delete(11) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_right_leaf_missing(self): + self.root_copy.delete(20) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_diff_value(self): + self.root_copy.left.data = 16 + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_extra_right_leaf(self): + self.root_copy.insert(25) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_compare_trees_extra_left_leaf(self): + self.root_copy.insert(18) + self.assertFalse(self.root.compare_trees(self.root_copy)) + + def test_print_tree(self): + self.root.print_tree() + + def test_tree_data(self): + self.assertEqual([e for e in self.root.tree_data()], + [3, 5, 7, 10, 11, 12, 15, 20]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_generators.py b/algorithms/tests/test_generators.py new file mode 100644 index 0000000..7670340 --- /dev/null +++ b/algorithms/tests/test_generators.py @@ -0,0 +1,21 @@ +import unittest + +import algorithms.generators as generators + + +class GeneratorsTest(unittest.TestCase): + + def setUp(self): + pass + + def test_fib(self): + fib = [e for e in generators.fib(10)] + self.assertEqual(fib, [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) + + def test_fib_empty(self): + fib = [e for e in generators.fib(0)] + self.assertEqual(fib, []) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_list.py b/algorithms/tests/test_list.py new file mode 100644 index 0000000..e8c61c4 --- /dev/null +++ b/algorithms/tests/test_list.py @@ -0,0 +1,62 @@ +import unittest + +import algorithms.list as list + + +class List(unittest.TestCase): + + def setUp(self): + pass + + def test_find_max_sub(self): + bounds, m = [e for e in list.find_max_sub([-2, 3, -4, 5, 1, -5])] + self.assertEqual(bounds, (3, 4)) + self.assertEqual(m, 6) + + def test_find_int_first_half(self): + idx = list.find_int(4, [1, 2, 4, 5, 7, 9]) + self.assertEqual(idx, 2) + + def test_find_int_second_half(self): + idx = list.find_int(7, [1, 2, 4, 5, 7, 9]) + self.assertEqual(idx, 4) + + def test_find_int_not_found(self): + idx = list.find_int(3, [1, 2, 4, 5, 7, 9]) + self.assertIsNone(idx) + + def test_find_int_single_element_list(self): + idx = list.find_int(3, [3, ]) + self.assertEqual(idx, 0) + + def test_find_int_empty_list(self): + idx = list.find_int(3, []) + self.assertIsNone(idx) + + def test_merge_sort(self): + res = list.merge_sort([3, 4, 1, 5, 0]) + self.assertListEqual(res, [0, 1, 3, 4, 5]) + + def test_merge_sort_duplicates(self): + res = list.merge_sort([3, 4, 1, 5, 0, 4]) + self.assertListEqual(res, [0, 1, 3, 4, 4, 5]) + + def test_merge_sort_single_element(self): + res = list.merge_sort([3]) + self.assertListEqual(res, [3]) + + def test_quicksort(self): + res = list.quicksort([3, 4, 1, 5, 0]) + self.assertListEqual(res, [0, 1, 3, 4, 5]) + + def test_quicksort_duplicates(self): + res = list.quicksort([3, 4, 1, 5, 4, 0, 1]) + self.assertListEqual(res, [0, 1, 1, 3, 4, 4, 5]) + + def test_quicksort_single_element(self): + res = list.quicksort([3]) + self.assertListEqual(res, [3]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_permutations.py b/algorithms/tests/test_permutations.py new file mode 100644 index 0000000..167a244 --- /dev/null +++ b/algorithms/tests/test_permutations.py @@ -0,0 +1,22 @@ +import unittest + +import algorithms.permutations as permutations + + +class GeneratorsTest(unittest.TestCase): + + def setUp(self): + pass + + def test_permutations(self): + p = [e for e in permutations.permutations([1, 2, 3])] + self.assertEqual(p, [[1, 2, 3], [2, 1, 3], [2, 3, 1], [1, 3, 2], + [3, 1, 2], [3, 2, 1]]) + + def test_permutations_single(self): + p = [e for e in permutations.permutations([1])] + self.assertEqual(p, [[1]]) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_string.py b/algorithms/tests/test_string.py new file mode 100644 index 0000000..1d27a6e --- /dev/null +++ b/algorithms/tests/test_string.py @@ -0,0 +1,67 @@ +import unittest + +import algorithms.string as string + + +class StringTest(unittest.TestCase): + + def test_atoi(self): + self.assertEqual(string.atoi('123'), 123) + + def test_atoi_neg(self): + self.assertEqual(string.atoi('-123'), -123) + + def test_atoi_empty_string(self): + self.assertRaises(ValueError, string.atoi, '') + + def test_reverse_string_words(self): + s = 'word1 word2 word3' + s = string.reverse_string_words(s) + self.assertEqual(s, 'word3 word2 word1') + + def test_reverse_string_word(self): + s = 'word1' + s = string.reverse_string_words(s) + self.assertEqual(s, 'word1') + + def test_string_matching_naive(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_naive(t, s), [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_naive(t, s), []) + + def test_string_matching_rabin_karp(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_rabin_karp(t, s), [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_rabin_karp(t, s), []) + + def test_string_matching_knuth_morris_pratt(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_knuth_morris_pratt(t, s), + [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_knuth_morris_pratt(t, s), []) + + def test_string_matching_boyer_moore_horspool(self): + t = 'ababbababa' + s = 'aba' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), + [0, 5, 7]) + t = 'ababbababa' + s = 'abbb' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), []) + + s = 'ababbababa' + t = 'abbb' + self.assertEqual(string.string_matching_boyer_moore_horspool(t, s), []) + + +if __name__ == '__main__': + unittest.main() diff --git a/algorithms/tests/test_string_matching.py b/algorithms/tests/test_string_matching.py deleted file mode 100644 index 4b7d11d..0000000 --- a/algorithms/tests/test_string_matching.py +++ /dev/null @@ -1,40 +0,0 @@ -import unittest -import string_matching - -class StringMatchingTest(unittest.TestCase): - - def test_string_matching_naive(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_naive(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_naive(t, s), []) - - def test_string_matching_rabin_karp(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_rabin_karp(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_rabin_karp(t, s), []) - - def test_string_matching_knuth_morris_pratt(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_knuth_morris_pratt(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_knuth_morris_pratt(t, s), []) - - def test_string_matching_boyer_moore_horspool(self): - t = 'ababbababa' - s = 'aba' - self.assertEquals(string_matching.string_matching_boyer_moore_horspool(t, s), [0, 5, 7]) - t = 'ababbababa' - s = 'abbb' - self.assertEquals(string_matching.string_matching_boyer_moore_horspool(t, s), []) - -if __name__ == '__main__': - unittest.main() - diff --git a/binary_tree_tutorial.txt b/binary_tree_tutorial.txt deleted file mode 100644 index 561daee..0000000 --- a/binary_tree_tutorial.txt +++ /dev/null @@ -1,528 +0,0 @@ -This article is about a Python library I created to manage binary search trees. I will go over the following: - - - -You can checkout the library code on GitHub: git clone https://laurentluce@github.com/laurentluce/python-algorithms.git. This folder contains more libraries but we are just going to focus on the Binary Tree one. - -As a reminder, here is a binary search tree definition (Wikipedia). - -A binary search tree (BST) or ordered binary tree is a node-based binary tree data structure which has the following properties: - - - -Here is an example of a binary search tree: - - - -

Node class

- -We need to represent a tree node. To do that, we create a new class named Node with 3 attributes: - - - -[code lang="python"] -class Node: - """ - Tree node: left and right child + data which can be any object - """ - def __init__(self, data): - """ - Node constructor - - @param data node data object - """ - self.left = None - self.right = None - self.data = data -[/code] - -Let's create a tree node containing the integer 8. You can pass any object for the data so it is flexible. When you create a node, both left and right node equal to None. - -[code lang="python"] -root = Node(8) -[/code] - -Note that we just created a tree with a single node. - - - -

Insert method

- -We need a method to help us populate our tree. This method takes the node's data as an argument and inserts a new node in the tree. - -[code lang="python"] -class Node: - ... - def insert(self, data): - """ - Insert new node with data - - @param data node data object to insert - """ - if data < self.data: - if self.left == None: - self.left = Node(data) - else: - self.left.insert(data) - else: - if self.right == None: - self.right = Node(data) - else: - self.right.insert(data) -[/code] - -insert() is called recursively as we are locating the place where to add the new node. - -Let's add 3 nodes to our root node which we created above and let's look at what the code does. - -[code lang="python"] -root.insert(3) -root.insert(10) -root.insert(1) -[/code] - -This is what happens when we add the second node (3): - - -This is what happens when we add the third node (10): - - -This is what happens when we add the fourth node (1): - - -This is how the tree looks like now: - - - -Let's continue and complete our tree so we can move on to the next section which is about looking up nodes in the tree. - -[code lang="python"] -root.insert(6) -root.insert(4) -root.insert(7) -root.insert(14) -root.insert(13) -[/code] - -The complete tree looks like this: - - - -

Lookup method

- -We need a way to look for a specific node in the tree. We add a new method named lookup which takes a node's data as an argument and returns the node if found or None if not. We also return the node's parent for convenience. - -[code lang="python"] -class Node: - ... - def lookup(self, data, parent=None): - """ - Lookup node containing data - - @param data node data object to look up - @param parent node's parent - @returns node and node's parent if found or None, None - """ - if data < self.data: - if self.left == None: - return None, None - return self.left.lookup(data, self) - elif data > self.data: - if self.right == None: - return None, None - return self.right.lookup(data, self) - else: - return self, parent -[/code] - -Let's look up the node containing 6. - -[code lang="python"] -node, parent = root.lookup(6) -[/code] - -This is what happens when lookup() is called: - - - - - -

Delete method

- -The method delete() takes the data of the node to remove as an argument. - -[code lang="python"] -class Node: - ... - def delete(self, data): - """ - Delete node containing data - - @param data node's content to delete - """ - # get node containing data - node, parent = self.lookup(data) - if node != None: - children_count = node.children_count() - ... -[/code] - -There are 3 possibilities to handle: - - -Let's tackle the first possibility which is the easiest. We look for the node to remove and we set its parent's left or right child to None. - -[code lang="python"] - def delete(self, data): - ... - if children_count == 0: - # if node has no children, just remove it - if parent.left is node: - parent.left = None - else: - parent.right = None - ... -[/code] - -Note: children_count() returns the number of children of a node. - -Here is the function children_count: - -[code lang="python"] -class Node: - ... - def children_count(self): - """ - Returns the number of children - - @returns number of children: 0, 1, 2 - """ - if node == None: - return None - cnt = 0 - if self.left: - cnt += 1 - if self.right: - cnt += 1 - return cnt -[/code] - -For example, we want to remove node 1. Node 3 left child will be set to None. - -[code lang="python"] -root.delete(1) -[/code] - - - -Let's look at the second possibility which is the node to be removed has 1 child. We replace the node's data by its left or right child's data and we set its left or right child to None. - -[code lang="python"] - def delete(self, data): - ... - elif children_count == 1: - # if node has 1 child - # replace node by its child - if node.left: - node.data = node.left.data - node.left = None - else: - node.data = node.right.data - node.right = None - ... -[/code] - -For example, we want to remove node 14. Node 14 data will be set to 13 (its left child's data) and its left child will be set to None. - -[code lang="python"] -root.delete(14) -[/code] - - - -Let's look at the last possibility which is the node to be removed has 2 children. We replace its data with its successor's data and we fix the successor's parent's child. - -[code lang="python"] - def delete(self, data): - ... - else: - # if node has 2 children - # find its successor - parent = node - successor = node.right - while successor.left: - parent = successor - successor = successor.left - # replace node data by its successor data - node.data = successor.data - # fix successor's parent's child - if parent.left == successor: - parent.left = successor.right - else: - parent.right = successor.right -[/code] - -For example, we want to remove node 3. We look for its successor by going right then left until we reach a leaf. Its successor is node 4. We replace 3 with 4. Node 4 doesn't have a child so we set node 6 left child to None. - -[code lang="python"] -root.delete(3) -[/code] - - - -

Print method

- -We add a method to print the tree inorder. This method has no argument. We use recursion inside print_tree() to walk the tree breath-first. We first traverse the left subtree, then we print the root node then we traverse the right subtree. - -[code lang="python"] -class Node: - ... - def print_tree(self): - """ - Print tree content inorder - """ - if self.left: - self.left.print_tree() - print self.data, - if self.right: - self.right.print_tree() -[/code] - -Let's print our tree: -[code lang="python"] -root.print_tree() -[/code] - -The output will be: 1, 3, 4, 6, 7, 8, 10, 13, 14 - -

Comparing 2 trees

- -To compare 2 trees, we add a method which compares each subtree recursively. It returns False when one leaf is not the same in both trees. This includes 1 leaf missing in the other tree or the data is different. We need to pass the root of the tree to compare to as an argument. - -[code lang="python"] -class Node: - ... - def compare_trees(self, node): - """ - Compare 2 trees - - @param node tree's root node to compare to - @returns True if the tree passed is identical to this tree - """ - if node == None: - return False - if self.data != node.data: - return False - res = True - if self.left == None: - if node.left: - return False - else: - res = self.left.compare_trees(node.left) - if self.right == None: - if node.right: - return False - else: - res = self.right.compare_trees(node.right) - return res -[/code] - -For example, we want to compare tree (3, 8, 10) with tree (3, 8, 11) - - - -[code lang="python"] -# root2 is the root of tree 2 -root.compare_trees(root2) -[/code] - -This is what happens in the code when we call compare_trees(). - - - -

Generator returning the tree elements one by one

- -It is sometimes useful to create a generator which returns the tree nodes values one by one. It is memory efficient as it doesn't have to build the full list of nodes right away. Each time we call this method, it returns the next node value. - -To do that, we use the yield keyword which returns an object and stops right there so the function will continue from there next time the method is called. - -We cannot use recursion in this case so we use a stack. - -Here is the code: - -[code lang="python"] -class Node: - ... - def tree_data(self): - """ - Generator to get the tree nodes data - """ - # we use a stack to traverse the tree in a non-recursive way - stack = [] - node = self - while stack or node: - if node: - stack.append(node) - node = node.left - else: # we are returning so we pop the node and we yield it - node = stack.pop() - yield node.data - node = node.right -[/code] - -For example, we want to access the tree nodes using a for loop: -[code lang="python"] -for data in root.tree_data: - print data -[/code] - -Let's look at what happens in the code with the same example we have been using: - - - - - -

Unit tests suite

- -As a good practice, you should always create a test suite for your library or application. I did a lot of unit testing at Tomnica and on Gourmious and it has been a life saver. - -We are going to use Python unittest module. We place our test modules in a folder called 'tests' so Python unit testing knows where to look. - -Each test module filename needs to start with 'test' and the methods in our test class also. - -We create a file named test_binary_tree.py in the folder 'tests' and we add a new class to it derived from unittest.TestCase. - -[code lang="python"] -class NodeTest(unittest.TestCase): - def test_binary_tree(self): - ... -[/code] - -We are going to add our test cases in the method test_binary_tree. - -Let's create 2 identical trees: -[code lang="python"] - data = [10, 5, 15, 4, 7, 13, 17, 11, 14] - # create 2 trees with the same content - root = binary_tree.Node(data[0]) - for i in data[1:]: - root.insert(i) - - root2 = binary_tree.Node(data[0]) - for i in data[1:]: - root2.insert(i) -[/code] - -Does compare_trees() returns True? -[code lang="python"] - self.assertTrue(root.compare_trees(root2)) -[/code] - -Does the generator tree_data() returns the nodes inorder? -[code lang="python"] - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [4, 5, 7, 10, 11, 13, 14, 15, 17]) -[/code] - -Does lookup() works? -[code lang="python"] - node, parent = root.lookup(9) - # Node 9 doesn't exist - self.assertTrue(node == None) - # check if returned node and parent are correct - node, parent = root.lookup(11) - self.assertTrue(node.data == 11) - self.assertTrue(parent.data == 13) -[/code] - -Does deleting a node with no child works? -[code lang="python"] - # delete a leaf node - root.delete(4) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [5, 7, 10, 11, 13, 14, 15, 17]) -[/code] - -Does deleting a node with 1 child works? -[code lang="python"] - # delete a node with 1 child - root.delete(5) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 13, 14, 15, 17]) -[/code] - -Does deleting a node with 2 children works? -[code lang="python"] - # delete a node with 2 children - root.delete(13) - # check the content of the tree inorder - t = [] - for d in root.tree_data(): - t.append(d) - self.assertEquals(t, [7, 10, 11, 14, 15, 17]) -[/code] - -Here you go, I hope you enjoyed this tutorial. Don't hesitate to add comments if you have any feedback. diff --git a/build-support/.flake8 b/build-support/.flake8 new file mode 100644 index 0000000..ef09bcb --- /dev/null +++ b/build-support/.flake8 @@ -0,0 +1 @@ +[flake8] diff --git a/constraints.txt b/constraints.txt new file mode 100644 index 0000000..e69de29 diff --git a/pants b/pants new file mode 100755 index 0000000..e9b5768 --- /dev/null +++ b/pants @@ -0,0 +1,294 @@ +#!/usr/bin/env bash +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +# =============================== NOTE =============================== +# This ./pants bootstrap script comes from the pantsbuild/setup +# project. It is intended to be checked into your code repository so +# that other developers have the same setup. +# +# Learn more here: https://www.pantsbuild.org/docs/installation +# ==================================================================== + +set -eou pipefail + +# NOTE: To use an unreleased version of Pants from the pantsbuild/pants master branch, +# locate the master branch SHA, set PANTS_SHA= in the environment, and run this script as usual. +# +# E.g., PANTS_SHA=725fdaf504237190f6787dda3d72c39010a4c574 ./pants --version + +PYTHON_BIN_NAME="${PYTHON:-unspecified}" + +# Set this to specify a non-standard location for this script to read the Pants version from. +# NB: This will *not* cause Pants itself to use this location as a config file. +# You can use PANTS_CONFIG_FILES or --pants-config-files to do so. +PANTS_TOML=${PANTS_TOML:-pants.toml} + +PANTS_BIN_NAME="${PANTS_BIN_NAME:-$0}" + +PANTS_SETUP_CACHE="${PANTS_SETUP_CACHE:-${XDG_CACHE_HOME:-$HOME/.cache}/pants/setup}" +# If given a relative path, we fix it to be absolute. +if [[ "$PANTS_SETUP_CACHE" != /* ]]; then + PANTS_SETUP_CACHE="${PWD}/${PANTS_SETUP_CACHE}" +fi + +PANTS_BOOTSTRAP="${PANTS_SETUP_CACHE}/bootstrap-$(uname -s)-$(uname -m)" + +VENV_VERSION=${VENV_VERSION:-20.2.2} + +VENV_PACKAGE=virtualenv-${VENV_VERSION} +VENV_TARBALL=${VENV_PACKAGE}.tar.gz + +COLOR_RED="\x1b[31m" +COLOR_GREEN="\x1b[32m" +COLOR_RESET="\x1b[0m" + +function log() { + echo -e "$@" 1>&2 +} + +function die() { + (($# > 0)) && log "${COLOR_RED}$*${COLOR_RESET}" + exit 1 +} + +function green() { + (($# > 0)) && log "${COLOR_GREEN}$*${COLOR_RESET}" +} + +function tempdir { + mktemp -d "$1"/pants.XXXXXX +} + +function get_exe_path_or_die { + local exe="$1" + if ! command -v "${exe}"; then + die "Could not find ${exe}. Please ensure ${exe} is on your PATH." + fi +} + +function get_pants_config_value { + local config_key="$1" + local optional_space="[[:space:]]*" + local prefix="^${config_key}${optional_space}=${optional_space}" + local raw_value + raw_value="$(sed -ne "/${prefix}/ s#${prefix}##p" "${PANTS_TOML}")" + echo "${raw_value}" | tr -d \"\' && return 0 + return 0 +} + +function get_python_major_minor_version { + local python_exe="$1" + "$python_exe" <&1 > /dev/null)" == "pyenv: python${version}"* ]]; then + continue + fi + echo "${interpreter_path}" && return 0 + done +} + +function determine_python_exe { + local pants_version="$1" + set_supported_python_versions "${pants_version}" + local requirement_str="For \`pants_version = \"${pants_version}\"\`, Pants requires Python ${supported_message} to run." + + local python_bin_name + if [[ "${PYTHON_BIN_NAME}" != 'unspecified' ]]; then + python_bin_name="${PYTHON_BIN_NAME}" + else + python_bin_name="$(determine_default_python_exe)" + if [[ -z "${python_bin_name}" ]]; then + die "No valid Python interpreter found. ${requirement_str} Please check that a valid interpreter is installed and on your \$PATH." + fi + fi + local python_exe + python_exe="$(get_exe_path_or_die "${python_bin_name}")" + local major_minor_version + major_minor_version="$(get_python_major_minor_version "${python_exe}")" + for valid_version in "${supported_python_versions_int[@]}"; do + if [[ "${major_minor_version}" == "${valid_version}" ]]; then + echo "${python_exe}" && return 0 + fi + done + die "Invalid Python interpreter version for ${python_exe}. ${requirement_str}" +} + +# TODO(John Sirois): GC race loser tmp dirs leftover from bootstrap_XXX +# functions. Any tmp dir w/o a symlink pointing to it can go. + +function bootstrap_venv { + if [[ ! -d "${PANTS_BOOTSTRAP}/${VENV_PACKAGE}" ]]; then + ( + mkdir -p "${PANTS_BOOTSTRAP}" + local staging_dir + staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") + cd "${staging_dir}" + curl -LO "https://pypi.io/packages/source/v/virtualenv/${VENV_TARBALL}" + tar -xzf "${VENV_TARBALL}" + ln -s "${staging_dir}/${VENV_PACKAGE}" "${staging_dir}/latest" + mv "${staging_dir}/latest" "${PANTS_BOOTSTRAP}/${VENV_PACKAGE}" + ) 1>&2 + fi + + local venv_path="${PANTS_BOOTSTRAP}/${VENV_PACKAGE}" + local venv_entry_point + + # shellcheck disable=SC2086 + if [[ -f "${venv_path}/virtualenv.py" ]]; then + venv_entry_point="${venv_path}/virtualenv.py" + elif [[ -f "${venv_path}/src/virtualenv/__main__.py" ]]; then + venv_entry_point="${venv_path}/src/virtualenv/__main__.py" + else + die "Could not find virtualenv entry point for version $VENV_VERSION" + fi + + echo "${venv_entry_point}" +} + +function find_links_url { + local pants_version="$1" + local pants_sha="$2" + echo -n "https://binaries.pantsbuild.org/wheels/pantsbuild.pants/${pants_sha}/${pants_version/+/%2B}/index.html" +} + +function get_version_for_sha { + local sha="$1" + + # Retrieve the Pants version associated with this commit. + local pants_version + pants_version="$(curl --fail -sL "https://raw.githubusercontent.com/pantsbuild/pants/${sha}/src/python/pants/VERSION")" + + # Construct the version as the release version from src/python/pants/VERSION, plus the string `+gitXXXXXXXX`, + # where the XXXXXXXX is the first 8 characters of the SHA. + echo "${pants_version}+git${sha:0:8}" +} + +function bootstrap_pants { + local pants_version="$1" + local python="$2" + local pants_sha="${3:-}" + + local pants_requirement="pantsbuild.pants==${pants_version}" + local maybe_find_links + if [[ -z "${pants_sha}" ]]; then + maybe_find_links="" + else + maybe_find_links="--find-links=$(find_links_url "${pants_version}" "${pants_sha}")" + fi + local python_major_minor_version + python_major_minor_version="$(get_python_major_minor_version "${python}")" + local target_folder_name + target_folder_name="${pants_version}_py${python_major_minor_version}" + + if [[ ! -d "${PANTS_BOOTSTRAP}/${target_folder_name}" ]]; then + ( + local venv_entry_point="$(bootstrap_venv)" + local staging_dir + staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") + "${python}" "${venv_entry_point}" --no-download "${staging_dir}/install" && \ + "${staging_dir}/install/bin/pip" install -U pip && \ + "${staging_dir}/install/bin/pip" install ${maybe_find_links} --progress-bar off "${pants_requirement}" && \ + ln -s "${staging_dir}/install" "${staging_dir}/${target_folder_name}" && \ + mv "${staging_dir}/${target_folder_name}" "${PANTS_BOOTSTRAP}/${target_folder_name}" && \ + green "New virtual environment successfully created at ${PANTS_BOOTSTRAP}/${target_folder_name}." + ) 1>&2 + fi + echo "${PANTS_BOOTSTRAP}/${target_folder_name}" +} + +# Ensure we operate from the context of the ./pants buildroot. +cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +pants_version="$(determine_pants_version)" +python="$(determine_python_exe "${pants_version}")" +pants_dir="$(bootstrap_pants "${pants_version}" "${python}" "${PANTS_SHA:-}")" +pants_python="${pants_dir}/bin/python" +pants_binary="${pants_dir}/bin/pants" +pants_extra_args="" +if [[ -n "${PANTS_SHA:-}" ]]; then + pants_extra_args="${pants_extra_args} --python-repos-repos=$(find_links_url "$pants_version" "$PANTS_SHA")" +fi + +# We set the env var no_proxy to '*', to work around an issue with urllib using non +# async-signal-safe syscalls after we fork a process that has already spawned threads. +# +# See https://blog.phusion.nl/2017/10/13/why-ruby-app-servers-break-on-macos-high-sierra-and-what-can-be-done-about-it/ +export no_proxy='*' + +# shellcheck disable=SC2086 +exec "${pants_python}" "${pants_binary}" ${pants_extra_args} \ + --pants-bin-name="${PANTS_BIN_NAME}" --pants-version=${pants_version} "$@" diff --git a/pants.toml b/pants.toml new file mode 100644 index 0000000..0c3de5a --- /dev/null +++ b/pants.toml @@ -0,0 +1,30 @@ +[GLOBAL] +pants_version = "2.1.0" +backend_packages = [ + "pants.backend.python", + "pants.backend.python.lint.flake8" +] + +[source] +# The Python source root is the repo root. See https://www.pantsbuild.org/docs/source-roots. +root_patterns = ["/"] + +[python-setup] +# The default interpreter compatibility for code in this repo. Individual targets can override +# this with the `interpreter_constraints` field. See +# https://www.pantsbuild.org/docs/python-interpreter-compatibility. +interpreter_constraints = ["==2.7.*", ">=3.6"] +# Use a constraints file. See https://www.pantsbuild.org/docs/python-third-party-dependencies. +requirement_constraints = "constraints.txt" +# We search for interpreters on both on the $PATH and in the `$(pyenv root)/versions` folder. +# If you're using macOS, you may want to leave off the entry to avoid using the +# problematic system Pythons. See +# https://www.pantsbuild.org/docs/python-interpreter-compatibility#changing-the-interpreter-search-path. +interpreter_search_paths = ["", ""] + +[flake8] +config = "build-support/.flake8" + +[pytest] +version = "pytest>=4.0,<6.1" +pytest_plugins = ["zipp>=1.2"] diff --git a/pep8.sh b/pep8.sh new file mode 100755 index 0000000..e7fe912 --- /dev/null +++ b/pep8.sh @@ -0,0 +1,2 @@ +flake8 algorithms +exit diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py deleted file mode 100644 index 0137842..0000000 --- a/setup.py +++ /dev/null @@ -1,29 +0,0 @@ -from distutils.core import setup -setup( - name = "algorithms", - packages = ['algorithms'], - version = "0.1", - description = "Algorithms implemented in Python", - author = "Laurent Luce", - author_email = "laurentluce49@yahoo.com", - url = "http://github.com/laurentluce/python-algorithms", - download_url = "http://github.com/laurentluce/python-algorithms", - keywords = ["algorithms"], - classifiers = [ - "Programming Language :: Python", - "Operating System :: OS Independent", - "License :: OSI Approved :: MIT License", - "Intended Audience :: Developers", - "Development Status :: 5 - Production/Stable", - "Topic :: Software Development :: Libraries :: Python Modules" - ], - long_description = """\ - Python Algorithms Library - ---------------------------- - - DESCRIPTION - The purpose of this library is to help you with basic and more advanced - algorithms - - LICENSE The Python Algorithms Library is distributed under the MIT - License """ )