diff --git a/README.md b/README.md index 4c2cb92..6b3577f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Algos in Python +Algorithms in Python ====== Implementations of a few algorithms and datastructures for fun and profit! @@ -8,6 +8,8 @@ Completed - Karatsuba Multiplication - Basic Sorting - Rabin-Miller primality test +- Sieve of Eratosthenes for prime numbers +- Binary Search - Counting Inversions in an array - Selecting ith order statistic in an array - Graph datastructure (directed & undirected) @@ -33,11 +35,15 @@ Completed - Binary Search Tree - Kandane's Algorithm - Knapsack Problem (0/1 and unbounded) +- Longest Increasing Subsequence +- Longest Common Subsequence - Prefix Tries - Stack ADT (with example problems) - String Reverse - Parenthesis Matching - Infix to Postfix +- Modular exponentiation +- Modular multiplicative inverse Tests @@ -47,3 +53,6 @@ Tests python -m tests.graph_algorithms_test python -m tests.heap_test python -m tests.unionfind_test + python -m tests.singly_linked_list_test + python -m tests.modular_exponentiation_test + python -m tests.modular_multiplicative_inverse_test diff --git a/dp/coinchange.py b/dp/coinchange.py new file mode 100644 index 0000000..1b3a370 --- /dev/null +++ b/dp/coinchange.py @@ -0,0 +1,24 @@ +""" +Problem: http://www.algorithmist.com/index.php/Coin_Change +""" +def coinchange(total, coins): + M = len(coins) + table = [[0]*M for i in range(total+1)] + for i in range(M): + table[0][i] = 1 + + for i in range(1, total+1): + for j in range(M): + # count of solutions excluding coin + x = table[i][j-1] if j > 0 else 0 + + # count of solutions including coin + y = table[i-coins[j]][j] if i - coins[j] >= 0 else 0 + table[i][j] = x + y + + return table[total][M-1] + +if __name__ == "__main__": + print coinchange(10, [2, 3, 5, 6]) # 5 + print coinchange(5, [2, 3, 5]) # 2 + print coinchange(4, [1, 2, 3]) # 4 diff --git a/dp/kadane.py b/dp/kadane.py new file mode 100644 index 0000000..499e8f8 --- /dev/null +++ b/dp/kadane.py @@ -0,0 +1,31 @@ +""" +Problem: The maximum subarray problem is the task of finding the +contiguous subarray within a one-dimensional array of numbers +(containing at least one positive number) which has the largest sum. + +Solution: +The recurrence relation that we solve at each step is the following - + +Let S[i] = be the max value contigous subsequence till the ith element +of the array. + +Then S[i] = max(A[i], A[i] + S[i - 1]) +At each step, we have two options +1) We add the ith element to the sum till the i-1th elem +2) We start a new array starting at i + +We take a max of both these options and accordingly build up the array. +""" +def max_value_contigous_subsequence(arr): + A = [arr[0]] + [0] * (len(arr) - 1) + max_to_here = arr[0] + for i in range(1, len(arr)): + A[i] = max(arr[i], arr[i] + A[i-1]) + max_to_here = max(max_to_here, A[i]) + return max_to_here + +if __name__ == "__main__": + x = [-2, -3, 4, -1, -2, 1, 5, -3] + y = [-2, 1, -3, 4, -1, 2, 1, -5, 4] + z = [-1, 3, -5, 4, 6, -1, 2, -7, 13, -3] + print map(max_value_contigous_subsequence, [x, y, z]) diff --git a/dp/lcs.py b/dp/lcs.py new file mode 100644 index 0000000..35c0056 --- /dev/null +++ b/dp/lcs.py @@ -0,0 +1,32 @@ +""" +Problem : https://en.wikipedia.org/wiki/Longest_common_subsequence_problem +""" + +def longest_common_subsequence(s1, s2): + # lengths of strings s1 and s2 + m, n = len(s1), len(s2) + # to cache the results + cache = [[0 for j in range(n + 1)] for i in range(m + 1)] + for i, character_s1 in enumerate(s1): + for j, character_s2 in enumerate(s2): + if character_s1 == character_s2: + cache[i + 1][j + 1] = cache[i][j] + 1 + else: + cache[i + 1][j + 1] = max(cache[i][j + 1], cache[i + 1][j]) + # LCS is empty by default + sequence = "" + i, j = m, n + # finding the sequence from cache + while i >= 1 and j >= 1: + if s1[i - 1] == s2[j - 1]: + sequence += s1[i - 1] + i, j = i - 1, j - 1 + elif cache[i - 1][j] > cache[i][j - 1]: + i -= 1 + else: + j -= 1 + # returns the length of LCS along with the sequence itself + return (len(sequence), sequence[::-1]) + +if __name__ == "__main__": + print(longest_common_subsequence("ABCXYZ","ACBCXZ")) diff --git a/dp/longest_subsequence.py b/dp/longest_subsequence.py index c037ff8..e1ac1d2 100644 --- a/dp/longest_subsequence.py +++ b/dp/longest_subsequence.py @@ -1,31 +1,45 @@ -def longest_seq(seq): - """ returns the longest increasing subseqence - in a sequence """ - count = [1] * len(seq) - prev = [0] * len(seq) - for i in range(1, len(seq)): - dist = [] - temp_prev = {} - for j in range(i): - if seq[j] < seq[i]: - dist.append(count[j]) - temp_prev[count[j]] = j - else: - temp_prev[0] = j - dist.append(0) - count[i] = 1 + max(dist) - prev[i] = temp_prev[max(dist)] - - # path - path = [seq[prev.index(max(prev))]] - i = prev.index(max(prev)) - while i>1: - path.append(seq[prev[i]]) - i = prev[i] - return max(count), path[::-1] +""" +Problem: http://www.geeksforgeeks.org/dynamic-programming-set-3-longest-increasing-subsequence/ +""" +def longest_increasing_subsequence(nums): + # array used to store the length of the longest subsequence found + cache = [1] * len(nums) + + # array used to store the location of the predecessor in the longest + # subsequence. -1 by default + location = [-1] * len(nums) + + for i in range(1, len(nums)): + for j in range(0, i): + if nums[i] > nums[j]: + if cache[j] + 1 > cache[i]: + cache[i] = cache[j] + 1 + location[i] = j + + # finding the max in the cache gives us the + # answer - i.e. length of the LIS + max_value = max(cache) + + # with the answer in hand, we need to build the solution + # using the locations stored + solution = [] + i = cache.index(max_value) + + # we start with the max value i.e. the index of the + # location where the max LIS exists and then + # keep backtracking to build up the solution + while location[i] > -1: + solution.append(nums[i]) + i = location[i] + + # when the loop ends, just append the starting element + solution.append(nums[i]) + + # return the length of the LIS and the solution (in reverse) + return max_value, solution[::-1] if __name__ == "__main__": - seq = [5, 2, 8, 10, 3, 6, 9, 7] - seq2 = [0, 8, 3, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15] - print longest_seq(seq2) + assert longest_increasing_subsequence([3, 4, -1, 0, 6, 2, 3]) == (4, [-1, 0, 2, 3]) + assert longest_increasing_subsequence([10, 22, 9, 33, 21, 50, 41, 60, 80]) == (6, [10, 22, 33, 50, 60, 80]) + assert longest_increasing_subsequence([5,0,1,2,3,4,5,6,7,8,9,10,11,12, 2, 8, 10, 3, 6, 9, 7]) == (13, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) diff --git a/graphs/digraph.py b/graphs/digraph.py index 10d3a6d..e8badd3 100644 --- a/graphs/digraph.py +++ b/graphs/digraph.py @@ -18,7 +18,7 @@ def __init__(self): def __str__(self): return "Directed Graph \nNodes: %s \nEdges: %s" % (self.nodes(), self.edges()) - def add_edge(self, edge, wt=1, label=""): + def add_edge(self, edge, wt=DEFAULT_WEIGHT, label=""): """ Add an edge to the graph connecting two nodes. An edge, here, is a pair of node like C(m, n) or a tuple diff --git a/graphs/eulerian_tour.py b/graphs/eulerian_tour.py new file mode 100755 index 0000000..055571a --- /dev/null +++ b/graphs/eulerian_tour.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# Find Eulerian Tour +# +# Write a program that takes in a graph +# represented as a list of tuples +# and return a list of nodes that +# you would follow on an Eulerian Tour +# +# For example, if the input graph was +# [(1, 2), (2, 3), (3, 1)] +# A possible Eulerian tour would be [1, 2, 3, 1] + +def get_a_tour(): + '''This function returns a possible tour in the current graph and removes the edges included in that tour, from the graph.''' + global graph + + nodes_degree = {} # Creating a {node: degree} dictionary for current graph. + for edge in graph: + a, b = edge[0], edge[1] + nodes_degree[a] = nodes_degree.get(a, 0) + 1 + nodes_degree[b] = nodes_degree.get(b, 0) + 1 + + tour =[] # Finding a tour in the current graph. + loop = enumerate(nodes_degree) + while True: + try: + l = loop.__next__() + index = l[0] + node = l[1] + degree = nodes_degree[node] + try: + if (tour[-1], node) in graph or (node, tour[-1]) in graph: + tour.append(node) + try: + graph.remove((tour[-2], tour[-1])) + nodes_degree[tour[-1]] -= 1 # Updating degree of nodes in the graph, not required but for the sake of completeness. + nodes_degree[tour[-2]] -= 1 # Can also be used to check the correctness of program. In the end all degrees must zero. + except ValueError: + graph.remove((tour[-1], tour[-2])) + nodes_degree[tour[-1]] -= 1 + nodes_degree[tour[-2]] -= 1 + except IndexError: + tour.append(node) + except StopIteration: + loop = enumerate(nodes_degree) + + if len(tour) > 2: + if tour[0] == tour[-1]: + return tour + +def get_eulerian_tour(): + '''This function returns a Eulerian Tour for the input graph.''' + global graph + tour = get_a_tour() + + if graph: # If stuck at the beginning, finding additional tour in the graph. + loop = enumerate(tour[: -1]) + l = loop.__next__() + i = l[0] + node = l[1] + try: + while True: + if node in list(zip(*graph))[0] or node in list(zip(*graph))[1]: + t = get_a_tour() # Retreivng the additional tour + j = t.index(node) + tour = tour[ : i] + t[j:-1] + t[ :j+1] + tour[i+1: ] # Joining the two tours. + if not graph: # Found Eulerian Tour + return tour # Returning the Eulerian Tour + loop = enumerate(tour[: -1]) # Still stuck? Looping back to search for another tour. + l = loop.__next__() + i = l[0] + node = l[1] + except StopIteration: # Oops! seems like the vertices in the current tour cannot connect to rest of the edges in the graph. + print("Your graph doesn't seem to be connected") + exit() + else: # Found the Eulerian Tour in the very first call. Lucky Enough! + return tour + +# Sample inputs +# graph = [(1, 2), (1, 3), (2, 3), (2, 4), (2, 6), (3, 4), (3, 5), (4, 5), (4, 6)] +# graph = [(1, 2), (1, 3), (2, 3)] +# graph = [(1, 2), (1, 3), (2, 3), (2, 4), (2, 6), (3, 4), (3, 5), (4, 5), (4, 6), (9, 10), (10, 11), (11, 9)] +# graph = [(1, 2), (1, 3), (2, 3), (2, 4), (2, 6), (3, 4), (3, 5), (4, 5), (4, 6), (2, 7), (7, 8), (8, 2)] +# graph = [(1, 2), (1, 3), (2, 3), (2, 4), (2, 6), (3, 4), (3, 5), (4, 5), (4, 6), (1, 5), (5, 6), (1, 6)] +# graph = [(1, 2), (2, 3), (3, 1), (3, 4), (4, 3)] +# graph = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)] +# graph = [(2, 6), (4, 2), (5, 4), (6, 5), (6, 8), (7, 9), (8, 7), (9, 6)] + +# creating a {node: degree} dictionary +nodes_degree = {} +for edge in graph: + a, b = edge[0], edge[1] + nodes_degree[a] = nodes_degree.get(a, 0) + 1 + nodes_degree[b] = nodes_degree.get(b, 0) + 1 + +#checking degree +degrees = nodes_degree.values() # remember it return a view +for degree in degrees: + if degree % 2: + print("Your graph have one or more nodes with odd degrees. Hence an Eulerian Tour is impossible.") + exit() + +#finding Eulerian Tour +tour = get_eulerian_tour() +print(tour) diff --git a/graphs/graph.py b/graphs/graph.py index 98c6c30..10364b4 100644 --- a/graphs/graph.py +++ b/graphs/graph.py @@ -4,7 +4,7 @@ class graph(object): methods: add_edge, add_edges, add_node, add_nodes, has_node, has_edge, nodes, edges, neighbors, del_node, del_edge, node_order, - set_edge_weight, get_edge_weight, + set_edge_weight, get_edge_weight """ DEFAULT_WEIGHT = 1 @@ -38,7 +38,7 @@ def has_node(self, node): """ return node in self.node_neighbors - def add_edge(self, edge, wt=1, label=""): + def add_edge(self, edge, wt=DEFAULT_WEIGHT, label=""): """ Add an edge to the graph connecting two nodes. An edge, here, is a pair of node like C(m, n) or a tuple @@ -61,7 +61,7 @@ def nodes(self): """ Returns a list of nodes in the graph """ - return list(self.node_neighbors.keys()) + return self.node_neighbors.keys() def has_edge(self, edge): """ @@ -69,9 +69,7 @@ def has_edge(self, edge): graph. An edge, here, is a pair of node like C(m, n) or a tuple """ u, v = edge - if v not in self.node_neighbors[u]: - return False - return True + return v in self.node_neighbors.get(u, []) def neighbors(self, node): """ @@ -115,14 +113,16 @@ def edges(self): """ edge_list = [] for node in self.nodes(): - for each in self.neighbors(node): - edge_list.append((node, each)) + edges = [(node, each) for each in self.neighbors(node)] + edge_list.extend(edges) return edge_list # Methods for setting properties on nodes and edges def set_edge_weight(self, edge, wt): """Set the weight of the edge """ u, v = edge + if not self.has_edge(edge): + raise Exception("Edge (%s, %s) not an existing edge" % (u, v)) self.node_neighbors[u][v] = wt if u != v: self.node_neighbors[v][u] = wt @@ -140,10 +140,7 @@ def get_edge_weights(self): unique_list = {} for u in self.nodes(): for v in self.neighbors(u): - if not unique_list.get(v) or u not in unique_list.get(v): + if u not in unique_list.get(v, set()): edge_list.append((self.node_neighbors[u][v], (u, v))) - if u not in unique_list: - unique_list[u] = [v] - else: - unique_list[u].append(v) + unique_list.setdefault(u, set()).add(v) return edge_list diff --git a/graphs/graph_algorithms.py b/graphs/graph_algorithms.py index 4329399..3818a23 100644 --- a/graphs/graph_algorithms.py +++ b/graphs/graph_algorithms.py @@ -8,13 +8,13 @@ def BFS(gr, s): Returns a list of nodes that are "findable" from s """ if not gr.has_node(s): raise Exception("Node %s not in graph" % s) - nodes_explored = [s] + nodes_explored = set([s]) q = deque([s]) while len(q)!=0: node = q.popleft() for each in gr.neighbors(node): if each not in nodes_explored: - nodes_explored.append(each) + nodes_explored.add(each) q.append(each) return nodes_explored @@ -28,7 +28,7 @@ def shortest_hops(gr, s): else: dist = {} q = deque([s]) - nodes_explored = [s] + nodes_explored = set([s]) for n in gr.nodes(): if n == s: dist[n] = 0 else: dist[n] = float('inf') @@ -36,7 +36,7 @@ def shortest_hops(gr, s): node = q.popleft() for each in gr.neighbors(node): if each not in nodes_explored: - nodes_explored.append(each) + nodes_explored.add(each) q.append(each) dist[each] = dist[node] + 1 return dist @@ -46,18 +46,18 @@ def undirected_connected_components(gr): in an undirected graph """ if gr.DIRECTED: raise Exception("This method works only with a undirected graph") - explored = [] + explored = set([]) con_components = [] for node in gr.nodes(): if node not in explored: reachable_nodes = BFS(gr, node) con_components.append(reachable_nodes) - explored += reachable_nodes + explored |= reachable_nodes return con_components def DFS(gr, s): """ Depth first search wrapper """ - path = [] + path = set([]) depth_first_search(gr, s, path) return path @@ -65,7 +65,7 @@ def depth_first_search(gr, s, path): """ Depth first search Returns a list of nodes "findable" from s """ if s in path: return False - path.append(s) + path.add(s) for each in gr.neighbors(s): if each not in path: depth_first_search(gr, each, path) @@ -121,7 +121,7 @@ def outer_dfs(digr, node, nodes_explored, path): def DFS_loop(digr): """ Core DFS loop used to find strongly connected components in a directed graph """ - node_explored = [] # list for keeping track of nodes explored + node_explored = set([]) # list for keeping track of nodes explored finishing_times = [] # list for adding nodes based on their finishing times for node in digr.nodes(): if node not in node_explored: @@ -131,7 +131,7 @@ def DFS_loop(digr): def inner_DFS(digr, node, node_explored, finishing_times): """ Inner DFS used in DFS loop method """ - node_explored.append(node) # mark explored + node_explored.add(node) # mark explored for each in digr.neighbors(node): if each not in node_explored: inner_DFS(digr, each, node_explored, finishing_times) @@ -143,8 +143,9 @@ def shortest_path(digr, s): """ Finds the shortest path from s to every other vertex findable from s using Dijkstra's algorithm in O(mlogn) time. Uses heaps for super fast implementation """ - nodes_explored = [s] - nodes_unexplored = DFS(digr, s)[1:] # all accessible nodes from s + nodes_explored = set([s]) + nodes_unexplored = DFS(digr, s) # all accessible nodes from s + nodes_unexplored.remove(s) dist = {s:0} node_heap = [] @@ -155,7 +156,7 @@ def shortest_path(digr, s): while len(node_heap) > 0: min_dist, nearest_node = heapq.heappop(node_heap) dist[nearest_node] = min_dist - nodes_explored.append(nearest_node) + nodes_explored.add(nearest_node) nodes_unexplored.remove(nearest_node) # recompute keys for just popped node @@ -183,7 +184,7 @@ def minimum_spanning_tree(gr): cost spanning tree in a undirected connected graph. Works only with undirected and connected graphs """ s = gr.nodes()[0] - nodes_explored = [s] + nodes_explored = set([s]) nodes_unexplored = gr.nodes() nodes_unexplored.remove(s) min_cost, node_heap = 0, [] @@ -197,7 +198,7 @@ def minimum_spanning_tree(gr): # adds the cheapest to "explored" node_cost, min_node = heapq.heappop(node_heap) min_cost += node_cost - nodes_explored.append(min_node) + nodes_explored.add(min_node) nodes_unexplored.remove(min_node) # recompute keys for neighbors of deleted node diff --git a/heaps/maxheap.py b/heaps/maxheap.py index 6f14856..f902af4 100644 --- a/heaps/maxheap.py +++ b/heaps/maxheap.py @@ -27,7 +27,7 @@ def heappush(self, x): i = len(self.heap) self.heap.append(x) parent = self.parent(i) - while parent != [] and self.heap[i] > self.heap[parent]: + while parent != -1 and self.heap[i] > self.heap[parent]: self.heap[i], self.heap[parent] = self.heap[parent], self.heap[i] i = parent parent = self.parent(i) diff --git a/heaps/minheap.py b/heaps/minheap.py index 8c64b76..a3cce3a 100644 --- a/heaps/minheap.py +++ b/heaps/minheap.py @@ -11,7 +11,7 @@ def __init__(self, nums=None): self.heap = [] if nums: self.build_heap(nums) - + def __str__(self): return "Min-heap with %s items" % (len(self.heap)) @@ -19,28 +19,24 @@ def max_elements(self): return len(self.heap) def height(self): - return math.ceil(math.log(len(self.heap))/math.log(2)) + return math.ceil(math.log(len(self.heap)) / math.log(2)) def is_leaf(self, i): """ returns True if i is a leaf node """ - return i > int(math.ceil( (len(self.heap)- 2) / 2)) + return i > int(math.ceil((len(self.heap) - 2) / 2.0)) def parent(self, i): if i == 0: - return [] + return -1 elif i % 2 != 0: # odd - return (i-1)/2 - return int(math.floor((i-1)/2)) + return (i - 1) / 2 + return (i - 2) / 2 def leftchild(self, i): - if not self.is_leaf(i): - return 2*i+1 - return [] + return 2 * i + 1 def rightchild(self, i): - if not self.is_leaf(i): - return 2*i+2 - return [] + return 2 * i + 2 def heapify(self, i): l = self.leftchild(i) @@ -58,17 +54,16 @@ def build_heap(self, elem): """ transforms a list of elements into a heap in linear time """ self.heap = elem[:] - last_leaf = int(math.ceil( (len(self.heap)- 2) / 2)) + last_leaf = self.parent(len(self.heap)) for i in range(last_leaf, -1, -1): self.heapify(i) - def heappush(self, x): """ Adds a new item x in the heap""" i = len(self.heap) self.heap.append(x) parent = self.parent(i) - while parent != [] and self.heap[i] < self.heap[parent]: + while parent != -1 and self.heap[i] < self.heap[parent]: self.heap[i], self.heap[parent] = self.heap[parent], self.heap[i] i = parent parent = self.parent(i) diff --git a/lists/__init__.py b/lists/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lists/queue.py b/lists/queue.py new file mode 100644 index 0000000..e6883f1 --- /dev/null +++ b/lists/queue.py @@ -0,0 +1,24 @@ +from collections import deque + + +class Queue(object): + """Wrapper around collections.deque to provide the api consistent with + a Queue""" + + def __init__(self): + self.items = deque() + + def __str__(self): + return ("Queue of size: %d" % len(self.items)) + + def isEmpty(self): + return len(self.items) == 0 + + def enqueue(self, item): + self.items.append(item) + + def dequeue(self): + return self.items.popleft() + + def size(self): + return len(self.items) diff --git a/lists/singlylinkedlist.py b/lists/singlylinkedlist.py new file mode 100644 index 0000000..d8bd85f --- /dev/null +++ b/lists/singlylinkedlist.py @@ -0,0 +1,79 @@ +class Node(object): + def __init__(self, data=None): + self.data = data + self.next = None + + def __repr__(self): + return str(self.data) + +class SinglyLinkedList(object): + def __init__(self, iterable=[]): + self.head = None + self.size = 0 + for item in iterable: self.append(item) + + def __repr__(self): + (current, nodes) = self.head, [] + while current: + nodes.append(str(current)) + current = current.next + return "->".join(nodes) + + def __len__(self): + return self.size + + def __iter__(self): + current = self.head + while current: + yield current + current = current.next + raise StopIteration + + def __contains__(self, data): + tmp = self.head + found = False + while tmp and not found: + if data == tmp.data: + found = True + else: + tmp = tmp.next + return found + + def append(self, data): + tmp = Node(data) + tmp.next = self.head + self.head = tmp + self.size += 1 + + def getHead(self): + return self.head + + def getTail(self): + tmp = self.head + while tmp.next: + tmp = tmp.next + return tmp + + def delete(self, data): + tmp = self.head + prev = None + found = False + while tmp and not found: + if data == tmp.data: + found = True + else: + prev = tmp + tmp = tmp.next + if found: + self.size -= 1 + if prev == None: + self.head = self.head.next + else: + prev.next = tmp.next + +if __name__ == "__main__": + list1 = SinglyLinkedList(range(0, 100, 10)) + print list1 # testing repr + print 50 in list1, 110 not in list1 # testing contains + list1.delete(50) # testing delete + print len(list1) == 9, 50 not in list1 # testing size diff --git a/linear_datastructures/stack-adt.py b/lists/stack-adt.py similarity index 97% rename from linear_datastructures/stack-adt.py rename to lists/stack-adt.py index 8b66349..a828ff7 100644 --- a/linear_datastructures/stack-adt.py +++ b/lists/stack-adt.py @@ -17,7 +17,7 @@ def pop(self): def top(self): if self.isEmpty(): return None - return self.items[len(self.items)-1] + return self.items[-1] def string_reverse(s): stack = Stack() diff --git a/misc/GCD.py b/misc/GCD.py new file mode 100644 index 0000000..f1daa47 --- /dev/null +++ b/misc/GCD.py @@ -0,0 +1,21 @@ +""" +Greatest common divisor(GCD) of two integers X and Y is the largest integer that divides both X and Y. + +References : +https://en.wikipedia.org/wiki/Euclidean_algorithm +https://proofwiki.org/wiki/Euclidean_Algorithm +http://stackoverflow.com/questions/6005582/how-does-the-euclidean-algorithm-work + + +Algorithm : +* If X = 0 then GCD(X,Y) = Y, as GCD(0,Y) = Y. +* If Y = 0 then GCD(X,Y) = X, as GCD(X,0) = X. +* Write X in quotient remainder form (X = Y * Q + R). +* Find GCD(Y,R) using the Euclidean algorithm since GCD(X,Y) = GCD(Y,R). +""" + +def greatest_common_divisor(x,y): + return x if y == 0 else greatest_common_divisor(y,x%y) + +if __name__ == "__main__": + print(greatest_common_divisor(20,25)) diff --git a/misc/__init__.py b/misc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/misc/kadane.py b/misc/kadane.py deleted file mode 100644 index 6827a52..0000000 --- a/misc/kadane.py +++ /dev/null @@ -1,20 +0,0 @@ -# The maximum subarray problem is the task of finding the contiguous subarray within a one-dimensional array of numbers which has the largest sum. Kadane's algorithm finds the maximum subarray sum in linear time. -# For example, in the array { -1, 3, -5, 4, 6, -1, 2, -7, 13, -3 }, the maximum subarray sum is 17 (from the highlighted subarray). - -def find_max_subarray(numbers): - max_till_here = [0]*len(numbers) - max_value = 0 - for i in range(len(numbers)): - max_till_here[i] = max(numbers[i], max_till_here[i-1] + numbers[i]) - max_value = max(max_value, max_till_here[i]) - return max_value - -# another version -def find_max_subarray2(numbers): - max_till_here = [numbers[0]] - for n in numbers[1:]: - max_till_here.append(max(n, max_till_here[-1] + n)) - return max(max_till_here) - -print find_max_subarray([-2, 1, -3, 4, -1, 2, 1, -5, 4]) # 6 -print find_max_subarray([ -1, 3, -5, 4, 6, -1, 2, -7, 13, -3 ]) # 17 diff --git a/misc/max_area_histogram.py b/misc/max_area_histogram.py index ce342ab..0769e36 100644 --- a/misc/max_area_histogram.py +++ b/misc/max_area_histogram.py @@ -10,7 +10,7 @@ """ -# hist represented as ith bar has height h(i) +# hist represented as ith bar has height h(i) histogram = [6, 4, 2, 1, 3, 4, 5, 2, 6] """ @@ -21,26 +21,29 @@ 4. compute max area """ -def get_L(hist): - L = [0]*len(hist) - for i in range(1, len(hist)): - if hist[i] > hist[i-1]: - L[i] = i + +def find_Li(hist, i): + left_edge = 0 + for j in range(i-1, -1, -1): + if hist[j] >= hist[i]: + left_edge += 1 else: - L[i] = L[i-1] - return L + return left_edge + + return left_edge -print get_L(histogram) def find_Ri(hist, i): right_edge = 0 - for j in range(i+1, len(hist)): + for j in range(i + 1, len(hist)): if hist[j] >= hist[i]: right_edge += 1 else: return right_edge + return right_edge + def get_area(hist, i): return hist[i] * (find_Li(hist, i) + find_Ri(hist, i) + 1) @@ -53,6 +56,7 @@ def get_max_area(hist): max_area = area return max_area + def max_rectangle_area(histogram): """Find the area of the largest rectangle that fits entirely under the histogram. @@ -61,21 +65,21 @@ def max_rectangle_area(histogram): stack = [] top = lambda: stack[-1] max_area = 0 - pos = 0 # current position in the histogram + pos = 0 # current position in the histogram for pos, height in enumerate(histogram): - start = pos # position where rectangle starts + start = pos # position where rectangle starts while True: - if not stack or height > top().height: - stack.append(Info(start, height)) # push - elif stack and height < top().height: - max_area = max(max_area, top().height*(pos-top().start)) + if not stack or height > top()[1]: + stack.append((start, height)) # push + elif stack and height < top()[1]: + max_area = max(max_area, top()[1] * (pos - top()[0])) start, _ = stack.pop() continue - break # height == top().height goes here + break # height == top().height goes here pos += 1 for start, height in stack: - max_area = max(max_area, height*(pos-start)) + max_area = max(max_area, height * (pos - start)) return max_area diff --git a/misc/modular_exponentiation.py b/misc/modular_exponentiation.py new file mode 100644 index 0000000..43c66ee --- /dev/null +++ b/misc/modular_exponentiation.py @@ -0,0 +1,22 @@ +""" +Problem: https://en.wikipedia.org/wiki/Modular_exponentiation +""" + +def modular_exponentiation(base, exp, mod): + if exp < 1: + raise ValueError("Exponentiation should be ve+ int") + if mod == 1: + return 0 + elif mod < 1: + raise ValueError("Modulus should be ve+ int") + #Initialize result to 1 + result = 1 + base %= mod + while exp > 0: + #multiply base to result if exp is odd + if exp % 2 == 1: + result = (result * base) % mod + #Double base and half exp + exp = exp >> 1 + base = (base ** 2) % mod + return result \ No newline at end of file diff --git a/misc/modular_multiplicative_inverse.py b/misc/modular_multiplicative_inverse.py new file mode 100644 index 0000000..36dd550 --- /dev/null +++ b/misc/modular_multiplicative_inverse.py @@ -0,0 +1,42 @@ +""" +Problem: https://en.wikipedia.org/wiki/Modular_multiplicative_inverse +""" +import GCD as gcd + +def modular_multiplicative_inv(a, m): + if m == 1: + return 0 + if m < 1: + raise ValueError('Modulus should be ve+ int > 0') + # check for co-prime condition + if gcd.greatest_common_divisor(a, m) != 1: + raise ValueError('a and m are not co-primes') + + # Make var "a" positive if it's negative + if a < 0: + a %= m + + # Initialise vars + m0 = m + x0 = 0 + x1 = 1 + + while a > 1: + # Calculate quotient q; store m into temp t + q = a / m + t = m + + # Calculate m as remainder(a, m); store temp t into a + m = a % m + a = t + + # Assign x0 into temp t; Calculate x0 and store temp t into x1 + t = x0 + x0 = x1 - q * x0 + x1 = t + + # If x1 is negative then add modulus m0 + if x1 < 0: + x1 += m0 + + return x1 \ No newline at end of file diff --git a/misc/shuffle.py b/misc/shuffle.py new file mode 100644 index 0000000..ee039c5 --- /dev/null +++ b/misc/shuffle.py @@ -0,0 +1,25 @@ +""" +Fisher-Yates shuffle algorithm implemented in Python. + +Reference : +https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle +http://www.geeksforgeeks.org/shuffle-a-given-array/ + +Algorithm: +For all N indices of list, swap the element at a given index i with the element at a random index j where 0 <= j <= i. +""" + +from random import randint + +def shuffle(arr): + """ + Shuffle a list. + """ + for i in range(0,len(arr)): + r = randint(0,i) + arr[i],arr[r] = arr[r],arr[i] + +if __name__ == '__main__': + arr = [1,2,3,4,5,6] + shuffle(arr) + print(arr) diff --git a/misc/sieve_of_eratosthenes.py b/misc/sieve_of_eratosthenes.py new file mode 100644 index 0000000..03cb2a6 --- /dev/null +++ b/misc/sieve_of_eratosthenes.py @@ -0,0 +1,20 @@ +""" +Implementation of Sieve of Eratosthenes algorithm to generate all the primes upto N. + +Algorithm : + * We have a list of numbers from 1 to N. + * Initially, all the numbers are marked as primes. + * We go to every prime number in the list (<= N ^ 1/2) and mark all the multiples + of this prime number which are bigger than the number itself as non-primes. +""" + +from math import sqrt,ceil + +def generate_primes(n): + bool_array = [False, False] + [True] * n # start with all values as True, except 0 and 1 + for i in range(2, int(ceil(sqrt(n)))): # only go to till square root of n + if bool_array[i]: # if the number is marked as prime + for j in range(i*i,n+1,i): # iterate through all its multiples + bool_array[j] = False # and mark them as False + primes = [i for i in range(n+1) if bool_array[i]] # return all numbers which are marked as True + return primes diff --git a/misc/temp.cpp b/misc/temp.cpp deleted file mode 100644 index d580669..0000000 --- a/misc/temp.cpp +++ /dev/null @@ -1,63 +0,0 @@ -int largestArea(int arr[], int len) -{ - int area[len]; //initialize it to 0 - int n, i, t; - stack St; //include stack for using this #include - bool done; - - for (i=0; i=0; i--) - { - while (!St.empty()) - { - if(arr[i] <= arr[St.top()]) - { - St.pop(); - } - else - break; - } - if(St.empty()) - t = len; - else - t = St.top(); - //calculating Ri, after this step area[i] = Li + Ri - area[i] += t - i -1; - St.push(i); - } - - int max = 0; - //Calculating Area[i] and find max Area - for (i=0; i max) - max = area[i]; - } - - return max; -} - diff --git a/sorting and basics/binary_search.py b/sorting and basics/binary_search.py new file mode 100644 index 0000000..c849ad7 --- /dev/null +++ b/sorting and basics/binary_search.py @@ -0,0 +1,36 @@ +from sorting import mergesort + + +def search(arr, item): + """Performs binary search on an array + with the given item and returns True or + False. + +>>> search([5, 4, 1, 6, 2, 3, 9, 7], 2) + True + +>>> search([5, 4, 1, 6, 2, 3, 9, 7], 8) + False + """ + + arr = mergesort(arr) + + first = 0 + last = len(arr) - 1 + found = False + + while first <= last and not found: + midpoint = (first + last) // 2 + if arr[midpoint] == item: + found = True + else: + if item < arr[midpoint]: + last = midpoint - 1 + else: + first = midpoint + 1 + + return found + + +print search([5, 4, 1, 6, 2, 3, 9, 7], 2) +print search([5, 4, 1, 6, 2, 3, 9, 7], 8) diff --git a/sorting and basics/karatsuba.py b/sorting and basics/karatsuba.py index b361b2b..1ce9ad3 100644 --- a/sorting and basics/karatsuba.py +++ b/sorting and basics/karatsuba.py @@ -6,17 +6,17 @@ def karatsuba(x, y, b=10): >>> karatsuba(1234223123412323, 1234534213423333123) 1523690672850721578619752112274729L """ - nx, ny = len(str(x))/2, len(str(y))/2 - if x < 1000 or y < 1000: return x * y - m = nx if nx < ny else ny - x1 = x / (b**m) - x0 = x % (x1 * (b**m)) - y1 = y / (b**m) - y0 = y % (y1 * (b**m)) - z1 = karatsuba(x1,y1,b) - z3 = karatsuba(x0,y0,b) + + if x < 1000 or y < 1000: + return x * y + m = min(len(str(x)) / 2, len(str(y)) / 2) + bm = b**m + x1, x0 = x / bm, x % bm + y1, y0 = y / bm, y % bm + z1 = karatsuba(x1, y1, b) + z3 = karatsuba(x0, y0, b) z2 = karatsuba(x1 + x0, y1 + y0, b) - z1 - z3 - return (b**(2*m))*z1 + (b**m)*z2 + z3 + return (bm**2)*z1 + bm*z2 + z3 if __name__ == "__main__": import doctest diff --git a/sorting and basics/sorting.py b/sorting and basics/sorting.py index 11d94c7..9e19d4e 100644 --- a/sorting and basics/sorting.py +++ b/sorting and basics/sorting.py @@ -9,10 +9,8 @@ def mergesort(arr): """ n = len(arr) if n <= 1: return arr - a1 = arr[:n/2] - a2 = arr[n/2:] - a1 = mergesort(a1) - a2 = mergesort(a2) + a1 = mergesort(arr[:n/2]) + a2 = mergesort(arr[n/2:]) return merge(a1, a2) def merge(arr_a, arr_b): @@ -25,8 +23,8 @@ def merge(arr_a, arr_b): else: arr_c.append(arr_b[j]) j += 1 - if arr_a[i:]: arr_c += arr_a[i:] - if arr_b[j:]: arr_c += arr_b[j:] + if arr_a[i:]: arr_c.extend(arr_a[i:]) + if arr_b[j:]: arr_c.extend(arr_b[j:]) return arr_c def quicksort(a): @@ -84,8 +82,9 @@ def insertionsort(a): item = a[i] j = i while j > 0 and a[j-1] > item: - a[j],a[j-1] = a[j-1],a[j] + a[j] = a[j-1] j -= 1 + a[j] = item return a if __name__ == "__main__": diff --git a/tests/gcd_test.py b/tests/gcd_test.py new file mode 100644 index 0000000..87064a5 --- /dev/null +++ b/tests/gcd_test.py @@ -0,0 +1,13 @@ +import unittest +import fractions +import GCD + +class TestEuclideanGCD(unittest.TestCase): + def test_gcd(self): + self.assertEqual(fractions.gcd(30,50),GCD.greatest_common_divisor(30,50)) + self.assertEqual(fractions.gcd(55555,123450),GCD.greatest_common_divisor(55555,123450)) + self.assertEqual(fractions.gcd(-30,-50),GCD.greatest_common_divisor(-30,-50)) + self.assertEqual(fractions.gcd(-1234,1234),GCD.greatest_common_divisor(-1234,1234)) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/graph_algorithms_test.py b/tests/graph_algorithms_test.py index 28d95a4..dba26c4 100644 --- a/tests/graph_algorithms_test.py +++ b/tests/graph_algorithms_test.py @@ -63,7 +63,7 @@ def test_topological_ordering(self): dag.add_edges([("a", "b"), ("a", "c"), ("a", "e"), ("d", "a")]) dag.add_edges([("g", "b"), ("g", "f"), ("f", "e"), ("h", "f"), ("h", "a")]) order = {o[0]: o[1] for o in topological_ordering(dag)} - self.assertEqual(sum([order[u] < order[v] for (u, v) in + self.assertEqual(sum([order[u] < order[v] for (u, v) in dag.edges()]), len(dag.edges())) # all comparisons are True def test_directed_connected_components(self): @@ -86,7 +86,7 @@ def test_shortest_path_in_directed_graph(self): digr.add_nodes(["a", "b", "c", "d", "e", "f"]) digr.add_edge(("a", "b"), 7) digr.add_edge(("a", "c"), 9) - digr.add_edge(("a", "f"), 14) + digr.add_edge(("a", "f"), 14) digr.add_edge(("f", "e"), 9) digr.add_edge(("c", "f"), 2) digr.add_edge(("c", "d"), 11) @@ -101,33 +101,24 @@ def test_shortest_path_in_directed_graph(self): self.assertEqual(shortest_path(digr, "a")["f"], 11) def test_prims_minimum_spanning_tree(self): - lines = [l for l in open("tests/edges.txt")] - lines = lines[1:] - edges = (l.split() for l in lines) gr = graph() - for (u, v, w) in edges: - if u not in gr.nodes(): - gr.add_node(u) - if v not in gr.nodes(): - gr.add_node(v) - gr.add_edge( (u, v), int(w) ) - + gr.add_nodes(["a", "b", "c", "d"]) + gr.add_edge(("a", "b"), 4) + gr.add_edge(("b", "c"), 3) + gr.add_edge(("a", "c"), 1) + gr.add_edge(("c", "d"), 2) min_cost = minimum_spanning_tree(gr) - self.assertEqual(min_cost, 39) + self.assertEqual(min_cost, 6) def test_kruskals_minimum_spanning_tree(self): - lines = [l for l in open("tests/edges.txt")] - lines = lines[1:] - edges = (l.split() for l in lines) gr = graph() - for (u, v, w) in edges: - if u not in gr.nodes(): - gr.add_node(u) - if v not in gr.nodes(): - gr.add_node(v) - gr.add_edge( (u, v), int(w) ) + gr.add_nodes(["a", "b", "c", "d"]) + gr.add_edge(("a", "b"), 4) + gr.add_edge(("b", "c"), 3) + gr.add_edge(("a", "c"), 1) + gr.add_edge(("c", "d"), 2) min_cost = kruskal_MST(gr) - self.assertEqual(min_cost, 39) + self.assertEqual(min_cost, 6) if __name__ == "__main__": unittest.main() diff --git a/tests/lcs_test.py b/tests/lcs_test.py new file mode 100644 index 0000000..9e1ed15 --- /dev/null +++ b/tests/lcs_test.py @@ -0,0 +1,11 @@ +import unittest +import lcs + +class TestLCS(unittest.TestCase): + def test_lcs(self): + self.assertEqual(lcs.longest_common_subsequence("ABCD", "BBDABXYDCCAD"), (4, "ABCD")) + self.assertEqual(lcs.longest_common_subsequence("BANANA", "ATANA"), (4, "AANA")) + self.assertEqual(lcs.longest_common_subsequence("ABCDEFG", "BDGK"), (3, "BDG")) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/modular_exponentiation_test.py b/tests/modular_exponentiation_test.py new file mode 100644 index 0000000..8ab9f68 --- /dev/null +++ b/tests/modular_exponentiation_test.py @@ -0,0 +1,16 @@ +import os, sys +import unittest +sys.path.append(os.path.join(os.getcwd(), os.path.pardir)) +from misc import modular_exponentiation as me + +class TestLCS(unittest.TestCase): + def test_modular_exponentiation(self): + self.assertEqual(me.modular_exponentiation(2, 10, 100), 24) + self.assertEqual(me.modular_exponentiation(2, 200, 10), 6) + self.assertEqual(me.modular_exponentiation(5, 20, 1), 0) + #self.assertEqual(me.modular_exponentiation(8, 1, 10), 8) + self.assertRaises(ValueError, me.modular_exponentiation, 12, -1, 10) + self.assertRaises(ValueError, me.modular_exponentiation, 12, 5, 0) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/modular_multiplicative_inverse_test.py b/tests/modular_multiplicative_inverse_test.py new file mode 100644 index 0000000..165eeaf --- /dev/null +++ b/tests/modular_multiplicative_inverse_test.py @@ -0,0 +1,16 @@ +import os, sys +import unittest +sys.path.append(os.path.join(os.getcwd(), os.path.pardir)) +from misc import modular_multiplicative_inverse as mmi + +class TestLCS(unittest.TestCase): + def test_modular_multiplicative_inverse(self): + self.assertEqual(mmi.modular_multiplicative_inv(10, 7), 5) + self.assertEqual(mmi.modular_multiplicative_inv(45, 13), 11) + self.assertEqual(mmi.modular_multiplicative_inv(52, 1), 0) + + self.assertRaises(ValueError, mmi.modular_multiplicative_inv, 12, -1) + self.assertRaises(ValueError, mmi.modular_multiplicative_inv, 12, 2) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/sieve_test.py b/tests/sieve_test.py new file mode 100644 index 0000000..a75dd9c --- /dev/null +++ b/tests/sieve_test.py @@ -0,0 +1,10 @@ +import unittest +from sieve_of_eratosthenes import calculate_primes + +class TestSieveOfEratosthenes(unittest.TestCase): + def test_primes(self): + self.prime_list = [2,3,5,7,11,13,17,19] + self.assertEqual(self.prime_list,calculate_primes(20)) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/singly_linked_list_test.py b/tests/singly_linked_list_test.py new file mode 100644 index 0000000..6a404f1 --- /dev/null +++ b/tests/singly_linked_list_test.py @@ -0,0 +1,22 @@ +import os, sys +sys.path.append(os.path.join(os.getcwd(), os.path.pardir)) +import unittest +from lists.singlylinkedlist import SinglyLinkedList + +class test_graph(unittest.TestCase): + def setUp(self): + self.tens = SinglyLinkedList(range(0, 100, 10)) + self.blankList = SinglyLinkedList() + + def test_length_method(self): + self.assertEqual(len(self.tens), 10) + self.assertEqual(len(self.blankList), 0) + + def test_add_method(self): + self.blankList.append(50) + self.tens.append(110) + self.assertEqual(len(self.blankList), 1) + self.assertEqual(len(self.tens), 11) + +if __name__ == "__main__": + unittest.main() diff --git a/trees/binarysearchtree.py b/trees/binarysearchtree.py index 162be5f..fb573e3 100644 --- a/trees/binarysearchtree.py +++ b/trees/binarysearchtree.py @@ -12,14 +12,13 @@ def __init__(self, value): def __repr__(self): return "Node with value - %s" % self.value - class BinarySearchTree(object): def __init__(self): self.root = None self.len = 0 def __len__(self): - return self.len + return self.len def is_empty(self): return self.root == None @@ -41,7 +40,7 @@ def _postorder(self, node, values): self._postorder(node.left, values) self._postorder(node.right, values) values.append(node.value) - + def values(self, reverse = False, order="in"): values = [] if order == "in": @@ -55,7 +54,7 @@ def values(self, reverse = False, order="in"): return values def _search(self, root, value): - if not root or root.value == value: + if not root or root.value == value: return root if value < root.value: return self._search(root.left, value) @@ -80,11 +79,11 @@ def get_min(self): def get_max(self): """ returns the element with the maximum value """ return self._extremes(self.root, find_min=False) - + def successor(self, value): """ returns the successor of the element with value - value""" node = self.find_element(value) - if not node: + if not node: return None if node.right: return self._extremes(node.right, find_min=True) @@ -101,6 +100,8 @@ def insert(self, value): else: node = self.root while node and node.value != value: + if node.value == value: + return parent = node if node.value < value: node = node.right @@ -113,14 +114,14 @@ def insert(self, value): new_node.parent = parent self.len += 1 return - + def delete(self, value): """ deletes a node from tree with value - value """ node = self.find_element(value) if not node: return None if not node.left or not node.right: - node_spliced = node + node_spliced = node else: node_spliced = self.successor(node.value) if node_spliced.left: @@ -135,7 +136,7 @@ def delete(self, value): node_spliced.parent.left = temp_node else: node_spliced.parent.right = temp_node - + if node != node_spliced: node.value = node_spliced.value return node_spliced diff --git a/trees/trie.py b/trees/trie.py index b5d858c..88bfbf7 100644 --- a/trees/trie.py +++ b/trees/trie.py @@ -1,85 +1,100 @@ -""" Tries in python +""" Tries in python Methods - insert_key(k, v) has_key(k) retrie_val(k) start_with_prefix(prefix) """ -# HELPERS # -def _get_child_branches(tr): - if tr == []: - return [] - return tr[1:] -def _get_child_branch(tr, c): - for branch in _get_child_branches(tr): + +def _get_child_branches(trie): + """ + Helper method for getting branches + """ + return trie[1:] + + +def _get_child_branch(trie, c): + """ + Get branch matching the character + """ + for branch in _get_child_branches(trie): if branch[0] == c: return branch + return None -def _retrive_branch(k, trie_list): - if k == "": + +def _retrive_branch(k, trie): + """ + Get branch matching the key word + """ + if not k: return None - tr = trie_list + for c in k: - child_branch = _get_child_branch(tr, c) + child_branch = _get_child_branch(trie, c) if not child_branch: return None - tr = child_branch - return tr + trie = child_branch + + return trie + def _is_trie_bucket(bucket): if len(bucket) != 2: return False - if type(bucket[1]) is tuple: - return True + + return type(bucket[1]) is tuple + def _get_bucket_key(bucket): if not _is_trie_bucket(bucket): return None - return bucket[1][0] -# HAS_KEY # -def has_key(k, tr): - if k == "": - return None - key_tuple = _retrive_branch(k, tr) - if not key_tuple: - return False - return True + return bucket[1][0] -# RETRIE_VAL -def retrie_val(k, tr): - if k == "": - return None - key_tuple = _retrive_branch(k, tr) + +def has_key(k, trie): + """ + Check if trie contain the key word + """ + return _retrive_branch(k, trie) is not None + + +def retrie_val(k, trie): + key_tuple = _retrive_branch(k, trie) if not key_tuple: return None + return key_tuple[1] -def insert_key(key, v, trie_list): - if key == "": - return None - elif has_key(key, trie_list): - return None - else: - tr = trie_list - for char in key: - branch = _get_child_branch(tr, char) - if branch == None: - new_branch = [char] - tr.append(new_branch) - tr = new_branch - else: - tr = branch - tr.append((key, v)) - return None +def insert_key(key, v, trie): + """ + Insert a (key, value) pair into trie + """ + if not key or has_key(key, trie): + return + + for char in key: + branch = _get_child_branch(trie, char) + if not branch: + new_branch = [char] + trie.append(new_branch) + trie = new_branch + else: + trie = branch + trie.append((key, v)) def start_with_prefix(prefix, trie): + """ + Find words start with prefix + """ branch = _retrive_branch(prefix, trie) if not branch: return [] + prefix_list = [] q = branch[1:] while q: @@ -88,6 +103,7 @@ def start_with_prefix(prefix, trie): prefix_list.append(_get_bucket_key(curr_branch)) else: q.extend(curr_branch[1:]) + return prefix_list if __name__ == "__main__": @@ -142,7 +158,7 @@ def start_with_prefix(prefix, trie): Washington West Virginia Wisconsin - Wyoming""" + Wyoming""" states_list = [w.strip().lower() for w in states.splitlines() if w] for state in states_list: insert_key(state, True, trie) diff --git a/union_find/unionfind.py b/union_find/unionfind.py index 8f5838b..2bce362 100644 --- a/union_find/unionfind.py +++ b/union_find/unionfind.py @@ -59,7 +59,7 @@ def get_leader(self, a): def count_groups(self): """ returns a count of the number of groups/sets in the data structure""" - return len(self.group.keys()) + return len(self.group) def make_union(self, leadera, leaderb): """ takes union of two sets with leaders, leadera and leaderb