diff --git a/ArduinoControl/arduino_control.py b/ArduinoControl/arduino_control.py new file mode 100644 index 0000000..f73dc88 --- /dev/null +++ b/ArduinoControl/arduino_control.py @@ -0,0 +1,89 @@ +import serial + +# serial communication params +SERIAL_PORT = "/dev/ttyUSB0" +DEFAULT_BAUD_RATE = 9600 + +class ArduinoControlService: + + def __init__(self, port=SERIAL_PORT, baud_rate=DEFAULT_BAUD_RATE): + self._controller = serial.Serial(port, baud_rate) + self._state = 0 + + # public methods + def get_state(self): + """ + Returns output state. + :return: output state 0/1 + """ + return self._state + + def control(self, state): + """ + Control arduino writing through serial port. Output state is written as str. + :param state: value that determines output state - one of the following values (`switch`, `power off`, + `power on`) (str) + :return: void method + """ + print("Calling arduino control method with params: [state = {}]".format(state)) + self._set_state(state) + self._controller.write(str(self._state).encode()) + + def dispose(self): + """ + Closes the serial port. + :return: void method + """ + self._controller.close() + + # private methods + def _state_switch(self): + """ + Switches the output state. + :return: void method + """ + self._state = 1 - self._state + + def _turn_on(self): + """ + Sets output state to high. + :return: void method + """ + self._state = 1 + + def _turn_off(self): + """ + Sets output state to low. + :return: void method + """ + self._state = 0 + + def _set_state(self, state): + """ + Sets output state based on state value. + :param state: value that determines output state - one of the following values (`switch`, `power off`, + `power on`) (str) + :return: void method + """ + if state == "switch": + self._state_switch() + elif state == "power off": + self._turn_off() + elif state == "power on": + self._turn_on() + else: + raise ValueError("Invalid state.") + print("Current relay state = {}".format(self.get_state())) + + +import time + +ar_s = ArduinoControlService() +for i in range(6): + ar_s.control("switch") + print(ar_s.get_state()) + time.sleep(3) + +ar_s.control("power on") +ar_s.control("power off") +ar_s.dispose() \ No newline at end of file diff --git a/ArduinoControl/arudino_control.ino b/ArduinoControl/arudino_control.ino new file mode 100644 index 0000000..3ad201b --- /dev/null +++ b/ArduinoControl/arudino_control.ino @@ -0,0 +1,30 @@ + +#define RELAY1 7 +void setup() + +{ + + Serial.begin(9600); + pinMode(RELAY1, OUTPUT); + +} + +void loop() + +{ + + if (Serial.available()) { + char serialListener = Serial.read(); + Serial.println(serialListener); + if (serialListener == '0') { + digitalWrite(RELAY1, 0); + Serial.println("Light OFF"); + } + else if (serialListener == '1') { + digitalWrite(RELAY1, 1); + Serial.println("Light ON"); + } + } + + +} \ No newline at end of file diff --git a/Karatsuba Algorithm/Karatsuba.py b/Karatsuba Algorithm/Karatsuba.py new file mode 100644 index 0000000..55273ae --- /dev/null +++ b/Karatsuba Algorithm/Karatsuba.py @@ -0,0 +1,21 @@ +def test(func, a): + try: + assert func == a + except Exception: + print("{} != {}".format(func, a)) + + +def mul(x, y): + return int(bin(int(str(x), 2) * (int(str(y), 2)))[2:]) + + +if __name__ == "__main__": + test(mul(1100, 1010), 1111000) + test(mul(110, 1010), 111100) + test(mul(11, 1010), 11110) + test(mul(1, 1010), 1010) + test(mul(0, 1010), 0) + test(mul(111, 111), 110001) + test(mul(11, 11), 1001) + print("Ok tested") + input() diff --git a/README.md b/README.md index 4cd4a1e..b78d903 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ ## FunUtils +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils?ref=badge_shield) + Some codes I wrote to help me with errands. it really made my life easier hope it does for you too ^^ @@ -116,6 +118,11 @@ list.json 20. **Clipboard Translator**: Program that automatically translates a text copied to the clipboard. +21. **Translate Excel**: Program that uses google translator to translate an excel column and row-range in a given excel sheet. It asks from which language to which you want to translate. +``` +python3 translate_excel.py <> <> <> <> +``` + ## Authors * **Houssem Charfeddine** - *FunUtils* - [HC](https://github.com/HoussemCharf) @@ -123,7 +130,12 @@ list.json * **Shashank S** - *Backup,Get Comics,Wallpaper* - [talsperre](https://github.com/talsperre) * **Aditya Y** - *ToDo* - [Screwed-U-Head](https://github.com/Screwed-Up-Head) * **Bart E** - *Bad Link Filter* - [Callidus](https://github.com/Baev1) +* **Lukas Loukota** - *Translate Excel* - [loukotal](https://github.com/loukotal) + ## License codes are licensed under the MIT License - see the [LICENSE.md](LICENSE) file for details + + +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils?ref=badge_large) \ No newline at end of file diff --git a/Searching Algorithms/binary_search.py b/Searching Algorithms/binary_search.py new file mode 100644 index 0000000..644f98f --- /dev/null +++ b/Searching Algorithms/binary_search.py @@ -0,0 +1,29 @@ +# +# Binary search works for a sorted array. +# Note: The code logic is written for an array sorted in +# increasing order. +# T(n): O(log n) +# +def binary_search(array, query): + lo, hi = 0, len(array) - 1 + while lo <= hi: + mid = (hi + lo) // 2 + val = array[mid] + if val == query: + return mid + elif val < query: + lo = mid + 1 + else: + hi = mid - 1 + return None + +def binary_search_recur(array, low, high, val): + if low > high: # error case + return -1 + mid = (low + high) // 2 + if val < array[mid]: + return binary_search_recur(array, low, mid - 1, val) + elif val > array[mid]: + return binary_search_recur(array, mid + 1, high, val) + else: + return mid diff --git a/Searching Algorithms/linear_search.py b/Searching Algorithms/linear_search.py new file mode 100644 index 0000000..cf57fcf --- /dev/null +++ b/Searching Algorithms/linear_search.py @@ -0,0 +1,12 @@ +# +# Linear search works in any array. +# +# T(n): O(n) +# + +def linear_search(array, query): + for i in range(len(array)): + if array[i] == query: + return i + + return -1 diff --git a/Send Email/send_email.py b/Send Email/send_email.py new file mode 100644 index 0000000..3c1b696 --- /dev/null +++ b/Send Email/send_email.py @@ -0,0 +1,28 @@ +import smtplib +import ssl +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +try: + email = "<< Enter your email >>" + password = "<< Enter your password" + to = "<< Enter sender email >>" + msg = """ << Email Body >>""" + message = MIMEMultipart() + message["From"] = email + message["To"] = to + message["Subject"] = "HacktoberFest 2019" + message.attach(MIMEText(msg, "plain")) + context = ssl.create_default_context() + server = smtplib.SMTP("smtp.gmail.com") + server.starttls() + server.ehlo() + server.login(email, password) + server.sendmail(email, to, message.as_string()) + print('Email have been successfully send') + +except Exception as ex: + print(ex) + +finally: + server.quit() \ No newline at end of file diff --git a/Sorting Algorithms/bucket_sort.py b/Sorting Algorithms/bucket_sort.py new file mode 100644 index 0000000..d89232c --- /dev/null +++ b/Sorting Algorithms/bucket_sort.py @@ -0,0 +1,28 @@ +def bucket_sort(arr): + ''' Bucket Sort + Complexity: O(n^2) + The complexity is dominated by nextSort + ''' + # The number of buckets and make buckets + num_buckets = len(arr) + buckets = [[] for bucket in range(num_buckets)] + # Assign values into bucket_sort + for value in arr: + index = value * num_buckets // (max(arr) + 1) + buckets[index].append(value) + # Sort + sorted_list = [] + for i in range(num_buckets): + sorted_list.extend(next_sort(buckets[i])) + return sorted_list + +def next_sort(arr): + # We will use insertion sort here. + for i in range(1, len(arr)): + j = i - 1 + key = arr[i] + while arr[j] > key and j >= 0: + arr[j+1] = arr[j] + j = j - 1 + arr[j + 1] = key + return arr diff --git a/Sorting Algorithms/insertion_sort.py b/Sorting Algorithms/insertion_sort.py new file mode 100644 index 0000000..06c8622 --- /dev/null +++ b/Sorting Algorithms/insertion_sort.py @@ -0,0 +1,25 @@ +def insertion_sort(arr, simulation=False): + """ Insertion Sort + Complexity: O(n^2) + """ + + iteration = 0 + if simulation: + print("iteration",iteration,":",*arr) + + for i in range(len(arr)): + cursor = arr[i] + pos = i + + while pos > 0 and arr[pos - 1] > cursor: + # Swap the number down the list + arr[pos] = arr[pos - 1] + pos = pos - 1 + # Break and do the final swap + arr[pos] = cursor + + if simulation: + iteration = iteration + 1 + print("iteration",iteration,":",*arr) + + return arr diff --git a/SpeechToText/stt.py b/SpeechToText/stt.py new file mode 100644 index 0000000..20753a5 --- /dev/null +++ b/SpeechToText/stt.py @@ -0,0 +1,89 @@ +import speech_recognition as sr + +#dependency - SpeechRecognition lib https://github.com/Uberi/speech_recognition - pip3 install SpeechRecognition +#convert speech to text from microphone audio + + +RECOGNITION_METHODS = { + "bing": "recognize_bing", + "google": "recognize_google", + "google_cloud": "recognize_google_cloud", + "houndify": "recognize_houndify", + "ibm": "recognize_ibm", + "sphinx": "recognize_sphinx", + "wit": "recognize_wit", + "azure": "recognize_azure" + +} + +class SpeechRecognizer: + + def __init__(self, recognition_api="google", language="en-us"): + self._recognizer = sr.Recognizer() + # below energy_threshold is considered silence, above speech + self._recognizer.energy_threshold = 500 + self._recognition_api = recognition_api + self._recognition_method = None + self._determine_recognition_method() + self._microphone = sr.Microphone() + self._language = language + + # public methods + def set_language(self, language): + """ + Sets recognition _language. + :param str language: _language code + :rtype: None + :return: void method + """ + assert (isinstance(language, str)) + self._language = language + + def get_language(self): + """ + Returns recognition _language. + :rtype:str + :return: recognition _language + """ + return self._language + + def recognize_from_microphone(self): + """ + Returns action result with recognized text from the speech. Input speech is read from microphone. Raises RequestError or + UnknownValueError. + :rtype: string + :return: recognized text from the speech + """ + audio = self._get_audio_from_microphone() + speech = self._recognition_method(audio, language=self._language) + return speech + + # private methods + def _determine_recognition_method(self): + """ + Determines and sets recognition method - based on the API name. + :rtype: None + :return: void method + """ + api_method = RECOGNITION_METHODS.get(self._recognition_api, "recognize_google") + if self._recognizer is not None and hasattr(self._recognizer, api_method): + self._recognition_method = getattr(self._recognizer, api_method) + + # NOTE: not implemented + def _recognize_from_file(self, audio_file): + pass + + def _get_audio_from_microphone(self): + """ + Returns audio data from the microphone + :rtype: AudioData + :return: + """ + audio = None + if self._microphone is not None: + with self._microphone as source: + print('Ready for command...') + self._recognizer.adjust_for_ambient_noise(source) + audio = self._recognizer.listen(source) + print("Audio data = {}".format(audio)) + return audio diff --git a/SpeechToText/stt_test.py b/SpeechToText/stt_test.py new file mode 100644 index 0000000..f394ae6 --- /dev/null +++ b/SpeechToText/stt_test.py @@ -0,0 +1,4 @@ +from stt import SpeechRecognizer + +recognizer = SpeechRecognizer("en-US") +print(recognizer.recognize_from_microphone()) diff --git a/TextToSpeech/tts.py b/TextToSpeech/tts.py new file mode 100644 index 0000000..0ec61d0 --- /dev/null +++ b/TextToSpeech/tts.py @@ -0,0 +1,63 @@ +# possible replacement pyttsx3 +from gtts import gTTS +import os +from utils import * +# tts audio config +PATH_TO_AUDIO_DIR = r"audio/" +DEFAULT_AUDIO_FILE = PATH_TO_AUDIO_DIR + "temporary.mp3" + +#NOTE: +# install dependencies: +# pip3 install gTTS +# pip3 install playsound +class Speaker: + def __init__(self, language="en-us"): + self._language = language + self._tts = gTTS(lang=self._language, text="dummy") + + # public methods + def set_language(self, language): + """ + Sets operating speaking _language. + :param str language: _language code + :rtype: None + :return: void method + """ + assert (isinstance(language, str)) + self._language = language + self._tts.lang = self._language + + def get_language(self): + """ + Returns speaking _language. + :rtype:str + :return: speaking _language + """ + return self._language + + def save_speech_and_play(self, text=""): + """ + Speak out the given text. Text must not be empty string. + :param str text: text to be spoken + :rtype: None + :return: void method + """ + assert (isinstance(text, str)) + if text != '': + self._speak(text, str(get_current_timestamp()) + ".mp3") + + # private methods + def _speak(self, text, file_name=DEFAULT_AUDIO_FILE): + """ + Speak out and play audio. + :param str text: + :param str file_name: audio file in which speech will be saved + :rtype: None + :return:void method + """ + assert (isinstance(text, str)) + if file_name != DEFAULT_AUDIO_FILE: + file_name = PATH_TO_AUDIO_DIR + file_name + self._tts.text = text + self._tts.save(file_name) + play_audio(file_name) diff --git a/TextToSpeech/tts_test.py b/TextToSpeech/tts_test.py new file mode 100644 index 0000000..0ef62c3 --- /dev/null +++ b/TextToSpeech/tts_test.py @@ -0,0 +1,5 @@ +from tts import Speaker + +speaker = Speaker("en-us") +print(speaker._speak("Marry had a little lamb!")) +print(speaker.save_speech_and_play(text="Hello, world!")) diff --git a/TextToSpeech/utils.py b/TextToSpeech/utils.py new file mode 100644 index 0000000..de57c2b --- /dev/null +++ b/TextToSpeech/utils.py @@ -0,0 +1,21 @@ +import time +from playsound import playsound + +def play_audio(file_name): + """ + Play audio file. + :param str file_name: name of file that will be played. + :rtype: None + :return: void method + """ + assert (isinstance(file_name, str)) + playsound(file_name) + + +def get_current_timestamp(): + """ + Returns current timestamp as str. + :rtype: str + :return: current timestamp (Return the current time in seconds since the Epoch) + """ + return time.time() diff --git a/Translate excel/requirements.txt b/Translate excel/requirements.txt new file mode 100644 index 0000000..df925cb --- /dev/null +++ b/Translate excel/requirements.txt @@ -0,0 +1,10 @@ +certifi==2024.7.4 +chardet==3.0.4 +et-xmlfile==1.0.1 +googletrans==2.4.0 +idna==3.7 +jdcal==1.4.1 +openpyxl==3.0.0 +requests==2.32.0 +urllib3==1.26.19 + diff --git a/Translate excel/translate_excel.py b/Translate excel/translate_excel.py new file mode 100644 index 0000000..cf79112 --- /dev/null +++ b/Translate excel/translate_excel.py @@ -0,0 +1,60 @@ +import googletrans +from openpyxl import load_workbook +import sys + + +def main(): + if len(sys.argv) != 5: + print("incorrect input parameters") + return 1 + + # Load worksheet + worksheet = sys.argv[1] + + wb = load_workbook(worksheet) + + # Selects active worksheet (excel List) (index 0 by default == 1st List) + ws = wb.active + + + column = sys.argv[2] + + start_str = column + sys.argv[3] + end_str = column + sys.argv[4] + + # Selects 6 cells + rows = ws[start_str:end_str] + + # Initialize Translator Instance from Translator Object (I guess?) + translator = googletrans.Translator() + + dest = input("Translate to (ie. cs, en, ..): ") + src = input("Translate from: ") + # Iterating through objects in rows + for row in rows: + # Uses googletrans package to ping Google Translate API (AJAX call) to translate input text + # Input text is 1st parameter in .translate method on Translator object + + # Iterating through cells in row (It's an object) + for cell in row: + + # Checks if cell is empty (could be checked more elegantly by removing empty cells I think) + # If empty (doesn't have a value) skip it + if not cell.value: + continue + else: + text = translator.translate(cell.value, dest=dest, src=src).text + cell.value = text + + # Splits on "." to get rid of the xlsx ending + cut_postfix = worksheet.split(".") + + save_str = cut_postfix[0] + "_translated.xlsx" + # Saves the changes to a new file + + wb.save(save_str) + print("Successfully translated!") + + +if __name__ == "__main__": + main() diff --git a/WhasappBot/whatsappbot.py b/WhasappBot/whatsappbot.py new file mode 100644 index 0000000..bda6f89 --- /dev/null +++ b/WhasappBot/whatsappbot.py @@ -0,0 +1,125 @@ +# Imports the required dependency libraries +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.action_chains import ActionChains +from requests import get +from bs4 import BeautifulSoup as bs +import keyboard +import time +import click +import os +import sys +import csv +import threading + +# Setting the browser configurations to launch the browser +chrome_options = Options() +chrome_options.add_argument( + "user-data-dir=" + os.path.dirname(sys.argv[0])) + +# Launching the Browser +driver = webdriver.Chrome(chrome_options=chrome_options) + +# Maximizing the browser window +driver.maximize_window() + +# Opening the given url in Browser +driver.get("https://web.whatsapp.com") +# Time to load the QR Code and scenning via Mobile Whatsapp +time.sleep(25) + +# Key in the value of the Chat name that you want to read the messages and reply +target = '"your_friend/group_name"' + +# Identify the Chatlist based on its element +panel = driver.find_element_by_class_name('chatlist-panel-body') + + +# If the chatlist is huge, we have to scroll and find the elements +elem = None +a = 0 +while elem is None: + a += 300 + try: + driver.execute_script('arguments[0].scrollTop = %s' % a, panel) + elem = driver.find_element_by_xpath( + '//span[@title=' + target + ']') + except: + pass + +# Once the element is found, we are moving to the specific element +ac = ActionChains(driver) +ac.move_to_element(elem).click().perform() +time.sleep(2) + +url = driver.page_source + +# Reading message and taking decisions based on the condition +def readMessage(): + + # A new tread will start at the specified time interval and will read the last available message + threading.Timer(5.0, readMessage).start() + url = driver.page_source + soup = bs(url, "lxml") + + try: + gotdiv = soup.find_all("div", { "class" : "msg msg-group" })[-1] + except IndexError: + gotdiv = 'null' + + if gotdiv == 'null': + div = soup.find_all("div", { "class" : "bubble bubble-text copyable-text" })[-1] + # print(div) + else: + div = soup.find_all("div", { "class" : "msg msg-group" })[-1] + + text = div.find_all('span') + print(text) + + try: + gottext = text[4].find_all(text=True)[1] + except IndexError: + gottext = 'null' + + if gottext == 'null': + div = soup.find_all("div", { "class" : "chat-title" })[-1] + name = div.find_all(text=True)[1] + try: + msg = text[-2].find_all(text=True)[1].lower() + except IndexError: + msg = "You replied last" + time = text[-1].find(text=True) + + else: #group + name = text[3].find_all(text=True)[1] + try: + msg = text[4].find_all(text=True)[1].lower() + except IndexError: + msg = "You replied last" + try: + time = text[-2].find(text=True) + except: + time = "None" + + + print(name, msg, time) + +# Getting appropriate reply from the csv +# Bot will lookup the csv for reply Only if the text contains the word buddy + +if "buddy" in msg: + + with open('dict.csv', "r") as f: + reader = csv.reader(f) + chat = {} + + for row in reader: + key = row[0] + chat[key] = row[1:] + try: + gotreply = chat[msg] + except KeyError: + gotreply = 'null' + + print(gotreply) diff --git a/heap/__init__.py b/heap/__init__.py new file mode 100644 index 0000000..9ea682d --- /dev/null +++ b/heap/__init__.py @@ -0,0 +1,5 @@ +from .binary_heap import * +from .skyline import * +from .sliding_window_max import * +from .merge_sorted_k_lists import * +from .k_closest_points import * diff --git a/heap/binary_heap.py b/heap/binary_heap.py new file mode 100644 index 0000000..629be43 --- /dev/null +++ b/heap/binary_heap.py @@ -0,0 +1,111 @@ +""" +Binary Heap. A min heap is a complete binary tree where each node is smaller +its childen. The root, therefore, is the minimum element in the tree. The min +heap use array to represent the data and operation. For example a min heap: + + 4 + / \ + 50 7 + / \ / +55 90 87 + +Heap [0, 4, 50, 7, 55, 90, 87] + +Method in class: insert, remove_min +For example insert(2) in a min heap: + + 4 4 2 + / \ / \ / \ + 50 7 --> 50 2 --> 50 4 + / \ / \ / \ / \ / \ / \ +55 90 87 2 55 90 87 7 55 90 87 7 + +For example remove_min() in a min heap: + + 4 87 7 + / \ / \ / \ + 50 7 --> 50 7 --> 50 87 + / \ / / \ / \ +55 90 87 55 90 55 90 + +""" +from abc import ABCMeta, abstractmethod + +class AbstractHeap(metaclass=ABCMeta): + """Abstract Class for Binary Heap.""" + def __init__(self): + pass + @abstractmethod + def perc_up(self, i): + pass + @abstractmethod + def insert(self, val): + pass + @abstractmethod + def perc_down(self,i): + pass + @abstractmethod + def min_child(self,i): + pass + @abstractmethod + def remove_min(self,i): + pass +class BinaryHeap(AbstractHeap): + def __init__(self): + self.currentSize = 0 + self.heap = [(0)] + + def perc_up(self, i): + while i // 2 > 0: + if self.heap[i] < self.heap[i // 2]: + # Swap value of child with value of its parent + self.heap[i], self.heap[i//2] = self.heap[i//2], self.heap[i] + i = i // 2 + + """ + Method insert always start by inserting the element at the bottom. + it inserts rightmost spot so as to maintain the complete tree property + Then, it fix the tree by swapping the new element with its parent, + until it finds an appropriate spot for the element. It essentially + perc_up the minimum element + Complexity: O(logN) + """ + def insert(self, val): + self.heap.append(val) + self.currentSize = self.currentSize + 1 + self.perc_up(self.currentSize) + + """ + Method min_child returns index of smaller 2 childs of its parent + """ + def min_child(self, i): + if 2 * i + 1 > self.currentSize: # No right child + return 2 * i + else: + # left child > right child + if self.heap[2 * i] > self.heap[2 * i +1]: + return 2 * i + 1 + else: + return 2 * i + + def perc_down(self, i): + while 2 * i < self.currentSize: + min_child = self.min_child(i) + if self.heap[min_child] < self.heap[i]: + # Swap min child with parent + self.heap[min_child], self.heap[i] = self.heap[i], self.heap[min_child] + i = min_child + """ + Remove Min method removes the minimum element and swap it with the last + element in the heap( the bottommost, rightmost element). Then, it + perc_down this element, swapping it with one of its children until the + min heap property is restored + Complexity: O(logN) + """ + def remove_min(self): + ret = self.heap[1] # the smallest value at beginning + self.heap[1] = self.heap[self.currentSize] # Repalce it by the last value + self.currentSize = self.currentSize - 1 + self.heap.pop() + self.perc_down(1) + return ret diff --git a/heap/k_closest_points.py b/heap/k_closest_points.py new file mode 100644 index 0000000..8e38e05 --- /dev/null +++ b/heap/k_closest_points.py @@ -0,0 +1,44 @@ +"""Given a list of points, find the k closest to the origin. + +Idea: Maintain a max heap of k elements. +We can iterate through all points. +If a point p has a smaller distance to the origin than the top element of a heap, we add point p to the heap and remove the top element. +After iterating through all points, our heap contains the k closest points to the origin. +""" + + +from heapq import heapify, heappushpop + + +def k_closest(points, k, origin=(0, 0)): + # Time: O(k+(n-k)logk) + # Space: O(k) + """Initialize max heap with first k points. + Python does not support a max heap; thus we can use the default min heap where the keys (distance) are negated. + """ + heap = [(-distance(p, origin), p) for p in points[:k]] + heapify(heap) + + """ + For every point p in points[k:], + check if p is smaller than the root of the max heap; + if it is, add p to heap and remove root. Reheapify. + """ + for p in points[k:]: + d = distance(p, origin) + + heappushpop(heap, (-d, p)) # heappushpop does conditional check + """Same as: + if d < -heap[0][0]: + heappush(heap, (-d,p)) + heappop(heap) + + Note: heappushpop is more efficient than separate push and pop calls. + Each heappushpop call takes O(logk) time. + """ + + return [p for nd, p in heap] # return points in heap + + +def distance(point, origin=(0, 0)): + return (point[0] - origin[0])**2 + (point[1] - origin[1])**2 diff --git a/heap/merge_sorted_k_lists.py b/heap/merge_sorted_k_lists.py new file mode 100644 index 0000000..2fbfe1d --- /dev/null +++ b/heap/merge_sorted_k_lists.py @@ -0,0 +1,68 @@ +""" +Merge k sorted linked lists and return it as one sorted list. Analyze and describe its complexity. +""" + + +from heapq import heappop, heapreplace, heapify +from queue import PriorityQueue + + +# Definition for singly-linked list. +class ListNode(object): + def __init__(self, x): + self.val = x + self.next = None + + +def merge_k_lists(lists): + dummy = node = ListNode(0) + h = [(n.val, n) for n in lists if n] + heapify(h) + while h: + v, n = h[0] + if n.next is None: + heappop(h) # only change heap size when necessary + else: + heapreplace(h, (n.next.val, n.next)) + node.next = n + node = node.next + + return dummy.next + + +def merge_k_lists(lists): + dummy = ListNode(None) + curr = dummy + q = PriorityQueue() + for node in lists: + if node: + q.put((node.val, node)) + while not q.empty(): + curr.next = q.get()[1] # These two lines seem to + curr = curr.next # be equivalent to :- curr = q.get()[1] + if curr.next: + q.put((curr.next.val, curr.next)) + return dummy.next + + +""" +I think my code's complexity is also O(nlogk) and not using heap or priority queue, +n means the total elements and k means the size of list. + +The mergeTwoLists function in my code comes from the problem Merge Two Sorted Lists +whose complexity obviously is O(n), n is the sum of length of l1 and l2. + +To put it simpler, assume the k is 2^x, So the progress of combination is like a full binary tree, +from bottom to top. So on every level of tree, the combination complexity is n, +because every level have all n numbers without repetition. +The level of tree is x, ie log k. So the complexity is O(n log k). + +for example, 8 ListNode, and the length of every ListNode is x1, x2, +x3, x4, x5, x6, x7, x8, total is n. + +on level 3: x1+x2, x3+x4, x5+x6, x7+x8 sum: n + +on level 2: x1+x2+x3+x4, x5+x6+x7+x8 sum: n + +on level 1: x1+x2+x3+x4+x5+x6+x7+x8 sum: n +""" diff --git a/heap/skyline.py b/heap/skyline.py new file mode 100644 index 0000000..c666703 --- /dev/null +++ b/heap/skyline.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +A city's skyline is the outer contour of the silhouette formed by all the buildings +in that city when viewed from a distance. +Now suppose you are given the locations and height of all the buildings +as shown on a cityscape photo (Figure A), +write a program to output the skyline formed by these buildings collectively (Figure B). + +The geometric information of each building is represented by a triplet of integers [Li, Ri, Hi], +where Li and Ri are the x coordinates of the left and right edge of the ith building, respectively, +and Hi is its height. It is guaranteed that 0 ≤ Li, Ri ≤ INT_MAX, 0 < Hi ≤ INT_MAX, and Ri - Li > 0. +You may assume all buildings are perfect rectangles grounded on an absolutely flat surface at height 0. + +For instance, the dimensions of all buildings in Figure A are recorded as: +[ [2 9 10], [3 7 15], [5 12 12], [15 20 10], [19 24 8] ] . + +The output is a list of "key points" (red dots in Figure B) in the format of +[ [x1,y1], [x2, y2], [x3, y3], ... ] +that uniquely defines a skyline. +A key point is the left endpoint of a horizontal line segment. Note that the last key point, +where the rightmost building ends, +is merely used to mark the termination of the skyline, and always has zero height. +Also, the ground in between any two adjacent buildings should be considered part of the skyline contour. + +For instance, the skyline in Figure B should be represented as:[ [2 10], [3 15], [7 12], [12 0], [15 10], [20 8], [24, 0] ]. + +Notes: + +The number of buildings in any input list is guaranteed to be in the range [0, 10000]. +The input list is already sorted in ascending order by the left x position Li. +The output list must be sorted by the x position. +There must be no consecutive horizontal lines of equal height in the output skyline. For instance, +[...[2 3], [4 5], [7 5], [11 5], [12 7]...] is not acceptable; the three lines of height 5 should be merged +into one in the final output as such: [...[2 3], [4 5], [12 7], ...] + +""" +import heapq + +def get_skyline(lrh): + """ + Wortst Time Complexity: O(NlogN) + :type buildings: List[List[int]] + :rtype: List[List[int]] + """ + skyline, live = [], [] + i, n = 0, len(lrh) + while i < n or live: + if not live or i < n and lrh[i][0] <= -live[0][1]: + x = lrh[i][0] + while i < n and lrh[i][0] == x: + heapq.heappush(live, (-lrh[i][2], -lrh[i][1])) + i += 1 + else: + x = -live[0][1] + while live and -live[0][1] <= x: + heapq.heappop(live) + height = len(live) and -live[0][0] + if not skyline or height != skyline[-1][1]: + skyline += [x, height], + return skyline diff --git a/heap/sliding_window_max.py b/heap/sliding_window_max.py new file mode 100644 index 0000000..f88458a --- /dev/null +++ b/heap/sliding_window_max.py @@ -0,0 +1,41 @@ +""" +Given an array nums, there is a sliding window of size k +which is moving from the very left of the array to the very right. +You can only see the k numbers in the window. +Each time the sliding window moves right by one position. + +For example, +Given nums = [1,3,-1,-3,5,3,6,7], and k = 3. + +Window position Max +--------------- ----- +[1 3 -1] -3 5 3 6 7 3 + 1 [3 -1 -3] 5 3 6 7 3 + 1 3 [-1 -3 5] 3 6 7 5 + 1 3 -1 [-3 5 3] 6 7 5 + 1 3 -1 -3 [5 3 6] 7 6 + 1 3 -1 -3 5 [3 6 7] 7 +Therefore, return the max sliding window as [3,3,5,5,6,7]. +""" +import collections + + +def max_sliding_window(nums, k): + """ + :type nums: List[int] + :type k: int + :rtype: List[int] + """ + if not nums: + return nums + queue = collections.deque() + res = [] + for num in nums: + if len(queue) < k: + queue.append(num) + else: + res.append(max(queue)) + queue.popleft() + queue.append(num) + res.append(max(queue)) + return res diff --git a/machine learning/k_means_clustering.py b/machine learning/k_means_clustering.py new file mode 100644 index 0000000..4c21eea --- /dev/null +++ b/machine learning/k_means_clustering.py @@ -0,0 +1,103 @@ +from sklearn.datasets import load_digits +from sklearn.preprocessing import scale +from sklearn.decomposition import PCA +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt +from sklearn import metrics +import numpy as np + + +# ---------------------------------------------------------------------------------------------------------- # +# A script using K-Means Clustering to classify handwritten digits. # +# Written by @tobinatore # +# # +# This script uses the following dataset: # +# Sklearn's own written digits dataset # +# ---------------------------------------------------------------------------------------------------------- # + + +def bench_k_means(estimator, name, data): + estimator.fit(data) + # A short explanation for every score: + # homogeneity: each cluster contains only members of a single class (range 0 - 1) + # completeness: all members of a given class are assigned to the same cluster (range 0 - 1) + # v_measure: harmonic mean of homogeneity and completeness + # adjusted_rand: similarity of the actual values and their predictions, + # ignoring permutations and with chance normalization + # (range -1 to 1, -1 being bad, 1 being perfect and 0 being random) + # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations + # (range 0 - 1, with 0 being random agreement and 1 being perfect agreement) + # silhouette: uses the mean distance between a sample and all other points in the same class, + # as well as the mean distance between a sample and all other points in the nearest cluster + # to calculate a score (range: -1 to 1, with the former being incorrect, + # and the latter standing for highly dense clustering. + # 0 indicates overlapping clusters. + print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t' + 'adjusted-mutual-info: %.3f \tsilhouette: %.3f' + % (name, estimator.inertia_, + metrics.homogeneity_score(y, estimator.labels_), + metrics.completeness_score(y, estimator.labels_), + metrics.v_measure_score(y, estimator.labels_), + metrics.adjusted_rand_score(y, estimator.labels_), + metrics.adjusted_mutual_info_score(y, estimator.labels_), + metrics.silhouette_score(data, estimator.labels_, + metric='euclidean'))) + + +def plot(kmeans, data): + reduced_data = PCA(n_components=2).fit_transform(data) + kmeans.fit(reduced_data) + + # Step size of the mesh. Decrease to increase the quality of the VQ. + h = .01 # point in the mesh [x_min, x_max]x[y_min, y_max]. + + # Plot the decision boundary. For that, we will assign a color to each + x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 + y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) + + # Obtain labels for each point in mesh. Use last trained model. + Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + plt.figure(1) + plt.clf() + plt.imshow(Z, interpolation='nearest', + extent=(xx.min(), xx.max(), yy.min(), yy.max()), + cmap=plt.cm.Paired, + aspect='auto', origin='lower') + + plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2) + + # Plot the centroids as a white X + centroids = kmeans.cluster_centers_ + plt.scatter(centroids[:, 0], centroids[:, 1], + marker='x', s=169, linewidths=3, + color='w', zorder=10) + plt.title('K-means clustering on the digits dataset (PCA-reduced data)\n' + 'Centroids are marked with white cross') + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + plt.xticks(()) + plt.yticks(()) + plt.show() + + +# Loading and preparing the data +digits = load_digits() +data = scale(digits.data) +y = digits.target + +# Number of clusters +k = len(np.unique(y)) + +samples, features = data.shape + +# Defining the classifier +classifier = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=300) + +# Computing the score of this classifier +bench_k_means(classifier, "kmeans++", data) + +plot(classifier, data) diff --git a/machine learning/linear_regression.py b/machine learning/linear_regression.py new file mode 100644 index 0000000..0b3a165 --- /dev/null +++ b/machine learning/linear_regression.py @@ -0,0 +1,151 @@ +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import statistics + + +# -------------------------------------------------------------------------------------------------------- # +# A script using linear regression to estimate the grades of students in G3 based on their results in G1 # +# and G2 as well as their absences during the academic year, their failures and the time studied per week. # +# Written by @tobinatore # +# # +# This script uses the following dataset: # +# https://archive.ics.uci.edu/ml/datasets/Student+Performance # +# -------------------------------------------------------------------------------------------------------- # + + +def read_data(filename): + """ + Function for reading the CSV-file and dropping all columns that aren't important for our purpose. + :param filename: String + :return: DataFrame + """ + dat = pd.read_csv(filename, sep=";") + dat = dat[["G1", "G2", "studytime", "failures", "absences", "G3"]] + return dat + + +def r_squared(pred, res): + """ + Calculating the R² score of this model. + Value returned is between 0.0 and 1.0, the higher the better. + :param pred: List + :param res: List + :return: Float + """ + ss_t = 0 + ss_r = 0 + + for i in range(len(pred)): + ss_t += (res[i] - statistics.mean(res)) ** 2 + ss_r += (res[i] - pred[i]) ** 2 + + return 1 - (ss_r / ss_t) + + +def rmse(pred, res): + """ + Calculating the Root Mean Square Error. + The lower the returned value, the better. + :param pred: List + :param res: List + :return: Float + """ + rmse = 0 + for i in range(len(pred)): + rmse += (res[i] - pred[i]) ** 2 + return np.sqrt(rmse / len(pred)) + + +def get_cost(X, y, theta): + """ + Getting the cost using the current values of theta. + :param X: numpy.ndarray + :param y: numpy.ndarray + :param theta: numpy.ndarray + :return: Float + """ + cost = np.power(((X @ theta.T)-y), 2) + return np.sum(cost)/(2 * len(X)) + + +def gradient_descent(X, y, theta, iterations, alpha): + """ + Optimizing the values of theta using gradient descent. + :param X: numpy.ndarray + :param y: numpy.ndarray + :param theta: numpy.ndarray + :param iterations: Integer + :param alpha: Integer + :return: numpy.ndarray, numpy.ndarray + """ + cost = np.zeros(iterations) + for i in range(iterations): + theta = theta - (alpha / len(X)) * np.sum(X * ((X @ theta.T) - y), axis=0) + cost[i] = get_cost(X, y, theta) + return theta, cost + + +data = read_data("student-mat.csv") + +# Splitting the data in two batches. +# 70% training data, 30% test data +train = data.sample(frac=0.7) +test = data.drop(train.index) + +# Preparing 2 numpy arrays. +# X will hold all data except G3 and y only holds G3 +X = train.iloc[:, :5] +ones = np.ones([X.shape[0], 1]) +X = np.concatenate((ones, X), axis=1) + +y = train.iloc[:, -1:].values + +# Initializing theta +theta = np.zeros([1, 6]) + +# Setting hyper parameters +alpha = 0.00001 +iterations = 5000 + +# Training the model. +# This means optimizing the cost via gradient descent and calculating the final cost. +theta, cost = gradient_descent(X, y, theta, iterations, alpha) +final_cost = get_cost(X, y, theta) + +# Plotting the cost in relation to the iteration +fig, ax = plt.subplots() +ax.plot(np.arange(iterations), cost, 'r') +ax.set_xlabel('Iterations') +ax.set_ylabel('Cost') +ax.set_title('Error vs. Training Epoch') +plt.show() + +print("Final cost: ", final_cost) + +# Initializing the test set +X_test = test.iloc[:, :5].values.tolist() + +y_test = test.iloc[:, -1:].values + +theta = theta.tolist() + +# Transforming y_test from [[10],[4],...,[20]] to a simple list [10, 4, ..., 20] +store = [] +for entry in y_test.tolist(): + store.append(entry[0]) + +y_test = store.copy() + +# Calculating predictions using the function theta1 + (theta2 * x1) + ... + (theta6 * x5) +predictions = [] +for line in X_test: + prediction = round(theta[0][0] + (theta[0][1]*line[0]) + (theta[0][2]*line[1]) + (theta[0][3]*line[2]) + \ + (theta[0][4] * line[3]) + (theta[0][5]*line[4])) + + predictions.append(prediction) + +# Printing the score of the model +print("RMSE-Score: ", rmse(predictions, y_test)) +print("R²-Score:", r_squared(predictions, y_test)) + diff --git a/machine learning/nearest_neighbor.py b/machine learning/nearest_neighbor.py new file mode 100644 index 0000000..d0fabab --- /dev/null +++ b/machine learning/nearest_neighbor.py @@ -0,0 +1,41 @@ +import math + +def distance(x,y): + """[summary] + HELPER-FUNCTION + calculates the (eulidean) distance between vector x and y. + + Arguments: + x {[tuple]} -- [vector] + y {[tuple]} -- [vector] + """ + assert len(x) == len(y), "The vector must have same length" + result = () + sum = 0 + for i in range(len(x)): + result += (x[i] -y[i],) + for component in result: + sum += component**2 + return math.sqrt(sum) + + +def nearest_neighbor(x, tSet): + """[summary] + Implements the nearest neighbor algorithm + + Arguments: + x {[tupel]} -- [vector] + tSet {[dict]} -- [training set] + + Returns: + [type] -- [result of the AND-function] + """ + assert isinstance(x, tuple) and isinstance(tSet, dict) + current_key = () + min_d = float('inf') + for key in tSet: + d = distance(x, key) + if d < min_d: + min_d = d + current_key = key + return tSet[current_key]