diff --git a/.gitignore b/.gitignore
index 58391e9..94f7230 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,8 @@ secrets/
 **/__pycache__/
 codecompasslib/API/datasets/**.csv
 codecompasslib/API/**.txt
-dataset_new.csv
 codecompasslib/models/**.csv
 codecompasslib/models/examples/**.csv
-codecompasslib/PretrainedModels/
\ No newline at end of file
+codecompasslib/PretrainedModels/
+dump.rdb
+data/*
diff --git a/codecompasslib/API/drive_operations.py b/codecompasslib/API/drive_old/drive_operations.py
similarity index 100%
rename from codecompasslib/API/drive_operations.py
rename to codecompasslib/API/drive_old/drive_operations.py
diff --git a/codecompasslib/API/helper_functions.py b/codecompasslib/API/helper_functions.py
index 4b04da0..178a016 100644
--- a/codecompasslib/API/helper_functions.py
+++ b/codecompasslib/API/helper_functions.py
@@ -15,7 +15,7 @@ def save_to_csv(data: any, filename: str) -> None:
     :return: Does not return anything.
     """
     df: DataFrame = DataFrame(data)
-    df.to_csv(Path(PARENT_PATH + '/Data/' + filename), index=False)
+    df.to_csv(Path(PARENT_PATH + '/data/' + filename), index=False)
 
 
 def list_to_txt(data: list, file_name: str) -> bool:
diff --git a/codecompasslib/API/redis_operations.py b/codecompasslib/API/redis_operations.py
new file mode 100644
index 0000000..fbbcde1
--- /dev/null
+++ b/codecompasslib/API/redis_operations.py
@@ -0,0 +1,154 @@
+#ADD ARGUMENT HERE FOR EMBEDDED / NON EMBEDDED WHEN IMPLEMENTING REDIS FOR BOTH DATASETS
+import json
+import sys
+import os
+from redis import Redis
+from pandas import DataFrame, concat, read_csv
+from numpy import vstack
+
+
+# Redis client constants
+REDIS_HOST = 'localhost'
+REDIS_PORT = 6379
+REDIS_DB = 0
+
+#Initialize Redis client
+redis_client = Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True)
+
+def redis_to_dataframe() -> DataFrame:
+    """
+    Retrieves embedded datasets from Redis and converts them into a DataFrame.
+
+    Returns:
+        pd.DataFrame: A DataFrame containing 'id' and 'embedding' columns.
+    """
+    embedded_data = []
+
+    # Fetch all keys matching the pattern "embedded:*"
+    redis_keys = redis_client.keys('embedded:*')
+
+    for key in redis_keys:
+        # Decode the key from bytes to string
+        key_str = key
+
+        # Get the corresponding embedding vector
+        embedded_vector = redis_client.get(key_str)
+        
+        if embedded_vector:
+            embedding_list = json.loads(embedded_vector)  # Convert from JSON string to list
+            repository_id = key_str.split(":")[1]  # Extracting the repository ID from the key
+            embedded_data.append({'id': float(repository_id), 'embedding': embedding_list})
+
+    # Create a DataFrame from the collected embedded data
+    df_embed = DataFrame(embedded_data)
+    df_embed['id'] = df_embed['id'].astype(float)
+    
+    embedding_array = vstack(df_embed['embedding'].values)
+    
+    df_embeddings = DataFrame(embedding_array)
+    df_embeddings.columns = [f"embedding_{i}" for i in range(df_embeddings.shape[1])]
+    df_embeddings = df_embeddings.astype(float)
+
+    df_embedded = concat([df_embed[['id']], df_embeddings], axis=1)
+
+    return df_embedded
+
+def load_non_embedded_data(fname: str) -> DataFrame:
+    """
+    Load non-embedded data from a local CSV file.
+    :param file_path: Path to the non-embedded CSV file.
+    :return: DataFrame containing non-embedded data.
+    """
+    root_dir = os.path.dirname(os.path.abspath(__file__))
+    
+    project_dir = os.path.dirname(root_dir)
+    real_project_dir = os.path.dirname(project_dir)
+    # Add the project directory to the Python path
+    sys.path.insert(0, real_project_dir)
+    datafolder = real_project_dir + '/data/'
+    
+    df_non_embedded = read_csv(datafolder + fname)
+    return df_non_embedded
+
+
+def save_redis_to_json(file_path='redis_data.json'):
+    """
+    Save all Redis data to a JSON file.
+    
+    Parameters:
+    - file_path (str): The path to the JSON file where data will be saved.
+    """
+    # Get all keys
+    keys = redis_client.keys('*')  # Use '*' to match all keys
+    print(f"Number of keys: {len(keys)}")
+
+    # Prepare a dictionary to hold all key-value pairs
+    data_dict = {}
+    
+    for key in keys:
+        print(f"KEY: {key}")
+        print("Data type:", redis_client.type(key))
+        value = redis_client.get(key)  # Adjust this function according to the Redis type, e.g., get, hgetall
+        
+        # Store in the dictionary with value handling
+        data_dict[key] = value
+
+    # Write to a JSON file
+    with open(file_path, 'w') as json_file:
+        json.dump(data_dict, json_file, indent=2, ensure_ascii=False)
+
+    print(f"Data saved to {file_path}")
+    
+def load_json_to_redis(file_path='redis_data.json', host='localhost', port=6379, db=0):
+    """
+    Load data from a JSON file into a Redis database.
+
+    Parameters:
+    - file_path (str): The path to the JSON file to be loaded.
+    - host (str): The Redis server hostname.
+    - port (int): The Redis server port.
+    - db (int): The Redis database number.
+    """
+
+    # Open the JSON file and load its data
+    with open(file_path, 'r', encoding='utf-8') as json_file:
+        data_dict = json.load(json_file)
+
+    # Iterate over each key-value pair in the loaded data and save them in Redis
+    for key, value in data_dict.items():
+        if value is not None:
+            redis_client.set(key, value)
+
+    print(f"Data loaded into Redis from {file_path}")
+    
+def load_csv_to_redis(fname="df_embedded_combined"):
+    """
+    Load data from a CSV file into a Redis database.
+
+    Parameters:
+    - fname (str): The name of the CSV file to be loaded (assumes it ends with '.csv').
+    """
+    path = datafolder + fname + '.csv'
+    print("Loading from:", path)
+    
+    # Read the CSV file into a pandas DataFrame
+    df = read_csv(path)
+
+    # Make sure to create the embeddings_columns dynamically based on the data
+    embedding_columns = [col for col in df.columns if col.startswith("embedding_")]
+
+    # Store each embedding in Redis
+    for index, row in df.iterrows():
+        redis_key = f"embedded:{row['id']}"  # Use repository ID as the Redis key
+        # Convert the embedding columns to a list and store as a JSON string
+        redis_client.set(redis_key, json.dumps(row[embedding_columns].tolist()))
+        if index % 10000 == 0:
+            print(f"Stored {index} embeddings in Redis")
+        
+    print(f"Data loaded into Redis from {fname}.csv")
+
+if __name__ == "__main__":
+    #save_redis_to_json('redis_embedded.json')
+    #load_json_to_redis('redis_embedded.json')
+    #load_csv_to_redis()
+    pass
\ No newline at end of file
diff --git a/codecompasslib/API/testing_redis.ipynb b/codecompasslib/API/testing_redis.ipynb
new file mode 100644
index 0000000..45407c9
--- /dev/null
+++ b/codecompasslib/API/testing_redis.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redis_operations import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x1056762c0>>\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py\", line 775, in _clean_thread_parent_frames\n",
+      "    def _clean_thread_parent_frames(\n",
+      "KeyboardInterrupt: \n"
+     ]
+    }
+   ],
+   "source": [
+    "df = redis_to_dataframe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib/API/redis_operations.py:70: DtypeWarning: Columns (6,11,12,15,16,17,18,19,20,21,22,23,24) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  df_non_embedded = read_csv(datafolder + fname)\n"
+     ]
+    }
+   ],
+   "source": [
+    "non_embedded_df = load_non_embedded_data(\"data_full.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>name</th>\n",
+       "      <th>owner_user</th>\n",
+       "      <th>owner_type</th>\n",
+       "      <th>description</th>\n",
+       "      <th>url</th>\n",
+       "      <th>is_fork</th>\n",
+       "      <th>date_created</th>\n",
+       "      <th>date_updated</th>\n",
+       "      <th>date_pushed</th>\n",
+       "      <th>...</th>\n",
+       "      <th>has_wiki</th>\n",
+       "      <th>has_pages</th>\n",
+       "      <th>has _discussions</th>\n",
+       "      <th>num_forks</th>\n",
+       "      <th>is_archived</th>\n",
+       "      <th>is_disabled</th>\n",
+       "      <th>is_template</th>\n",
+       "      <th>license</th>\n",
+       "      <th>open_issues</th>\n",
+       "      <th>topics</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>444741024.0</td>\n",
+       "      <td>doc.aurora.dev-develop-compat-evm</td>\n",
+       "      <td>mercyog</td>\n",
+       "      <td>User</td>\n",
+       "      <td>No description</td>\n",
+       "      <td>https://api.github.com/repos/mercyog/doc.auror...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>2022-01-05T09:24:59Z</td>\n",
+       "      <td>2023-03-05T00:05:37Z</td>\n",
+       "      <td>2022-01-05T09:24:59Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>404812692.0</td>\n",
+       "      <td>White_Paper</td>\n",
+       "      <td>mercyog</td>\n",
+       "      <td>User</td>\n",
+       "      <td>White Paper for Choice Coin</td>\n",
+       "      <td>https://api.github.com/repos/mercyog/White_Paper</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2021-09-09T17:30:02Z</td>\n",
+       "      <td>2023-03-05T00:05:37Z</td>\n",
+       "      <td>2021-09-09T00:41:10Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>462013111.0</td>\n",
+       "      <td>Advance-SQL-Assignment</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Advance SQL assignment.</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/Adv...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2022-02-21T20:01:28Z</td>\n",
+       "      <td>2024-03-09T14:13:51Z</td>\n",
+       "      <td>2020-08-05T20:54:22Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>462013123.0</td>\n",
+       "      <td>anosql</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Easy SQL in Python</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/anosql</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2022-02-21T20:01:29Z</td>\n",
+       "      <td>2024-03-09T14:13:51Z</td>\n",
+       "      <td>2020-09-09T18:47:09Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Other</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>585055299.0</td>\n",
+       "      <td>Automation_Project</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Automated Bash Script to automate log Backup g...</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/Aut...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>2023-01-04T07:49:35Z</td>\n",
+       "      <td>2024-03-09T14:13:42Z</td>\n",
+       "      <td>2023-01-04T17:15:46Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id                               name     owner_user owner_type  \\\n",
+       "0  444741024.0  doc.aurora.dev-develop-compat-evm        mercyog       User   \n",
+       "1  404812692.0                        White_Paper        mercyog       User   \n",
+       "2  462013111.0             Advance-SQL-Assignment  Rameshwar0852       User   \n",
+       "3  462013123.0                             anosql  Rameshwar0852       User   \n",
+       "4  585055299.0                 Automation_Project  Rameshwar0852       User   \n",
+       "\n",
+       "                                         description  \\\n",
+       "0                                     No description   \n",
+       "1                        White Paper for Choice Coin   \n",
+       "2                           Advance SQL assignment.    \n",
+       "3                                 Easy SQL in Python   \n",
+       "4  Automated Bash Script to automate log Backup g...   \n",
+       "\n",
+       "                                                 url is_fork  \\\n",
+       "0  https://api.github.com/repos/mercyog/doc.auror...   False   \n",
+       "1   https://api.github.com/repos/mercyog/White_Paper    True   \n",
+       "2  https://api.github.com/repos/Rameshwar0852/Adv...    True   \n",
+       "3  https://api.github.com/repos/Rameshwar0852/anosql    True   \n",
+       "4  https://api.github.com/repos/Rameshwar0852/Aut...   False   \n",
+       "\n",
+       "           date_created          date_updated           date_pushed  ...  \\\n",
+       "0  2022-01-05T09:24:59Z  2023-03-05T00:05:37Z  2022-01-05T09:24:59Z  ...   \n",
+       "1  2021-09-09T17:30:02Z  2023-03-05T00:05:37Z  2021-09-09T00:41:10Z  ...   \n",
+       "2  2022-02-21T20:01:28Z  2024-03-09T14:13:51Z  2020-08-05T20:54:22Z  ...   \n",
+       "3  2022-02-21T20:01:29Z  2024-03-09T14:13:51Z  2020-09-09T18:47:09Z  ...   \n",
+       "4  2023-01-04T07:49:35Z  2024-03-09T14:13:42Z  2023-01-04T17:15:46Z  ...   \n",
+       "\n",
+       "   has_wiki has_pages has _discussions num_forks is_archived is_disabled  \\\n",
+       "0      True     False            False         0       False       False   \n",
+       "1      True     False            False         0       False       False   \n",
+       "2      True     False            False         0       False       False   \n",
+       "3      True     False            False         0       False       False   \n",
+       "4      True     False            False         0       False       False   \n",
+       "\n",
+       "  is_template     license open_issues topics  \n",
+       "0       False  No license         0.0     []  \n",
+       "1       False  No license         0.0     []  \n",
+       "2       False  No license         0.0     []  \n",
+       "3       False       Other         0.0     []  \n",
+       "4       False  No license         0.0     []  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "non_embedded_df.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Get list of all unique owner_user\n",
+    "\n",
+    "unique_names = non_embedded_df[\"owner_user\"].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['mercyog' 'Rameshwar0852' 'CodingWithHarry' 'TerakomariGandesblood'\n",
+      " 'lebrancconvas' 'carlosallexandre' 'Endarzboy' 'GokhanRepo' 'jonasbn'\n",
+      " 'WesleyRodrigues55' 'comxd' 'v1xingyue' 'normantodd' 'indrakishore'\n",
+      " 'nelsontang' 'Sendan4' 'k3ntar0' 'kbjude' 'friism' 'burnflare' 'pirahawk'\n",
+      " 'shojib' 'neodigm' 'RaffaelSchemmer' 'ashishpatel1992' 'ibocon'\n",
+      " 'yongfengxu' 'jrcastine' 'include' 'mfeldman143' 'crazedRomeo'\n",
+      " 'vivekky57' 'JPKovacs' 'philippeboyd' 'pid1' 'ITOPanda' 'kizen777'\n",
+      " 'EIETMC2' 'rodrich' 'Bnowako' '00-Python' 'killsnow' 'Sagiri18'\n",
+      " 'trantuanngoc' 'gp48maz1' 'chinahappyking' 'chenxiing' '1sagarcharaniya1'\n",
+      " 'keepallsimple' 'ku' 'regina-book' 'KarthickAN' 'goldenminerlmg'\n",
+      " 'elitongadotti' 'LeeKangHyun' 'ambitionli' 'xiedacon' 'ahahh' 'davidu'\n",
+      " 'csendranshi' 'JulianWe' 'Antonio24' 'aquateen' 'flom84' 'yoshikinoue'\n",
+      " 'k1selman' 'vin-node' 'jderrett' 'jhhb' 'sebas1989' 'abrahamsod'\n",
+      " 'zirtaebn' 'LulaV14' 'CuteMing' 'teckick' 'joeldrapper' 'sifanxu1996'\n",
+      " 'rtfeldman' 'srinivashappy' 'nazieb' 'ymdysk' 'rurutea' 'QuteMiao'\n",
+      " 'EsMaybe' 'CarloDotLog' 'xhebox' 'TooSchoolForCool' 'KarthikJagadish'\n",
+      " 'pwnall' 'DBLESSED1' 'MoOx' 'Thirteentj' 'LegendAJJ' 'go-diego' 'wez'\n",
+      " 'mariotsvetanov' 'Aronfeyman' 'beerkaya' 'pros2021' 'railsbob']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(unique_names[:100])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/codecompasslib/embeddings/embeddings_helper_functions.py b/codecompasslib/embeddings/embeddings_helper_functions.py
index 8a13a2f..abc9203 100644
--- a/codecompasslib/embeddings/embeddings_helper_functions.py
+++ b/codecompasslib/embeddings/embeddings_helper_functions.py
@@ -2,25 +2,25 @@
 import pandas as pd
 from gensim.models.keyedvectors import KeyedVectors
 from sentence_transformers import SentenceTransformer
-from langchain_community.embeddings import OllamaEmbeddings
+#from langchain_community.embeddings import OllamaEmbeddings
 from gensim.models import KeyedVectors
 import openai
 
-def add_embeddings_to_existing_dataset(df1, df2):
-    """
-    Combines two DataFrames containing embeddings into a single DataFrame.
+# def add_embeddings_to_existing_dataset(df1, df2):
+#     """
+#     Combines two DataFrames containing embeddings into a single DataFrame.
 
-    Args:
-        df1 (pandas.DataFrame): The first DataFrame containing embeddings.
-        df2 (pandas.DataFrame): The second DataFrame containing embeddings.
+#     Args:
+#         df1 (pandas.DataFrame): The first DataFrame containing embeddings.
+#         df2 (pandas.DataFrame): The second DataFrame containing embeddings.
 
-    Returns:
-        pandas.DataFrame: A DataFrame containing the combined embeddings.
+#     Returns:
+#         pandas.DataFrame: A DataFrame containing the combined embeddings.
 
-    """
-    df_combined = pd.concat([df1, df2], axis=0)
-    df_combined = df_combined.drop_duplicates(subset='id', keep='first') # Remove duplicates
-    return df_combined
+#     """
+#     df_combined = pd.concat([df1, df2], axis=0)
+#     df_combined = df_combined.drop_duplicates(subset='id', keep='first') # Remove duplicates
+#     return df_combined
 
 def load_word2vec_model():
     """
diff --git a/codecompasslib/embeddings/generate_embedded_dataset.py b/codecompasslib/embeddings/generate_embedded_dataset.py
index aaa05a7..7927ffa 100644
--- a/codecompasslib/embeddings/generate_embedded_dataset.py
+++ b/codecompasslib/embeddings/generate_embedded_dataset.py
@@ -8,17 +8,28 @@
 
 # Add the project directory to the Python path
 sys.path.insert(0, real_project_dir)
-from codecompasslib.API.drive_operations import get_creds_drive, list_shared_drive_contents, download_csv_as_pd_dataframe, upload_df_to_drive_as_csv
 from codecompasslib.embeddings.embeddings_helper_functions import generate_openAI_embeddings
 from codecompasslib.models.secrets_manager import load_openai_key
+
 import openai
 import pandas as pd
+import redis
+import json
+import numpy as np
+from redis import Redis
+
+# Redis client constants
+REDIS_HOST = 'localhost'
+REDIS_PORT = 6379
+REDIS_DB = 0
 
+#Initialize Redis client
+redis_client = Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True)
 
-# generate embedded dataset using OpenAI embeddings
-def generate_openAI_embedded_csv(df, column_to_embed):
+# Generate embedded dataset using OpenAI embeddings
+def generate_openAI_embedded_to_redis(df, column_to_embed):
     """
-    Generates embeddings for a given textual column in a DataFrame and saves the embeddings to a CSV file.
+    Generates embeddings for a given textual column in a DataFrame and saves the embeddings to Redis.
 
     Args:
         df (pandas.DataFrame): The DataFrame containing the data.
@@ -34,94 +45,70 @@ def generate_openAI_embedded_csv(df, column_to_embed):
         df = pd.DataFrame({'id': [1, 2, 3], 'text': ['Hello', 'World', 'GitHub']})
         df_with_embeddings = generate_openAI_embedded_csv(df, 'text')
     """
-    # remove rows with missing values (We still have a very big dataset after removing the missing values anyway)
+    # Remove rows with missing values
     df_clean = df.dropna()
-    
-    # turn description to lowercase and remove row if description="no description" or empty string
+
+    # Turn description to lowercase and remove rows if description="no description" or empty string
     df_clean = df_clean[df_clean[column_to_embed].str.lower() != 'no description']
-    
-    # cut text if it's size exceeds 8000 tokens
-    df_clean[column_to_embed] = df_clean[column_to_embed].apply(lambda x: x[:8190]) # due to openAI API limit
-    
-    # grab api key from secrets
+
+    # Cut text if its size exceeds 8000 tokens
+    df_clean[column_to_embed] = df_clean[column_to_embed].apply(lambda x: x[:8190])  # due to OpenAI API limit
+
+    # Grab API key from secrets
     api_key = load_openai_key()
     client = openai.Client(api_key=api_key)
-    
-    # extract textual column as list of strings
+
+    # Extract textual column as list of strings
     textual_column = df_clean[column_to_embed].values.tolist()
     
-    # extract id
+    # Extract IDs and owner_users
     ids = df_clean['id'].values.tolist()
+    
     owner_users = df_clean['owner_user'].values.tolist()
 
-    # create an emptry dataframe to store the embeddings
-    embedding_size = len(generate_openAI_embeddings('Test textual column', client).data[0].embedding)
+    # Create an empty DataFrame to store the embeddings
+    embedding_size = len(generate_openAI_embeddings('Test text for embedding', client).data[0].embedding)
     
     embeddings_columns = ['embedding_' + str(i) for i in range(embedding_size)]
     df_with_embeddings = pd.DataFrame(columns=['id', 'owner_user'] + embeddings_columns)
 
-    batch_size = 2040 # You can adjust this value based on the API limits and your requirements
+
+    batch_size = 2040  # Adjust this value based on the API limits and your requirements
 
     # Iterate over every batch of textual column
     for i in range(0, len(textual_column), batch_size):
-        if i % (batch_size*10) == 0:
+        if i % (batch_size * 10) == 0:
             print(f"Processing batch starting at index: {i}")
-        
+
         # Get the current batch of textual column
-        descriptions_batch = textual_column[i:i+batch_size]
-        
+        descriptions_batch = textual_column[i:i + batch_size]
+
         # Get the embeddings for the current batch
         embeddings_response = generate_openAI_embeddings(descriptions_batch, client)
-            
         # Create a DataFrame for the current batch
         batch_df = pd.DataFrame(columns=['id', 'owner_user'] + embeddings_columns)
-        batch_df['id'] = ids[i:i+batch_size]
-        batch_df['owner_user'] = owner_users[i:i+batch_size]
-        
+        batch_df['id'] = ids[i:i + batch_size]
+        batch_df['owner_user'] = owner_users[i:i + batch_size]
+
         # Extract the embeddings and convert them into a list of lists
         embeddings_list = [embedding.embedding for embedding in embeddings_response.data]
 
         # Convert the list of lists into a DataFrame
         embeddings_df = pd.DataFrame(embeddings_list, dtype='float16')
 
-        # Assuming 'batch_df' is your original DataFrame and you want to add the embeddings to it
-        # Make sure 'batch_df' has the same number of rows as 'embeddings_df'
+        # Assuming 'batch_df' is the original DataFrame, add the embeddings to it
         batch_df[embeddings_columns] = embeddings_df
-        
-        # Save the current batch DataFrame to a CSV file
-        # Mode 'a' is for append, header=False to avoid writing headers multiple times
-        batch_df.to_csv('df_embedded_0504_batch.csv', mode='a', header=not i, index=False)
-        
-        # Optional: Free up memory by deleting the batch DataFrame if no longer needed
-        del batch_df
-    
-    # Load the CSV file with the embeddings
-    df_with_embeddings = pd.read_csv('df_embedded_0504_batch.csv')
-    return df_with_embeddings
-    
-def main():
-    # Load the dataset
-    DRIVE_ID = "0AL1DtB4TdEWdUk9PVA"
-    DATA_FOLDER = "13JitBJQLNgMvFwx4QJcvrmDwKOYAShVx"
 
-    creds = get_creds_drive()
-    list_shared_drive_contents(creds=creds, folder_id=DATA_FOLDER, drive_id=DRIVE_ID)
-    
-    df = download_csv_as_pd_dataframe(creds,"1WSgwAhzNbSqC6e_RRBDHpgpQCnGZvVcc")
-    
-    columns_to_retrieve = ['id', 'name', 'owner_user', 'description', 'stars', 'language']
-    
-    # retrieve only the columns that are needed
-    df = df[columns_to_retrieve]
-    
-    # Define the column to embed
-    column_to_embed = 'description'
-    
-    # Generate the embedded dataset
-    df_embedded = generate_openAI_embedded_csv(df, column_to_embed)
-    
-    # save the dataframe with embeddings to drive
-    upload_df_to_drive_as_csv(creds, df_embedded, "df_embedded_0504.csv", DATA_FOLDER)
+        # Store each embedding in Redis
+        for idx, row in batch_df.iterrows():
+            # print(f"Storing embedding for ID: {row['id']} under the key: embedded:{row['id']}")
+            redis_key = f"embedded:{row['id']}"  # Use repository ID as the Redis key
+            redis_client.set(redis_key, json.dumps(row[embeddings_columns].tolist()))  # Store as JSON string
+
+    # return df_with_embeddings # MAYBE DROP THE RETURN? JUST TO LOAD THE DATA INTO REDIS (MAYBE MAKE FUNCTION TO SAVE TO REDIS FROM DF??)
+
 
-if __name__ == "__main__":
-    main()
+#If running main script it will start generating the embeddings from the local csv
+if __name__ == "__main___":
+    df = pd.read_csv(f"{real_project_dir}/data/data_full.csv")
+    generate_openAI_embedded_to_redis(df, 'description')
diff --git a/codecompasslib/models/cosine_similarity_model.py b/codecompasslib/models/cosine_similarity_model.py
index 9dafd97..288aa3a 100644
--- a/codecompasslib/models/cosine_similarity_model.py
+++ b/codecompasslib/models/cosine_similarity_model.py
@@ -25,7 +25,7 @@
 # Add the project directory to the Python path
 sys.path.insert(0, real_project_dir)
 
-from codecompasslib.API.drive_operations import download_csv_as_pd_dataframe, get_creds_drive
+from codecompasslib.API.drive_old.drive_operations import download_csv_as_pd_dataframe, get_creds_drive
 
 def load_data(full_data_folder_id: str) -> DataFrame:
     """
diff --git a/codecompasslib/models/lightgbm_model.py b/codecompasslib/models/lightgbm_model.py
index 9d21174..58e2687 100644
--- a/codecompasslib/models/lightgbm_model.py
+++ b/codecompasslib/models/lightgbm_model.py
@@ -1,5 +1,7 @@
 import os
 import sys
+import streamlit as st
+import pandas as pd
 
 # go up to root
 # Construct the path to the root directory (one level up from embeddings)
@@ -10,6 +12,8 @@
 sys.path.insert(0, real_project_dir)
 
 import pandas as pd
+import json
+import redis
 from typing import Tuple, List
 from pandas import DataFrame, concat
 from numpy import ndarray, argsort
@@ -17,9 +21,8 @@
 from sklearn.model_selection import train_test_split
 from category_encoders import ordinal
 
-from codecompasslib.API.drive_operations import download_csv_as_pd_dataframe, get_creds_drive
 from codecompasslib.API.get_bulk_data import get_stared_repos, get_user_repos
-
+from codecompasslib.API.redis_operations import redis_to_dataframe, load_non_embedded_data
 
 def encode_csv(df: DataFrame, encoder, label_col: str, typ: str = "fit") -> Tuple[DataFrame, ndarray]:
     """
@@ -38,6 +41,37 @@ def encode_csv(df: DataFrame, encoder, label_col: str, typ: str = "fit") -> Tupl
     del df[label_col]
     return df, y
 
+def preprocess_data(df_embedded: DataFrame, df_non_embedded: DataFrame,
+                    label_col: str, target_user: str) -> DataFrame:
+    """
+    Preprocesses the data by merging embedded and non-embedded datasets,
+    converting the 'stars' column to integer, adding a target column,
+    and dropping unnecessary columns.
+
+    Args:
+        df_embedded (DataFrame): The embedded dataset.
+        df_non_embedded (DataFrame): The non-embedded dataset.
+        label_col (str): The name of the target label column.
+        target_user (str): The username of the target user.
+
+    Returns:
+        DataFrame: The preprocessed dataset.
+        List: List of repo IDs that are either starred or owned by the target user.
+    """
+    # Merge the embedded and non-embedded datasets (match based on ID), grab the column you need for training 
+    df_merged: DataFrame = pd.merge(df_embedded, df_non_embedded[['id', 'stars', 'language', 'owner_user']], on='id', how='left')
+
+    # Turn stars column into integer column
+    df_merged['stars'] = df_merged['stars'].astype(int)
+
+    # Add target column: 1 if the repo is starred or owned by the user, else 0
+    owned_by_target_repo_ids: List = [item['id'] for item in get_user_repos(target_user)[0]]
+    starred_repo_ids: List = [item['id'] for item in get_stared_repos(target_user)[0]]
+    starred_or_owned_by_user:List = starred_repo_ids + owned_by_target_repo_ids
+    df_merged[label_col] = df_merged['id'].apply(lambda x: 1 if x in starred_or_owned_by_user else 0)
+
+    return df_merged, starred_or_owned_by_user
+
 
 def train_lightGBM_model(df_merged: DataFrame, label_col: str) -> Tuple[lgb.Booster, ordinal.OrdinalEncoder]:
     """
@@ -122,77 +156,18 @@ def train_lightGBM_model(df_merged: DataFrame, label_col: str) -> Tuple[lgb.Boos
     return lgb_model, ord_encoder
 
 
-def load_data(full_data_folder_id: str, full_data_embedded_folder_id: str) -> Tuple[DataFrame, DataFrame]:
-    """
-    Load the data from the Google Drive
-    :return: The non-embedded and embedded datasets
-    """
-
-    creds = get_creds_drive()
-    df_non_embedded: DataFrame = download_csv_as_pd_dataframe(creds=creds, file_id=full_data_folder_id)
-    df_embedded: DataFrame = download_csv_as_pd_dataframe(creds=creds, file_id=full_data_embedded_folder_id)
-
-    # Having data locally works much faster than retrieving from drive. Uncomment the following lines to use local data
-    # df_non_embedded = pd.read_csv('codecompasslib/models/data_full.csv')
-    # df_embedded = pd.read_csv('codecompasslib/models/df_embedded_combined.csv')
-
-    print("Data loaded")
-    return df_non_embedded, df_embedded
-
-
-def preprocess_data(df_embedded: DataFrame, df_non_embedded: DataFrame,
-                    label_col: str, target_user: str) -> DataFrame:
-    """
-    Preprocesses the data by merging embedded and non-embedded datasets,
-    converting the 'stars' column to integer, adding a target column,
-    and dropping unnecessary columns.
-
-    Args:
-        df_embedded (DataFrame): The embedded dataset.
-        df_non_embedded (DataFrame): The non-embedded dataset.
-        label_col (str): The name of the target label column.
-        target_user (str): The username of the target user.
-
-    Returns:
-        DataFrame: The preprocessed dataset.
-        List: List of repo IDs that are either starred or owned by the target user.
-    """
-    # Merge the embedded and non-embedded datasets (match based on ID), grab the column you need for training 
-    df_merged: DataFrame = pd.merge(df_embedded, df_non_embedded[['id', 'stars', 'language']], on='id', how='left')
-
-    # Turn stars column into integer column
-    df_merged['stars'] = df_merged['stars'].astype(int)
-
-    # Add target column: 1 if the repo is starred or owned by the user, else 0
-    owned_by_target_repo_ids: List = [item['id'] for item in get_user_repos(target_user)[0]]
-    starred_repo_ids: List = [item['id'] for item in get_stared_repos(target_user)[0]]
-    starred_or_owned_by_user:List = starred_repo_ids + owned_by_target_repo_ids
-    df_merged[label_col] = df_merged['id'].apply(lambda x: 1 if x in starred_or_owned_by_user else 0)
-
-    return df_merged, starred_or_owned_by_user
-
-
 def generate_lightGBM_recommendations(target_user: str, df_non_embedded: DataFrame,
                                       df_embedded: DataFrame, number_of_recommendations: int = 10) -> list:
     """
     Generates recommendations using the LightGBM model.
-
-    Args:
-        target_user (str): The target user for whom recommendations are generated.
-        df_non_embedded (DataFrame): The non-embedded data frame containing the features.
-        df_embedded (DataFrame): The embedded data frame containing the features.
-        label_col (str): The name of the label column.
-        number_of_recommendations (int, optional): The number of recommendations to generate. Defaults to 10.
-
-    Returns:
-        list: A list of recommendations, each containing the repository name, owner user, and prediction score.
     """
+
     # Preprocess data
     label_col: str = 'target'
     df_merged, starred_or_owned_by_user = preprocess_data(df_embedded, df_non_embedded, label_col, target_user)
-
+        
     df_training_ready: DataFrame = df_merged.drop(columns=['id', 'owner_user'])
-
+    
     lgb_model: lgb.Booster
     ord_encoder: ordinal.OrdinalEncoder
     # Train LightGBM model
@@ -219,3 +194,14 @@ def generate_lightGBM_recommendations(target_user: str, df_non_embedded: DataFra
             recommendations.append((df_merged.iloc[index]['id'], df_merged.iloc[index]['owner_user'], all_preds[index]))
 
     return recommendations
+    
+    
+
+    
+
+    
+
+
+
+
+
diff --git a/codecompasslib/models/redis_testing.ipynb b/codecompasslib/models/redis_testing.ipynb
new file mode 100644
index 0000000..7070b9a
--- /dev/null
+++ b/codecompasslib/models/redis_testing.ipynb
@@ -0,0 +1,1663 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Current Directory: /Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib/models\n",
+      "Project Directory: /Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib\n",
+      "Real Project Directory: /Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import importlib\n",
+    "\n",
+    "# Setting path\n",
+    "# Get the current working directory\n",
+    "current_dir = os.getcwd()  # This is the directory where the notebook is located\n",
+    "project_dir = os.path.abspath(os.path.join(current_dir, '..'))  # One level up from the notebook's directory\n",
+    "real_project_dir = os.path.abspath(os.path.join(project_dir, '..'))  # Two levels up to the project directory\n",
+    "datafolder = real_project_dir + 'data/'\n",
+    "\n",
+    "# Add the project directory to the Python path\n",
+    "sys.path.insert(0, real_project_dir)\n",
+    "\n",
+    "# Optional: Check the paths\n",
+    "print(\"Current Directory:\", current_dir)\n",
+    "print(\"Project Directory:\", project_dir)\n",
+    "print(\"Real Project Directory:\", real_project_dir)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Token loaded successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from codecompasslib.models.lightgbm_model import load_non_embedded_data, generate_lightGBM_recommendations, preprocess_data\n",
+    "from codecompasslib.API.redis_operations import redis_to_dataframe, load_csv_to_redis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/maudhelenhovland/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib/API/redis_operations.py:70: DtypeWarning: Columns (6,11,12,15,16,17,18,19,20,21,22,23,24) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  df_non_embedded = read_csv(datafolder + fname)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Non-embedded dataset \n",
+      "\n",
+      "Types: \n",
+      "\n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 2583825 entries, 0 to 2583824\n",
+      "Data columns (total 28 columns):\n",
+      " #   Column            Dtype  \n",
+      "---  ------            -----  \n",
+      " 0   id                float64\n",
+      " 1   name              object \n",
+      " 2   owner_user        object \n",
+      " 3   owner_type        object \n",
+      " 4   description       object \n",
+      " 5   url               object \n",
+      " 6   is_fork           object \n",
+      " 7   date_created      object \n",
+      " 8   date_updated      object \n",
+      " 9   date_pushed       object \n",
+      " 10  size              float64\n",
+      " 11  stars             object \n",
+      " 12  watchers          object \n",
+      " 13  updated_at        object \n",
+      " 14  language          object \n",
+      " 15  has_issues        object \n",
+      " 16  has_projects      object \n",
+      " 17  has_downloads     object \n",
+      " 18  has_wiki          object \n",
+      " 19  has_pages         object \n",
+      " 20  has _discussions  object \n",
+      " 21  num_forks         object \n",
+      " 22  is_archived       object \n",
+      " 23  is_disabled       object \n",
+      " 24  is_template       object \n",
+      " 25  license           object \n",
+      " 26  open_issues       float64\n",
+      " 27  topics            object \n",
+      "dtypes: float64(3), object(25)\n",
+      "memory usage: 552.0+ MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "non_embeeded = load_non_embedded_data(\"data_full.csv\")\n",
+    "\n",
+    "print(\"\\nNon-embedded dataset \\n\\nTypes: \\n\")\n",
+    "print(non_embeeded.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      " Column names: \n",
+      "\n",
+      "Index(['id', 'name', 'owner_user', 'owner_type', 'description', 'url',\n",
+      "       'is_fork', 'date_created', 'date_updated', 'date_pushed', 'size',\n",
+      "       'stars', 'watchers', 'updated_at', 'language', 'has_issues',\n",
+      "       'has_projects', 'has_downloads', 'has_wiki', 'has_pages',\n",
+      "       'has _discussions', 'num_forks', 'is_archived', 'is_disabled',\n",
+      "       'is_template', 'license', 'open_issues', 'topics'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\n\\n Column names: \\n\")\n",
+    "print(non_embeeded.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>name</th>\n",
+       "      <th>owner_user</th>\n",
+       "      <th>owner_type</th>\n",
+       "      <th>description</th>\n",
+       "      <th>url</th>\n",
+       "      <th>is_fork</th>\n",
+       "      <th>date_created</th>\n",
+       "      <th>date_updated</th>\n",
+       "      <th>date_pushed</th>\n",
+       "      <th>...</th>\n",
+       "      <th>has_wiki</th>\n",
+       "      <th>has_pages</th>\n",
+       "      <th>has _discussions</th>\n",
+       "      <th>num_forks</th>\n",
+       "      <th>is_archived</th>\n",
+       "      <th>is_disabled</th>\n",
+       "      <th>is_template</th>\n",
+       "      <th>license</th>\n",
+       "      <th>open_issues</th>\n",
+       "      <th>topics</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>444741024.0</td>\n",
+       "      <td>doc.aurora.dev-develop-compat-evm</td>\n",
+       "      <td>mercyog</td>\n",
+       "      <td>User</td>\n",
+       "      <td>No description</td>\n",
+       "      <td>https://api.github.com/repos/mercyog/doc.auror...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>2022-01-05T09:24:59Z</td>\n",
+       "      <td>2023-03-05T00:05:37Z</td>\n",
+       "      <td>2022-01-05T09:24:59Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>404812692.0</td>\n",
+       "      <td>White_Paper</td>\n",
+       "      <td>mercyog</td>\n",
+       "      <td>User</td>\n",
+       "      <td>White Paper for Choice Coin</td>\n",
+       "      <td>https://api.github.com/repos/mercyog/White_Paper</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2021-09-09T17:30:02Z</td>\n",
+       "      <td>2023-03-05T00:05:37Z</td>\n",
+       "      <td>2021-09-09T00:41:10Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>462013111.0</td>\n",
+       "      <td>Advance-SQL-Assignment</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Advance SQL assignment.</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/Adv...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2022-02-21T20:01:28Z</td>\n",
+       "      <td>2024-03-09T14:13:51Z</td>\n",
+       "      <td>2020-08-05T20:54:22Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>462013123.0</td>\n",
+       "      <td>anosql</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Easy SQL in Python</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/anosql</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2022-02-21T20:01:29Z</td>\n",
+       "      <td>2024-03-09T14:13:51Z</td>\n",
+       "      <td>2020-09-09T18:47:09Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Other</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>585055299.0</td>\n",
+       "      <td>Automation_Project</td>\n",
+       "      <td>Rameshwar0852</td>\n",
+       "      <td>User</td>\n",
+       "      <td>Automated Bash Script to automate log Backup g...</td>\n",
+       "      <td>https://api.github.com/repos/Rameshwar0852/Aut...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>2023-01-04T07:49:35Z</td>\n",
+       "      <td>2024-03-09T14:13:42Z</td>\n",
+       "      <td>2023-01-04T17:15:46Z</td>\n",
+       "      <td>...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>No license</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id                               name     owner_user owner_type  \\\n",
+       "0  444741024.0  doc.aurora.dev-develop-compat-evm        mercyog       User   \n",
+       "1  404812692.0                        White_Paper        mercyog       User   \n",
+       "2  462013111.0             Advance-SQL-Assignment  Rameshwar0852       User   \n",
+       "3  462013123.0                             anosql  Rameshwar0852       User   \n",
+       "4  585055299.0                 Automation_Project  Rameshwar0852       User   \n",
+       "\n",
+       "                                         description  \\\n",
+       "0                                     No description   \n",
+       "1                        White Paper for Choice Coin   \n",
+       "2                           Advance SQL assignment.    \n",
+       "3                                 Easy SQL in Python   \n",
+       "4  Automated Bash Script to automate log Backup g...   \n",
+       "\n",
+       "                                                 url is_fork  \\\n",
+       "0  https://api.github.com/repos/mercyog/doc.auror...   False   \n",
+       "1   https://api.github.com/repos/mercyog/White_Paper    True   \n",
+       "2  https://api.github.com/repos/Rameshwar0852/Adv...    True   \n",
+       "3  https://api.github.com/repos/Rameshwar0852/anosql    True   \n",
+       "4  https://api.github.com/repos/Rameshwar0852/Aut...   False   \n",
+       "\n",
+       "           date_created          date_updated           date_pushed  ...  \\\n",
+       "0  2022-01-05T09:24:59Z  2023-03-05T00:05:37Z  2022-01-05T09:24:59Z  ...   \n",
+       "1  2021-09-09T17:30:02Z  2023-03-05T00:05:37Z  2021-09-09T00:41:10Z  ...   \n",
+       "2  2022-02-21T20:01:28Z  2024-03-09T14:13:51Z  2020-08-05T20:54:22Z  ...   \n",
+       "3  2022-02-21T20:01:29Z  2024-03-09T14:13:51Z  2020-09-09T18:47:09Z  ...   \n",
+       "4  2023-01-04T07:49:35Z  2024-03-09T14:13:42Z  2023-01-04T17:15:46Z  ...   \n",
+       "\n",
+       "   has_wiki has_pages has _discussions num_forks is_archived is_disabled  \\\n",
+       "0      True     False            False         0       False       False   \n",
+       "1      True     False            False         0       False       False   \n",
+       "2      True     False            False         0       False       False   \n",
+       "3      True     False            False         0       False       False   \n",
+       "4      True     False            False         0       False       False   \n",
+       "\n",
+       "  is_template     license open_issues topics  \n",
+       "0       False  No license         0.0     []  \n",
+       "1       False  No license         0.0     []  \n",
+       "2       False  No license         0.0     []  \n",
+       "3       False       Other         0.0     []  \n",
+       "4       False  No license         0.0     []  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "non_embeeded.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embedded = redis_to_dataframe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Embedded dataset \n",
+      "\n",
+      "Types: \n",
+      "\n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 1205027 entries, 0 to 1205026\n",
+      "Columns: 257 entries, id to embedding_255\n",
+      "dtypes: float64(257)\n",
+      "memory usage: 2.3 GB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\nEmbedded dataset \\n\\nTypes: \\n\")\n",
+    "print(embedded.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      " Column names: \n",
+      "\n",
+      "Index(['id', 'embedding_0', 'embedding_1', 'embedding_2', 'embedding_3',\n",
+      "       'embedding_4', 'embedding_5', 'embedding_6', 'embedding_7',\n",
+      "       'embedding_8',\n",
+      "       ...\n",
+      "       'embedding_246', 'embedding_247', 'embedding_248', 'embedding_249',\n",
+      "       'embedding_250', 'embedding_251', 'embedding_252', 'embedding_253',\n",
+      "       'embedding_254', 'embedding_255'],\n",
+      "      dtype='object', length=257)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\\n\\n Column names: \\n\")\n",
+    "print(embedded.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>embedding_0</th>\n",
+       "      <th>embedding_1</th>\n",
+       "      <th>embedding_2</th>\n",
+       "      <th>embedding_3</th>\n",
+       "      <th>embedding_4</th>\n",
+       "      <th>embedding_5</th>\n",
+       "      <th>embedding_6</th>\n",
+       "      <th>embedding_7</th>\n",
+       "      <th>embedding_8</th>\n",
+       "      <th>...</th>\n",
+       "      <th>embedding_246</th>\n",
+       "      <th>embedding_247</th>\n",
+       "      <th>embedding_248</th>\n",
+       "      <th>embedding_249</th>\n",
+       "      <th>embedding_250</th>\n",
+       "      <th>embedding_251</th>\n",
+       "      <th>embedding_252</th>\n",
+       "      <th>embedding_253</th>\n",
+       "      <th>embedding_254</th>\n",
+       "      <th>embedding_255</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>51728484.0</td>\n",
+       "      <td>-0.072998</td>\n",
+       "      <td>0.115051</td>\n",
+       "      <td>-0.042389</td>\n",
+       "      <td>0.032623</td>\n",
+       "      <td>-0.039032</td>\n",
+       "      <td>-0.035614</td>\n",
+       "      <td>-0.039246</td>\n",
+       "      <td>0.078186</td>\n",
+       "      <td>0.001712</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.016678</td>\n",
+       "      <td>0.153320</td>\n",
+       "      <td>-0.042511</td>\n",
+       "      <td>0.056854</td>\n",
+       "      <td>0.066040</td>\n",
+       "      <td>-0.032990</td>\n",
+       "      <td>-0.035858</td>\n",
+       "      <td>0.041168</td>\n",
+       "      <td>-0.032166</td>\n",
+       "      <td>0.133911</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1845031.0</td>\n",
+       "      <td>-0.116882</td>\n",
+       "      <td>-0.038361</td>\n",
+       "      <td>-0.067322</td>\n",
+       "      <td>0.037048</td>\n",
+       "      <td>-0.024780</td>\n",
+       "      <td>-0.047058</td>\n",
+       "      <td>0.012970</td>\n",
+       "      <td>0.150635</td>\n",
+       "      <td>-0.132080</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.072327</td>\n",
+       "      <td>0.089905</td>\n",
+       "      <td>-0.009506</td>\n",
+       "      <td>0.055878</td>\n",
+       "      <td>0.056335</td>\n",
+       "      <td>-0.022934</td>\n",
+       "      <td>0.007069</td>\n",
+       "      <td>0.037811</td>\n",
+       "      <td>-0.042419</td>\n",
+       "      <td>0.092896</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>640245327.0</td>\n",
+       "      <td>-0.066101</td>\n",
+       "      <td>0.001008</td>\n",
+       "      <td>-0.023071</td>\n",
+       "      <td>0.042358</td>\n",
+       "      <td>0.019363</td>\n",
+       "      <td>-0.033508</td>\n",
+       "      <td>0.047028</td>\n",
+       "      <td>0.134399</td>\n",
+       "      <td>-0.040833</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.050781</td>\n",
+       "      <td>0.034149</td>\n",
+       "      <td>0.064758</td>\n",
+       "      <td>0.004837</td>\n",
+       "      <td>0.007904</td>\n",
+       "      <td>0.022354</td>\n",
+       "      <td>0.015594</td>\n",
+       "      <td>0.051422</td>\n",
+       "      <td>-0.027527</td>\n",
+       "      <td>-0.017197</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>443559880.0</td>\n",
+       "      <td>-0.006077</td>\n",
+       "      <td>0.084595</td>\n",
+       "      <td>-0.033844</td>\n",
+       "      <td>0.069092</td>\n",
+       "      <td>-0.026199</td>\n",
+       "      <td>0.076233</td>\n",
+       "      <td>-0.024872</td>\n",
+       "      <td>0.127808</td>\n",
+       "      <td>0.007904</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.034210</td>\n",
+       "      <td>0.154907</td>\n",
+       "      <td>0.162598</td>\n",
+       "      <td>-0.090210</td>\n",
+       "      <td>0.053070</td>\n",
+       "      <td>-0.030014</td>\n",
+       "      <td>0.055450</td>\n",
+       "      <td>0.035645</td>\n",
+       "      <td>-0.036407</td>\n",
+       "      <td>0.101624</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>38375571.0</td>\n",
+       "      <td>-0.052521</td>\n",
+       "      <td>-0.064087</td>\n",
+       "      <td>-0.019775</td>\n",
+       "      <td>-0.014755</td>\n",
+       "      <td>-0.040894</td>\n",
+       "      <td>-0.065125</td>\n",
+       "      <td>0.021408</td>\n",
+       "      <td>0.064697</td>\n",
+       "      <td>-0.084473</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.113953</td>\n",
+       "      <td>-0.202881</td>\n",
+       "      <td>-0.024536</td>\n",
+       "      <td>-0.027328</td>\n",
+       "      <td>-0.043884</td>\n",
+       "      <td>0.011597</td>\n",
+       "      <td>-0.032959</td>\n",
+       "      <td>-0.010086</td>\n",
+       "      <td>-0.003260</td>\n",
+       "      <td>0.078552</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 257 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            id  embedding_0  embedding_1  embedding_2  embedding_3  \\\n",
+       "0   51728484.0    -0.072998     0.115051    -0.042389     0.032623   \n",
+       "1    1845031.0    -0.116882    -0.038361    -0.067322     0.037048   \n",
+       "2  640245327.0    -0.066101     0.001008    -0.023071     0.042358   \n",
+       "3  443559880.0    -0.006077     0.084595    -0.033844     0.069092   \n",
+       "4   38375571.0    -0.052521    -0.064087    -0.019775    -0.014755   \n",
+       "\n",
+       "   embedding_4  embedding_5  embedding_6  embedding_7  embedding_8  ...  \\\n",
+       "0    -0.039032    -0.035614    -0.039246     0.078186     0.001712  ...   \n",
+       "1    -0.024780    -0.047058     0.012970     0.150635    -0.132080  ...   \n",
+       "2     0.019363    -0.033508     0.047028     0.134399    -0.040833  ...   \n",
+       "3    -0.026199     0.076233    -0.024872     0.127808     0.007904  ...   \n",
+       "4    -0.040894    -0.065125     0.021408     0.064697    -0.084473  ...   \n",
+       "\n",
+       "   embedding_246  embedding_247  embedding_248  embedding_249  embedding_250  \\\n",
+       "0       0.016678       0.153320      -0.042511       0.056854       0.066040   \n",
+       "1       0.072327       0.089905      -0.009506       0.055878       0.056335   \n",
+       "2      -0.050781       0.034149       0.064758       0.004837       0.007904   \n",
+       "3      -0.034210       0.154907       0.162598      -0.090210       0.053070   \n",
+       "4      -0.113953      -0.202881      -0.024536      -0.027328      -0.043884   \n",
+       "\n",
+       "   embedding_251  embedding_252  embedding_253  embedding_254  embedding_255  \n",
+       "0      -0.032990      -0.035858       0.041168      -0.032166       0.133911  \n",
+       "1      -0.022934       0.007069       0.037811      -0.042419       0.092896  \n",
+       "2       0.022354       0.015594       0.051422      -0.027527      -0.017197  \n",
+       "3      -0.030014       0.055450       0.035645      -0.036407       0.101624  \n",
+       "4       0.011597      -0.032959      -0.010086      -0.003260       0.078552  \n",
+       "\n",
+       "[5 rows x 257 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "embedded.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>embedding_0</th>\n",
+       "      <th>embedding_1</th>\n",
+       "      <th>embedding_2</th>\n",
+       "      <th>embedding_3</th>\n",
+       "      <th>embedding_4</th>\n",
+       "      <th>embedding_5</th>\n",
+       "      <th>embedding_6</th>\n",
+       "      <th>embedding_7</th>\n",
+       "      <th>embedding_8</th>\n",
+       "      <th>...</th>\n",
+       "      <th>embedding_250</th>\n",
+       "      <th>embedding_251</th>\n",
+       "      <th>embedding_252</th>\n",
+       "      <th>embedding_253</th>\n",
+       "      <th>embedding_254</th>\n",
+       "      <th>embedding_255</th>\n",
+       "      <th>stars</th>\n",
+       "      <th>language</th>\n",
+       "      <th>owner_user</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>130509551.0</td>\n",
+       "      <td>0.039060</td>\n",
+       "      <td>0.02786</td>\n",
+       "      <td>-0.005497</td>\n",
+       "      <td>0.01672</td>\n",
+       "      <td>0.020080</td>\n",
+       "      <td>-0.03192</td>\n",
+       "      <td>0.108950</td>\n",
+       "      <td>0.11800</td>\n",
+       "      <td>-0.05210</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.006836</td>\n",
+       "      <td>0.011270</td>\n",
+       "      <td>0.087800</td>\n",
+       "      <td>-0.009640</td>\n",
+       "      <td>-0.144300</td>\n",
+       "      <td>-0.07745</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Cuda</td>\n",
+       "      <td>barseghyanartur</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>38375571.0</td>\n",
+       "      <td>-0.052460</td>\n",
+       "      <td>-0.06415</td>\n",
+       "      <td>-0.019880</td>\n",
+       "      <td>-0.01478</td>\n",
+       "      <td>-0.040860</td>\n",
+       "      <td>-0.06506</td>\n",
+       "      <td>0.021470</td>\n",
+       "      <td>0.06450</td>\n",
+       "      <td>-0.08435</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.043880</td>\n",
+       "      <td>0.011540</td>\n",
+       "      <td>-0.032960</td>\n",
+       "      <td>-0.010086</td>\n",
+       "      <td>-0.003280</td>\n",
+       "      <td>0.07860</td>\n",
+       "      <td>0</td>\n",
+       "      <td>HTML</td>\n",
+       "      <td>leonirlopes</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>737250560.0</td>\n",
+       "      <td>-0.080100</td>\n",
+       "      <td>-0.07544</td>\n",
+       "      <td>-0.036740</td>\n",
+       "      <td>-0.06665</td>\n",
+       "      <td>-0.069400</td>\n",
+       "      <td>0.03308</td>\n",
+       "      <td>-0.015840</td>\n",
+       "      <td>0.14070</td>\n",
+       "      <td>-0.02538</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.042420</td>\n",
+       "      <td>0.099000</td>\n",
+       "      <td>-0.013680</td>\n",
+       "      <td>0.085100</td>\n",
+       "      <td>-0.012505</td>\n",
+       "      <td>0.04720</td>\n",
+       "      <td>8</td>\n",
+       "      <td>Rust</td>\n",
+       "      <td>lilydjwg</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>194578827.0</td>\n",
+       "      <td>-0.128700</td>\n",
+       "      <td>-0.01314</td>\n",
+       "      <td>-0.028080</td>\n",
+       "      <td>-0.11053</td>\n",
+       "      <td>0.031000</td>\n",
+       "      <td>0.12470</td>\n",
+       "      <td>-0.043600</td>\n",
+       "      <td>0.18150</td>\n",
+       "      <td>-0.09430</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.042180</td>\n",
+       "      <td>0.027130</td>\n",
+       "      <td>0.057860</td>\n",
+       "      <td>0.041350</td>\n",
+       "      <td>-0.051240</td>\n",
+       "      <td>0.02364</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Go</td>\n",
+       "      <td>zhsso</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>758946133.0</td>\n",
+       "      <td>-0.064900</td>\n",
+       "      <td>-0.08950</td>\n",
+       "      <td>-0.044200</td>\n",
+       "      <td>-0.04780</td>\n",
+       "      <td>0.007130</td>\n",
+       "      <td>-0.15230</td>\n",
+       "      <td>-0.048600</td>\n",
+       "      <td>0.17040</td>\n",
+       "      <td>0.06885</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.040860</td>\n",
+       "      <td>-0.115540</td>\n",
+       "      <td>-0.073200</td>\n",
+       "      <td>0.034270</td>\n",
+       "      <td>-0.021590</td>\n",
+       "      <td>0.06040</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Rust</td>\n",
+       "      <td>joelparkerhenderson</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1205022</th>\n",
+       "      <td>197403819.0</td>\n",
+       "      <td>-0.027000</td>\n",
+       "      <td>-0.03436</td>\n",
+       "      <td>-0.024900</td>\n",
+       "      <td>0.12225</td>\n",
+       "      <td>-0.034200</td>\n",
+       "      <td>0.02406</td>\n",
+       "      <td>-0.041500</td>\n",
+       "      <td>0.12780</td>\n",
+       "      <td>-0.10913</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.023060</td>\n",
+       "      <td>-0.001948</td>\n",
+       "      <td>-0.000825</td>\n",
+       "      <td>0.006508</td>\n",
+       "      <td>-0.011500</td>\n",
+       "      <td>-0.03885</td>\n",
+       "      <td>1</td>\n",
+       "      <td>JavaScript</td>\n",
+       "      <td>przemyslawzalewski</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1205023</th>\n",
+       "      <td>178142966.0</td>\n",
+       "      <td>-0.088000</td>\n",
+       "      <td>0.01217</td>\n",
+       "      <td>-0.044160</td>\n",
+       "      <td>-0.07000</td>\n",
+       "      <td>0.026300</td>\n",
+       "      <td>0.00918</td>\n",
+       "      <td>-0.075700</td>\n",
+       "      <td>0.01749</td>\n",
+       "      <td>-0.05624</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.030000</td>\n",
+       "      <td>0.025900</td>\n",
+       "      <td>0.081800</td>\n",
+       "      <td>-0.042500</td>\n",
+       "      <td>-0.023480</td>\n",
+       "      <td>-0.04416</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Python</td>\n",
+       "      <td>Tobey123</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1205024</th>\n",
+       "      <td>479650052.0</td>\n",
+       "      <td>-0.039760</td>\n",
+       "      <td>-0.03122</td>\n",
+       "      <td>-0.042400</td>\n",
+       "      <td>0.03653</td>\n",
+       "      <td>0.016390</td>\n",
+       "      <td>-0.02430</td>\n",
+       "      <td>-0.001417</td>\n",
+       "      <td>0.03622</td>\n",
+       "      <td>0.02328</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.025740</td>\n",
+       "      <td>-0.078200</td>\n",
+       "      <td>0.036320</td>\n",
+       "      <td>-0.003975</td>\n",
+       "      <td>-0.011200</td>\n",
+       "      <td>0.04196</td>\n",
+       "      <td>0</td>\n",
+       "      <td>CSS</td>\n",
+       "      <td>HadzhieV777</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1205025</th>\n",
+       "      <td>56554747.0</td>\n",
+       "      <td>-0.091250</td>\n",
+       "      <td>0.00616</td>\n",
+       "      <td>-0.036350</td>\n",
+       "      <td>-0.06050</td>\n",
+       "      <td>-0.005802</td>\n",
+       "      <td>-0.19030</td>\n",
+       "      <td>-0.010155</td>\n",
+       "      <td>0.07050</td>\n",
+       "      <td>-0.07190</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.052000</td>\n",
+       "      <td>0.004280</td>\n",
+       "      <td>0.086900</td>\n",
+       "      <td>0.102600</td>\n",
+       "      <td>-0.012400</td>\n",
+       "      <td>0.06287</td>\n",
+       "      <td>0</td>\n",
+       "      <td>C++</td>\n",
+       "      <td>wyrover</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1205026</th>\n",
+       "      <td>139950081.0</td>\n",
+       "      <td>0.000772</td>\n",
+       "      <td>0.08203</td>\n",
+       "      <td>-0.046360</td>\n",
+       "      <td>-0.01985</td>\n",
+       "      <td>-0.001562</td>\n",
+       "      <td>0.05685</td>\n",
+       "      <td>0.000621</td>\n",
+       "      <td>0.12090</td>\n",
+       "      <td>-0.02058</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.076540</td>\n",
+       "      <td>0.025800</td>\n",
+       "      <td>0.018750</td>\n",
+       "      <td>-0.034270</td>\n",
+       "      <td>-0.020580</td>\n",
+       "      <td>0.13380</td>\n",
+       "      <td>0</td>\n",
+       "      <td>JavaScript</td>\n",
+       "      <td>thefreakingmind</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1205027 rows × 261 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  id  embedding_0  embedding_1  embedding_2  embedding_3  \\\n",
+       "0        130509551.0     0.039060      0.02786    -0.005497      0.01672   \n",
+       "1         38375571.0    -0.052460     -0.06415    -0.019880     -0.01478   \n",
+       "2        737250560.0    -0.080100     -0.07544    -0.036740     -0.06665   \n",
+       "3        194578827.0    -0.128700     -0.01314    -0.028080     -0.11053   \n",
+       "4        758946133.0    -0.064900     -0.08950    -0.044200     -0.04780   \n",
+       "...              ...          ...          ...          ...          ...   \n",
+       "1205022  197403819.0    -0.027000     -0.03436    -0.024900      0.12225   \n",
+       "1205023  178142966.0    -0.088000      0.01217    -0.044160     -0.07000   \n",
+       "1205024  479650052.0    -0.039760     -0.03122    -0.042400      0.03653   \n",
+       "1205025   56554747.0    -0.091250      0.00616    -0.036350     -0.06050   \n",
+       "1205026  139950081.0     0.000772      0.08203    -0.046360     -0.01985   \n",
+       "\n",
+       "         embedding_4  embedding_5  embedding_6  embedding_7  embedding_8  ...  \\\n",
+       "0           0.020080     -0.03192     0.108950      0.11800     -0.05210  ...   \n",
+       "1          -0.040860     -0.06506     0.021470      0.06450     -0.08435  ...   \n",
+       "2          -0.069400      0.03308    -0.015840      0.14070     -0.02538  ...   \n",
+       "3           0.031000      0.12470    -0.043600      0.18150     -0.09430  ...   \n",
+       "4           0.007130     -0.15230    -0.048600      0.17040      0.06885  ...   \n",
+       "...              ...          ...          ...          ...          ...  ...   \n",
+       "1205022    -0.034200      0.02406    -0.041500      0.12780     -0.10913  ...   \n",
+       "1205023     0.026300      0.00918    -0.075700      0.01749     -0.05624  ...   \n",
+       "1205024     0.016390     -0.02430    -0.001417      0.03622      0.02328  ...   \n",
+       "1205025    -0.005802     -0.19030    -0.010155      0.07050     -0.07190  ...   \n",
+       "1205026    -0.001562      0.05685     0.000621      0.12090     -0.02058  ...   \n",
+       "\n",
+       "         embedding_250  embedding_251  embedding_252  embedding_253  \\\n",
+       "0            -0.006836       0.011270       0.087800      -0.009640   \n",
+       "1            -0.043880       0.011540      -0.032960      -0.010086   \n",
+       "2             0.042420       0.099000      -0.013680       0.085100   \n",
+       "3            -0.042180       0.027130       0.057860       0.041350   \n",
+       "4            -0.040860      -0.115540      -0.073200       0.034270   \n",
+       "...                ...            ...            ...            ...   \n",
+       "1205022       0.023060      -0.001948      -0.000825       0.006508   \n",
+       "1205023      -0.030000       0.025900       0.081800      -0.042500   \n",
+       "1205024      -0.025740      -0.078200       0.036320      -0.003975   \n",
+       "1205025       0.052000       0.004280       0.086900       0.102600   \n",
+       "1205026       0.076540       0.025800       0.018750      -0.034270   \n",
+       "\n",
+       "         embedding_254  embedding_255  stars    language           owner_user  \\\n",
+       "0            -0.144300       -0.07745      0        Cuda      barseghyanartur   \n",
+       "1            -0.003280        0.07860      0        HTML          leonirlopes   \n",
+       "2            -0.012505        0.04720      8        Rust             lilydjwg   \n",
+       "3            -0.051240        0.02364      0          Go                zhsso   \n",
+       "4            -0.021590        0.06040      2        Rust  joelparkerhenderson   \n",
+       "...                ...            ...    ...         ...                  ...   \n",
+       "1205022      -0.011500       -0.03885      1  JavaScript   przemyslawzalewski   \n",
+       "1205023      -0.023480       -0.04416      0      Python             Tobey123   \n",
+       "1205024      -0.011200        0.04196      0         CSS          HadzhieV777   \n",
+       "1205025      -0.012400        0.06287      0         C++              wyrover   \n",
+       "1205026      -0.020580        0.13380      0  JavaScript      thefreakingmind   \n",
+       "\n",
+       "         target  \n",
+       "0             0  \n",
+       "1             0  \n",
+       "2             0  \n",
+       "3             0  \n",
+       "4             0  \n",
+       "...         ...  \n",
+       "1205022       0  \n",
+       "1205023       0  \n",
+       "1205024       0  \n",
+       "1205025       0  \n",
+       "1205026       0  \n",
+       "\n",
+       "[1205027 rows x 261 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_merged, _ = preprocess_data(embedded, non_embeeded, \"target\", \"mercyog\")\n",
+    "df_merged"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>embedding_0</th>\n",
+       "      <th>embedding_1</th>\n",
+       "      <th>embedding_2</th>\n",
+       "      <th>embedding_3</th>\n",
+       "      <th>embedding_4</th>\n",
+       "      <th>embedding_5</th>\n",
+       "      <th>embedding_6</th>\n",
+       "      <th>embedding_7</th>\n",
+       "      <th>embedding_8</th>\n",
+       "      <th>...</th>\n",
+       "      <th>embedding_250</th>\n",
+       "      <th>embedding_251</th>\n",
+       "      <th>embedding_252</th>\n",
+       "      <th>embedding_253</th>\n",
+       "      <th>embedding_254</th>\n",
+       "      <th>embedding_255</th>\n",
+       "      <th>stars</th>\n",
+       "      <th>language</th>\n",
+       "      <th>owner_user</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>284158</th>\n",
+       "      <td>83222441.0</td>\n",
+       "      <td>-0.017780</td>\n",
+       "      <td>0.10730</td>\n",
+       "      <td>-0.03250</td>\n",
+       "      <td>0.038100</td>\n",
+       "      <td>0.02058</td>\n",
+       "      <td>0.06805</td>\n",
+       "      <td>-0.02692</td>\n",
+       "      <td>0.13750</td>\n",
+       "      <td>-0.04395</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.01240</td>\n",
+       "      <td>0.049160</td>\n",
+       "      <td>-0.049840</td>\n",
+       "      <td>0.03607</td>\n",
+       "      <td>-0.14820</td>\n",
+       "      <td>0.04040</td>\n",
+       "      <td>250323</td>\n",
+       "      <td>Python</td>\n",
+       "      <td>donnemartin</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>759082</th>\n",
+       "      <td>45717250.0</td>\n",
+       "      <td>-0.068050</td>\n",
+       "      <td>-0.03784</td>\n",
+       "      <td>-0.02481</td>\n",
+       "      <td>0.013695</td>\n",
+       "      <td>0.01224</td>\n",
+       "      <td>-0.05550</td>\n",
+       "      <td>-0.01959</td>\n",
+       "      <td>0.13260</td>\n",
+       "      <td>-0.03061</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.05017</td>\n",
+       "      <td>-0.001194</td>\n",
+       "      <td>-0.002592</td>\n",
+       "      <td>-0.05612</td>\n",
+       "      <td>-0.10114</td>\n",
+       "      <td>0.05222</td>\n",
+       "      <td>181795</td>\n",
+       "      <td>C++</td>\n",
+       "      <td>tensorflow</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>835753</th>\n",
+       "      <td>2325298.0</td>\n",
+       "      <td>-0.005554</td>\n",
+       "      <td>0.14210</td>\n",
+       "      <td>-0.02860</td>\n",
+       "      <td>0.030730</td>\n",
+       "      <td>-0.05942</td>\n",
+       "      <td>0.05792</td>\n",
+       "      <td>-0.01570</td>\n",
+       "      <td>0.15190</td>\n",
+       "      <td>0.03564</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.01303</td>\n",
+       "      <td>0.065700</td>\n",
+       "      <td>0.024290</td>\n",
+       "      <td>-0.07260</td>\n",
+       "      <td>-0.04404</td>\n",
+       "      <td>0.06207</td>\n",
+       "      <td>167981</td>\n",
+       "      <td>C</td>\n",
+       "      <td>torvalds</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>282695</th>\n",
+       "      <td>155220641.0</td>\n",
+       "      <td>-0.001491</td>\n",
+       "      <td>0.11100</td>\n",
+       "      <td>-0.05040</td>\n",
+       "      <td>0.009850</td>\n",
+       "      <td>0.04740</td>\n",
+       "      <td>-0.02014</td>\n",
+       "      <td>-0.02502</td>\n",
+       "      <td>0.06366</td>\n",
+       "      <td>-0.01021</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.07960</td>\n",
+       "      <td>0.016110</td>\n",
+       "      <td>0.083860</td>\n",
+       "      <td>0.03842</td>\n",
+       "      <td>-0.05057</td>\n",
+       "      <td>-0.02344</td>\n",
+       "      <td>123554</td>\n",
+       "      <td>Python</td>\n",
+       "      <td>huggingface</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>578539</th>\n",
+       "      <td>21540759.0</td>\n",
+       "      <td>-0.076540</td>\n",
+       "      <td>-0.01588</td>\n",
+       "      <td>-0.05377</td>\n",
+       "      <td>-0.014790</td>\n",
+       "      <td>0.01617</td>\n",
+       "      <td>0.00798</td>\n",
+       "      <td>0.01688</td>\n",
+       "      <td>0.02441</td>\n",
+       "      <td>-0.03305</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.02374</td>\n",
+       "      <td>0.107200</td>\n",
+       "      <td>0.035130</td>\n",
+       "      <td>0.06390</td>\n",
+       "      <td>-0.03250</td>\n",
+       "      <td>0.10724</td>\n",
+       "      <td>118663</td>\n",
+       "      <td>Go</td>\n",
+       "      <td>avelino</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 261 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 id  embedding_0  embedding_1  embedding_2  embedding_3  \\\n",
+       "284158   83222441.0    -0.017780      0.10730     -0.03250     0.038100   \n",
+       "759082   45717250.0    -0.068050     -0.03784     -0.02481     0.013695   \n",
+       "835753    2325298.0    -0.005554      0.14210     -0.02860     0.030730   \n",
+       "282695  155220641.0    -0.001491      0.11100     -0.05040     0.009850   \n",
+       "578539   21540759.0    -0.076540     -0.01588     -0.05377    -0.014790   \n",
+       "\n",
+       "        embedding_4  embedding_5  embedding_6  embedding_7  embedding_8  ...  \\\n",
+       "284158      0.02058      0.06805     -0.02692      0.13750     -0.04395  ...   \n",
+       "759082      0.01224     -0.05550     -0.01959      0.13260     -0.03061  ...   \n",
+       "835753     -0.05942      0.05792     -0.01570      0.15190      0.03564  ...   \n",
+       "282695      0.04740     -0.02014     -0.02502      0.06366     -0.01021  ...   \n",
+       "578539      0.01617      0.00798      0.01688      0.02441     -0.03305  ...   \n",
+       "\n",
+       "        embedding_250  embedding_251  embedding_252  embedding_253  \\\n",
+       "284158        0.01240       0.049160      -0.049840        0.03607   \n",
+       "759082       -0.05017      -0.001194      -0.002592       -0.05612   \n",
+       "835753       -0.01303       0.065700       0.024290       -0.07260   \n",
+       "282695       -0.07960       0.016110       0.083860        0.03842   \n",
+       "578539        0.02374       0.107200       0.035130        0.06390   \n",
+       "\n",
+       "        embedding_254  embedding_255   stars  language   owner_user  target  \n",
+       "284158       -0.14820        0.04040  250323    Python  donnemartin       0  \n",
+       "759082       -0.10114        0.05222  181795       C++   tensorflow       0  \n",
+       "835753       -0.04404        0.06207  167981         C     torvalds       0  \n",
+       "282695       -0.05057       -0.02344  123554    Python  huggingface       0  \n",
+       "578539       -0.03250        0.10724  118663        Go      avelino       0  \n",
+       "\n",
+       "[5 rows x 261 columns]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#sort df merged by most stars\n",
+    "df_merged = df_merged.sort_values(by='stars', ascending=False)\n",
+    "df_merged.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training LightGBM model\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m recos \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_lightGBM_recommendations\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmercyog\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_embeeded\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedded\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib/models/lightgbm_model.py:174\u001b[0m, in \u001b[0;36mgenerate_lightGBM_recommendations\u001b[0;34m(target_user, df_non_embedded, df_embedded, number_of_recommendations)\u001b[0m\n\u001b[1;32m    172\u001b[0m ord_encoder: ordinal\u001b[38;5;241m.\u001b[39mOrdinalEncoder\n\u001b[1;32m    173\u001b[0m \u001b[38;5;66;03m# Train LightGBM model\u001b[39;00m\n\u001b[0;32m--> 174\u001b[0m lgb_model, ord_encoder \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_lightGBM_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_training_ready\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_col\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    176\u001b[0m \u001b[38;5;66;03m# Make predictions for all repos\u001b[39;00m\n\u001b[1;32m    177\u001b[0m full_dataset_x, full_dataset_y \u001b[38;5;241m=\u001b[39m encode_csv(df_training_ready, ord_encoder, label_col, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransform\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/codecompasslib/models/lightgbm_model.py:130\u001b[0m, in \u001b[0;36mtrain_lightGBM_model\u001b[0;34m(df_merged, label_col)\u001b[0m\n\u001b[1;32m    127\u001b[0m y: DataFrame \u001b[38;5;241m=\u001b[39m df_merged[label_col]\n\u001b[1;32m    129\u001b[0m \u001b[38;5;66;03m# Dataset is imbalaned -> make sure that the stratify parameter is set\u001b[39;00m\n\u001b[0;32m--> 130\u001b[0m X_combined, X_test, y_combined, y_test \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_test_split\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m42\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstratify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    131\u001b[0m X_train, X_val, y_train, y_val \u001b[38;5;241m=\u001b[39m train_test_split(X_combined, y_combined, test_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m,\n\u001b[1;32m    132\u001b[0m                                                   stratify\u001b[38;5;241m=\u001b[39my_combined)\n\u001b[1;32m    134\u001b[0m \u001b[38;5;66;03m# combine X_train and y_train\u001b[39;00m\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2583\u001b[0m, in \u001b[0;36mtrain_test_split\u001b[0;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[1;32m   2579\u001b[0m         CVClass \u001b[38;5;241m=\u001b[39m ShuffleSplit\n\u001b[1;32m   2581\u001b[0m     cv \u001b[38;5;241m=\u001b[39m CVClass(test_size\u001b[38;5;241m=\u001b[39mn_test, train_size\u001b[38;5;241m=\u001b[39mn_train, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[0;32m-> 2583\u001b[0m     train, test \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43marrays\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstratify\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2585\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m   2586\u001b[0m     chain\u001b[38;5;241m.\u001b[39mfrom_iterable(\n\u001b[1;32m   2587\u001b[0m         (_safe_indexing(a, train), _safe_indexing(a, test)) \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m arrays\n\u001b[1;32m   2588\u001b[0m     )\n\u001b[1;32m   2589\u001b[0m )\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/sklearn/model_selection/_split.py:1689\u001b[0m, in \u001b[0;36mBaseShuffleSplit.split\u001b[0;34m(self, X, y, groups)\u001b[0m\n\u001b[1;32m   1659\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Generate indices to split data into training and test set.\u001b[39;00m\n\u001b[1;32m   1660\u001b[0m \n\u001b[1;32m   1661\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1686\u001b[0m \u001b[38;5;124;03mto an integer.\u001b[39;00m\n\u001b[1;32m   1687\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1688\u001b[0m X, y, groups \u001b[38;5;241m=\u001b[39m indexable(X, y, groups)\n\u001b[0;32m-> 1689\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m train, test \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_iter_indices(X, y, groups):\n\u001b[1;32m   1690\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m train, test\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2078\u001b[0m, in \u001b[0;36mStratifiedShuffleSplit._iter_indices\u001b[0;34m(self, X, y, groups)\u001b[0m\n\u001b[1;32m   2076\u001b[0m class_counts \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mbincount(y_indices)\n\u001b[1;32m   2077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39mmin(class_counts) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m-> 2078\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2079\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe least populated class in y has only 1\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2080\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m member, which is too few. The minimum\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2081\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m number of groups for any class cannot\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2082\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m be less than 2.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2083\u001b[0m     )\n\u001b[1;32m   2085\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_train \u001b[38;5;241m<\u001b[39m n_classes:\n\u001b[1;32m   2086\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2087\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe train_size = \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m should be greater or \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2088\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mequal to the number of classes = \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (n_train, n_classes)\n\u001b[1;32m   2089\u001b[0m     )\n",
+      "\u001b[0;31mValueError\u001b[0m: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2."
+     ]
+    }
+   ],
+   "source": [
+    "recos = generate_lightGBM_recommendations(\"mercyog\", non_embeeded, embedded)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "m e https://github.com/e/m\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "0",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+      "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 0",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, repo \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(recos):\n\u001b[0;32m----> 2\u001b[0m     repo_id \u001b[38;5;241m=\u001b[39m \u001b[43mrepo\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m  \u001b[38;5;66;03m# Assuming repo ID is the first element\u001b[39;00m\n\u001b[1;32m      3\u001b[0m     owner \u001b[38;5;241m=\u001b[39m repo[\u001b[38;5;241m1\u001b[39m]    \n\u001b[1;32m      4\u001b[0m     link \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://github.com/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mowner\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrepo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m  \u001b[38;5;66;03m# Link to the respective GitHub repository\u001b[39;00m\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   4101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   4104\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
+      "File \u001b[0;32m~/Desktop/OneDrive - IE/Y3Q2/chatbots_recos/CodeCompass/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3807\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   3808\u001b[0m         \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m   3809\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m   3810\u001b[0m     ):\n\u001b[1;32m   3811\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m   3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   3814\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m   3815\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m   3816\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[1;32m   3817\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 0"
+     ]
+    }
+   ],
+   "source": [
+    "for index, repo in enumerate(recos):\n",
+    "    repo_id = repo[0]  # Assuming repo ID is the first element\n",
+    "    owner = repo[1]    \n",
+    "    link = f\"https://github.com/{owner}/{repo_id}\"  # Link to the respective GitHub repository\n",
+    "    print(repo_id, owner, link)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "id\n",
+      "embedding_0\n",
+      "embedding_1\n",
+      "embedding_2\n",
+      "embedding_3\n",
+      "embedding_4\n",
+      "embedding_5\n",
+      "embedding_6\n",
+      "embedding_7\n",
+      "embedding_8\n",
+      "embedding_9\n",
+      "embedding_10\n",
+      "embedding_11\n",
+      "embedding_12\n",
+      "embedding_13\n",
+      "embedding_14\n",
+      "embedding_15\n",
+      "embedding_16\n",
+      "embedding_17\n",
+      "embedding_18\n",
+      "embedding_19\n",
+      "embedding_20\n",
+      "embedding_21\n",
+      "embedding_22\n",
+      "embedding_23\n",
+      "embedding_24\n",
+      "embedding_25\n",
+      "embedding_26\n",
+      "embedding_27\n",
+      "embedding_28\n",
+      "embedding_29\n",
+      "embedding_30\n",
+      "embedding_31\n",
+      "embedding_32\n",
+      "embedding_33\n",
+      "embedding_34\n",
+      "embedding_35\n",
+      "embedding_36\n",
+      "embedding_37\n",
+      "embedding_38\n",
+      "embedding_39\n",
+      "embedding_40\n",
+      "embedding_41\n",
+      "embedding_42\n",
+      "embedding_43\n",
+      "embedding_44\n",
+      "embedding_45\n",
+      "embedding_46\n",
+      "embedding_47\n",
+      "embedding_48\n",
+      "embedding_49\n",
+      "embedding_50\n",
+      "embedding_51\n",
+      "embedding_52\n",
+      "embedding_53\n",
+      "embedding_54\n",
+      "embedding_55\n",
+      "embedding_56\n",
+      "embedding_57\n",
+      "embedding_58\n",
+      "embedding_59\n",
+      "embedding_60\n",
+      "embedding_61\n",
+      "embedding_62\n",
+      "embedding_63\n",
+      "embedding_64\n",
+      "embedding_65\n",
+      "embedding_66\n",
+      "embedding_67\n",
+      "embedding_68\n",
+      "embedding_69\n",
+      "embedding_70\n",
+      "embedding_71\n",
+      "embedding_72\n",
+      "embedding_73\n",
+      "embedding_74\n",
+      "embedding_75\n",
+      "embedding_76\n",
+      "embedding_77\n",
+      "embedding_78\n",
+      "embedding_79\n",
+      "embedding_80\n",
+      "embedding_81\n",
+      "embedding_82\n",
+      "embedding_83\n",
+      "embedding_84\n",
+      "embedding_85\n",
+      "embedding_86\n",
+      "embedding_87\n",
+      "embedding_88\n",
+      "embedding_89\n",
+      "embedding_90\n",
+      "embedding_91\n",
+      "embedding_92\n",
+      "embedding_93\n",
+      "embedding_94\n",
+      "embedding_95\n",
+      "embedding_96\n",
+      "embedding_97\n",
+      "embedding_98\n",
+      "embedding_99\n",
+      "embedding_100\n",
+      "embedding_101\n",
+      "embedding_102\n",
+      "embedding_103\n",
+      "embedding_104\n",
+      "embedding_105\n",
+      "embedding_106\n",
+      "embedding_107\n",
+      "embedding_108\n",
+      "embedding_109\n",
+      "embedding_110\n",
+      "embedding_111\n",
+      "embedding_112\n",
+      "embedding_113\n",
+      "embedding_114\n",
+      "embedding_115\n",
+      "embedding_116\n",
+      "embedding_117\n",
+      "embedding_118\n",
+      "embedding_119\n",
+      "embedding_120\n",
+      "embedding_121\n",
+      "embedding_122\n",
+      "embedding_123\n",
+      "embedding_124\n",
+      "embedding_125\n",
+      "embedding_126\n",
+      "embedding_127\n",
+      "embedding_128\n",
+      "embedding_129\n",
+      "embedding_130\n",
+      "embedding_131\n",
+      "embedding_132\n",
+      "embedding_133\n",
+      "embedding_134\n",
+      "embedding_135\n",
+      "embedding_136\n",
+      "embedding_137\n",
+      "embedding_138\n",
+      "embedding_139\n",
+      "embedding_140\n",
+      "embedding_141\n",
+      "embedding_142\n",
+      "embedding_143\n",
+      "embedding_144\n",
+      "embedding_145\n",
+      "embedding_146\n",
+      "embedding_147\n",
+      "embedding_148\n",
+      "embedding_149\n",
+      "embedding_150\n",
+      "embedding_151\n",
+      "embedding_152\n",
+      "embedding_153\n",
+      "embedding_154\n",
+      "embedding_155\n",
+      "embedding_156\n",
+      "embedding_157\n",
+      "embedding_158\n",
+      "embedding_159\n",
+      "embedding_160\n",
+      "embedding_161\n",
+      "embedding_162\n",
+      "embedding_163\n",
+      "embedding_164\n",
+      "embedding_165\n",
+      "embedding_166\n",
+      "embedding_167\n",
+      "embedding_168\n",
+      "embedding_169\n",
+      "embedding_170\n",
+      "embedding_171\n",
+      "embedding_172\n",
+      "embedding_173\n",
+      "embedding_174\n",
+      "embedding_175\n",
+      "embedding_176\n",
+      "embedding_177\n",
+      "embedding_178\n",
+      "embedding_179\n",
+      "embedding_180\n",
+      "embedding_181\n",
+      "embedding_182\n",
+      "embedding_183\n",
+      "embedding_184\n",
+      "embedding_185\n",
+      "embedding_186\n",
+      "embedding_187\n",
+      "embedding_188\n",
+      "embedding_189\n",
+      "embedding_190\n",
+      "embedding_191\n",
+      "embedding_192\n",
+      "embedding_193\n",
+      "embedding_194\n",
+      "embedding_195\n",
+      "embedding_196\n",
+      "embedding_197\n",
+      "embedding_198\n",
+      "embedding_199\n",
+      "embedding_200\n",
+      "embedding_201\n",
+      "embedding_202\n",
+      "embedding_203\n",
+      "embedding_204\n",
+      "embedding_205\n",
+      "embedding_206\n",
+      "embedding_207\n",
+      "embedding_208\n",
+      "embedding_209\n",
+      "embedding_210\n",
+      "embedding_211\n",
+      "embedding_212\n",
+      "embedding_213\n",
+      "embedding_214\n",
+      "embedding_215\n",
+      "embedding_216\n",
+      "embedding_217\n",
+      "embedding_218\n",
+      "embedding_219\n",
+      "embedding_220\n",
+      "embedding_221\n",
+      "embedding_222\n",
+      "embedding_223\n",
+      "embedding_224\n",
+      "embedding_225\n",
+      "embedding_226\n",
+      "embedding_227\n",
+      "embedding_228\n",
+      "embedding_229\n",
+      "embedding_230\n",
+      "embedding_231\n",
+      "embedding_232\n",
+      "embedding_233\n",
+      "embedding_234\n",
+      "embedding_235\n",
+      "embedding_236\n",
+      "embedding_237\n",
+      "embedding_238\n",
+      "embedding_239\n",
+      "embedding_240\n",
+      "embedding_241\n",
+      "embedding_242\n",
+      "embedding_243\n",
+      "embedding_244\n",
+      "embedding_245\n",
+      "embedding_246\n",
+      "embedding_247\n",
+      "embedding_248\n",
+      "embedding_249\n",
+      "embedding_250\n",
+      "embedding_251\n",
+      "embedding_252\n",
+      "embedding_253\n",
+      "embedding_254\n",
+      "embedding_255\n"
+     ]
+    }
+   ],
+   "source": [
+    "for element in repo:\n",
+    "    print(element)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/frontend/recommender/app.py b/frontend/recommender/app.py
index 68b8cfc..3c3d875 100644
--- a/frontend/recommender/app.py
+++ b/frontend/recommender/app.py
@@ -12,23 +12,31 @@
 sys.path.insert(0, real_project_dir)
 
 # Import necessary functions from codecompasslib
-from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations, load_data
+from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations,load_non_embedded_data
+from codecompasslib.API.redis_operations import redis_to_dataframe
+
+@st.cache_data
+def load_non_embedded_data_cached(file_path):
+    return load_non_embedded_data(file_path)
+
+@st.cache_data
+def redis_to_dataframe_cached():
+    return redis_to_dataframe()
 
-# Function to load cached data
 def load_cached_data():
-    # Check if data is already stored in session state
     if 'cached_data' not in st.session_state:
         with st.spinner('Fetching data from the server...'):
-            # Load data
-            full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd'
-            full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By'
-            st.session_state.cached_data = load_data(full_data_folder_id, full_data_embedded_folder_id)
+            df_non_embedded = load_non_embedded_data_cached("data_full.csv")
+            print("\nNon embedded data loaded.")
+            df_embedded = redis_to_dataframe_cached()
+            print("\nEmbedded data from Redis loaded")
+            st.session_state.cached_data = (df_non_embedded, df_embedded)
     return st.session_state.cached_data
 
+
 def main():
     # Load the data
     df_non_embedded, df_embedded = load_cached_data()
-
     # Set app title
     st.title('GitHub Repo Recommendation System')
 
@@ -38,7 +46,7 @@ def main():
     # Button to get recommendations
     if st.button('Get Recommendations'):
         # Check if user exists in the dataset
-        if target_user not in df_embedded['owner_user'].values:
+        if target_user not in df_non_embedded['owner_user'].values:
             st.error("User not found in the dataset. Please enter a valid username.")
         else:
             # Generate recommendations
diff --git a/tests/conftest.py b/tests/conftest.py
index f9a8077..30d04c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,7 @@
 
 import pytest
 from google.oauth2.credentials import Credentials
-from codecompasslib.API.drive_operations import get_creds_drive
+from codecompasslib.API.drive_old.drive_operations import get_creds_drive
 from pandas import DataFrame
 from unittest.mock import patch, Mock, mock_open
 import json
diff --git a/tests/test_drive.py b/tests/test_drive.py
index c724f03..1234447 100644
--- a/tests/test_drive.py
+++ b/tests/test_drive.py
@@ -1,5 +1,5 @@
 import pytest
-from codecompasslib.API.drive_operations import (list_shared_drive_contents, download_csv_as_pd_dataframe,
+from codecompasslib.API.drive_old.drive_operations import (list_shared_drive_contents, download_csv_as_pd_dataframe,
                                                  upload_df_to_drive_as_csv)