From 9f12dbd62a8c2fc3a7ef3e14b3571e3c2fd70e3e Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 20:50:07 +0200 Subject: [PATCH 01/12] check --- frontend/recommender/app.py | 32 ++++++++----------- frontend/recommender/batch_recommendations.py | 23 +++++++++++++ 2 files changed, 37 insertions(+), 18 deletions(-) create mode 100644 frontend/recommender/batch_recommendations.py diff --git a/frontend/recommender/app.py b/frontend/recommender/app.py index 68b8cfc..1e82da7 100644 --- a/frontend/recommender/app.py +++ b/frontend/recommender/app.py @@ -1,6 +1,7 @@ import os import sys import streamlit as st +import redis import pandas as pd # Navigate to root directory @@ -12,11 +13,10 @@ sys.path.insert(0, real_project_dir) # Import necessary functions from codecompasslib -from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations, load_data +from codecompasslib.models.lightgbm_model import load_data # Function to load cached data def load_cached_data(): - # Check if data is already stored in session state if 'cached_data' not in st.session_state: with st.spinner('Fetching data from the server...'): # Load data @@ -25,34 +25,30 @@ def load_cached_data(): st.session_state.cached_data = load_data(full_data_folder_id, full_data_embedded_folder_id) return st.session_state.cached_data +# Connect to Redis +redis_client = redis.Redis(host='localhost', port=6379, db=0) + def main(): # Load the data df_non_embedded, df_embedded = load_cached_data() - # Set app title st.title('GitHub Repo Recommendation System') - - # Input for target user target_user = st.text_input("Enter the target user's username:") - - # Button to get recommendations + if st.button('Get Recommendations'): - # Check if user exists in the dataset - if target_user not in df_embedded['owner_user'].values: + recommendations = redis_client.get(target_user) + + if recommendations is None: st.error("User not found in the dataset. Please enter a valid username.") else: - # Generate recommendations - with st.spinner('Generating recommendations...'): - recommendations = generate_lightGBM_recommendations(target_user, df_non_embedded, df_embedded, number_of_recommendations=10) - - # Display recommendations + recommendations = eval(recommendations.decode("utf-8")) st.subheader("Recommendations") for index, repo in enumerate(recommendations): - name = df_non_embedded[df_non_embedded['id'] == repo[0]]['name'].values[0] - description = df_non_embedded[df_non_embedded['id'] == repo[0]]['description'].values[0] - link = f"https://github.com/{repo[1]}/{name}" + repo_id, owner = repo + name = df_non_embedded[df_non_embedded['id'] == repo_id]['name'].values[0] + description = df_non_embedded[df_non_embedded['id'] == repo_id]['description'].values[0] + link = f"https://github.com/{owner}/{name}" - # Display recommendation details in a card-like format with shadow st.markdown(f"""

{name}

diff --git a/frontend/recommender/batch_recommendations.py b/frontend/recommender/batch_recommendations.py new file mode 100644 index 0000000..491911d --- /dev/null +++ b/frontend/recommender/batch_recommendations.py @@ -0,0 +1,23 @@ +import redis +import pandas as pd +from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations, load_data + +def fetch_and_store_recommendations(): + # Load data + full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd' + full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' + df_non_embedded, df_embedded = load_data(full_data_folder_id, full_data_embedded_folder_id) + + # Connect to Redis + redis_client = redis.Redis(host='localhost', port=6379, db=0) + + # Get unique users + unique_users = df_embedded['owner_user'].unique() + + # Compute recommendations for each user + for user in unique_users: + recommendations = generate_lightGBM_recommendations(user, df_non_embedded, df_embedded, number_of_recommendations=10) + redis_client.set(user, str(recommendations)) + +if __name__ == "__main__": + fetch_and_store_recommendations() From 9d32f049bccd65dd96a4fb96d8739f98ad3094c5 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 21:31:38 +0200 Subject: [PATCH 02/12] update --- frontend/recommender/app.py | 53 ++++++++++--------- frontend/recommender/batch_recommendations.py | 25 ++++++++- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/frontend/recommender/app.py b/frontend/recommender/app.py index 1e82da7..18021ba 100644 --- a/frontend/recommender/app.py +++ b/frontend/recommender/app.py @@ -2,7 +2,7 @@ import sys import streamlit as st import redis -import pandas as pd +import json # Navigate to root directory root_dir = os.path.dirname(os.path.abspath(__file__)) @@ -19,14 +19,17 @@ def load_cached_data(): if 'cached_data' not in st.session_state: with st.spinner('Fetching data from the server...'): - # Load data full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd' full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' st.session_state.cached_data = load_data(full_data_folder_id, full_data_embedded_folder_id) return st.session_state.cached_data # Connect to Redis -redis_client = redis.Redis(host='localhost', port=6379, db=0) +try: + redis_client = redis.Redis(host='localhost', port=6379, db=0) +except Exception as e: + st.error(f"Could not connect to Redis: {e}") + sys.exit(1) def main(): # Load the data @@ -34,28 +37,30 @@ def main(): st.title('GitHub Repo Recommendation System') target_user = st.text_input("Enter the target user's username:") - + if st.button('Get Recommendations'): - recommendations = redis_client.get(target_user) - - if recommendations is None: - st.error("User not found in the dataset. Please enter a valid username.") - else: - recommendations = eval(recommendations.decode("utf-8")) - st.subheader("Recommendations") - for index, repo in enumerate(recommendations): - repo_id, owner = repo - name = df_non_embedded[df_non_embedded['id'] == repo_id]['name'].values[0] - description = df_non_embedded[df_non_embedded['id'] == repo_id]['description'].values[0] - link = f"https://github.com/{owner}/{name}" - - st.markdown(f""" -
-

{name}

-

{description}

- View on GitHub -
- """, unsafe_allow_html=True) + try: + recommendations = redis_client.get(target_user) + if recommendations is None: + st.error("User not found in the dataset. Please enter a valid username.") + else: + recommendations = json.loads(recommendations.decode("utf-8")) + st.subheader("Recommendations") + for index, repo in enumerate(recommendations): + repo_id, owner = repo + name = df_non_embedded[df_non_embedded['id'] == repo_id]['name'].values[0] + description = df_non_embedded[df_non_embedded['id'] == repo_id]['description'].values[0] + link = f"https://github.com/{owner}/{name}" + + st.markdown(f""" +
+

{name}

+

{description}

+ View on GitHub +
+ """, unsafe_allow_html=True) + except Exception as e: + st.error(f"Could not retrieve recommendations: {e}") if __name__ == "__main__": main() diff --git a/frontend/recommender/batch_recommendations.py b/frontend/recommender/batch_recommendations.py index 491911d..92e6bd7 100644 --- a/frontend/recommender/batch_recommendations.py +++ b/frontend/recommender/batch_recommendations.py @@ -1,6 +1,17 @@ +import os +import sys + +# Set path to the root directory +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +print("Path:", sys.path) + import redis import pandas as pd +import json from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations, load_data +import logging + +logging.basicConfig(level=logging.INFO) def fetch_and_store_recommendations(): # Load data @@ -9,7 +20,11 @@ def fetch_and_store_recommendations(): df_non_embedded, df_embedded = load_data(full_data_folder_id, full_data_embedded_folder_id) # Connect to Redis - redis_client = redis.Redis(host='localhost', port=6379, db=0) + try: + redis_client = redis.Redis(host='localhost', port=6379, db=0) + except Exception as e: + logging.error(f"Could not connect to Redis: {e}") + return # Get unique users unique_users = df_embedded['owner_user'].unique() @@ -17,7 +32,13 @@ def fetch_and_store_recommendations(): # Compute recommendations for each user for user in unique_users: recommendations = generate_lightGBM_recommendations(user, df_non_embedded, df_embedded, number_of_recommendations=10) - redis_client.set(user, str(recommendations)) + recommendations_json = json.dumps(recommendations) + + try: + redis_client.set(user, recommendations_json) + logging.info(f"Stored recommendations for user {user}") + except Exception as e: + logging.error(f"Could not store recommendations for user {user}: {e}") if __name__ == "__main__": fetch_and_store_recommendations() From fdfdcf9ef73f399900dcaea766221a0fc5c24460 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 22:21:12 +0200 Subject: [PATCH 03/12] test data --- recommendations.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 recommendations.json diff --git a/recommendations.json b/recommendations.json new file mode 100644 index 0000000..46e5595 --- /dev/null +++ b/recommendations.json @@ -0,0 +1 @@ +[[15500812.0, "matthewbdaly", 1.0000000036274914e-15], [7778661.0, "Skellington-zz", 1.0000000036274914e-15], [41188271.0, "marvin-zhao", 1.0000000036274914e-15], [41188345.0, "marvin-zhao", 1.0000000036274914e-15], [42033762.0, "marvin-zhao", 1.0000000036274914e-15], [160783310.0, "marvin-zhao", 1.0000000036274914e-15], [13188377.0, "marvin-zhao", 1.0000000036274914e-15], [32130169.0, "marvin-zhao", 1.0000000036274914e-15], [191329936.0, "marvin-zhao", 1.0000000036274914e-15], [4689581.0, "marvin-zhao", 1.0000000036274914e-15]] \ No newline at end of file From 64fbb49ff713adb2fa8266752b8b7b74aba92905 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 22:21:21 +0200 Subject: [PATCH 04/12] save locally --- frontend/recommender/batch_recommendations.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/frontend/recommender/batch_recommendations.py b/frontend/recommender/batch_recommendations.py index 92e6bd7..2720052 100644 --- a/frontend/recommender/batch_recommendations.py +++ b/frontend/recommender/batch_recommendations.py @@ -34,6 +34,13 @@ def fetch_and_store_recommendations(): recommendations = generate_lightGBM_recommendations(user, df_non_embedded, df_embedded, number_of_recommendations=10) recommendations_json = json.dumps(recommendations) + # File path to save the recommendations + file_path = 'recommendations.json' + + # Write recommendations to a JSON file + with open(file_path, 'w') as file: + file.write(recommendations_json) + try: redis_client.set(user, recommendations_json) logging.info(f"Stored recommendations for user {user}") From bf4198a9e390ca64c9c8c333c30a774b7a3eb53a Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 22:31:10 +0200 Subject: [PATCH 05/12] inital --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f2f4b12..94ca7bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,6 @@ openai >= 1.14.3 category-encoders==2.6.3 sentence_transformers==2.6.0 lightgbm==4.3.0 +redis From 7f9c319383ba36b03188a92f6a128f6bfa76856b Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 22:51:58 +0200 Subject: [PATCH 06/12] update --- README.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b253ed1..5e31266 100644 --- a/README.md +++ b/README.md @@ -56,11 +56,24 @@ pip install -r requirements.txt - `openAI_key`: Your OpenAI API key. ### **4.** Run: -Chatbot +**Chatbot** ``` streamlit run frontend/chatbot/app.py ``` -Recommender: +**Recommender:** +``` +redis-server +``` +Check server is running (Outputs pong if connected) +``` +redis-cli ping +``` +Check database by +``` +redis-cli + +KEYS * +``` ``` streamlit run frontend/recommender/app.py ``` From 9e8adfb468efb5a71f3aa5a37b1c9381da4aa495 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 22:52:05 +0200 Subject: [PATCH 07/12] use redis database --- frontend/recommender/app.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/frontend/recommender/app.py b/frontend/recommender/app.py index 18021ba..d042cf1 100644 --- a/frontend/recommender/app.py +++ b/frontend/recommender/app.py @@ -3,6 +3,7 @@ import streamlit as st import redis import json +from codecompasslib.models.lightgbm_model import load_data # Navigate to root directory root_dir = os.path.dirname(os.path.abspath(__file__)) @@ -12,9 +13,6 @@ # Add project directory to Python path sys.path.insert(0, real_project_dir) -# Import necessary functions from codecompasslib -from codecompasslib.models.lightgbm_model import load_data - # Function to load cached data def load_cached_data(): if 'cached_data' not in st.session_state: @@ -24,14 +22,14 @@ def load_cached_data(): st.session_state.cached_data = load_data(full_data_folder_id, full_data_embedded_folder_id) return st.session_state.cached_data -# Connect to Redis -try: - redis_client = redis.Redis(host='localhost', port=6379, db=0) -except Exception as e: - st.error(f"Could not connect to Redis: {e}") - sys.exit(1) - def main(): + # Connect to Redis + try: + redis_client = redis.Redis(host='localhost', port=6379, db=0) + except Exception as e: + st.error(f"Could not connect to Redis: {e}") + sys.exit(1) + # Load the data df_non_embedded, df_embedded = load_cached_data() From fa2aa014097821d99bb12c5807095e5144f6d697 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Sun, 5 May 2024 23:03:43 +0200 Subject: [PATCH 08/12] logistic regression --- codecompasslib/models/lr_model.py | 139 ++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 codecompasslib/models/lr_model.py diff --git a/codecompasslib/models/lr_model.py b/codecompasslib/models/lr_model.py new file mode 100644 index 0000000..877d530 --- /dev/null +++ b/codecompasslib/models/lr_model.py @@ -0,0 +1,139 @@ +import os +import sys +import pandas as pd +from typing import Tuple, List +from pandas import DataFrame, concat +from numpy import ndarray, argsort +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from category_encoders import ordinal +from lightgbm_model import preprocess_data +from codecompasslib.API.drive_operations import download_csv_as_pd_dataframe, get_creds_drive +from codecompasslib.API.get_bulk_data import get_stared_repos, get_user_repos + +# go up to root +# Construct the path to the root directory (one level up from embeddings) +root_dir = os.path.dirname(os.path.abspath(__file__)) +project_dir = os.path.dirname(root_dir) +real_project_dir = os.path.dirname(project_dir) +# Add the project directory to the Python path +sys.path.insert(0, real_project_dir) + + + + +def encode_csv(df: DataFrame, encoder, label_col: str, typ: str = "fit") -> Tuple[DataFrame, ndarray]: + """ + Encode the categorical columns in a DataFrame using the specified encoder. + :param df: The DataFrame to be encoded. + :param encoder: The encoder object used for encoding the categorical columns. + :param label_col: The name of the label column. + :param typ: The type of encoding to perform. Defaults to "fit". + :return: A tuple containing the encoded DataFrame and the label column values. + """ + if typ == "fit": + df = encoder.fit_transform(df) + else: + df = encoder.transform(df) + y: ndarray = df[label_col].values + del df[label_col] + return df, y + + +def train_logistic_regression_model(df_merged: DataFrame, label_col: str) -> Tuple[LogisticRegression, ordinal.OrdinalEncoder]: + """ + Trains a logistic regression model using the provided merged dataframe. + + This function trains a logistic regression model using the provided merged dataframe. It performs the following steps: + 1. Splits the merged dataframe into training, validation, and test sets. + 2. Encodes categorical columns using ordinal encoding. + 3. Trains the logistic regression model using the training data. + 4. Returns the trained logistic regression model and the ordinal encoder. + + Note: This function assumes that the merged dataframe has the following columns: + - 'target': The target variable to be predicted. + - 'id': An identifier column. + - 'owner_user': A column representing the owner user. + - 'embedding_0' to 'embedding_255': Numerical columns representing the embeddings. + - 'language': A categorical column representing the language. + - 'stars': A numerical column representing the number of stars. + + :param df_merged: DataFrame containing the training data. + :param label_col: The name of the target variable column. + :return: A tuple containing the trained logistic regression model and the ordinal encoder. + """ + + print("Training logistic regression model") + + X: DataFrame = df_merged.drop(columns=['target']) # drop columns not used for training + y: DataFrame = df_merged[label_col] + + # Dataset is imbalanced -> make sure that the stratify parameter is set + X_combined, X_test, y_combined, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y) + X_train, X_val, y_train, y_val = train_test_split(X_combined, y_combined, test_size=0.1, random_state=42, + stratify=y_combined) + + # combine X_train and y_train + train_data = concat([X_train, y_train], axis=1) + valid_data = concat([X_val, y_val], axis=1) + test_data = concat([X_test, y_test], axis=1) + + cate_cols = ['language'] + ord_encoder: ordinal.OrdinalEncoder = ordinal.OrdinalEncoder(cols=cate_cols) + + train_x, train_y = encode_csv(train_data, ord_encoder, label_col) + valid_x, valid_y = encode_csv(valid_data, ord_encoder, label_col, "transform") + test_x, test_y = encode_csv(test_data, ord_encoder, label_col, "transform") + + logistic_regression_model = LogisticRegression() + logistic_regression_model.fit(train_x, train_y) + + return logistic_regression_model, ord_encoder + + +def generate_logistic_regression_recommendations(target_user: str, df_non_embedded: DataFrame, + df_embedded: DataFrame, number_of_recommendations: int = 10) -> list: + """ + Generates recommendations using the logistic regression model. + + Args: + target_user (str): The target user for whom recommendations are generated. + df_non_embedded (DataFrame): The non-embedded data frame containing the features. + df_embedded (DataFrame): The embedded data frame containing the features. + number_of_recommendations (int, optional): The number of recommendations to generate. Defaults to 10. + + Returns: + list: A list of recommendations, each containing the repository name, owner user, and prediction score. + """ + # Preprocess data + label_col: str = 'target' + df_merged, starred_or_owned_by_user = preprocess_data(df_embedded, df_non_embedded, label_col, target_user) + + df_training_ready: DataFrame = df_merged.drop(columns=['id', 'owner_user']) + + logistic_regression_model: LogisticRegression + ord_encoder: ordinal.OrdinalEncoder + # Train logistic regression model + logistic_regression_model, ord_encoder = train_logistic_regression_model(df_training_ready, label_col) + + # Make predictions for all repos + full_dataset_x, full_dataset_y = encode_csv(df_training_ready, ord_encoder, label_col, "transform") + all_preds = logistic_regression_model.predict_proba(full_dataset_x)[:, 1] + + # Get sorted predictions with the highest one first + top_indices = argsort(all_preds)[::-1] + + # Get the top recommendations + recommendations: list = [] + counter: int = 0 + for index in top_indices: + if counter == number_of_recommendations: + break + # disregard if the repo is already starred by the user + if df_merged.iloc[index]['id'] in starred_or_owned_by_user: + continue + else: + counter += 1 + recommendations.append((df_merged.iloc[index]['id'], df_merged.iloc[index]['owner_user'], all_preds[index])) + + return recommendations From 905288082a0d3250a034f7d28e5e27d5d30dfe4d Mon Sep 17 00:00:00 2001 From: maudhelen Date: Mon, 6 May 2024 00:41:35 +0200 Subject: [PATCH 09/12] fill redis with google dirve data --- frontend/recommender/fill_redis.py | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 frontend/recommender/fill_redis.py diff --git a/frontend/recommender/fill_redis.py b/frontend/recommender/fill_redis.py new file mode 100644 index 0000000..b6e8dfd --- /dev/null +++ b/frontend/recommender/fill_redis.py @@ -0,0 +1,48 @@ +import os +import sys +import streamlit as st +import redis +import json +import pandas as pd + +# Navigate to root directory +root_dir = os.path.dirname(os.path.abspath(__file__)) +project_dir = os.path.dirname(root_dir) +real_project_dir = os.path.dirname(project_dir) + +# Add project directory to Python path +sys.path.insert(0, real_project_dir) + +# Import necessary functions from codecompasslib +from codecompasslib.models.lightgbm_model import load_data + + +def fill_redis_with_data(): + try: + # Load the data + full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd' + full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' + df_non_embedded, df_embedded = load_data(full_data_folder_id, full_data_embedded_folder_id) + + # Convert DataFrames to JSON format + df_non_embedded_json = df_non_embedded.to_json(orient='records') + df_embedded_json = df_embedded.to_json(orient='records') + + # Connect to Redis + redis_client = redis.Redis(host='localhost', port=6379, db=0) + + # Store the JSON strings in Redis + print("Storing data in Redis...") + + print("Not embedded df saving ...") + redis_client.set('df_non_embedded', df_non_embedded_json) + + print("Embedded df saving ...") + redis_client.set('df_embedded', df_embedded_json) + + print("Data stored in Redis successfully.") + except Exception as e: + print(f"Error: {e}") + +if __name__ == "__main__": + fill_redis_with_data() From fc2f3d02c08bbf0820ac77deb8cde92e02f338a7 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Mon, 6 May 2024 01:24:17 +0200 Subject: [PATCH 10/12] fill data --- frontend/recommender/fill_redis.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/frontend/recommender/fill_redis.py b/frontend/recommender/fill_redis.py index b6e8dfd..8c759d7 100644 --- a/frontend/recommender/fill_redis.py +++ b/frontend/recommender/fill_redis.py @@ -1,6 +1,5 @@ import os import sys -import streamlit as st import redis import json import pandas as pd @@ -24,9 +23,25 @@ def fill_redis_with_data(): full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' df_non_embedded, df_embedded = load_data(full_data_folder_id, full_data_embedded_folder_id) - # Convert DataFrames to JSON format - df_non_embedded_json = df_non_embedded.to_json(orient='records') - df_embedded_json = df_embedded.to_json(orient='records') + print("Checkpoint 1") + # Convert DataFrames to CSV + df_non_embedded_csv = df_non_embedded.to_csv(index=False) + df_embedded_csv = df_embedded.to_csv(index=False) + + #print first 3 rows of the csv + print("\nFirst 3 rows of the csv") + print(df_non_embedded_csv[:3]) + + # Convert CSV to JSON + print("Checkpoint 2") + df_non_embedded_json = json.loads(df_non_embedded_csv) + df_embedded_json = json.loads(df_embedded_csv) + + #print first 3 rows of the json + print("\nFirst 3 rows of the json") + print(df_non_embedded_json[:3]) + + print("Checkpoint 3") # Connect to Redis redis_client = redis.Redis(host='localhost', port=6379, db=0) From 7c5e92b5aae57926a1c5522757a7fe85e28a515d Mon Sep 17 00:00:00 2001 From: maudhelen Date: Mon, 6 May 2024 01:24:52 +0200 Subject: [PATCH 11/12] use redis --- frontend/recommender/app.py | 89 +++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 48 deletions(-) diff --git a/frontend/recommender/app.py b/frontend/recommender/app.py index d042cf1..22733a2 100644 --- a/frontend/recommender/app.py +++ b/frontend/recommender/app.py @@ -3,62 +3,55 @@ import streamlit as st import redis import json -from codecompasslib.models.lightgbm_model import load_data +import pandas as pd -# Navigate to root directory -root_dir = os.path.dirname(os.path.abspath(__file__)) -project_dir = os.path.dirname(root_dir) -real_project_dir = os.path.dirname(project_dir) - -# Add project directory to Python path -sys.path.insert(0, real_project_dir) - -# Function to load cached data -def load_cached_data(): - if 'cached_data' not in st.session_state: - with st.spinner('Fetching data from the server...'): - full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd' - full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' - st.session_state.cached_data = load_data(full_data_folder_id, full_data_embedded_folder_id) - return st.session_state.cached_data - -def main(): - # Connect to Redis +# Function to retrieve recommendations from Redis +def retrieve_recommendations_from_redis(target_user): try: + # Connect to Redis redis_client = redis.Redis(host='localhost', port=6379, db=0) - except Exception as e: - st.error(f"Could not connect to Redis: {e}") - sys.exit(1) - # Load the data - df_non_embedded, df_embedded = load_cached_data() + # Retrieve recommendations from Redis + recommendations = redis_client.get(target_user) + if recommendations: + return json.loads(recommendations.decode("utf-8")) + else: + return None + except Exception as e: + st.error(f"Could not fetch recommendations from Redis: {e}") + return None + +def main(): + # Set app title st.title('GitHub Repo Recommendation System') + + # Input for target user target_user = st.text_input("Enter the target user's username:") - if st.button('Get Recommendations'): - try: - recommendations = redis_client.get(target_user) - if recommendations is None: - st.error("User not found in the dataset. Please enter a valid username.") - else: - recommendations = json.loads(recommendations.decode("utf-8")) - st.subheader("Recommendations") - for index, repo in enumerate(recommendations): - repo_id, owner = repo - name = df_non_embedded[df_non_embedded['id'] == repo_id]['name'].values[0] - description = df_non_embedded[df_non_embedded['id'] == repo_id]['description'].values[0] - link = f"https://github.com/{owner}/{name}" - - st.markdown(f""" -
-

{name}

-

{description}

- View on GitHub -
- """, unsafe_allow_html=True) - except Exception as e: - st.error(f"Could not retrieve recommendations: {e}") + # Button to retrieve and display recommendations + if st.button('Retrieve and Display Recommendations'): + # Retrieve recommendations from Redis + retrieved_recommendations = retrieve_recommendations_from_redis(target_user) + + if retrieved_recommendations: + # Display recommendations + st.subheader("Recommendations") + for index, repo in enumerate(retrieved_recommendations): + name = repo[1] # Assuming the second element in the recommendation tuple is the repo name + description = "" # You may need to fetch description from Redis or another source + link = f"https://github.com/{repo[1]}/{name}" + + # Display recommendation details in a card-like format with shadow + st.markdown(f""" +
+

{name}

+

{description}

+ View on GitHub +
+ """, unsafe_allow_html=True) + else: + st.error("No recommendations found for the target user.") if __name__ == "__main__": main() From a8960e6c5ca75d125ed35787a32fe0d269e447a3 Mon Sep 17 00:00:00 2001 From: maudhelen Date: Mon, 6 May 2024 01:25:01 +0200 Subject: [PATCH 12/12] delete --- frontend/recommender/batch_recommendations.py | 51 ------------------- 1 file changed, 51 deletions(-) delete mode 100644 frontend/recommender/batch_recommendations.py diff --git a/frontend/recommender/batch_recommendations.py b/frontend/recommender/batch_recommendations.py deleted file mode 100644 index 2720052..0000000 --- a/frontend/recommender/batch_recommendations.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import sys - -# Set path to the root directory -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -print("Path:", sys.path) - -import redis -import pandas as pd -import json -from codecompasslib.models.lightgbm_model import generate_lightGBM_recommendations, load_data -import logging - -logging.basicConfig(level=logging.INFO) - -def fetch_and_store_recommendations(): - # Load data - full_data_folder_id = '1Qiy9u03hUthqaoBDr4VQqhKwtLJ2O3Yd' - full_data_embedded_folder_id = '139wi78iRzhwGZwxmI5WALoYocR-Rk9By' - df_non_embedded, df_embedded = load_data(full_data_folder_id, full_data_embedded_folder_id) - - # Connect to Redis - try: - redis_client = redis.Redis(host='localhost', port=6379, db=0) - except Exception as e: - logging.error(f"Could not connect to Redis: {e}") - return - - # Get unique users - unique_users = df_embedded['owner_user'].unique() - - # Compute recommendations for each user - for user in unique_users: - recommendations = generate_lightGBM_recommendations(user, df_non_embedded, df_embedded, number_of_recommendations=10) - recommendations_json = json.dumps(recommendations) - - # File path to save the recommendations - file_path = 'recommendations.json' - - # Write recommendations to a JSON file - with open(file_path, 'w') as file: - file.write(recommendations_json) - - try: - redis_client.set(user, recommendations_json) - logging.info(f"Stored recommendations for user {user}") - except Exception as e: - logging.error(f"Could not store recommendations for user {user}: {e}") - -if __name__ == "__main__": - fetch_and_store_recommendations()