From 346ea7369710555940a4ec0fe70947e56fca89ab Mon Sep 17 00:00:00 2001 From: Nicolas IRAGNE Date: Thu, 31 Jul 2025 14:17:20 +0200 Subject: [PATCH] fix: make cache aware of subpaths --- compose.yml | 2 +- src/server/query_processor.py | 2 ++ src/server/s3_utils.py | 10 +++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/compose.yml b/compose.yml index fd37406d..8f45a7bf 100644 --- a/compose.yml +++ b/compose.yml @@ -21,7 +21,7 @@ x-prod-environment: &prod-environment x-dev-environment: &dev-environment DEBUG: "true" - LOG_LEVEL: "debug" + LOG_LEVEL: "DEBUG" RELOAD: "true" GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development} # S3 Configuration for development diff --git a/src/server/query_processor.py b/src/server/query_processor.py index d568a21f..03f52f16 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -90,6 +90,7 @@ async def _check_s3_cache( user_name=cast("str", query.user_name), repo_name=cast("str", query.repo_name), commit=query.commit, + subpath=query.subpath, include_patterns=query.include_patterns, ignore_patterns=query.ignore_patterns, ) @@ -168,6 +169,7 @@ def _store_digest_content( user_name=cast("str", query.user_name), repo_name=cast("str", query.repo_name), commit=query.commit, + subpath=query.subpath, include_patterns=query.include_patterns, ignore_patterns=query.ignore_patterns, ) diff --git a/src/server/s3_utils.py b/src/server/s3_utils.py index 80acea45..f02f0270 100644 --- a/src/server/s3_utils.py +++ b/src/server/s3_utils.py @@ -62,6 +62,7 @@ def generate_s3_file_path( user_name: str, repo_name: str, commit: str, + subpath: str, include_patterns: set[str] | None, ignore_patterns: set[str], ) -> str: @@ -69,7 +70,7 @@ def generate_s3_file_path( The file path is formatted as: [/]ingest////// - /-.txt + /--.txt If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path. The commit-ID is always included in the URL. @@ -85,6 +86,8 @@ def generate_s3_file_path( Repository name. commit : str Commit hash. + subpath : str + Subpath of the repository. include_patterns : set[str] | None Set of patterns specifying which files to include. ignore_patterns : set[str] @@ -111,9 +114,10 @@ def generate_s3_file_path( patterns_str = f"include:{sorted(include_patterns) if include_patterns else []}" patterns_str += f"exclude:{sorted(ignore_patterns)}" patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16] + subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16] - # Build the base path using hostname directly - base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{user_name}-{repo_name}.txt" + file_name = f"{user_name}-{repo_name}-{subpath_hash}.txt" + base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{file_name}" # Check for S3_DIRECTORY_PREFIX environment variable s3_directory_prefix = os.getenv("S3_DIRECTORY_PREFIX")