codellm-devkit · rangeetpan · Sep 11, 2024 · Sep 6, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
@@ -51,14 +51,15 @@ class JCodeanalyzer:
     """
 
     def __init__(
-            self,
-            project_dir: Union[str, Path],
-            source_code: str | None,
-            analysis_backend_path: Union[str, Path, None],
-            analysis_json_path: Union[str, Path, None],
-            analysis_level: str,
-            use_graalvm_binary: bool,
-            eager_analysis: bool,
+        self,
+        project_dir: Union[str, Path],
+        source_code: str | None,
+        analysis_backend_path: Union[str, Path, None],
+        analysis_json_path: Union[str, Path, None],
+        analysis_level: str,
+        use_graalvm_binary: bool,
+        eager_analysis: bool,
+        target_files: List[str] | None
     ) -> None:
         self.project_dir = project_dir
         self.source_code = source_code
@@ -67,6 +68,7 @@ def __init__(
         self.use_graalvm_binary = use_graalvm_binary
         self.eager_analysis = eager_analysis
         self.analysis_level = analysis_level
+        self.target_files = target_files
         self.application = self._init_codeanalyzer(
             analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2)
         # Attributes related the Java code analysis...
@@ -198,11 +200,19 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
         """
 
         codeanalyzer_exec = self._get_codeanalyzer_exec()
-
+        codeanalyzer_args = ''
         if self.analysis_json_path is None:
             logger.info("Reading analysis from the pipe.")
-            codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                f"-i {Path(self.project_dir)} --analysis-level={analysis_level}")
+            # If target file is provided, the input is merged into a single string and passed to codeanalyzer
+            if self.target_files:
+                target_file_options = ' -t '.join([s.strip() for s in self.target_files])
+                codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                 f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
+                )
+            else:
+                codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
+                )
             try:
                 logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
                 console_out: CompletedProcess[str] = subprocess.run(
@@ -216,15 +226,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
                 raise CodeanalyzerExecutionException(str(e)) from e
 
         else:
+            # Check if the code analyzer needs to be run
+            is_run_code_analyzer = False
             analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
-            if not analysis_json_path_file.exists() or self.eager_analysis:
-                # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
-                # flag is set, we'll run the analysis every time the object is created. This will happen regradless
-                # of the existence of the analysis file.
-                # Create the executable command for codeanalyzer.
+            # If target file is provided, the input is merged into a single string and passed to codeanalyzer
+            if self.target_files:
+                target_file_options = ' -t '.join([s.strip() for s in self.target_files])
                 codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
-
+                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
+                    f" -o {self.analysis_json_path} -t {target_file_options}"
+                )
+                is_run_code_analyzer = True
+            else:
+                if not analysis_json_path_file.exists() or self.eager_analysis:
+                    # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
+                    # flag is set, we'll run the analysis every time the object is created. This will happen regradless
+                    # of the existence of the analysis file.
+                    # Create the executable command for codeanalyzer.
+                    codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                            f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
+                        )
+                    is_run_code_analyzer = True
+
+            if is_run_code_analyzer:
                 try:
                     logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
                     subprocess.run(
@@ -238,7 +262,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
 
                 except Exception as e:
                     raise CodeanalyzerExecutionException(str(e)) from e
-
             with open(analysis_json_path_file) as f:
                 data = json.load(f)
                 return JApplication(**data)
@@ -252,7 +275,6 @@ def _codeanalyzer_single_file(self):
         JApplication
             The application view of the Java code with the analysis results.
         """
-        # self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
         codeanalyzer_exec = self._get_codeanalyzer_exec()
         codeanalyzer_args = ["--source-analysis", self.source_code]
         codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args

diff --git a/cldk/analysis/java/java.py b/cldk/analysis/java/java.py
@@ -15,15 +15,16 @@
 class JavaAnalysis(SymbolTable, CallGraph):
 
     def __init__(
-        self,
-        project_dir: str | Path | None,
-        source_code: str | None,
-        analysis_backend: str,
-        analysis_backend_path: str | None,
-        analysis_json_path: str | Path | None,
-        analysis_level: str,
-        use_graalvm_binary: bool,
-        eager_analysis: bool,
+            self,
+            project_dir: str | Path | None,
+            source_code: str | None,
+            analysis_backend: str,
+            analysis_backend_path: str | None,
+            analysis_json_path: str | Path | None,
+            analysis_level: str,
+            target_files: List[str] | None,
+            use_graalvm_binary: bool,
+            eager_analysis: bool,
     ) -> None:
         """
         Parameters
@@ -44,7 +45,9 @@ def __init__(
         eager_analysis : bool, optional
             A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
             eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
-
+        target_files: str, optional
+            The target files for which the analysis will run or get modified. Currently, this feature only supported
+            with symbol table analysis. In the future, we will add this feature to other analysis levels.
         Attributes
         ----------
         analysis_backend : JCodeQL | JApplication
@@ -59,7 +62,8 @@ def __init__(
         self.analysis_backend_path = analysis_backend_path
         self.eager_analysis = eager_analysis
         self.use_graalvm_binary = use_graalvm_binary
-        self.analysis_backend =  analysis_backend
+        self.analysis_backend = analysis_backend
+        self.target_files = target_files
         # Initialize the analysis analysis_backend
         if analysis_backend.lower() == "codeql":
             self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -72,6 +76,7 @@ def __init__(
                 analysis_json_path=self.analysis_json_path,
                 use_graalvm_binary=self.use_graalvm_binary,
                 analysis_backend_path=self.analysis_backend_path,
+                target_files=self.target_files
             )
         else:
             raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -438,7 +443,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
             raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
         return self.backend.get_implemented_interfaces(qualified_class_name)
 
-    def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
+    def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
+                                                  method_signature: str | None = None) -> (List)[
+        Tuple[JMethodDetail, JMethodDetail]]:
         """
         A call graph using symbol table for a given class and a given method.
         Args:
@@ -478,7 +485,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
         """
         if using_symbol_table:
             return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
-                                                                method_signature=method_signature)
+                                                                  method_signature=method_signature)
         if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
             raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
         return self.backend.get_class_call_graph(qualified_class_name, method_signature)

diff --git a/cldk/core.py b/cldk/core.py
@@ -1,8 +1,9 @@
 from pathlib import Path
 
-
 import logging
+from typing import List
 
+from cldk.analysis import AnalysisLevel
 from cldk.analysis.java import JavaAnalysis
 from cldk.analysis.java.treesitter import JavaSitter
 from cldk.utils.exceptions import CldkInitializationException
@@ -30,15 +31,16 @@ def __init__(self, language: str):
         self.language: str = language
 
     def analysis(
-        self,
-        project_path: str | Path | None = None,
-        source_code: str | None = None,
-        eager: bool = False,
-        analysis_backend: str | None = "codeanalyzer",
-        analysis_level: str = "symbol_table",
-        analysis_backend_path: str | None = None,
-        analysis_json_path: str | Path = None,
-        use_graalvm_binary: bool = False,
+            self,
+            project_path: str | Path | None = None,
+            source_code: str | None = None,
+            eager: bool = False,
+            analysis_backend: str | None = "codeanalyzer",
+            analysis_level: str = AnalysisLevel.symbol_table,
+            target_files: List[str] | None = None,
+            analysis_backend_path: str | None = None,
+            analysis_json_path: str | Path = None,
+            use_graalvm_binary: bool = False,
     ) -> JavaAnalysis:
         """
         Initialize the preprocessor based on the specified language and analysis_backend.
@@ -65,7 +67,11 @@ def analysis(
         eager : bool, optional
             A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
             eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
-
+        analysis_level: str, optional
+            Analysis levels. Refer to AnalysisLevel.
+        target_files: List[str] | None, optional
+            The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
+            with symbol table analysis. In the future, we will add this feature to other analysis levels.
         Returns
         -------
         JavaAnalysis
@@ -77,13 +83,19 @@ def analysis(
             If neither project_path nor source_code is provided.
         NotImplementedError
             If the specified language is not implemented yet.
+
+        Args:
+            analysis_level:
+            target_files:
+            analysis_level:
         """
 
         if project_path is None and source_code is None:
             raise CldkInitializationException("Either project_path or source_code must be provided.")
 
         if project_path is not None and source_code is not None:
-            raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
+            raise CldkInitializationException(
+                "Both project_path and source_code are provided. Please provide " "only one.")
 
         if self.language == "java":
             return JavaAnalysis(
@@ -94,6 +106,7 @@ def analysis(
                 analysis_backend_path=analysis_backend_path,
                 analysis_json_path=analysis_json_path,
                 use_graalvm_binary=use_graalvm_binary,
+                target_files=target_files,
                 eager_analysis=eager,
             )
         else:
@@ -114,7 +127,7 @@ def treesitter_parser(self):
         else:
             raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")
 
-    def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
+    def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
         """
         Parse the project using treesitter.
 

diff --git a/cldk/models/java/models.py b/cldk/models/java/models.py
@@ -341,6 +341,7 @@ class JCompilationUnit(BaseModel):
     comment: str
     imports: List[str]
     type_declarations: Dict[str, JType]
+    is_modified: bool = False
 
 
 class JMethodDetail(BaseModel):