From c5a89c04957af49d0a99edbcd059861d6a632229 Mon Sep 17 00:00:00 2001
From: Rangeet Pan <Rangeet.Pan@ibm.com>
Date: Fri, 6 Sep 2024 10:43:52 -0400
Subject: [PATCH 1/5] support for adding incremental analysis

---
 .../java/codeanalyzer/codeanalyzer.py         | 33 ++++++++++++----
 cldk/analysis/java/java.py                    | 33 +++++++++-------
 cldk/core.py                                  | 39 ++++++++++++-------
 cldk/models/java/models.py                    |  1 +
 4 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
index eff3fa8..4a6bb2f 100644
--- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py
+++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
@@ -64,6 +64,7 @@ def __init__(
         analysis_level: str,
         use_graalvm_binary: bool,
         eager_analysis: bool,
+        target_files: List[str] | None
     ) -> None:
         self.project_dir = project_dir
         self.source_code = source_code
@@ -72,6 +73,7 @@ def __init__(
         self.use_graalvm_binary = use_graalvm_binary
         self.eager_analysis = eager_analysis
         self.analysis_level = analysis_level
+        self.target_files = target_files
         self.application = self._init_codeanalyzer(
             analysis_level=1 if analysis_level == "symbol_table" else 2
         )
@@ -230,9 +232,17 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
 
         if self.analysis_json_path is None:
             logger.info("Reading analysis from the pipe.")
-            codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
-            )
+            codeanalyzer_args = ''
+            # If target file is provided, the input is merged into a single string and passed to codeanalyzer
+            if self.target_files:
+                target_file_options = ' '.join([s.strip() for s in self.target_files])
+                codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                 f"-i {Path(self.project_dir)} --analysis-level={analysis_level} --target-files={target_file_options}"
+                )
+            else:
+                codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
+                )
             try:
                 logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
                 console_out: CompletedProcess[str] = subprocess.run(
@@ -254,10 +264,18 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
                 # flag is set, we'll run the analysis every time the object is created. This will happen regradless
                 # of the existence of the analysis file.
                 # Create the executable command for codeanalyzer.
-                codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
-                )
-
+                codeanalyzer_args = ''
+                # If target file is provided, the input is merged into a single string and passed to codeanalyzer
+                if self.target_files:
+                    target_file_options = ' '.join([s.strip() for s in self.target_files])
+                    codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                        f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
+                        f" -o {self.analysis_json_path} --target-files={target_file_options}"
+                    )
+                else:
+                    codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                        f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
+                    )
                 try:
                     logger.info(
                         f"Running codeanalyzer subprocess with args {codeanalyzer_args}"
@@ -289,7 +307,6 @@ def _codeanalyzer_single_file(self):
         JApplication
             The application view of the Java code with the analysis results.
         """
-        # self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
         codeanalyzer_exec = self._get_codeanalyzer_exec()
         codeanalyzer_args = ["--source-analysis", self.source_code]
         codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args
diff --git a/cldk/analysis/java/java.py b/cldk/analysis/java/java.py
index 0b02684..5d29564 100644
--- a/cldk/analysis/java/java.py
+++ b/cldk/analysis/java/java.py
@@ -15,15 +15,16 @@
 class JavaAnalysis(SymbolTable, CallGraph):
 
     def __init__(
-        self,
-        project_dir: str | Path | None,
-        source_code: str | None,
-        analysis_backend: str,
-        analysis_backend_path: str | None,
-        analysis_json_path: str | Path | None,
-        analysis_level: str,
-        use_graalvm_binary: bool,
-        eager_analysis: bool,
+            self,
+            project_dir: str | Path | None,
+            source_code: str | None,
+            analysis_backend: str,
+            analysis_backend_path: str | None,
+            analysis_json_path: str | Path | None,
+            analysis_level: str,
+            target_files: List[str] | None,
+            use_graalvm_binary: bool,
+            eager_analysis: bool,
     ) -> None:
         """
         Parameters
@@ -44,7 +45,9 @@ def __init__(
         eager_analysis : bool, optional
             A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
             eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
-
+        target_files: str, optional
+            The target files for which the analysis will run or get modified. Currently, this feature only supported
+            with symbol table analysis. In the future, we will add this feature to other analysis levels.
         Attributes
         ----------
         analysis_backend : JCodeQL | JApplication
@@ -59,7 +62,8 @@ def __init__(
         self.analysis_backend_path = analysis_backend_path
         self.eager_analysis = eager_analysis
         self.use_graalvm_binary = use_graalvm_binary
-        self.analysis_backend =  analysis_backend
+        self.analysis_backend = analysis_backend
+        self.target_files = target_files
         # Initialize the analysis analysis_backend
         if analysis_backend.lower() == "codeql":
             self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -72,6 +76,7 @@ def __init__(
                 analysis_json_path=self.analysis_json_path,
                 use_graalvm_binary=self.use_graalvm_binary,
                 analysis_backend_path=self.analysis_backend_path,
+                target_files=self.target_files
             )
         else:
             raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -438,7 +443,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
             raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
         return self.backend.get_implemented_interfaces(qualified_class_name)
 
-    def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
+    def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
+                                                  method_signature: str | None = None) -> (List)[
+        Tuple[JMethodDetail, JMethodDetail]]:
         """
         A call graph using symbol table for a given class and a given method.
         Args:
@@ -478,7 +485,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
         """
         if using_symbol_table:
             return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
-                                                                method_signature=method_signature)
+                                                                  method_signature=method_signature)
         if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
             raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
         return self.backend.get_class_call_graph(qualified_class_name, method_signature)
diff --git a/cldk/core.py b/cldk/core.py
index ec6f07b..91445c9 100644
--- a/cldk/core.py
+++ b/cldk/core.py
@@ -1,8 +1,9 @@
 from pathlib import Path
 
-
 import logging
+from typing import List
 
+from cldk.analysis import AnalysisLevel
 from cldk.analysis.java import JavaAnalysis
 from cldk.analysis.java.treesitter import JavaSitter
 from cldk.utils.exceptions import CldkInitializationException
@@ -30,15 +31,16 @@ def __init__(self, language: str):
         self.language: str = language
 
     def analysis(
-        self,
-        project_path: str | Path | None = None,
-        source_code: str | None = None,
-        eager: bool = False,
-        analysis_backend: str | None = "codeanalyzer",
-        analysis_level: str = "symbol_table",
-        analysis_backend_path: str | None = None,
-        analysis_json_path: str | Path = None,
-        use_graalvm_binary: bool = False,
+            self,
+            project_path: str | Path | None = None,
+            source_code: str | None = None,
+            eager: bool = False,
+            analysis_backend: str | None = "codeanalyzer",
+            analysis_level: str = AnalysisLevel.symbol_table,
+            target_files: List[str] | None = None,
+            analysis_backend_path: str | None = None,
+            analysis_json_path: str | Path = None,
+            use_graalvm_binary: bool = False,
     ) -> JavaAnalysis:
         """
         Initialize the preprocessor based on the specified language and analysis_backend.
@@ -65,7 +67,11 @@ def analysis(
         eager : bool, optional
             A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
             eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
-
+        analysis_level: str, optional
+            Analysis levels. Refer to AnalysisLevel.
+        target_files: List[str] | None, optional
+            The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
+            with symbol table analysis. In the future, we will add this feature to other analysis levels.
         Returns
         -------
         JavaAnalysis
@@ -77,13 +83,19 @@ def analysis(
             If neither project_path nor source_code is provided.
         NotImplementedError
             If the specified language is not implemented yet.
+
+        Args:
+            analysis_level:
+            target_files:
+            analysis_level:
         """
 
         if project_path is None and source_code is None:
             raise CldkInitializationException("Either project_path or source_code must be provided.")
 
         if project_path is not None and source_code is not None:
-            raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
+            raise CldkInitializationException(
+                "Both project_path and source_code are provided. Please provide " "only one.")
 
         if self.language == "java":
             return JavaAnalysis(
@@ -94,6 +106,7 @@ def analysis(
                 analysis_backend_path=analysis_backend_path,
                 analysis_json_path=analysis_json_path,
                 use_graalvm_binary=use_graalvm_binary,
+                target_files=target_files,
                 eager_analysis=eager,
             )
         else:
@@ -114,7 +127,7 @@ def treesitter_parser(self):
         else:
             raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")
 
-    def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
+    def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
         """
         Parse the project using treesitter.
 
diff --git a/cldk/models/java/models.py b/cldk/models/java/models.py
index 25713e2..0743502 100644
--- a/cldk/models/java/models.py
+++ b/cldk/models/java/models.py
@@ -291,6 +291,7 @@ class JType(BaseModel):
     is_record_declaration: bool = False
     is_concrete_class: bool = False
     is_entry_point: bool = False
+    is_modified: bool = False
     comment: str
     extends_list: List[str] = []
     implements_list: List[str] = []

From 399199c3cb6db56bfd737f4bd5c0a32ecc3766c0 Mon Sep 17 00:00:00 2001
From: Rangeet Pan <Rangeet.Pan@ibm.com>
Date: Mon, 9 Sep 2024 20:01:21 -0400
Subject: [PATCH 2/5] change subprocess command to add -t for target files

---
 cldk/analysis/java/codeanalyzer/codeanalyzer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
index 4a6bb2f..207a348 100644
--- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py
+++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
@@ -235,9 +235,9 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
             codeanalyzer_args = ''
             # If target file is provided, the input is merged into a single string and passed to codeanalyzer
             if self.target_files:
-                target_file_options = ' '.join([s.strip() for s in self.target_files])
+                target_file_options = '-t '.join([s.strip() for s in self.target_files])
                 codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                 f"-i {Path(self.project_dir)} --analysis-level={analysis_level} --target-files={target_file_options}"
+                 f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
                 )
             else:
                 codeanalyzer_args = codeanalyzer_exec + shlex.split(
@@ -267,10 +267,10 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
                 codeanalyzer_args = ''
                 # If target file is provided, the input is merged into a single string and passed to codeanalyzer
                 if self.target_files:
-                    target_file_options = ' '.join([s.strip() for s in self.target_files])
+                    target_file_options = '-t '.join([s.strip() for s in self.target_files])
                     codeanalyzer_args = codeanalyzer_exec + shlex.split(
                         f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
-                        f" -o {self.analysis_json_path} --target-files={target_file_options}"
+                        f" -o {self.analysis_json_path} -t {target_file_options}"
                     )
                 else:
                     codeanalyzer_args = codeanalyzer_exec + shlex.split(

From 50df25f89a69fab39d813183bee9b07d210c2e04 Mon Sep 17 00:00:00 2001
From: Rangeet Pan <Rangeet.Pan@ibm.com>
Date: Mon, 9 Sep 2024 20:03:45 -0400
Subject: [PATCH 3/5] change subprocess command to add -t for target files

---
 cldk/models/java/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cldk/models/java/models.py b/cldk/models/java/models.py
index 0743502..d34e3e9 100644
--- a/cldk/models/java/models.py
+++ b/cldk/models/java/models.py
@@ -291,7 +291,6 @@ class JType(BaseModel):
     is_record_declaration: bool = False
     is_concrete_class: bool = False
     is_entry_point: bool = False
-    is_modified: bool = False
     comment: str
     extends_list: List[str] = []
     implements_list: List[str] = []
@@ -342,6 +341,7 @@ class JCompilationUnit(BaseModel):
     comment: str
     imports: List[str]
     type_declarations: Dict[str, JType]
+    is_modified: bool = False
 
 
 class JMethodDetail(BaseModel):

From 5e2d45cbc961fcfcc84bc3085707851daead094d Mon Sep 17 00:00:00 2001
From: Rangeet Pan <Rangeet.Pan@ibm.com>
Date: Mon, 9 Sep 2024 21:44:46 -0400
Subject: [PATCH 4/5] add logic to run codenalyzer when target files are
 provided

---
 .../java/codeanalyzer/codeanalyzer.py         | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
index 355c7ec..c6068f1 100644
--- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py
+++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
@@ -200,10 +200,9 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
         """
 
         codeanalyzer_exec = self._get_codeanalyzer_exec()
-
+        codeanalyzer_args = ''
         if self.analysis_json_path is None:
             logger.info("Reading analysis from the pipe.")
-            codeanalyzer_args = ''
             # If target file is provided, the input is merged into a single string and passed to codeanalyzer
             if self.target_files:
                 target_file_options = '-t '.join([s.strip() for s in self.target_files])
@@ -227,24 +226,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
                 raise CodeanalyzerExecutionException(str(e)) from e
 
         else:
+            # Check if the code analyzer needs to be run
+            is_run_code_analyzer = False
             analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
-            if not analysis_json_path_file.exists() or self.eager_analysis:
-                # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
-                # flag is set, we'll run the analysis every time the object is created. This will happen regradless
-                # of the existence of the analysis file.
-                # Create the executable command for codeanalyzer.
-                codeanalyzer_args = ''
-                # If target file is provided, the input is merged into a single string and passed to codeanalyzer
-                if self.target_files:
-                    target_file_options = '-t '.join([s.strip() for s in self.target_files])
-                    codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                        f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
-                        f" -o {self.analysis_json_path} -t {target_file_options}"
-                    )
-                else:
+            # If target file is provided, the input is merged into a single string and passed to codeanalyzer
+            if self.target_files:
+                target_file_options = '-t '.join([s.strip() for s in self.target_files])
+                codeanalyzer_args = codeanalyzer_exec + shlex.split(
+                    f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
+                    f" -o {self.analysis_json_path} -t {target_file_options}"
+                )
+                is_run_code_analyzer = True
+            else:
+                if not analysis_json_path_file.exists() or self.eager_analysis:
+                    # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
+                    # flag is set, we'll run the analysis every time the object is created. This will happen regradless
+                    # of the existence of the analysis file.
+                    # Create the executable command for codeanalyzer.
                     codeanalyzer_args = codeanalyzer_exec + shlex.split(
-                        f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
-                    )
+                            f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
+                        )
+                    is_run_code_analyzer = True
+
+            if is_run_code_analyzer:
                 try:
                     logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
                     subprocess.run(
@@ -258,7 +262,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
 
                 except Exception as e:
                     raise CodeanalyzerExecutionException(str(e)) from e
-
             with open(analysis_json_path_file) as f:
                 data = json.load(f)
                 return JApplication(**data)

From 9c45cf3fdba5acda162ad839a74d625f845f4ff3 Mon Sep 17 00:00:00 2001
From: Rangeet Pan <Rangeet.Pan@ibm.com>
Date: Mon, 9 Sep 2024 21:49:05 -0400
Subject: [PATCH 5/5] add logic to run codenalyzer when target files are
 provided

---
 cldk/analysis/java/codeanalyzer/codeanalyzer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
index c6068f1..41d260b 100644
--- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py
+++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py
@@ -205,7 +205,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
             logger.info("Reading analysis from the pipe.")
             # If target file is provided, the input is merged into a single string and passed to codeanalyzer
             if self.target_files:
-                target_file_options = '-t '.join([s.strip() for s in self.target_files])
+                target_file_options = ' -t '.join([s.strip() for s in self.target_files])
                 codeanalyzer_args = codeanalyzer_exec + shlex.split(
                  f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
                 )
@@ -231,7 +231,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
             analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
             # If target file is provided, the input is merged into a single string and passed to codeanalyzer
             if self.target_files:
-                target_file_options = '-t '.join([s.strip() for s in self.target_files])
+                target_file_options = ' -t '.join([s.strip() for s in self.target_files])
                 codeanalyzer_args = codeanalyzer_exec + shlex.split(
                     f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
                     f" -o {self.analysis_json_path} -t {target_file_options}"