diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py index 6a270f0..41d260b 100644 --- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py +++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py @@ -51,14 +51,15 @@ class JCodeanalyzer: """ def __init__( - self, - project_dir: Union[str, Path], - source_code: str | None, - analysis_backend_path: Union[str, Path, None], - analysis_json_path: Union[str, Path, None], - analysis_level: str, - use_graalvm_binary: bool, - eager_analysis: bool, + self, + project_dir: Union[str, Path], + source_code: str | None, + analysis_backend_path: Union[str, Path, None], + analysis_json_path: Union[str, Path, None], + analysis_level: str, + use_graalvm_binary: bool, + eager_analysis: bool, + target_files: List[str] | None ) -> None: self.project_dir = project_dir self.source_code = source_code @@ -67,6 +68,7 @@ def __init__( self.use_graalvm_binary = use_graalvm_binary self.eager_analysis = eager_analysis self.analysis_level = analysis_level + self.target_files = target_files self.application = self._init_codeanalyzer( analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2) # Attributes related the Java code analysis... @@ -198,11 +200,19 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication: """ codeanalyzer_exec = self._get_codeanalyzer_exec() - + codeanalyzer_args = '' if self.analysis_json_path is None: logger.info("Reading analysis from the pipe.") - codeanalyzer_args = codeanalyzer_exec + shlex.split( - f"-i {Path(self.project_dir)} --analysis-level={analysis_level}") + # If target file is provided, the input is merged into a single string and passed to codeanalyzer + if self.target_files: + target_file_options = ' -t '.join([s.strip() for s in self.target_files]) + codeanalyzer_args = codeanalyzer_exec + shlex.split( + f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}" + ) + else: + codeanalyzer_args = codeanalyzer_exec + shlex.split( + f"-i {Path(self.project_dir)} --analysis-level={analysis_level}" + ) try: logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}") console_out: CompletedProcess[str] = subprocess.run( @@ -216,15 +226,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication: raise CodeanalyzerExecutionException(str(e)) from e else: + # Check if the code analyzer needs to be run + is_run_code_analyzer = False analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json") - if not analysis_json_path_file.exists() or self.eager_analysis: - # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis - # flag is set, we'll run the analysis every time the object is created. This will happen regradless - # of the existence of the analysis file. - # Create the executable command for codeanalyzer. + # If target file is provided, the input is merged into a single string and passed to codeanalyzer + if self.target_files: + target_file_options = ' -t '.join([s.strip() for s in self.target_files]) codeanalyzer_args = codeanalyzer_exec + shlex.split( - f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}") - + f"-i {Path(self.project_dir)} --analysis-level={analysis_level}" + f" -o {self.analysis_json_path} -t {target_file_options}" + ) + is_run_code_analyzer = True + else: + if not analysis_json_path_file.exists() or self.eager_analysis: + # If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis + # flag is set, we'll run the analysis every time the object is created. This will happen regradless + # of the existence of the analysis file. + # Create the executable command for codeanalyzer. + codeanalyzer_args = codeanalyzer_exec + shlex.split( + f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}" + ) + is_run_code_analyzer = True + + if is_run_code_analyzer: try: logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}") subprocess.run( @@ -238,7 +262,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication: except Exception as e: raise CodeanalyzerExecutionException(str(e)) from e - with open(analysis_json_path_file) as f: data = json.load(f) return JApplication(**data) @@ -252,7 +275,6 @@ def _codeanalyzer_single_file(self): JApplication The application view of the Java code with the analysis results. """ - # self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code) codeanalyzer_exec = self._get_codeanalyzer_exec() codeanalyzer_args = ["--source-analysis", self.source_code] codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args diff --git a/cldk/analysis/java/java.py b/cldk/analysis/java/java.py index 0b02684..5d29564 100644 --- a/cldk/analysis/java/java.py +++ b/cldk/analysis/java/java.py @@ -15,15 +15,16 @@ class JavaAnalysis(SymbolTable, CallGraph): def __init__( - self, - project_dir: str | Path | None, - source_code: str | None, - analysis_backend: str, - analysis_backend_path: str | None, - analysis_json_path: str | Path | None, - analysis_level: str, - use_graalvm_binary: bool, - eager_analysis: bool, + self, + project_dir: str | Path | None, + source_code: str | None, + analysis_backend: str, + analysis_backend_path: str | None, + analysis_json_path: str | Path | None, + analysis_level: str, + target_files: List[str] | None, + use_graalvm_binary: bool, + eager_analysis: bool, ) -> None: """ Parameters @@ -44,7 +45,9 @@ def __init__( eager_analysis : bool, optional A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed eagerly. That is, the analysis.json file is created during analysis every time even if it already exists. - + target_files: str, optional + The target files for which the analysis will run or get modified. Currently, this feature only supported + with symbol table analysis. In the future, we will add this feature to other analysis levels. Attributes ---------- analysis_backend : JCodeQL | JApplication @@ -59,7 +62,8 @@ def __init__( self.analysis_backend_path = analysis_backend_path self.eager_analysis = eager_analysis self.use_graalvm_binary = use_graalvm_binary - self.analysis_backend = analysis_backend + self.analysis_backend = analysis_backend + self.target_files = target_files # Initialize the analysis analysis_backend if analysis_backend.lower() == "codeql": self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path) @@ -72,6 +76,7 @@ def __init__( analysis_json_path=self.analysis_json_path, use_graalvm_binary=self.use_graalvm_binary, analysis_backend_path=self.analysis_backend_path, + target_files=self.target_files ) else: raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.") @@ -438,7 +443,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]: raise NotImplementedError(f"Support for this functionality has not been implemented yet.") return self.backend.get_implemented_interfaces(qualified_class_name) - def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]: + def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, + method_signature: str | None = None) -> (List)[ + Tuple[JMethodDetail, JMethodDetail]]: """ A call graph using symbol table for a given class and a given method. Args: @@ -478,7 +485,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str """ if using_symbol_table: return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name, - method_signature=method_signature) + method_signature=method_signature) if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]: raise NotImplementedError(f"Support for this functionality has not been implemented yet.") return self.backend.get_class_call_graph(qualified_class_name, method_signature) diff --git a/cldk/core.py b/cldk/core.py index ec6f07b..91445c9 100644 --- a/cldk/core.py +++ b/cldk/core.py @@ -1,8 +1,9 @@ from pathlib import Path - import logging +from typing import List +from cldk.analysis import AnalysisLevel from cldk.analysis.java import JavaAnalysis from cldk.analysis.java.treesitter import JavaSitter from cldk.utils.exceptions import CldkInitializationException @@ -30,15 +31,16 @@ def __init__(self, language: str): self.language: str = language def analysis( - self, - project_path: str | Path | None = None, - source_code: str | None = None, - eager: bool = False, - analysis_backend: str | None = "codeanalyzer", - analysis_level: str = "symbol_table", - analysis_backend_path: str | None = None, - analysis_json_path: str | Path = None, - use_graalvm_binary: bool = False, + self, + project_path: str | Path | None = None, + source_code: str | None = None, + eager: bool = False, + analysis_backend: str | None = "codeanalyzer", + analysis_level: str = AnalysisLevel.symbol_table, + target_files: List[str] | None = None, + analysis_backend_path: str | None = None, + analysis_json_path: str | Path = None, + use_graalvm_binary: bool = False, ) -> JavaAnalysis: """ Initialize the preprocessor based on the specified language and analysis_backend. @@ -65,7 +67,11 @@ def analysis( eager : bool, optional A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed eagerly. That is, the analysis.json file is created during analysis every time even if it already exists. - + analysis_level: str, optional + Analysis levels. Refer to AnalysisLevel. + target_files: List[str] | None, optional + The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported + with symbol table analysis. In the future, we will add this feature to other analysis levels. Returns ------- JavaAnalysis @@ -77,13 +83,19 @@ def analysis( If neither project_path nor source_code is provided. NotImplementedError If the specified language is not implemented yet. + + Args: + analysis_level: + target_files: + analysis_level: """ if project_path is None and source_code is None: raise CldkInitializationException("Either project_path or source_code must be provided.") if project_path is not None and source_code is not None: - raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.") + raise CldkInitializationException( + "Both project_path and source_code are provided. Please provide " "only one.") if self.language == "java": return JavaAnalysis( @@ -94,6 +106,7 @@ def analysis( analysis_backend_path=analysis_backend_path, analysis_json_path=analysis_json_path, use_graalvm_binary=use_graalvm_binary, + target_files=target_files, eager_analysis=eager, ) else: @@ -114,7 +127,7 @@ def treesitter_parser(self): else: raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.") - def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]: + def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]: """ Parse the project using treesitter. diff --git a/cldk/models/java/models.py b/cldk/models/java/models.py index 25713e2..d34e3e9 100644 --- a/cldk/models/java/models.py +++ b/cldk/models/java/models.py @@ -341,6 +341,7 @@ class JCompilationUnit(BaseModel): comment: str imports: List[str] type_declarations: Dict[str, JType] + is_modified: bool = False class JMethodDetail(BaseModel):