Skip to content

Support for Incremental analysis #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 11, 2024
62 changes: 42 additions & 20 deletions cldk/analysis/java/codeanalyzer/codeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ class JCodeanalyzer:
"""

def __init__(
self,
project_dir: Union[str, Path],
source_code: str | None,
analysis_backend_path: Union[str, Path, None],
analysis_json_path: Union[str, Path, None],
analysis_level: str,
use_graalvm_binary: bool,
eager_analysis: bool,
self,
project_dir: Union[str, Path],
source_code: str | None,
analysis_backend_path: Union[str, Path, None],
analysis_json_path: Union[str, Path, None],
analysis_level: str,
use_graalvm_binary: bool,
eager_analysis: bool,
target_files: List[str] | None
) -> None:
self.project_dir = project_dir
self.source_code = source_code
Expand All @@ -67,6 +68,7 @@ def __init__(
self.use_graalvm_binary = use_graalvm_binary
self.eager_analysis = eager_analysis
self.analysis_level = analysis_level
self.target_files = target_files
self.application = self._init_codeanalyzer(
analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2)
# Attributes related the Java code analysis...
Expand Down Expand Up @@ -198,11 +200,19 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
"""

codeanalyzer_exec = self._get_codeanalyzer_exec()

codeanalyzer_args = ''
if self.analysis_json_path is None:
logger.info("Reading analysis from the pipe.")
codeanalyzer_args = codeanalyzer_exec + shlex.split(
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}")
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
if self.target_files:
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
codeanalyzer_args = codeanalyzer_exec + shlex.split(
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
)
else:
codeanalyzer_args = codeanalyzer_exec + shlex.split(
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
)
try:
logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
console_out: CompletedProcess[str] = subprocess.run(
Expand All @@ -216,15 +226,29 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
raise CodeanalyzerExecutionException(str(e)) from e

else:
# Check if the code analyzer needs to be run
is_run_code_analyzer = False
analysis_json_path_file = Path(self.analysis_json_path).joinpath("analysis.json")
if not analysis_json_path_file.exists() or self.eager_analysis:
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
# of the existence of the analysis file.
# Create the executable command for codeanalyzer.
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
if self.target_files:
target_file_options = ' -t '.join([s.strip() for s in self.target_files])
codeanalyzer_args = codeanalyzer_exec + shlex.split(
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")

f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
f" -o {self.analysis_json_path} -t {target_file_options}"
)
is_run_code_analyzer = True
else:
if not analysis_json_path_file.exists() or self.eager_analysis:
# If the analysis file does not exist, we'll run the analysis. Alternately, if the eager_analysis
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
# of the existence of the analysis file.
# Create the executable command for codeanalyzer.
codeanalyzer_args = codeanalyzer_exec + shlex.split(
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
)
is_run_code_analyzer = True

if is_run_code_analyzer:
try:
logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
subprocess.run(
Expand All @@ -238,7 +262,6 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:

except Exception as e:
raise CodeanalyzerExecutionException(str(e)) from e

with open(analysis_json_path_file) as f:
data = json.load(f)
return JApplication(**data)
Expand All @@ -252,7 +275,6 @@ def _codeanalyzer_single_file(self):
JApplication
The application view of the Java code with the analysis results.
"""
# self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
codeanalyzer_exec = self._get_codeanalyzer_exec()
codeanalyzer_args = ["--source-analysis", self.source_code]
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args
Expand Down
33 changes: 20 additions & 13 deletions cldk/analysis/java/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@
class JavaAnalysis(SymbolTable, CallGraph):

def __init__(
self,
project_dir: str | Path | None,
source_code: str | None,
analysis_backend: str,
analysis_backend_path: str | None,
analysis_json_path: str | Path | None,
analysis_level: str,
use_graalvm_binary: bool,
eager_analysis: bool,
self,
project_dir: str | Path | None,
source_code: str | None,
analysis_backend: str,
analysis_backend_path: str | None,
analysis_json_path: str | Path | None,
analysis_level: str,
target_files: List[str] | None,
use_graalvm_binary: bool,
eager_analysis: bool,
) -> None:
"""
Parameters
Expand All @@ -44,7 +45,9 @@ def __init__(
eager_analysis : bool, optional
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.

target_files: str, optional
The target files for which the analysis will run or get modified. Currently, this feature only supported
with symbol table analysis. In the future, we will add this feature to other analysis levels.
Attributes
----------
analysis_backend : JCodeQL | JApplication
Expand All @@ -59,7 +62,8 @@ def __init__(
self.analysis_backend_path = analysis_backend_path
self.eager_analysis = eager_analysis
self.use_graalvm_binary = use_graalvm_binary
self.analysis_backend = analysis_backend
self.analysis_backend = analysis_backend
self.target_files = target_files
# Initialize the analysis analysis_backend
if analysis_backend.lower() == "codeql":
self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
Expand All @@ -72,6 +76,7 @@ def __init__(
analysis_json_path=self.analysis_json_path,
use_graalvm_binary=self.use_graalvm_binary,
analysis_backend_path=self.analysis_backend_path,
target_files=self.target_files
)
else:
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
Expand Down Expand Up @@ -438,7 +443,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
return self.backend.get_implemented_interfaces(qualified_class_name)

def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
method_signature: str | None = None) -> (List)[
Tuple[JMethodDetail, JMethodDetail]]:
"""
A call graph using symbol table for a given class and a given method.
Args:
Expand Down Expand Up @@ -478,7 +485,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
"""
if using_symbol_table:
return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
method_signature=method_signature)
method_signature=method_signature)
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
return self.backend.get_class_call_graph(qualified_class_name, method_signature)
Expand Down
39 changes: 26 additions & 13 deletions cldk/core.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from pathlib import Path


import logging
from typing import List

from cldk.analysis import AnalysisLevel
from cldk.analysis.java import JavaAnalysis
from cldk.analysis.java.treesitter import JavaSitter
from cldk.utils.exceptions import CldkInitializationException
Expand Down Expand Up @@ -30,15 +31,16 @@ def __init__(self, language: str):
self.language: str = language

def analysis(
self,
project_path: str | Path | None = None,
source_code: str | None = None,
eager: bool = False,
analysis_backend: str | None = "codeanalyzer",
analysis_level: str = "symbol_table",
analysis_backend_path: str | None = None,
analysis_json_path: str | Path = None,
use_graalvm_binary: bool = False,
self,
project_path: str | Path | None = None,
source_code: str | None = None,
eager: bool = False,
analysis_backend: str | None = "codeanalyzer",
analysis_level: str = AnalysisLevel.symbol_table,
target_files: List[str] | None = None,
analysis_backend_path: str | None = None,
analysis_json_path: str | Path = None,
use_graalvm_binary: bool = False,
) -> JavaAnalysis:
"""
Initialize the preprocessor based on the specified language and analysis_backend.
Expand All @@ -65,7 +67,11 @@ def analysis(
eager : bool, optional
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.

analysis_level: str, optional
Analysis levels. Refer to AnalysisLevel.
target_files: List[str] | None, optional
The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
with symbol table analysis. In the future, we will add this feature to other analysis levels.
Returns
-------
JavaAnalysis
Expand All @@ -77,13 +83,19 @@ def analysis(
If neither project_path nor source_code is provided.
NotImplementedError
If the specified language is not implemented yet.

Args:
analysis_level:
target_files:
analysis_level:
"""

if project_path is None and source_code is None:
raise CldkInitializationException("Either project_path or source_code must be provided.")

if project_path is not None and source_code is not None:
raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
raise CldkInitializationException(
"Both project_path and source_code are provided. Please provide " "only one.")

if self.language == "java":
return JavaAnalysis(
Expand All @@ -94,6 +106,7 @@ def analysis(
analysis_backend_path=analysis_backend_path,
analysis_json_path=analysis_json_path,
use_graalvm_binary=use_graalvm_binary,
target_files=target_files,
eager_analysis=eager,
)
else:
Expand All @@ -114,7 +127,7 @@ def treesitter_parser(self):
else:
raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")

def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
"""
Parse the project using treesitter.

Expand Down
1 change: 1 addition & 0 deletions cldk/models/java/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ class JCompilationUnit(BaseModel):
comment: str
imports: List[str]
type_declarations: Dict[str, JType]
is_modified: bool = False


class JMethodDetail(BaseModel):
Expand Down