Skip to content

Issue 98: CLDK now supports getting crud operations from Java JPA applications #101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 11, 2025
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

# Don't ignore test fixture zip files
!tests/resources/java/application/daytrader8-1.2.zip
!tests/resources/java/application/plantsbywebsphere.zip

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
Expand Down
168 changes: 131 additions & 37 deletions cldk/analysis/java/codeanalyzer/codeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
from itertools import chain, groupby
from pdb import set_trace
import re
import json
import logging
import re
import shlex
import requests
import networkx as nx
from pathlib import Path
import subprocess
from subprocess import CompletedProcess
from urllib.request import urlretrieve
from datetime import datetime
from importlib import resources
from itertools import chain, groupby
from pathlib import Path
from subprocess import CompletedProcess
from typing import Any, Dict, List, Tuple
from typing import Union

import networkx as nx
from networkx import DiGraph

from cldk.analysis import AnalysisLevel
from cldk.analysis.java.treesitter import JavaSitter
from cldk.models.java import JGraphEdges
from cldk.models.java.models import JApplication, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
from typing import Dict, List, Tuple
from typing import Union

from cldk.models.java.enums import CRUDOperationType
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException

import logging

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -143,28 +138,31 @@ def _get_codeanalyzer_exec(self) -> List[str]:
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.bin") / "codeanalyzer") as codeanalyzer_bin_path:
codeanalyzer_exec = shlex.split(codeanalyzer_bin_path.__str__())
else:

if self.analysis_backend_path:
analysis_backend_path = Path(self.analysis_backend_path)
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
if codeanalyzer_jar_file is None:
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
else:
# Since the path to codeanalyzer.jar was not provided, we'll download the latest version from GitHub.
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
# Download the codeanalyzer jar if it doesn't exist, update if it's outdated,
# do nothing if it's up-to-date.
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
return codeanalyzer_exec

def init_japplication(self, data: str) -> JApplication:
@staticmethod
def _init_japplication(data: str) -> JApplication:
"""Return JApplication giving the stringified JSON as input.
Returns
-------
JApplication
The application view of the Java code with the analysis results.
"""
# from ipdb import set_trace

# set_trace()
return JApplication(**json.loads(data))

def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
Expand Down Expand Up @@ -197,7 +195,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
text=True,
check=True,
)
return JApplication(**json.loads(console_out.stdout))
return self._init_japplication(console_out.stdout)
except Exception as e:
raise CodeanalyzerExecutionException(str(e)) from e
else:
Expand All @@ -217,7 +215,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
# of the existence of the analysis file.
# Create the executable command for codeanalyzer.
codeanalyzer_args = codeanalyzer_exec + shlex.split(f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
codeanalyzer_args = codeanalyzer_exec + shlex.split(f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path} -v")
is_run_code_analyzer = True

if is_run_code_analyzer:
Expand All @@ -236,7 +234,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
raise CodeanalyzerExecutionException(str(e)) from e
with open(analysis_json_path_file) as f:
data = json.load(f)
return JApplication(**data)
return self._init_japplication(json.dumps(data))

def _codeanalyzer_single_file(self) -> JApplication:
"""Invokes codeanalyzer in a single file mode.
Expand All @@ -248,12 +246,11 @@ def _codeanalyzer_single_file(self) -> JApplication:
codeanalyzer_args = ["--source-analysis", self.source_code]
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args
try:
print(f"Running {' '.join(codeanalyzer_cmd)}")
logger.info(f"Running {' '.join(codeanalyzer_cmd)}")
console_out: CompletedProcess[str] = subprocess.run(codeanalyzer_cmd, capture_output=True, text=True, check=True)
if console_out.returncode != 0:
raise CodeanalyzerExecutionException(console_out.stderr)
return JApplication(**json.loads(console_out.stdout))
return self._init_japplication(console_out.stdout)
except Exception as e:
raise CodeanalyzerExecutionException(str(e)) from e

Expand Down Expand Up @@ -870,14 +867,9 @@ def get_all_entry_point_methods(self) -> Dict[str, Dict[str, JCallable]]:
Dict[str, Dict[str, JCallable]]: A dictionary of all entry point methods in the Java code.
"""
methods = chain.from_iterable(
((typename, method, callable)
for method, callable in methods.items() if callable.is_entrypoint)
for typename, methods in self.get_all_methods_in_application().items()
((typename, method, callable) for method, callable in methods.items() if callable.is_entrypoint) for typename, methods in self.get_all_methods_in_application().items()
)
return {
typename: {method: callable for _, method, callable in group}
for typename, group in groupby(methods, key=lambda x: x[0])
}
return {typename: {method: callable for _, method, callable in group} for typename, group in groupby(methods, key=lambda x: x[0])}

def get_all_entry_point_classes(self) -> Dict[str, JType]:
"""Returns a dictionary of all entry point classes in the Java code.
Expand All @@ -887,8 +879,110 @@ def get_all_entry_point_classes(self) -> Dict[str, JType]:
with qualified class names as keys.
"""

return {
typename: klass
for typename, klass in self.get_all_classes().items()
if klass.is_entrypoint_class
}
return {typename: klass for typename, klass in self.get_all_classes().items() if klass.is_entrypoint_class}

def get_all_crud_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a dictionary of all CRUD operations in the source code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
Dict[str, List[str]]: A dictionary of all CRUD operations in the source code.
"""

crud_operations = []
for class_name, class_details in self.get_all_classes().items():
for method_name, method_details in class_details.callable_declarations.items():
if len(method_details.crud_operations) > 0:
crud_operations.append({class_name: class_details, method_name: method_details, "crud_operations": method_details.crud_operations})
return crud_operations

def get_all_read_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all read operations in the source code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]:: A list of all read operations in the source code.
"""
crud_read_operations = []
for class_name, class_details in self.get_all_classes().items():
for method_name, method_details in class_details.callable_declarations.items():
if len(method_details.crud_operations) > 0:
crud_read_operations.append(
{
class_name: class_details,
method_name: method_details,
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.READ],
}
)
return crud_read_operations

def get_all_create_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all create operations in the source code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all create operations in the source code.
"""
crud_create_operations = []
for class_name, class_details in self.get_all_classes().items():
for method_name, method_details in class_details.callable_declarations.items():
if len(method_details.crud_operations) > 0:
crud_create_operations.append(
{
class_name: class_details,
method_name: method_details,
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.CREATE],
}
)
return crud_create_operations

def get_all_update_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all update operations in the source code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all update operations in the source code.
"""
crud_update_operations = []
for class_name, class_details in self.get_all_classes().items():
for method_name, method_details in class_details.callable_declarations.items():
if len(method_details.crud_operations) > 0:
crud_update_operations.append(
{
class_name: class_details,
method_name: method_details,
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.UPDATE],
}
)

return crud_update_operations

def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all delete operations in the source code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all delete operations in the source code.
"""
crud_delete_operations = []
for class_name, class_details in self.get_all_classes().items():
for method_name, method_details in class_details.callable_declarations.items():
if len(method_details.crud_operations) > 0:
crud_delete_operations.append(
{
class_name: class_details,
method_name: method_details,
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.DELETE],
}
)
return crud_delete_operations
2 changes: 1 addition & 1 deletion cldk/analysis/java/codeanalyzer/jar/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
*.jar
!codeanalyzer-*.jar
Binary file not shown.
51 changes: 42 additions & 9 deletions cldk/analysis/java/java_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
"""

from pathlib import Path
from typing import Dict, List, Tuple, Set
from typing import Any, Dict, List, Tuple, Set, Union
from networkx import DiGraph
from tree_sitter import Tree

from cldk.analysis import SymbolTable, CallGraph, AnalysisLevel
from cldk.analysis.java.treesitter import JavaSitter
from cldk.models.java import JCallable
from cldk.models.java import JApplication
from cldk.models.java.models import JCompilationUnit, JMethodDetail, JType, JField
from cldk.models.java.models import JCRUDOperation, JCompilationUnit, JMethodDetail, JType, JField
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
from cldk.analysis.java.codeql import JCodeQL
from cldk.utils.analysis_engine import AnalysisEngine
Expand Down Expand Up @@ -619,8 +619,6 @@ def get_calling_lines(self, target_method_name: str) -> List[int]:
List[int]: List of line numbers within in source method code block.
"""

if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
raise NotImplementedError("Support for this functionality has not been implemented yet.")
return self.backend.get_calling_lines(self.source_code, target_method_name)

def get_call_targets(self, declared_methods: dict) -> Set[str]:
Expand All @@ -629,12 +627,47 @@ def get_call_targets(self, declared_methods: dict) -> Set[str]:
Args:
declared_methods (dict): A dictionary of all declared methods in the class.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.

Returns:
Set[str]: A list of call targets (methods).
"""
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
raise NotImplementedError("Support for this functionality has not been implemented yet.")
return self.backend.get_call_targets(self.source_code, declared_methods)

def get_all_crud_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a dictionary of all CRUD operations in the source code.

Returns:
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all CRUD operations in the source code.
"""
return self.backend.get_all_crud_operations()

def get_all_create_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all create operations in the source code.

Returns:
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all create operations in the source code.
"""
return self.backend.get_all_create_operations()

def get_all_read_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all read operations in the source code.

Returns:
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all read operations in the source code.
"""
return self.backend.get_all_read_operations()

def get_all_update_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all update operations in the source code.

Returns:
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all update operations in the source code.
"""
return self.backend.get_all_update_operations()

def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
"""Returns a list of all delete operations in the source code.

Returns:
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all delete operations in the source code.
"""
return self.backend.get_all_delete_operations()
4 changes: 3 additions & 1 deletion cldk/models/java/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@
JGraphEdges,
)

__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "ConstantsNamespace"]
from .enums import CRUDOperationType, CRUDQueryType

__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]
28 changes: 28 additions & 0 deletions cldk/models/java/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from enum import Enum


class CRUDOperationType(Enum):
"""An enumeration of CRUD operation types.

Attributes:
CREATE (str): The create operation type.
READ (str): The read operation type.
UPDATE (str): The update operation type.
DELETE (str): The delete operation type.
"""
CREATE = "CREATE"
READ = "READ"
UPDATE = "UPDATE"
DELETE = "DELETE"

class CRUDQueryType(Enum):
"""An enumeration of CRUD query types.

Attributes:
READ (str): The read query type.
WRITE (str): The write query type.
NAMED (str): The named query type.
"""
READ = "READ"
WRITE = "WRITE"
NAMED = "NAMED"
Loading