diff --git a/cldk/analysis/commons/treesitter/models.py b/cldk/analysis/commons/treesitter/models.py index 9592149..78acc52 100644 --- a/cldk/analysis/commons/treesitter/models.py +++ b/cldk/analysis/commons/treesitter/models.py @@ -19,7 +19,7 @@ """ from dataclasses import dataclass -from typing import Dict, List +from typing import Dict, Tuple, List from tree_sitter import Node @@ -47,10 +47,8 @@ class Capture: node: Node name: str - def __init__(self, captures: Dict[str, List[Node]]): - self.captures = [] - for capture_name, captures in captures.items(): - self.captures = [self.Capture(node=node, name=capture_name) for node in captures] + def __init__(self, captures: List[Tuple[Node, str]]): + self.captures = [self.Capture(node=node, name=text) for node, text in captures] def __getitem__(self, index: int) -> Capture: """Get the capture at the specified index. diff --git a/cldk/analysis/commons/treesitter/treesitter_java.py b/cldk/analysis/commons/treesitter/treesitter_java.py index 3b767c8..a13b61f 100644 --- a/cldk/analysis/commons/treesitter/treesitter_java.py +++ b/cldk/analysis/commons/treesitter/treesitter_java.py @@ -25,6 +25,8 @@ from cldk.analysis.commons.treesitter.models import Captures logger = logging.getLogger(__name__) +language: Language = Language(tsjava.language()) +parser: Parser = Parser(language) # pylint: disable=too-many-public-methods @@ -34,8 +36,7 @@ class TreesitterJava: """ def __init__(self) -> None: - self.language: Language = Language(tsjava.language()) - self.parser: Parser = Parser(self.language) + pass def method_is_not_in_class(self, method_name: str, class_body: str) -> bool: """Check if a method is in a class. @@ -78,7 +79,7 @@ def syntax_error(node): return False - tree = self.parser.parse(bytes(code, "utf-8")) + tree = parser.parse(bytes(code, "utf-8")) if tree is not None: return not syntax_error(tree.root_node) return False @@ -92,7 +93,7 @@ def get_raw_ast(self, code: str) -> Tree: Returns: Tree: the raw AST """ - return self.parser.parse(bytes(code, "utf-8")) + return parser.parse(bytes(code, "utf-8")) def get_all_imports(self, source_code: str) -> Set[str]: """Get a list of all the imports in a class. @@ -176,8 +177,8 @@ def frame_query_and_capture_output(self, query: str, code_to_process: str) -> Ca code_to_process : str The code to process. """ - framed_query: Query = self.language.query(query) - tree = self.parser.parse(bytes(code_to_process, "utf-8")) + framed_query: Query = language.query(query) + tree = parser.parse(bytes(code_to_process, "utf-8")) return Captures(framed_query.captures(tree.root_node)) def get_method_name_from_declaration(self, method_name_string: str) -> str: @@ -392,7 +393,7 @@ def get_all_type_invocations(self, source_code: str) -> Set[str]: return {type_id.node.text.decode() for type_id in type_references} def get_method_return_type(self, source_code: str) -> str: - """Get the return type of a method. + """Get the return type of method. Parameters ---------- @@ -424,7 +425,7 @@ def get_lexical_tokens(self, code: str, filter_by_node_type: List[str] | None = List of lexical tokens """ - tree = self.parser.parse(bytes(code, "utf-8")) + tree = parser.parse(bytes(code, "utf-8")) root_node = tree.root_node lexical_tokens = [] @@ -432,9 +433,9 @@ def collect_leaf_token_values(node): if len(node.children) == 0: if filter_by_node_type is not None: if node.type in filter_by_node_type: - lexical_tokens.append(code[node.start_byte : node.end_byte]) + lexical_tokens.append(code[node.start_byte: node.end_byte]) else: - lexical_tokens.append(code[node.start_byte : node.end_byte]) + lexical_tokens.append(code[node.start_byte: node.end_byte]) else: for child in node.children: collect_leaf_token_values(child) diff --git a/pyproject.toml b/pyproject.toml index e8ff4c3..451828f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,11 +33,11 @@ pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" pyarrow = "19.0.0" -tree-sitter = "0.24.0" +tree-sitter = "0.22.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" -tree-sitter-java = "0.23.5" +tree-sitter-java = "0.21.0" tree-sitter-c = "0.23.5" tree-sitter-go = "0.23.4" tree-sitter-python = "0.23.6"