From 1acaca382b4a5fc6010fe0ff8e669f7af03d6054 Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Mon, 7 Apr 2025 12:49:39 -0700 Subject: [PATCH 1/8] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e8ff4c3..87998bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ tree-sitter = "0.24.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" -tree-sitter-java = "0.23.5" +tree-sitter-java = "0.21.0" tree-sitter-c = "0.23.5" tree-sitter-go = "0.23.4" tree-sitter-python = "0.23.6" From 42f1a1ea744276f919fff7c2b3392b38f1b46dad Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:06:40 -0700 Subject: [PATCH 2/8] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 87998bc..3620b55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" pyarrow = "19.0.0" -tree-sitter = "0.24.0" +tree-sitter = "0.21.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" From 66156de0fc70653fe48ec3b7f0a6fca43e6e9d55 Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:10:25 -0700 Subject: [PATCH 3/8] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3620b55..451828f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" pyarrow = "19.0.0" -tree-sitter = "0.21.0" +tree-sitter = "0.22.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" From 161792f45085a0237faa1b53f7df9b7230dc63bf Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:12:22 -0700 Subject: [PATCH 4/8] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 451828f..158c18c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" pyarrow = "19.0.0" -tree-sitter = "0.22.0" +tree-sitter = "0.23.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" From 0f00e76c0089df30d601a0c6b09ffed1ba9b01f2 Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:13:37 -0700 Subject: [PATCH 5/8] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 158c18c..451828f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" pyarrow = "19.0.0" -tree-sitter = "0.23.0" +tree-sitter = "0.22.0" rich = "13.9.4" wget = "3.2" requests = "^2.32.3" From 3eb3cfed8e8f2f70f4a7fdabf3b0a06d74eee62b Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:17:21 -0700 Subject: [PATCH 6/8] Update models.py --- cldk/analysis/commons/treesitter/models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cldk/analysis/commons/treesitter/models.py b/cldk/analysis/commons/treesitter/models.py index 9592149..79c695e 100644 --- a/cldk/analysis/commons/treesitter/models.py +++ b/cldk/analysis/commons/treesitter/models.py @@ -47,10 +47,8 @@ class Capture: node: Node name: str - def __init__(self, captures: Dict[str, List[Node]]): - self.captures = [] - for capture_name, captures in captures.items(): - self.captures = [self.Capture(node=node, name=capture_name) for node in captures] + def __init__(self, captures: List[Tuple[Node, str]]): + self.captures = [self.Capture(node=node, name=text) for node, text in captures] def __getitem__(self, index: int) -> Capture: """Get the capture at the specified index. From a49d45e1c61fb7f57ac2038421425088d6fa05bd Mon Sep 17 00:00:00 2001 From: Rangeet Pan <23088883+rangeetpan@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:18:44 -0700 Subject: [PATCH 7/8] Update models.py --- cldk/analysis/commons/treesitter/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cldk/analysis/commons/treesitter/models.py b/cldk/analysis/commons/treesitter/models.py index 79c695e..78acc52 100644 --- a/cldk/analysis/commons/treesitter/models.py +++ b/cldk/analysis/commons/treesitter/models.py @@ -19,7 +19,7 @@ """ from dataclasses import dataclass -from typing import Dict, List +from typing import Dict, Tuple, List from tree_sitter import Node From 59fc080fb01c90933fa9ab7b732ea1e60cd31a96 Mon Sep 17 00:00:00 2001 From: Divya Sankar Date: Tue, 15 Apr 2025 11:10:00 -0400 Subject: [PATCH 8/8] minor fix --- .../commons/treesitter/treesitter_java.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/cldk/analysis/commons/treesitter/treesitter_java.py b/cldk/analysis/commons/treesitter/treesitter_java.py index 3b767c8..a13b61f 100644 --- a/cldk/analysis/commons/treesitter/treesitter_java.py +++ b/cldk/analysis/commons/treesitter/treesitter_java.py @@ -25,6 +25,8 @@ from cldk.analysis.commons.treesitter.models import Captures logger = logging.getLogger(__name__) +language: Language = Language(tsjava.language()) +parser: Parser = Parser(language) # pylint: disable=too-many-public-methods @@ -34,8 +36,7 @@ class TreesitterJava: """ def __init__(self) -> None: - self.language: Language = Language(tsjava.language()) - self.parser: Parser = Parser(self.language) + pass def method_is_not_in_class(self, method_name: str, class_body: str) -> bool: """Check if a method is in a class. @@ -78,7 +79,7 @@ def syntax_error(node): return False - tree = self.parser.parse(bytes(code, "utf-8")) + tree = parser.parse(bytes(code, "utf-8")) if tree is not None: return not syntax_error(tree.root_node) return False @@ -92,7 +93,7 @@ def get_raw_ast(self, code: str) -> Tree: Returns: Tree: the raw AST """ - return self.parser.parse(bytes(code, "utf-8")) + return parser.parse(bytes(code, "utf-8")) def get_all_imports(self, source_code: str) -> Set[str]: """Get a list of all the imports in a class. @@ -176,8 +177,8 @@ def frame_query_and_capture_output(self, query: str, code_to_process: str) -> Ca code_to_process : str The code to process. """ - framed_query: Query = self.language.query(query) - tree = self.parser.parse(bytes(code_to_process, "utf-8")) + framed_query: Query = language.query(query) + tree = parser.parse(bytes(code_to_process, "utf-8")) return Captures(framed_query.captures(tree.root_node)) def get_method_name_from_declaration(self, method_name_string: str) -> str: @@ -392,7 +393,7 @@ def get_all_type_invocations(self, source_code: str) -> Set[str]: return {type_id.node.text.decode() for type_id in type_references} def get_method_return_type(self, source_code: str) -> str: - """Get the return type of a method. + """Get the return type of method. Parameters ---------- @@ -424,7 +425,7 @@ def get_lexical_tokens(self, code: str, filter_by_node_type: List[str] | None = List of lexical tokens """ - tree = self.parser.parse(bytes(code, "utf-8")) + tree = parser.parse(bytes(code, "utf-8")) root_node = tree.root_node lexical_tokens = [] @@ -432,9 +433,9 @@ def collect_leaf_token_values(node): if len(node.children) == 0: if filter_by_node_type is not None: if node.type in filter_by_node_type: - lexical_tokens.append(code[node.start_byte : node.end_byte]) + lexical_tokens.append(code[node.start_byte: node.end_byte]) else: - lexical_tokens.append(code[node.start_byte : node.end_byte]) + lexical_tokens.append(code[node.start_byte: node.end_byte]) else: for child in node.children: collect_leaf_token_values(child)