Skip to content

Fix issue 108: Unnecessary dependency on clang/llvm for non c/cpp analysis #112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ lint: ## Run the linter
.PHONY: test
test: ## Run the unit tests
$(info Running tests...)
pytest --pspec --cov=cldk --cov-fail-under=70 --disable-warnings
pytest --pspec --cov=cldk --cov-fail-under=75 --disable-warnings

##@ Build

Expand Down
48 changes: 24 additions & 24 deletions cldk/analysis/c/c_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
Analysis model for C projects
"""

import os
from pathlib import Path
from typing import Dict, List, Optional
import networkx as nx


from cldk.analysis.c.clang import ClangAnalyzer
from cldk.models.c import CApplication, CFunction, CTranslationUnit, CMacro, CTypedef, CStruct, CEnum, CVariable

Expand All @@ -36,7 +36,7 @@ def __init__(self, project_dir: Path) -> None:
self.c_application = self._init_application(project_dir)

def _init_application(self, project_dir: Path) -> CApplication:
"""Initializes the C application object.
"""Should initialize the C application object.

Args:
project_dir (Path): Path to the project directory.
Expand All @@ -56,7 +56,7 @@ def _init_application(self, project_dir: Path) -> CApplication:
return CApplication(translation_units=translation_units)

def get_c_application(self) -> CApplication:
"""returns the C application object.
"""Obtain the C application object.

Returns:
CApplication: C application object.
Expand Down Expand Up @@ -90,15 +90,15 @@ def is_parsable(self, source_code: str) -> bool:
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_call_graph(self) -> nx.DiGraph:
"""returns the call graph of the C code.
"""Should return the call graph of the C code.

Returns:
nx.DiGraph: The call graph of the C code.
"""
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_call_graph_json(self) -> str:
"""returns a serialized call graph in json.
"""Should return a serialized call graph in json.

Raises:
NotImplementedError: Raised when this functionality is not suported.
Expand All @@ -110,7 +110,7 @@ def get_call_graph_json(self) -> str:
raise NotImplementedError("Producing a call graph over a single file is not implemented yet.")

def get_callers(self, function: CFunction) -> Dict:
"""returns a dictionary of callers of the target method.
"""Should return a dictionary of callers of the target method.

Args:
function (CFunction): A CFunction object.
Expand All @@ -125,7 +125,7 @@ def get_callers(self, function: CFunction) -> Dict:
raise NotImplementedError("Generating all callers over a single file is not implemented yet.")

def get_callees(self, function: CFunction) -> Dict:
"""returns a dictionary of callees in a fuction.
"""Should return a dictionary of callees in a fuction.

Args:
function (CFunction): A CFunction object.
Expand All @@ -139,7 +139,7 @@ def get_callees(self, function: CFunction) -> Dict:
raise NotImplementedError("Generating all callees over a single file is not implemented yet.")

def get_functions(self) -> Dict[str, CFunction]:
"""returns all functions in the project.
"""Should return all functions in the project.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.
Expand All @@ -151,7 +151,7 @@ def get_functions(self) -> Dict[str, CFunction]:
return translation_unit.functions

def get_function(self, function_name: str, file_name: Optional[str]) -> CFunction | List[CFunction]:
"""returns a function object given the function name.
"""Should return a function object given the function name.

Args:
function_name (str): The name of the function.
Expand All @@ -163,7 +163,7 @@ def get_function(self, function_name: str, file_name: Optional[str]) -> CFunctio
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_C_file(self, file_name: str) -> str:
"""returns a class given qualified class name.
"""Should return a class given qualified class name.

Args:
file_name (str): The name of the file.
Expand Down Expand Up @@ -191,7 +191,7 @@ def get_C_compilation_unit(self, file_path: str) -> CTranslationUnit:
return self.c_application.translation_units.get(file_path)

def get_functions_in_file(self, file_name: str) -> List[CFunction]:
"""returns a dictionary of all methods of the given class.
"""Should return a dictionary of all methods of the given class.

Args:
file_name (str): The name of the file.
Expand All @@ -205,7 +205,7 @@ def get_functions_in_file(self, file_name: str) -> List[CFunction]:
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_macros(self) -> List[CMacro]:
"""returns a list of all macros in the C code.
"""Should return a list of all macros in the C code.

Raises:
NotImplementedError: Raised when current AnalysisEngine does not support this function.
Expand All @@ -216,7 +216,7 @@ def get_macros(self) -> List[CMacro]:
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
"""returns a list of all macros in the given file.
"""Should return a list of all macros in the given file.

Args:
file_name (str): The name of the file.
Expand All @@ -231,7 +231,7 @@ def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:


def get_includes(self) -> List[str]:
"""returns a list of all include statements across all files in the C code.
"""Should return a list of all include statements across all files in the C code.

Returns:
List[str]: A list of all include statements. Returns empty list if none found.
Expand All @@ -243,7 +243,7 @@ def get_includes(self) -> List[str]:


def get_includes_in_file(self, file_name: str) -> List[str] | None:
"""returns a list of all include statements in the given file.
"""Should return a list of all include statements in the given file.

Args:
file_name (str): The name of the file to search in.
Expand All @@ -257,7 +257,7 @@ def get_includes_in_file(self, file_name: str) -> List[str] | None:


def get_macros(self) -> List[CMacro]:
"""returns a list of all macro definitions across all files in the C code.
"""Should return a list of all macro definitions across all files in the C code.

Returns:
List[CMacro]: A list of all macro definitions. Returns empty list if none found.
Expand All @@ -269,7 +269,7 @@ def get_macros(self) -> List[CMacro]:


def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
"""returns a list of all macro definitions in the given file.
"""Should return a list of all macro definitions in the given file.

Args:
file_name (str): The name of the file to search in.
Expand All @@ -283,7 +283,7 @@ def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:


def get_typedefs(self) -> List[CTypedef]:
"""returns a list of all typedef declarations across all files in the C code.
"""Should return a list of all typedef declarations across all files in the C code.

Returns:
List[CTypedef]: A list of all typedef declarations. Returns empty list if none found.
Expand All @@ -295,7 +295,7 @@ def get_typedefs(self) -> List[CTypedef]:


def get_typedefs_in_file(self, file_name: str) -> List[CTypedef] | None:
"""returns a list of all typedef declarations in the given file.
"""Should return a list of all typedef declarations in the given file.

Args:
file_name (str): The name of the file to search in.
Expand All @@ -309,7 +309,7 @@ def get_typedefs_in_file(self, file_name: str) -> List[CTypedef] | None:


def get_structs(self) -> List[CStruct]:
"""returns a list of all struct/union declarations across all files in the C code.
"""Should return a list of all struct/union declarations across all files in the C code.

Returns:
List[CStruct]: A list of all struct/union declarations. Returns empty list if none found.
Expand All @@ -321,7 +321,7 @@ def get_structs(self) -> List[CStruct]:


def get_structs_in_file(self, file_name: str) -> List[CStruct] | None:
"""returns a list of all struct/union declarations in the given file.
"""Should return a list of all struct/union declarations in the given file.

Args:
file_name (str): The name of the file to search in.
Expand All @@ -335,7 +335,7 @@ def get_structs_in_file(self, file_name: str) -> List[CStruct] | None:


def get_enums(self) -> List[CEnum]:
"""returns a list of all enum declarations across all files in the C code.
"""Should return a list of all enum declarations across all files in the C code.

Returns:
List[CEnum]: A list of all enum declarations. Returns empty list if none found.
Expand All @@ -347,7 +347,7 @@ def get_enums(self) -> List[CEnum]:


def get_enums_in_file(self, file_name: str) -> List[CEnum] | None:
"""returns a list of all enum declarations in the given file.
"""Should return a list of all enum declarations in the given file.

Args:
file_name (str): The name of the file to search in.
Expand All @@ -361,7 +361,7 @@ def get_enums_in_file(self, file_name: str) -> List[CEnum] | None:


def get_globals(self, file_name: str) -> List[CVariable] | None:
"""returns a list of all global variable declarations in the given file.
"""Should return a list of all global variable declarations in the given file.

Args:
file_name (str): The name of the file to search in.
Expand Down
119 changes: 62 additions & 57 deletions cldk/analysis/c/clang/clang_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pdb import set_trace
import platform
from clang.cindex import Config
from pathlib import Path
Expand All @@ -12,75 +13,79 @@

# First, we only import Config from clang.cindex
from clang.cindex import Config


def find_libclang() -> str:
"""
Locates the libclang library on the system based on the operating system.
This function runs before any other Clang functionality is used, ensuring
proper initialization of the Clang environment.
"""
system = platform.system()

# On macOS, we check both Apple Silicon and Intel paths
if system == "Darwin":
possible_paths = [
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
]
install_instructions = "Install LLVM using: brew install llvm"

# On Linux, we check various common installation paths
elif system == "Linux":
from pathlib import Path

lib_paths = [Path("/usr/lib"), Path("/usr/lib64")]
possible_paths = [
str(p) for base in lib_paths if base.exists()
for p in base.rglob("libclang*.so*")
]

install_instructions = "Install libclang development package using your system's package manager"
else:
raise RuntimeError(f"Unsupported operating system: {system}")

# Check each possible path and return the first one that exists
for path in possible_paths:
if os.path.exists(path):
logger.info(f"Found libclang at: {path}")
return path

# If no library is found, provide clear installation instructions
raise RuntimeError(f"Could not find libclang library. \n" f"Please ensure LLVM is installed:\n{install_instructions}")


# Initialize libclang at module level
try:
libclang_path = find_libclang()
Config.set_library_file(libclang_path)
logger.info("Successfully initialized libclang")

# Now that libclang is initialized, we can safely import other Clang components
from clang.cindex import Index, TranslationUnit, CursorKind, TypeKind, CompilationDatabase

except Exception as e:
logger.error(f"Failed to initialize libclang: {e}")
raise
from clang.cindex import Index, TranslationUnit, CursorKind, TypeKind, CompilationDatabase


class ClangAnalyzer:
"""Analyzes C code using Clang's Python bindings."""

def __init__(self, compilation_database_path: Optional[Path] = None):
# Configure Clang before creating the Index
# # Let's turn off Address sanitization for parsing code
# # Initialize libclang at module level
# try:
if platform.system() == "Darwin":
possible_paths = [
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
]

# We could not find libclang. Raise an error and provide instructions.
if len(possible_paths) == 0:
raise RuntimeError("Install LLVM 18 using: brew install llvm@18")

# Check each possible path and return the first one that exists
for path in possible_paths:
if os.path.exists(path):
logger.info(f"Found libclang at: {path}")
# Configure Clang before creating the Index
Config.set_library_file(path)

self.index = Index.create()
self.compilation_database = None
# TODO: Implement compilation database for C/C++ projects so that we can get compile arguments for each file
# and parse them correctly. This is useful for projects with complex build systems.
if compilation_database_path:
self.compilation_database = CompilationDatabase.fromDirectory(str(compilation_database_path))

def __find_libclang(self) -> str:
"""
Locates the libclang library on the system based on the operating system.
This function runs before any other Clang functionality is used, ensuring
proper initialization of the Clang environment.
"""

system = platform.system()

# On macOS, we check both Apple Silicon and Intel paths
if system == "Darwin":
possible_paths = [
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
]
install_instructions = "Install LLVM using: brew install llvm"

# On Linux, we check various common installation paths
elif system == "Linux":
from pathlib import Path

lib_paths = [Path("/usr/lib"), Path("/usr/lib64")]
possible_paths = [str(p) for base in lib_paths if base.exists() for p in base.rglob("libclang*.so.17*")]
print(possible_paths)
install_instructions = "Install libclang development package using your system's package manager"
else:
raise RuntimeError(f"Unsupported operating system: {system}")

# Check each possible path and return the first one that exists
for path in possible_paths:
if os.path.exists(path):
logger.info(f"Found libclang at: {path}")
return path

# If no library is found, provide clear installation instructions
raise RuntimeError(f"Could not find libclang library. \n" f"Please ensure LLVM is installed:\n{install_instructions}")

def analyze_file(self, file_path: Path) -> CTranslationUnit:
"""Analyzes a single C source file using Clang."""

Expand All @@ -105,7 +110,7 @@ def analyze_file(self, file_path: Path) -> CTranslationUnit:
return translation_unit

def _process_translation_unit(self, cursor, translation_unit: CTranslationUnit):
"""Processes all declarations in a translation unit."""
"""Should process all declarations in a translation unit."""

for child in cursor.get_children():
if child.location.file and str(child.location.file) != translation_unit.file_path:
Expand Down
Loading