Skip to content

Commit f32e20b

Browse files
committed
BUG: resolve invalid grep with env neutral script.
1 parent 7c34da7 commit f32e20b

File tree

2 files changed

+214
-2
lines changed

2 files changed

+214
-2
lines changed

tools/ci/check_c_api_usage.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#!/usr/bin/env python3
2+
from __future__ import annotations
3+
4+
import argparse
5+
import os
6+
import re
7+
import sys
8+
import tempfile
9+
from pathlib import Path
10+
11+
"""
12+
Borrow-ref C API linter (Python version).
13+
14+
- Recursively scans source files under --root (default: numpy)
15+
- Matches suspicious CPython C-API calls as whole identifiers
16+
- Skips:
17+
- lines with '// noqa: borrowed-ref OK' or
18+
'// noqa: borrowed-ref - manual fix needed'
19+
- line comments (// ...)
20+
- block comments (/* ... */), even when they span lines
21+
- Prints findings and exits 1 if any issues found, else 0
22+
"""
23+
24+
# List of suspicious function calls:
25+
SUSPICIOUS_FUNCS: tuple[str, ...] = (
26+
"PyList_GetItem",
27+
"PyDict_GetItem",
28+
"PyDict_GetItemWithError",
29+
"PyDict_GetItemString",
30+
"PyDict_SetDefault",
31+
"PyDict_Next",
32+
"PyWeakref_GetObject",
33+
"PyWeakref_GET_OBJECT",
34+
"PyList_GET_ITEM",
35+
"_PyDict_GetItemStringWithError",
36+
"PySequence_Fast"
37+
)
38+
39+
# Match any function as a standalone C identifier: (?<!\w)(NAME)(?!\w)
40+
FUNC_RX = re.compile(r"(?<!\w)(?:"
41+
+ "|".join(map(re.escape, SUSPICIOUS_FUNCS))
42+
+ r")(?!\w)")
43+
44+
NOQA_OK = "noqa: borrowed-ref OK"
45+
NOQA_MANUAL = "noqa: borrowed-ref - manual fix needed"
46+
47+
DEFAULT_EXTS = {".c", ".h", ".c.src", ".cpp"}
48+
DEFAULT_EXCLUDES = {"pythoncapi-compat"}
49+
50+
def strip_comments(line: str, in_block: bool) -> tuple[str, bool]:
51+
"""
52+
Return (code_without_comments, updated_in_block).
53+
Removes // line comments and /* ... */ block comments (non-nesting, C-style).
54+
"""
55+
i = 0
56+
out_parts: list[str] = []
57+
n = len(line)
58+
59+
while i < n:
60+
if in_block:
61+
end = line.find("*/", i)
62+
if end == -1:
63+
# Entire remainder is inside a block comment.
64+
return ("".join(out_parts), True)
65+
i = end + 2
66+
in_block = False
67+
continue
68+
69+
# Not in block: look for next // or /* from current i
70+
sl = line.find("//", i)
71+
bl = line.find("/*", i)
72+
73+
if sl != -1 and (bl == -1 or sl < bl):
74+
# Line comment starts first: take code up to '//' and stop
75+
out_parts.append(line[i:sl])
76+
return ("".join(out_parts), in_block)
77+
78+
if bl != -1:
79+
# Block comment starts: take code up to '/*', then enter block
80+
out_parts.append(line[i:bl])
81+
i = bl + 2
82+
in_block = True
83+
continue
84+
85+
# No more comments
86+
out_parts.append(line[i:])
87+
break
88+
89+
return ("".join(out_parts), in_block)
90+
91+
def iter_source_files(root: Path, exts: set[str], excludes: set[str]) -> list[Path]:
92+
"""
93+
Return a list of source files under 'root', where filenames end with any of the
94+
extensions in 'exts' (e.g., '.c.src', '.c', '.h').
95+
Excludes directories whose names are in 'excludes'.
96+
"""
97+
results: list[Path] = []
98+
99+
for dirpath, dirnames, filenames in os.walk(root):
100+
# Prune excluded directories
101+
dirnames[:] = [d for d in dirnames if d not in excludes]
102+
for fn in filenames:
103+
# endswith handles mult-suffice patterns, e.g., .c.src
104+
if any(fn.endswith(ext) for ext in exts):
105+
results.append(Path(dirpath) / fn)
106+
return results
107+
108+
109+
def scan_file(path: Path) -> list[tuple[str, int, str, str]]:
110+
"""
111+
Scan a single file.
112+
Returns list of (func_name, line_number, path_str, raw_line_str).
113+
"""
114+
hits: list[tuple[str, int, str, str]] = []
115+
in_block = False
116+
117+
try:
118+
with path.open("r", encoding="utf-8", errors="ignore") as f:
119+
for lineno, raw in enumerate(f, 1):
120+
# Skip if approved by noqa markers
121+
if NOQA_OK in raw or NOQA_MANUAL in raw:
122+
continue
123+
124+
# Remove comments; if nothing remains, skip
125+
code, in_block = strip_comments(raw.rstrip("\n"), in_block)
126+
if not code.strip():
127+
continue
128+
129+
# Find all suspicious calls in non-comment code
130+
for m in FUNC_RX.finditer(code):
131+
func = m.group(0)
132+
hits.append((func, lineno, str(path), raw.rstrip("\n")))
133+
except FileNotFoundError:
134+
# File may have disappeared; ignore gracefully
135+
pass
136+
return hits
137+
138+
139+
def main(argv: list[str] | None = None) -> int:
140+
ap = argparse.ArgumentParser(description="Borrow-ref C API linter (Python).")
141+
ap.add_argument(
142+
"--root",
143+
default="numpy",
144+
type=str,
145+
help="Root directory to scan (default: numpy)"
146+
)
147+
ap.add_argument(
148+
"--ext",
149+
action="append",
150+
default=None,
151+
help="File extension(s) to include (repeatable). Defaults to .c,.h,.c.src,.cpp",
152+
)
153+
ap.add_argument(
154+
"--exclude",
155+
action="append",
156+
default=None,
157+
help="Directory name(s) to exclude (repeatable). Default: pythoncapi-compat",
158+
)
159+
args = ap.parse_args(argv)
160+
161+
root = Path(args.root)
162+
if not root.exists():
163+
print(f"error: root '{root}' does not exist", file=sys.stderr)
164+
return 2
165+
166+
exts = set(args.ext) if args.ext else set(DEFAULT_EXTS)
167+
excludes = set(args.exclude) if args.exclude else set(DEFAULT_EXCLUDES)
168+
169+
files = list(iter_source_files(root, exts, excludes))
170+
# print(f'this is the list of files....{files}')
171+
print(f"Scanning {len(files)} C/C++ source files...")
172+
173+
# Output file (mirrors your shell behavior)
174+
tmpdir = Path(".tmp")
175+
tmpdir.mkdir(exist_ok=True)
176+
fd, outpath = tempfile.mkstemp(
177+
prefix="c_api_usage_report.",
178+
suffix=".txt",
179+
dir=tmpdir
180+
)
181+
os.close(fd)
182+
183+
findings = 0
184+
with open(outpath, "w", encoding="utf-8", errors="ignore") as out:
185+
out.write("Running Suspicious C API usage report workflow...\n\n")
186+
for p in files:
187+
for func, lineno, pstr, raw in scan_file(p):
188+
findings += 1
189+
out.write(f"Found suspicious call to {func} in file: {pstr}\n")
190+
out.write(f" -> {pstr}:{lineno}:{raw}\n")
191+
out.write("Recommendation:\n")
192+
out.write(
193+
"If this use is intentional and safe, add "
194+
"'// noqa: borrowed-ref OK' on the same line "
195+
"to silence this warning.\n"
196+
)
197+
out.write(
198+
"Otherwise, consider replacing the call "
199+
"with a thread-safe API function.\n\n")
200+
201+
if findings == 0:
202+
out.write("C API borrow-ref linter found no issues.\n")
203+
204+
# Echo report and set exit status
205+
with open(outpath, "r", encoding="utf-8", errors="ignore") as f:
206+
sys.stdout.write(f.read())
207+
208+
return 1 if findings else 0
209+
210+
211+
if __name__ == "__main__":
212+
sys.exit(main())

tools/linter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ def run_check_c_api(self) -> tuple[int, str]:
5050
# Running borrowed ref checker
5151
print("Running C API borrow-reference linter...")
5252
borrowed_ref_script = os.path.join(self.repository_root, "tools", "ci",
53-
"check_c_api_usage.sh")
53+
"check_c_api_usage.py")
5454
borrowed_res = subprocess.run(
55-
["bash", borrowed_ref_script],
55+
["python3", borrowed_ref_script],
5656
stdout=subprocess.PIPE,
5757
stderr=subprocess.STDOUT,
5858
encoding="utf-8",

0 commit comments

Comments
 (0)