From 488ba78af23db2812bfbb5245be7637e0835701b Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Thu, 1 May 2025 02:41:21 +0300 Subject: [PATCH 1/5] impl --- .../c_parser/preprocessor/__init__.py | 3 +- .../c-analyzer/c_parser/preprocessor/clang.py | 109 ++++++++++++++++++ Tools/c-analyzer/c_parser/preprocessor/gcc.py | 7 +- Tools/c-analyzer/cpython/_parser.py | 4 + 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 Tools/c-analyzer/c_parser/preprocessor/clang.py diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py index 30a86cbd7dc494..f8d2f805cb1b19 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/__init__.py +++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py @@ -16,6 +16,7 @@ from . import ( pure as _pure, gcc as _gcc, + clang as _clang, ) @@ -234,7 +235,7 @@ def handling_errors(ignore_exc=None, *, log_err=None): 'bcpp': None, # aliases/extras: 'gcc': _gcc.preprocess, - 'clang': None, + 'clang': _clang.preprocess, } diff --git a/Tools/c-analyzer/c_parser/preprocessor/clang.py b/Tools/c-analyzer/c_parser/preprocessor/clang.py new file mode 100644 index 00000000000000..a5441770e085f3 --- /dev/null +++ b/Tools/c-analyzer/c_parser/preprocessor/clang.py @@ -0,0 +1,109 @@ +import os.path +import re + +from . import common as _common +from . import gcc as _gcc + + +TOOL = 'clang' + +META_FILES = { + '', + '', +} + + +def preprocess(filename, + incldirs=None, + includes=None, + macros=None, + samefiles=None, + cwd=None, + ): + if not cwd or not os.path.isabs(cwd): + cwd = os.path.abspath(cwd or '.') + filename = _gcc._normpath(filename, cwd) + + postargs = _gcc.POST_ARGS + basename = os.path.basename(filename) + dirname = os.path.basename(os.path.dirname(filename)) + if (basename not in _gcc.FILES_WITHOUT_INTERNAL_CAPI + and dirname not in _gcc.DIRS_WITHOUT_INTERNAL_CAPI): + postargs += ('-DPy_BUILD_CORE=1',) + + text = _common.preprocess( + TOOL, + filename, + incldirs=incldirs, + includes=includes, + macros=macros, + #preargs=PRE_ARGS, + postargs=postargs, + executable=['clang'], + compiler='unix', + cwd=cwd, + ) + return _iter_lines(text, filename, samefiles, cwd) + + +EXIT_MARKERS = {'# 2 "" 2', '# 3 "" 2', '# 4 "" 2'} + + +def _iter_lines(text, reqfile, samefiles, cwd, raw=False): + # NOTE:HACK: has a stack return in unusual order for /include/curses.h + if reqfile.endswith(('/Include/py_curses.h', + '/Modules/_cursesmodule.c', + '/Modules/_curses_panel.c')): + return + + lines = iter(text.splitlines()) + + # The first line is special. + # The subsequent lines are consistent. + firstlines = [ + f'# 1 "{reqfile}"', + '# 1 "" 1', + '# 1 "" 3', + '# 370 "" 3', + '# 1 "" 1', + '# 1 "" 2', + ] + for expected in firstlines: + line = next(lines) + if line != expected: + raise NotImplementedError((line, expected)) + + # Do all the CLI-provided includes. + filter_reqfile = (lambda f: _gcc._filter_reqfile(f, reqfile, samefiles)) + make_info = (lambda lno: _common.FileInfo(reqfile, lno)) + last = None + for line in lines: + assert last != reqfile, (last,) + # NOTE:clang specific + if not line: + continue + lno, included, flags = _gcc._parse_marker_line(line, reqfile) + if not included: + raise NotImplementedError((line,)) + if included == reqfile: + # This will be the last one. + assert 2 in flags, (line, flags) + else: + # NOTE:clang specific + if _gcc._normpath(included, cwd) == reqfile: + assert 1 in flags or 2 in flags, (line, flags, included, reqfile) + else: + assert 1 in flags, (line, flags, included, reqfile) + yield from _gcc._iter_top_include_lines( + lines, + _gcc._normpath(included, cwd), + cwd, + filter_reqfile, + make_info, + raw, + EXIT_MARKERS + ) + last = included + # The last one is always the requested file. + # NOTE:clang specific + assert _gcc._normpath(included, cwd) == reqfile, (line,) diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index d20cd19f6e6d5e..7b0272ee98d405 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -65,6 +65,8 @@ '-E', ) +EXIT_MARKERS = {'# 0 "" 2', '# 1 "" 2'} + def preprocess(filename, incldirs=None, @@ -138,6 +140,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False): filter_reqfile, make_info, raw, + EXIT_MARKERS ) last = included # The last one is always the requested file. @@ -146,7 +149,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False): def _iter_top_include_lines(lines, topfile, cwd, filter_reqfile, make_info, - raw): + raw, exit_markers): partial = 0 # depth files = [topfile] # We start at 1 in case there are source lines (including blank ones) @@ -154,7 +157,7 @@ def _iter_top_include_lines(lines, topfile, cwd, # _parse_marker_line() that the preprocessor reported lno as 1. lno = 1 for line in lines: - if line == '# 0 "" 2' or line == '# 1 "" 2': + if line in exit_markers: # We're done with this top-level include. return diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 037fe11ea223c7..a2df613db46966 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -351,6 +351,10 @@ def clean_lines(text): # Catch-alls: _abs('Include/**/*.h'): (5_000, 500), + + # Specific to clang + _abs('Modules/selectmodule.c'): (40_000, 3000), + _abs('Modules/_testcapi/pyatomic.c'): (30_000, 1000), } From e6c8a854680792e73d237dbeda9c4bf0f819a96b Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Thu, 1 May 2025 21:05:51 +0300 Subject: [PATCH 2/5] add experimental print --- .../c-analyzer/c_parser/preprocessor/clang.py | 51 ++++++++++++++----- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/Tools/c-analyzer/c_parser/preprocessor/clang.py b/Tools/c-analyzer/c_parser/preprocessor/clang.py index a5441770e085f3..8b24a8f4e4b229 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/clang.py +++ b/Tools/c-analyzer/c_parser/preprocessor/clang.py @@ -1,9 +1,10 @@ import os.path -import re +import re, sys from . import common as _common from . import gcc as _gcc +_normpath = _gcc._normpath TOOL = 'clang' @@ -22,7 +23,7 @@ def preprocess(filename, ): if not cwd or not os.path.isabs(cwd): cwd = os.path.abspath(cwd or '.') - filename = _gcc._normpath(filename, cwd) + filename = _normpath(filename, cwd) postargs = _gcc.POST_ARGS basename = os.path.basename(filename) @@ -46,14 +47,40 @@ def preprocess(filename, return _iter_lines(text, filename, samefiles, cwd) +# Reasons: +# py_curses related have a stack return in unusual order for /include/curses.h +CLANG_IGNORES = ( + '/Include/py_curses.h', + '/Modules/_cursesmodule.c', + '/Modules/_curses_panel.c' +) + +EXPERIMENTAL_PRINTED = False + +CLANG_EXPERIMENTAL = """ + +WARNING +======= +clang preprocessor is in experimental state. +a) There might be false positives +b) Following files are skipped +{} + +""".format('\n'.join([' ' + fn for fn in CLANG_IGNORES])) + + EXIT_MARKERS = {'# 2 "" 2', '# 3 "" 2', '# 4 "" 2'} def _iter_lines(text, reqfile, samefiles, cwd, raw=False): - # NOTE:HACK: has a stack return in unusual order for /include/curses.h - if reqfile.endswith(('/Include/py_curses.h', - '/Modules/_cursesmodule.c', - '/Modules/_curses_panel.c')): + global EXPERIMENTAL_PRINTED + if not EXPERIMENTAL_PRINTED: + print(CLANG_EXPERIMENTAL, flush=True) + EXPERIMENTAL_PRINTED = True + + # NOTE:clang specific + if reqfile.endswith(CLANG_IGNORES): + print(f'\nSkipping: {reqfile}', flush=True) return lines = iter(text.splitlines()) @@ -79,7 +106,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False): last = None for line in lines: assert last != reqfile, (last,) - # NOTE:clang specific + # NOTE:condition is clang specific if not line: continue lno, included, flags = _gcc._parse_marker_line(line, reqfile) @@ -89,14 +116,14 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False): # This will be the last one. assert 2 in flags, (line, flags) else: - # NOTE:clang specific - if _gcc._normpath(included, cwd) == reqfile: + # NOTE:first condition is specific to clang + if _normpath(included, cwd) == reqfile: assert 1 in flags or 2 in flags, (line, flags, included, reqfile) else: assert 1 in flags, (line, flags, included, reqfile) yield from _gcc._iter_top_include_lines( lines, - _gcc._normpath(included, cwd), + _normpath(included, cwd), cwd, filter_reqfile, make_info, @@ -105,5 +132,5 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False): ) last = included # The last one is always the requested file. - # NOTE:clang specific - assert _gcc._normpath(included, cwd) == reqfile, (line,) + # NOTE:_normpath is clang specific + assert _normpath(included, cwd) == reqfile, (line,) From 09bf9b2ffa25a728554e59bb26bbffd059a064b0 Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Fri, 2 May 2025 05:46:09 +0300 Subject: [PATCH 3/5] included 2 failures to tsvs next to similar entries --- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 + Tools/c-analyzer/cpython/ignored.tsv | 1 + 2 files changed, 2 insertions(+) diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 3c3cb2f9c86f16..301784f773d31f 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -400,6 +400,7 @@ Modules/_tkinter.c - tcl_lock - Modules/_tkinter.c - excInCmd - Modules/_tkinter.c - valInCmd - Modules/_tkinter.c - trbInCmd - +Modules/socketmodule.c - netdb_lock - ################################## diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index a33619b1b345e2..cd3b8945709cba 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -16,6 +16,7 @@ filename funcname name reason ## indicators for resource availability/capability # (set during first init) Python/bootstrap_hash.c py_getrandom getrandom_works - +Python/bootstrap_hash.c py_getentropy getentropy_works - Python/fileutils.c - _Py_open_cloexec_works - Python/fileutils.c set_inheritable ioctl_works - # (set lazily, *after* first init) From b28b686c4164f599aa9779b8318d24f1f5b77128 Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Fri, 2 May 2025 06:52:54 +0300 Subject: [PATCH 4/5] added fix/hack for curses.h fails --- .../c-analyzer/c_parser/preprocessor/clang.py | 32 ------------------- Tools/c-analyzer/c_parser/preprocessor/gcc.py | 10 ++++++ 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/Tools/c-analyzer/c_parser/preprocessor/clang.py b/Tools/c-analyzer/c_parser/preprocessor/clang.py index 8b24a8f4e4b229..574a23f8f6d6f9 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/clang.py +++ b/Tools/c-analyzer/c_parser/preprocessor/clang.py @@ -47,42 +47,10 @@ def preprocess(filename, return _iter_lines(text, filename, samefiles, cwd) -# Reasons: -# py_curses related have a stack return in unusual order for /include/curses.h -CLANG_IGNORES = ( - '/Include/py_curses.h', - '/Modules/_cursesmodule.c', - '/Modules/_curses_panel.c' -) - -EXPERIMENTAL_PRINTED = False - -CLANG_EXPERIMENTAL = """ - -WARNING -======= -clang preprocessor is in experimental state. -a) There might be false positives -b) Following files are skipped -{} - -""".format('\n'.join([' ' + fn for fn in CLANG_IGNORES])) - - EXIT_MARKERS = {'# 2 "" 2', '# 3 "" 2', '# 4 "" 2'} def _iter_lines(text, reqfile, samefiles, cwd, raw=False): - global EXPERIMENTAL_PRINTED - if not EXPERIMENTAL_PRINTED: - print(CLANG_EXPERIMENTAL, flush=True) - EXPERIMENTAL_PRINTED = True - - # NOTE:clang specific - if reqfile.endswith(CLANG_IGNORES): - print(f'\nSkipping: {reqfile}', flush=True) - return - lines = iter(text.splitlines()) # The first line is special. diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index 7b0272ee98d405..5d77489cd9db88 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -102,6 +102,8 @@ def preprocess(filename, def _iter_lines(text, reqfile, samefiles, cwd, raw=False): + with open('/Users/Edu/Downloads/test.c', 'r') as f: + text = f.read() lines = iter(text.splitlines()) # The first line is special. @@ -163,6 +165,14 @@ def _iter_top_include_lines(lines, topfile, cwd, _lno, included, flags = _parse_marker_line(line) if included: + # HACK: + # Mixes curses.h and ncurses.h marker lines + # gcc silently passes this, while clang fails + # See: /Include/py_curses.h #if-elif directives + # And compare with preprocessor output + if os.path.basename(included) == 'curses.h': + included = os.path.join(os.path.dirname(included), 'ncurses.h') + lno = _lno included = _normpath(included, cwd) # We hit a marker line. From 72409883aacb32c233432ade4ae072f2e9173ba2 Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Fri, 2 May 2025 06:58:40 +0300 Subject: [PATCH 5/5] fix leftover from debug --- Tools/c-analyzer/c_parser/preprocessor/gcc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index 5d77489cd9db88..4a55a1a24ee1be 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -102,8 +102,6 @@ def preprocess(filename, def _iter_lines(text, reqfile, samefiles, cwd, raw=False): - with open('/Users/Edu/Downloads/test.c', 'r') as f: - text = f.read() lines = iter(text.splitlines()) # The first line is special.