Skip to content

gh-133228: c-analyzer clang preprocessor #133229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Tools/c-analyzer/c_parser/preprocessor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from . import (
pure as _pure,
gcc as _gcc,
clang as _clang,
)


Expand Down Expand Up @@ -234,7 +235,7 @@ def handling_errors(ignore_exc=None, *, log_err=None):
'bcpp': None,
# aliases/extras:
'gcc': _gcc.preprocess,
'clang': None,
'clang': _clang.preprocess,
}


Expand Down
104 changes: 104 additions & 0 deletions Tools/c-analyzer/c_parser/preprocessor/clang.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os.path
import re, sys

from . import common as _common
from . import gcc as _gcc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to move the parts used from gcc in this new clang module to common?

Copy link
Contributor Author

@dg-pb dg-pb May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would, but I would leave this for when this is stable. And would need to look into it a bit more to get a sense what can be common and what is best to be left separate.

For now I would not rather mess with gcc.
And will add a warning that this is experimental with a list of skipped files.

Any chance you are running osx and could check if it works for you?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Windows, sorry.


_normpath = _gcc._normpath

TOOL = 'clang'

META_FILES = {
'<built-in>',
'<command line>',
}


def preprocess(filename,
incldirs=None,
includes=None,
macros=None,
samefiles=None,
cwd=None,
):
if not cwd or not os.path.isabs(cwd):
cwd = os.path.abspath(cwd or '.')
filename = _normpath(filename, cwd)

postargs = _gcc.POST_ARGS
basename = os.path.basename(filename)
dirname = os.path.basename(os.path.dirname(filename))
if (basename not in _gcc.FILES_WITHOUT_INTERNAL_CAPI
and dirname not in _gcc.DIRS_WITHOUT_INTERNAL_CAPI):
postargs += ('-DPy_BUILD_CORE=1',)

text = _common.preprocess(
TOOL,
filename,
incldirs=incldirs,
includes=includes,
macros=macros,
#preargs=PRE_ARGS,
postargs=postargs,
executable=['clang'],
compiler='unix',
cwd=cwd,
)
return _iter_lines(text, filename, samefiles, cwd)


EXIT_MARKERS = {'# 2 "<built-in>" 2', '# 3 "<built-in>" 2', '# 4 "<built-in>" 2'}


def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
lines = iter(text.splitlines())

# The first line is special.
# The subsequent lines are consistent.
firstlines = [
f'# 1 "{reqfile}"',
'# 1 "<built-in>" 1',
'# 1 "<built-in>" 3',
'# 370 "<built-in>" 3',
'# 1 "<command line>" 1',
'# 1 "<built-in>" 2',
]
for expected in firstlines:
line = next(lines)
if line != expected:
raise NotImplementedError((line, expected))

# Do all the CLI-provided includes.
filter_reqfile = (lambda f: _gcc._filter_reqfile(f, reqfile, samefiles))
make_info = (lambda lno: _common.FileInfo(reqfile, lno))
last = None
for line in lines:
assert last != reqfile, (last,)
# NOTE:condition is clang specific
if not line:
continue
lno, included, flags = _gcc._parse_marker_line(line, reqfile)
if not included:
raise NotImplementedError((line,))
if included == reqfile:
# This will be the last one.
assert 2 in flags, (line, flags)
else:
# NOTE:first condition is specific to clang
if _normpath(included, cwd) == reqfile:
assert 1 in flags or 2 in flags, (line, flags, included, reqfile)
else:
assert 1 in flags, (line, flags, included, reqfile)
yield from _gcc._iter_top_include_lines(
lines,
_normpath(included, cwd),
cwd,
filter_reqfile,
make_info,
raw,
EXIT_MARKERS
)
last = included
# The last one is always the requested file.
# NOTE:_normpath is clang specific
assert _normpath(included, cwd) == reqfile, (line,)
15 changes: 13 additions & 2 deletions Tools/c-analyzer/c_parser/preprocessor/gcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
'-E',
)

EXIT_MARKERS = {'# 0 "<command-line>" 2', '# 1 "<command-line>" 2'}


def preprocess(filename,
incldirs=None,
Expand Down Expand Up @@ -138,6 +140,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
filter_reqfile,
make_info,
raw,
EXIT_MARKERS
)
last = included
# The last one is always the requested file.
Expand All @@ -146,20 +149,28 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False):

def _iter_top_include_lines(lines, topfile, cwd,
filter_reqfile, make_info,
raw):
raw, exit_markers):
partial = 0 # depth
files = [topfile]
# We start at 1 in case there are source lines (including blank ones)
# before the first marker line. Also, we already verified in
# _parse_marker_line() that the preprocessor reported lno as 1.
lno = 1
for line in lines:
if line == '# 0 "<command-line>" 2' or line == '# 1 "<command-line>" 2':
if line in exit_markers:
# We're done with this top-level include.
return

_lno, included, flags = _parse_marker_line(line)
if included:
# HACK:
# Mixes curses.h and ncurses.h marker lines
# gcc silently passes this, while clang fails
# See: /Include/py_curses.h #if-elif directives
# And compare with preprocessor output
if os.path.basename(included) == 'curses.h':
included = os.path.join(os.path.dirname(included), 'ncurses.h')

lno = _lno
included = _normpath(included, cwd)
# We hit a marker line.
Expand Down
4 changes: 4 additions & 0 deletions Tools/c-analyzer/cpython/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,10 @@ def clean_lines(text):

# Catch-alls:
_abs('Include/**/*.h'): (5_000, 500),

# Specific to clang
_abs('Modules/selectmodule.c'): (40_000, 3000),
_abs('Modules/_testcapi/pyatomic.c'): (30_000, 1000),
}


Expand Down
1 change: 1 addition & 0 deletions Tools/c-analyzer/cpython/globals-to-fix.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ Modules/_tkinter.c - tcl_lock -
Modules/_tkinter.c - excInCmd -
Modules/_tkinter.c - valInCmd -
Modules/_tkinter.c - trbInCmd -
Modules/socketmodule.c - netdb_lock -


##################################
Expand Down
1 change: 1 addition & 0 deletions Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ filename funcname name reason
## indicators for resource availability/capability
# (set during first init)
Python/bootstrap_hash.c py_getrandom getrandom_works -
Python/bootstrap_hash.c py_getentropy getentropy_works -
Python/fileutils.c - _Py_open_cloexec_works -
Python/fileutils.c set_inheritable ioctl_works -
# (set lazily, *after* first init)
Expand Down
Loading