Skip to content

Enable parallelism #92

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp_linter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Run clang-tidy and clang-format on a list of files.
If executed from command-line, then `main()` is the entrypoint.
"""

import json
import logging
import os
Expand Down Expand Up @@ -87,6 +88,7 @@ def main():
extra_args=args.extra_arg,
tidy_review=is_pr_event and args.tidy_review,
format_review=is_pr_event and args.format_review,
num_workers=args.jobs,
)

start_log_group("Posting comment(s)")
Expand Down
108 changes: 83 additions & 25 deletions cpp_linter/clang_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from concurrent.futures import ProcessPoolExecutor, as_completed
import json
from pathlib import Path, PurePath
import subprocess
import sys
from tempfile import TemporaryDirectory
from textwrap import indent
from typing import Optional, List, Dict, Tuple
import shutil

from ..common_fs import FileObj
from ..loggers import start_log_group, end_log_group, logger
from ..loggers import start_log_group, end_log_group, worker_log_file_init, logger
from .clang_tidy import run_clang_tidy, TidyAdvice
from .clang_format import run_clang_format, FormatAdvice

Expand All @@ -31,6 +34,45 @@ def assemble_version_exec(tool_name: str, specified_version: str) -> Optional[st
return shutil.which(tool_name)


def _run_on_single_file(
file: FileObj,
temp_dir: str,
log_lvl: int,
tidy_cmd,
checks,
lines_changed_only,
database,
extra_args,
db_json,
tidy_review,
format_cmd,
style,
format_review,
):
log_file = worker_log_file_init(temp_dir, log_lvl)

tidy_note = None
if tidy_cmd is not None:
tidy_note = run_clang_tidy(
tidy_cmd,
file,
checks,
lines_changed_only,
database,
extra_args,
db_json,
tidy_review,
)

format_advice = None
if format_cmd is not None:
format_advice = run_clang_format(
format_cmd, file, style, lines_changed_only, format_review
)

return file.name, log_file, tidy_note, format_advice


def capture_clang_tools_output(
files: List[FileObj],
version: str,
Expand All @@ -41,6 +83,7 @@ def capture_clang_tools_output(
extra_args: List[str],
tidy_review: bool,
format_review: bool,
num_workers: Optional[int],
) -> Tuple[List[FormatAdvice], List[TidyAdvice]]:
"""Execute and capture all output from clang-tidy and clang-format. This aggregates
results in the :attr:`~cpp_linter.Globals.OUTPUT`.
Expand All @@ -60,6 +103,8 @@ def capture_clang_tools_output(
PR review comments using clang-tidy.
:param format_review: A flag to enable/disable creating a diff suggestion for
PR review comments using clang-format.
:param num_workers: The number of workers to use for parallel processing. If
`None`, then the number of workers is set to the number of CPU cores.
"""

def show_tool_version_output(cmd: str): # show version output for executable used
Expand All @@ -86,29 +131,42 @@ def show_tool_version_output(cmd: str): # show version output for executable us
if db_path.exists():
db_json = json.loads(db_path.read_text(encoding="utf-8"))

# temporary cache of parsed notifications for use in log commands
tidy_notes = []
format_advice = []
for file in files:
start_log_group(f"Performing checkup on {file.name}")
if tidy_cmd is not None:
tidy_notes.append(
run_clang_tidy(
tidy_cmd,
file,
checks,
lines_changed_only,
database,
extra_args,
db_json,
tidy_review,
)
with TemporaryDirectory() as temp_dir, ProcessPoolExecutor(num_workers) as executor:
log_lvl = logger.getEffectiveLevel()
futures = [
executor.submit(
_run_on_single_file,
file,
temp_dir=temp_dir,
log_lvl=log_lvl,
tidy_cmd=tidy_cmd,
checks=checks,
lines_changed_only=lines_changed_only,
database=database,
extra_args=extra_args,
db_json=db_json,
tidy_review=tidy_review,
format_cmd=format_cmd,
style=style,
format_review=format_review,
)
if format_cmd is not None:
format_advice.append(
run_clang_format(
format_cmd, file, style, lines_changed_only, format_review
)
)
end_log_group()
for file in files
]

# temporary cache of parsed notifications for use in log commands
format_advice_map: Dict[str, Optional[FormatAdvice]] = {}
tidy_notes_map: Dict[str, Optional[TidyAdvice]] = {}
for future in as_completed(futures):
file, log_file, note, advice = future.result()

start_log_group(f"Performing checkup on {file}")
sys.stdout.write(Path(log_file).read_text())
end_log_group()

format_advice_map[file] = advice
tidy_notes_map[file] = note

format_advice = list(filter(None, (format_advice_map[file.name] for file in files)))
tidy_notes = list(filter(None, (tidy_notes_map[file.name] for file in files)))

return (format_advice, tidy_notes)
30 changes: 29 additions & 1 deletion cpp_linter/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Setup the options for CLI arguments."""

import argparse
import configparser
from pathlib import Path
from typing import Tuple, List
from typing import Tuple, List, Optional

from .loggers import logger

Expand Down Expand Up @@ -304,6 +305,33 @@
)


def _parse_jobs(val: str) -> Optional[int]:
try:
jobs = int(val)
except ValueError as exc:
raise argparse.ArgumentTypeError(
f"Invalid -j (--jobs) value: {val} (must be an integer)"
) from exc

if jobs <= 0:
return None # let multiprocessing.Pool decide the number of workers

return jobs


cli_arg_parser.add_argument(
"-j",
"--jobs",
default=1,
type=_parse_jobs,
help="""Set the number of jobs to run simultaneously.
If set to <= 0, the number of jobs will be set to the
number of all available CPU cores.

Defaults to ``%(default)s``.""",
)


def parse_ignore_option(
paths: str, not_ignored: List[str]
) -> Tuple[List[str], List[str]]:
Expand Down
36 changes: 35 additions & 1 deletion cpp_linter/loggers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import os
from tempfile import NamedTemporaryFile

from requests import Response

FOUND_RICH_LIB = False
try: # pragma: no cover
from rich.logging import RichHandler # type: ignore
from rich.logging import RichHandler, get_console # type: ignore

FOUND_RICH_LIB = True

Expand Down Expand Up @@ -53,3 +55,35 @@ def log_response_msg(response: Response):
response.request.url,
response.text,
)


def worker_log_file_init(temp_dir: str, log_lvl: int):
log_file = NamedTemporaryFile("w", dir=temp_dir, delete=False)

logger.handlers.clear()
logger.propagate = False

handler: logging.Handler
if (
FOUND_RICH_LIB and "CPP_LINTER_PYTEST_NO_RICH" not in os.environ
): # pragma: no cover
console = get_console()
console.file = log_file
handler = RichHandler(show_time=False, console=console)
handler.setFormatter(logging.Formatter("%(name)s: %(message)s"))
else:
handler = logging.StreamHandler(log_file)
handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
logger.addHandler(handler)
# Windows does not copy log level to subprocess.
# https://github.com/cpp-linter/cpp-linter/actions/runs/8355193931
logger.setLevel(log_lvl)

## uncomment the following if log_commander is needed in isolated threads
# log_commander.handlers.clear()
# log_commander.propagate = False
# console_handler = logging.StreamHandler(log_file)
# console_handler.setFormatter(logging.Formatter("%(message)s"))
# log_commander.addHandler(console_handler)

return log_file.name
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ dynamic_context = "test_function"
# These options are useful if combining coverage data from multiple tested envs
parallel = true
relative_files = true
concurrency = ["thread", "multiprocessing"]
omit = [
# don't include tests in coverage
# "tests/*",
Expand Down
34 changes: 18 additions & 16 deletions tests/capture_tools_output/test_database_path.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tests specific to specifying the compilation database path."""

from typing import List
from pathlib import Path, PurePath
import logging
Expand Down Expand Up @@ -31,15 +32,17 @@
ids=["implicit path", "relative path", "absolute path"],
)
def test_db_detection(
caplog: pytest.LogCaptureFixture,
capsys: pytest.CaptureFixture,
monkeypatch: pytest.MonkeyPatch,
database: str,
expected_args: List[str],
):
"""test clang-tidy using a implicit path to the compilation database."""
monkeypatch.setenv("COVERAGE_FILE", str(Path.cwd() / ".coverage"))
monkeypatch.chdir(PurePath(__file__).parent.parent.as_posix())
monkeypatch.setenv("CPP_LINTER_PYTEST_NO_RICH", "1")
CACHE_PATH.mkdir(exist_ok=True)
caplog.set_level(logging.DEBUG, logger=logger.name)
logger.setLevel(logging.DEBUG)
demo_src = "demo/demo.cpp"
files = [FileObj(demo_src)]

Expand All @@ -53,23 +56,19 @@ def test_db_detection(
extra_args=[],
tidy_review=False,
format_review=False,
num_workers=None,
)
matched_args = []
for record in caplog.records:
assert "Error while trying to load a compilation database" not in record.message
msg_match = CLANG_TIDY_COMMAND.search(record.message)
if msg_match is not None:
matched_args = msg_match.group(0).split()[1:]
break
else: # pragma: no cover
raise RuntimeError("failed to find args passed in clang-tidy in log records")
stdout = capsys.readouterr().out
assert "Error while trying to load a compilation database" not in stdout
msg_match = CLANG_TIDY_COMMAND.search(stdout)
if msg_match is None: # pragma: no cover
pytest.fail("failed to find args passed in clang-tidy in log records")
matched_args = msg_match.group(0).split()[1:]
expected_args.append(demo_src.replace("/", os.sep) + '"')
assert expected_args == matched_args


def test_ninja_database(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture
):
def test_ninja_database(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
"""verify that the relative paths used in a database generated (and thus clang-tidy
stdout) for the ninja build system are resolved accordingly."""
tmp_path_demo = tmp_path / "demo"
Expand All @@ -80,15 +79,17 @@ def test_ninja_database(
ignore=shutil.ignore_patterns("compile_flags.txt"),
)
(tmp_path_demo / "build").mkdir(parents=True)
monkeypatch.setenv("COVERAGE_FILE", str(Path.cwd() / ".coverage"))
monkeypatch.chdir(str(tmp_path_demo))
monkeypatch.setattr(sys, "argv", ["meson", "init"])
meson()
monkeypatch.setattr(
sys, "argv", ["meson", "setup", "--backend=ninja", "build", "."]
)
meson()
monkeypatch.setenv("CPP_LINTER_PYTEST_NO_RICH", "1")

caplog.set_level(logging.DEBUG, logger=logger.name)
logger.setLevel(logging.DEBUG)
files = [FileObj("demo.cpp")]
gh_client = GithubApiClient()

Expand All @@ -103,6 +104,7 @@ def test_ninja_database(
extra_args=[],
tidy_review=False,
format_review=False,
num_workers=None,
)
found_project_file = False
for concern in tidy_advice:
Expand All @@ -111,7 +113,7 @@ def test_ninja_database(
assert not Path(note.filename).is_absolute()
found_project_file = True
if not found_project_file: # pragma: no cover
raise RuntimeError("no project files raised concerns with clang-tidy")
pytest.fail("no project files raised concerns with clang-tidy")
(comment, format_checks_failed, tidy_checks_failed) = gh_client.make_comment(
files, format_advice, tidy_advice
)
Expand Down
Loading