cpp-linter · 2bndy5 · Mar 21, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/cpp_linter/__init__.py b/cpp_linter/__init__.py
@@ -1,6 +1,7 @@
 """Run clang-tidy and clang-format on a list of files.
 If executed from command-line, then `main()` is the entrypoint.
 """
+
 import json
 import logging
 import os
@@ -87,6 +88,7 @@ def main():
         extra_args=args.extra_arg,
         tidy_review=is_pr_event and args.tidy_review,
         format_review=is_pr_event and args.format_review,
+        num_workers=args.jobs,
     )
 
     start_log_group("Posting comment(s)")

diff --git a/cpp_linter/clang_tools/__init__.py b/cpp_linter/clang_tools/__init__.py
@@ -1,12 +1,15 @@
+from concurrent.futures import ProcessPoolExecutor, as_completed
 import json
 from pathlib import Path, PurePath
 import subprocess
+import sys
+from tempfile import TemporaryDirectory
 from textwrap import indent
 from typing import Optional, List, Dict, Tuple
 import shutil
 
 from ..common_fs import FileObj
-from ..loggers import start_log_group, end_log_group, logger
+from ..loggers import start_log_group, end_log_group, worker_log_file_init, logger
 from .clang_tidy import run_clang_tidy, TidyAdvice
 from .clang_format import run_clang_format, FormatAdvice
 
@@ -31,6 +34,45 @@ def assemble_version_exec(tool_name: str, specified_version: str) -> Optional[st
     return shutil.which(tool_name)
 
 
+def _run_on_single_file(
+    file: FileObj,
+    temp_dir: str,
+    log_lvl: int,
+    tidy_cmd,
+    checks,
+    lines_changed_only,
+    database,
+    extra_args,
+    db_json,
+    tidy_review,
+    format_cmd,
+    style,
+    format_review,
+):
+    log_file = worker_log_file_init(temp_dir, log_lvl)
+
+    tidy_note = None
+    if tidy_cmd is not None:
+        tidy_note = run_clang_tidy(
+            tidy_cmd,
+            file,
+            checks,
+            lines_changed_only,
+            database,
+            extra_args,
+            db_json,
+            tidy_review,
+        )
+
+    format_advice = None
+    if format_cmd is not None:
+        format_advice = run_clang_format(
+            format_cmd, file, style, lines_changed_only, format_review
+        )
+
+    return file.name, log_file, tidy_note, format_advice
+
+
 def capture_clang_tools_output(
     files: List[FileObj],
     version: str,
@@ -41,6 +83,7 @@ def capture_clang_tools_output(
     extra_args: List[str],
     tidy_review: bool,
     format_review: bool,
+    num_workers: Optional[int],
 ) -> Tuple[List[FormatAdvice], List[TidyAdvice]]:
     """Execute and capture all output from clang-tidy and clang-format. This aggregates
     results in the :attr:`~cpp_linter.Globals.OUTPUT`.
@@ -60,6 +103,8 @@ def capture_clang_tools_output(
         PR review comments using clang-tidy.
     :param format_review: A flag to enable/disable creating a diff suggestion for
         PR review comments using clang-format.
+    :param num_workers: The number of workers to use for parallel processing. If
+        `None`, then the number of workers is set to the number of CPU cores.
     """
 
     def show_tool_version_output(cmd: str):  # show version output for executable used
@@ -86,29 +131,42 @@ def show_tool_version_output(cmd: str):  # show version output for executable us
         if db_path.exists():
             db_json = json.loads(db_path.read_text(encoding="utf-8"))
 
-    # temporary cache of parsed notifications for use in log commands
-    tidy_notes = []
-    format_advice = []
-    for file in files:
-        start_log_group(f"Performing checkup on {file.name}")
-        if tidy_cmd is not None:
-            tidy_notes.append(
-                run_clang_tidy(
-                    tidy_cmd,
-                    file,
-                    checks,
-                    lines_changed_only,
-                    database,
-                    extra_args,
-                    db_json,
-                    tidy_review,
-                )
+    with TemporaryDirectory() as temp_dir, ProcessPoolExecutor(num_workers) as executor:
+        log_lvl = logger.getEffectiveLevel()
+        futures = [
+            executor.submit(
+                _run_on_single_file,
+                file,
+                temp_dir=temp_dir,
+                log_lvl=log_lvl,
+                tidy_cmd=tidy_cmd,
+                checks=checks,
+                lines_changed_only=lines_changed_only,
+                database=database,
+                extra_args=extra_args,
+                db_json=db_json,
+                tidy_review=tidy_review,
+                format_cmd=format_cmd,
+                style=style,
+                format_review=format_review,
             )
-        if format_cmd is not None:
-            format_advice.append(
-                run_clang_format(
-                    format_cmd, file, style, lines_changed_only, format_review
-                )
-            )
-        end_log_group()
+            for file in files
+        ]
+
+        # temporary cache of parsed notifications for use in log commands
+        format_advice_map: Dict[str, Optional[FormatAdvice]] = {}
+        tidy_notes_map: Dict[str, Optional[TidyAdvice]] = {}
+        for future in as_completed(futures):
+            file, log_file, note, advice = future.result()
+
+            start_log_group(f"Performing checkup on {file}")
+            sys.stdout.write(Path(log_file).read_text())
+            end_log_group()
+
+            format_advice_map[file] = advice
+            tidy_notes_map[file] = note
+
+    format_advice = list(filter(None, (format_advice_map[file.name] for file in files)))
+    tidy_notes = list(filter(None, (tidy_notes_map[file.name] for file in files)))
+
     return (format_advice, tidy_notes)
diff --git a/cpp_linter/cli.py b/cpp_linter/cli.py
@@ -1,8 +1,9 @@
 """Setup the options for CLI arguments."""
+
 import argparse
 import configparser
 from pathlib import Path
-from typing import Tuple, List
+from typing import Tuple, List, Optional
 
 from .loggers import logger
 
@@ -304,6 +305,33 @@
 )
 
 
+def _parse_jobs(val: str) -> Optional[int]:
+    try:
+        jobs = int(val)
+    except ValueError as exc:
+        raise argparse.ArgumentTypeError(
+            f"Invalid -j (--jobs) value: {val} (must be an integer)"
+        ) from exc
+
+    if jobs <= 0:
+        return None  # let multiprocessing.Pool decide the number of workers
+
+    return jobs
+
+
+cli_arg_parser.add_argument(
+    "-j",
+    "--jobs",
+    default=1,
+    type=_parse_jobs,
+    help="""Set the number of jobs to run simultaneously.
+If set to <= 0, the number of jobs will be set to the
+number of all available CPU cores.
+
+Defaults to ``%(default)s``.""",
+)
+
+
 def parse_ignore_option(
     paths: str, not_ignored: List[str]
 ) -> Tuple[List[str], List[str]]:

diff --git a/cpp_linter/loggers.py b/cpp_linter/loggers.py
@@ -1,10 +1,12 @@
 import logging
+import os
+from tempfile import NamedTemporaryFile
 
 from requests import Response
 
 FOUND_RICH_LIB = False
 try:  # pragma: no cover
-    from rich.logging import RichHandler  # type: ignore
+    from rich.logging import RichHandler, get_console  # type: ignore
 
     FOUND_RICH_LIB = True
 
@@ -53,3 +55,35 @@ def log_response_msg(response: Response):
             response.request.url,
             response.text,
         )
+
+
+def worker_log_file_init(temp_dir: str, log_lvl: int):
+    log_file = NamedTemporaryFile("w", dir=temp_dir, delete=False)
+
+    logger.handlers.clear()
+    logger.propagate = False
+
+    handler: logging.Handler
+    if (
+        FOUND_RICH_LIB and "CPP_LINTER_PYTEST_NO_RICH" not in os.environ
+    ):  # pragma: no cover
+        console = get_console()
+        console.file = log_file
+        handler = RichHandler(show_time=False, console=console)
+        handler.setFormatter(logging.Formatter("%(name)s: %(message)s"))
+    else:
+        handler = logging.StreamHandler(log_file)
+        handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
+    logger.addHandler(handler)
+    # Windows does not copy log level to subprocess.
+    # https://github.com/cpp-linter/cpp-linter/actions/runs/8355193931
+    logger.setLevel(log_lvl)
+
+    ## uncomment the following if log_commander is needed in isolated threads
+    # log_commander.handlers.clear()
+    # log_commander.propagate = False
+    # console_handler = logging.StreamHandler(log_file)
+    # console_handler.setFormatter(logging.Formatter("%(message)s"))
+    # log_commander.addHandler(console_handler)
+
+    return log_file.name
diff --git a/pyproject.toml b/pyproject.toml
@@ -70,6 +70,7 @@ dynamic_context = "test_function"
 # These options are useful if combining coverage data from multiple tested envs
 parallel = true
 relative_files = true
+concurrency = ["thread", "multiprocessing"]
 omit = [
     # don't include tests in coverage
     # "tests/*",

diff --git a/tests/capture_tools_output/test_database_path.py b/tests/capture_tools_output/test_database_path.py
@@ -1,4 +1,5 @@
 """Tests specific to specifying the compilation database path."""
+
 from typing import List
 from pathlib import Path, PurePath
 import logging
@@ -31,15 +32,17 @@
     ids=["implicit path", "relative path", "absolute path"],
 )
 def test_db_detection(
-    caplog: pytest.LogCaptureFixture,
+    capsys: pytest.CaptureFixture,
     monkeypatch: pytest.MonkeyPatch,
     database: str,
     expected_args: List[str],
 ):
     """test clang-tidy using a implicit path to the compilation database."""
+    monkeypatch.setenv("COVERAGE_FILE", str(Path.cwd() / ".coverage"))
     monkeypatch.chdir(PurePath(__file__).parent.parent.as_posix())
+    monkeypatch.setenv("CPP_LINTER_PYTEST_NO_RICH", "1")
     CACHE_PATH.mkdir(exist_ok=True)
-    caplog.set_level(logging.DEBUG, logger=logger.name)
+    logger.setLevel(logging.DEBUG)
     demo_src = "demo/demo.cpp"
     files = [FileObj(demo_src)]
 
@@ -53,23 +56,19 @@ def test_db_detection(
         extra_args=[],
         tidy_review=False,
         format_review=False,
+        num_workers=None,
     )
-    matched_args = []
-    for record in caplog.records:
-        assert "Error while trying to load a compilation database" not in record.message
-        msg_match = CLANG_TIDY_COMMAND.search(record.message)
-        if msg_match is not None:
-            matched_args = msg_match.group(0).split()[1:]
-            break
-    else:  # pragma: no cover
-        raise RuntimeError("failed to find args passed in clang-tidy in log records")
+    stdout = capsys.readouterr().out
+    assert "Error while trying to load a compilation database" not in stdout
+    msg_match = CLANG_TIDY_COMMAND.search(stdout)
+    if msg_match is None:  # pragma: no cover
+        pytest.fail("failed to find args passed in clang-tidy in log records")
+    matched_args = msg_match.group(0).split()[1:]
     expected_args.append(demo_src.replace("/", os.sep) + '"')
     assert expected_args == matched_args
 
 
-def test_ninja_database(
-    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture
-):
+def test_ninja_database(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
     """verify that the relative paths used in a database generated (and thus clang-tidy
     stdout) for the ninja build system are resolved accordingly."""
     tmp_path_demo = tmp_path / "demo"
@@ -80,15 +79,17 @@ def test_ninja_database(
         ignore=shutil.ignore_patterns("compile_flags.txt"),
     )
     (tmp_path_demo / "build").mkdir(parents=True)
+    monkeypatch.setenv("COVERAGE_FILE", str(Path.cwd() / ".coverage"))
     monkeypatch.chdir(str(tmp_path_demo))
     monkeypatch.setattr(sys, "argv", ["meson", "init"])
     meson()
     monkeypatch.setattr(
         sys, "argv", ["meson", "setup", "--backend=ninja", "build", "."]
     )
     meson()
+    monkeypatch.setenv("CPP_LINTER_PYTEST_NO_RICH", "1")
 
-    caplog.set_level(logging.DEBUG, logger=logger.name)
+    logger.setLevel(logging.DEBUG)
     files = [FileObj("demo.cpp")]
     gh_client = GithubApiClient()
 
@@ -103,6 +104,7 @@ def test_ninja_database(
         extra_args=[],
         tidy_review=False,
         format_review=False,
+        num_workers=None,
     )
     found_project_file = False
     for concern in tidy_advice:
@@ -111,7 +113,7 @@ def test_ninja_database(
                 assert not Path(note.filename).is_absolute()
                 found_project_file = True
     if not found_project_file:  # pragma: no cover
-        raise RuntimeError("no project files raised concerns with clang-tidy")
+        pytest.fail("no project files raised concerns with clang-tidy")
     (comment, format_checks_failed, tidy_checks_failed) = gh_client.make_comment(
         files, format_advice, tidy_advice
     )