From 324e45b3588d9ba19e57e24130d1921bb23db243 Mon Sep 17 00:00:00 2001
From: Diego Russo <diego.russo@arm.com>
Date: Wed, 18 Sep 2024 10:48:45 +0100
Subject: [PATCH 1/3] Implement timeout mechanism for a benchmark run

---
 doc/usage.rst                        | 22 +++++++++++++---------
 pyperformance/_benchmark.py          |  4 ++++
 pyperformance/_utils.py              |  9 ++++++++-
 pyperformance/cli.py                 | 10 ++++++++++
 pyperformance/commands.py            |  4 ++--
 pyperformance/run.py                 | 14 ++++++++++++--
 pyperformance/tests/__init__.py      |  6 +++++-
 pyperformance/tests/test_commands.py |  3 +++
 8 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/doc/usage.rst b/doc/usage.rst
index 6ebd5153..8f56c2a0 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -101,9 +101,10 @@ Usage::
 
   pyperformance run [-h] [-r] [-f] [--debug-single-value] [-v] [-m]
                        [--affinity CPU_LIST] [-o FILENAME]
-                       [--append FILENAME] [--manifest MANIFEST]
-                       [-b BM_LIST] [--inherit-environ VAR_LIST]
-                       [-p PYTHON]
+                       [--append FILENAME] [--min-time MIN_TIME]
+                       [--same-loops SAME_LOOPS] [--timeout TIMEOUT]
+                       [--manifest MANIFEST] [-b BM_LIST]
+                       [--inherit-environ VAR_LIST] [-p PYTHON]
 
 options::
 
@@ -124,10 +125,17 @@ options::
                         baseline_python, not changed_python.
   --append FILENAME     Add runs to an existing file, or create it if
                         it doesn't exist
+  --min-time MIN_TIME   Minimum duration in seconds of a single value, used
+                        to calibrate the number of loops
+  --same-loops SAME_LOOPS
+                        Use the same number of loops as a previous run
+                        (i.e., don't recalibrate). Should be a path to a
+                        .json file from a previous run.
+  --timeout TIMEOUT     Timeout for a benchmark run (default: disabled)
   --manifest MANIFEST   benchmark manifest file to use
   -b BM_LIST, --benchmarks BM_LIST
-                        Comma-separated list of benchmarks to run. Can
-                        contain both positive and negative arguments:
+                        Comma-separated list of benchmarks or groups to run.
+                        Can contain both positive and negative arguments:
                         --benchmarks=run_this,also_this,-not_this. If
                         there are no positive arguments, we'll run all
                         benchmarks except the negative arguments.
@@ -140,10 +148,6 @@ options::
   -p PYTHON, --python PYTHON
                         Python executable (default: use running
                         Python)
-  --same-loops SAME_LOOPS
-                        Use the same number of loops as a previous run
-                        (i.e., don't recalibrate). Should be a path to a
-                        .json file from a previous run.
 
 show
 ----
diff --git a/pyperformance/_benchmark.py b/pyperformance/_benchmark.py
index 8ca5eaac..76d0138f 100644
--- a/pyperformance/_benchmark.py
+++ b/pyperformance/_benchmark.py
@@ -177,6 +177,7 @@ def python(self):
     def run(self, python, runid=None, pyperf_opts=None, *,
             venv=None,
             verbose=False,
+            timeout=None,
             ):
         if venv and python == sys.executable:
             python = venv.python
@@ -193,6 +194,7 @@ def run(self, python, runid=None, pyperf_opts=None, *,
             extra_opts=self.extra_opts,
             pyperf_opts=pyperf_opts,
             verbose=verbose,
+            timeout=timeout,
         )
 
         return bench
@@ -205,6 +207,7 @@ def _run_perf_script(python, runscript, runid, *,
                     extra_opts=None,
                     pyperf_opts=None,
                     verbose=False,
+                    timeout=None,
                     ):
     if not runscript:
         raise ValueError('missing runscript')
@@ -227,6 +230,7 @@ def _run_perf_script(python, runscript, runid, *,
             argv,
             env=env,
             capture='stderr' if hide_stderr else None,
+            timeout=timeout,
         )
         if ec != 0:
             if hide_stderr:
diff --git a/pyperformance/_utils.py b/pyperformance/_utils.py
index 908adc26..019ac8c5 100644
--- a/pyperformance/_utils.py
+++ b/pyperformance/_utils.py
@@ -89,7 +89,7 @@ def safe_rmtree(path):
 MS_WINDOWS = (sys.platform == 'win32')
 
 
-def run_cmd(argv, *, env=None, capture=None, verbose=True):
+def run_cmd(argv, *, env=None, capture=None, verbose=True, timeout=None):
     try:
         cmdstr = ' '.join(shlex.quote(a) for a in argv)
     except TypeError:
@@ -130,6 +130,9 @@ def run_cmd(argv, *, env=None, capture=None, verbose=True):
     if verbose:
         print('#', cmdstr)
 
+    if timeout:
+        kw.update(timeout=timeout)
+
     # Explicitly flush standard streams, required if streams are buffered
     # (not TTY) to write lines in the expected order
     sys.stdout.flush()
@@ -137,6 +140,10 @@ def run_cmd(argv, *, env=None, capture=None, verbose=True):
 
     try:
         proc = subprocess.run(argv, **kw)
+    except subprocess.TimeoutExpired as exc:
+        if verbose:
+            print('command timed out (%s)' % exc)
+        raise
     except OSError as exc:
         if exc.errno == errno.ENOENT:
             if verbose:
diff --git a/pyperformance/cli.py b/pyperformance/cli.py
index 843ed6a6..475071b8 100644
--- a/pyperformance/cli.py
+++ b/pyperformance/cli.py
@@ -25,6 +25,13 @@ def comma_separated(values):
     return list(filter(None, values))
 
 
+def check_positive(value):
+    value = int(value)
+    if value <= 0:
+        raise argparse.ArgumentTypeError("Argument must a be positive integer.")
+    return value
+
+
 def filter_opts(cmd, *, allow_no_benchmarks=False):
     cmd.add_argument("--manifest", help="benchmark manifest file to use")
 
@@ -82,6 +89,9 @@ def parse_args():
                      help="Use the same number of loops as a previous run "
                      "(i.e., don't recalibrate). Should be a path to a "
                      ".json file from a previous run.")
+    cmd.add_argument("--timeout",
+                     help="Timeout for a benchmark run (default: disabled)",
+                     type=check_positive)
     filter_opts(cmd)
 
     # show
diff --git a/pyperformance/commands.py b/pyperformance/commands.py
index ade1cb12..7cfa4033 100644
--- a/pyperformance/commands.py
+++ b/pyperformance/commands.py
@@ -191,8 +191,8 @@ def cmd_run(options, benchmarks):
 
     if errors:
         print("%s benchmarks failed:" % len(errors))
-        for name in errors:
-            print("- %s" % name)
+        for name, reason in errors:
+            print("- %s (%s)" % (name, reason))
         print()
         sys.exit(1)
 
diff --git a/pyperformance/run.py b/pyperformance/run.py
index c7865b84..de8a7f7d 100644
--- a/pyperformance/run.py
+++ b/pyperformance/run.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
 import hashlib
 import json
+import subprocess
 import sys
 import time
 import traceback
@@ -164,7 +165,7 @@ def add_bench(dest_suite, obj):
         bench_venv, bench_runid = benchmarks.get(bench)
         if bench_venv is None:
             print("ERROR: Benchmark %s failed: could not install requirements" % name)
-            errors.append(name)
+            errors.append((name, "Install requirements error"))
             continue
         try:
             result = bench.run(
@@ -173,11 +174,20 @@ def add_bench(dest_suite, obj):
                 pyperf_opts,
                 venv=bench_venv,
                 verbose=options.verbose,
+                timeout=options.timeout,
             )
+        except subprocess.TimeoutExpired as exc:
+            timeout = round(exc.timeout)
+            print("ERROR: Benchmark %s timed out after %s seconds" % (name, timeout))
+            errors.append((name, "Timed out after %s seconds" % timeout))
+        except RuntimeError as exc:
+            print("ERROR: Benchmark %s failed: %s" % (name, exc))
+            traceback.print_exc()
+            errors.append((name, exc))
         except Exception as exc:
             print("ERROR: Benchmark %s failed: %s" % (name, exc))
             traceback.print_exc()
-            errors.append(name)
+            errors.append((name, exc))
         else:
             suite = add_bench(suite, result)
 
diff --git a/pyperformance/tests/__init__.py b/pyperformance/tests/__init__.py
index bc66b317..442c7ff5 100644
--- a/pyperformance/tests/__init__.py
+++ b/pyperformance/tests/__init__.py
@@ -16,7 +16,7 @@
 DEV_SCRIPT = os.path.join(REPO_ROOT, 'dev.py')
 
 
-def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True):
+def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True, timeout=None):
     # XXX Optionally write the output to a file.
     argv = (cmd,) + args
     if not all(a and isinstance(a, str) for a in argv):
@@ -39,6 +39,10 @@ def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True):
 
     if verbose:
         print(f"(tests) Execute: {argv_str}", flush=True)
+
+    if timeout:
+        kwargs['timeout'] = 60
+
     proc = subprocess.run(argv, **kwargs)
 
     exitcode = proc.returncode
diff --git a/pyperformance/tests/test_commands.py b/pyperformance/tests/test_commands.py
index 7c311cf8..5972e85e 100644
--- a/pyperformance/tests/test_commands.py
+++ b/pyperformance/tests/test_commands.py
@@ -63,12 +63,14 @@ def run_pyperformance(self, cmd, *args,
                           exitcode=0,
                           capture='both',
                           verbose=True,
+                          timeout=None,
                           ):
         ec, stdout, stderr = self.run_module(
             'pyperformance', cmd, *args,
             capture=capture,
             onfail=None,
             verbose=verbose,
+            timeout=timeout,
         )
         if exitcode is True:
             self.assertGreater(ec, 0, repr(stdout))
@@ -154,6 +156,7 @@ def test_run_and_show(self):
             '--debug-single-value',
             '-o', filename,
             capture=None,
+            timeout=None,
         )
 
         # Display slowest benchmarks

From 6d07201f70411b3238e31061e04227c201e51fab Mon Sep 17 00:00:00 2001
From: Diego Russo <diego.russo@arm.com>
Date: Tue, 24 Sep 2024 14:16:17 +0100
Subject: [PATCH 2/3] Address feedbacks

---
 doc/usage.rst                        | 22 ++++++++++------------
 pyperformance/_benchmark.py          | 10 +++++-----
 pyperformance/_utils.py              |  9 +--------
 pyperformance/cli.py                 |  3 ++-
 pyperformance/run.py                 | 11 +++++------
 pyperformance/tests/__init__.py      |  6 +-----
 pyperformance/tests/test_commands.py |  3 ---
 7 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/doc/usage.rst b/doc/usage.rst
index 8f56c2a0..ae753af7 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -101,9 +101,8 @@ Usage::
 
   pyperformance run [-h] [-r] [-f] [--debug-single-value] [-v] [-m]
                        [--affinity CPU_LIST] [-o FILENAME]
-                       [--append FILENAME] [--min-time MIN_TIME]
-                       [--same-loops SAME_LOOPS] [--timeout TIMEOUT]
-                       [--manifest MANIFEST] [-b BM_LIST]
+                       [--append FILENAME] [--manifest MANIFEST]
+                       [--timeout TIMEOUT] [-b BM_LIST]
                        [--inherit-environ VAR_LIST] [-p PYTHON]
 
 options::
@@ -125,17 +124,12 @@ options::
                         baseline_python, not changed_python.
   --append FILENAME     Add runs to an existing file, or create it if
                         it doesn't exist
-  --min-time MIN_TIME   Minimum duration in seconds of a single value, used
-                        to calibrate the number of loops
-  --same-loops SAME_LOOPS
-                        Use the same number of loops as a previous run
-                        (i.e., don't recalibrate). Should be a path to a
-                        .json file from a previous run.
-  --timeout TIMEOUT     Timeout for a benchmark run (default: disabled)
+  --timeout TIMEOUT     Specify a timeout in seconds for a single
+                        benchmark run (default: disabled)
   --manifest MANIFEST   benchmark manifest file to use
   -b BM_LIST, --benchmarks BM_LIST
-                        Comma-separated list of benchmarks or groups to run.
-                        Can contain both positive and negative arguments:
+                        Comma-separated list of benchmarks to run. Can
+                        contain both positive and negative arguments:
                         --benchmarks=run_this,also_this,-not_this. If
                         there are no positive arguments, we'll run all
                         benchmarks except the negative arguments.
@@ -148,6 +142,10 @@ options::
   -p PYTHON, --python PYTHON
                         Python executable (default: use running
                         Python)
+  --same-loops SAME_LOOPS
+                        Use the same number of loops as a previous run
+                        (i.e., don't recalibrate). Should be a path to a
+                        .json file from a previous run.
 
 show
 ----
diff --git a/pyperformance/_benchmark.py b/pyperformance/_benchmark.py
index 76d0138f..5ec6fe07 100644
--- a/pyperformance/_benchmark.py
+++ b/pyperformance/_benchmark.py
@@ -177,7 +177,6 @@ def python(self):
     def run(self, python, runid=None, pyperf_opts=None, *,
             venv=None,
             verbose=False,
-            timeout=None,
             ):
         if venv and python == sys.executable:
             python = venv.python
@@ -194,7 +193,6 @@ def run(self, python, runid=None, pyperf_opts=None, *,
             extra_opts=self.extra_opts,
             pyperf_opts=pyperf_opts,
             verbose=verbose,
-            timeout=timeout,
         )
 
         return bench
@@ -207,7 +205,6 @@ def _run_perf_script(python, runscript, runid, *,
                     extra_opts=None,
                     pyperf_opts=None,
                     verbose=False,
-                    timeout=None,
                     ):
     if not runscript:
         raise ValueError('missing runscript')
@@ -230,14 +227,17 @@ def _run_perf_script(python, runscript, runid, *,
             argv,
             env=env,
             capture='stderr' if hide_stderr else None,
-            timeout=timeout,
         )
         if ec != 0:
             if hide_stderr:
                 sys.stderr.flush()
                 sys.stderr.write(stderr)
                 sys.stderr.flush()
-            raise RuntimeError("Benchmark died")
+            # pyperf returns exit code 124 if the benchmark execution times out
+            if ec == 124:
+                raise TimeoutError("Benchmark timed out")
+            else:
+                raise RuntimeError("Benchmark died")
         return pyperf.BenchmarkSuite.load(tmp)
 
 
diff --git a/pyperformance/_utils.py b/pyperformance/_utils.py
index 019ac8c5..908adc26 100644
--- a/pyperformance/_utils.py
+++ b/pyperformance/_utils.py
@@ -89,7 +89,7 @@ def safe_rmtree(path):
 MS_WINDOWS = (sys.platform == 'win32')
 
 
-def run_cmd(argv, *, env=None, capture=None, verbose=True, timeout=None):
+def run_cmd(argv, *, env=None, capture=None, verbose=True):
     try:
         cmdstr = ' '.join(shlex.quote(a) for a in argv)
     except TypeError:
@@ -130,9 +130,6 @@ def run_cmd(argv, *, env=None, capture=None, verbose=True, timeout=None):
     if verbose:
         print('#', cmdstr)
 
-    if timeout:
-        kw.update(timeout=timeout)
-
     # Explicitly flush standard streams, required if streams are buffered
     # (not TTY) to write lines in the expected order
     sys.stdout.flush()
@@ -140,10 +137,6 @@ def run_cmd(argv, *, env=None, capture=None, verbose=True, timeout=None):
 
     try:
         proc = subprocess.run(argv, **kw)
-    except subprocess.TimeoutExpired as exc:
-        if verbose:
-            print('command timed out (%s)' % exc)
-        raise
     except OSError as exc:
         if exc.errno == errno.ENOENT:
             if verbose:
diff --git a/pyperformance/cli.py b/pyperformance/cli.py
index 475071b8..3348f62e 100644
--- a/pyperformance/cli.py
+++ b/pyperformance/cli.py
@@ -90,7 +90,8 @@ def parse_args():
                      "(i.e., don't recalibrate). Should be a path to a "
                      ".json file from a previous run.")
     cmd.add_argument("--timeout",
-                     help="Timeout for a benchmark run (default: disabled)",
+                     help="Specify a timeout in seconds for a single "
+                     "benchmark run (default: disabled)",
                      type=check_positive)
     filter_opts(cmd)
 
diff --git a/pyperformance/run.py b/pyperformance/run.py
index de8a7f7d..67ab5d89 100644
--- a/pyperformance/run.py
+++ b/pyperformance/run.py
@@ -1,7 +1,6 @@
 from collections import namedtuple
 import hashlib
 import json
-import subprocess
 import sys
 import time
 import traceback
@@ -174,12 +173,10 @@ def add_bench(dest_suite, obj):
                 pyperf_opts,
                 venv=bench_venv,
                 verbose=options.verbose,
-                timeout=options.timeout,
             )
-        except subprocess.TimeoutExpired as exc:
-            timeout = round(exc.timeout)
-            print("ERROR: Benchmark %s timed out after %s seconds" % (name, timeout))
-            errors.append((name, "Timed out after %s seconds" % timeout))
+        except TimeoutError as exc:
+            print("ERROR: Benchmark %s timed out" % name)
+            errors.append((name, exc))
         except RuntimeError as exc:
             print("ERROR: Benchmark %s failed: %s" % (name, exc))
             traceback.print_exc()
@@ -243,5 +240,7 @@ def get_pyperf_opts(options):
         opts.append('--inherit-environ=%s' % ','.join(options.inherit_environ))
     if options.min_time:
         opts.append('--min-time=%s' % options.min_time)
+    if options.timeout:
+        opts.append('--timeout=%s' % options.timeout)
 
     return opts
diff --git a/pyperformance/tests/__init__.py b/pyperformance/tests/__init__.py
index 442c7ff5..bc66b317 100644
--- a/pyperformance/tests/__init__.py
+++ b/pyperformance/tests/__init__.py
@@ -16,7 +16,7 @@
 DEV_SCRIPT = os.path.join(REPO_ROOT, 'dev.py')
 
 
-def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True, timeout=None):
+def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True):
     # XXX Optionally write the output to a file.
     argv = (cmd,) + args
     if not all(a and isinstance(a, str) for a in argv):
@@ -39,10 +39,6 @@ def run_cmd(cmd, *args, capture=None, onfail='exit', verbose=True, timeout=None)
 
     if verbose:
         print(f"(tests) Execute: {argv_str}", flush=True)
-
-    if timeout:
-        kwargs['timeout'] = 60
-
     proc = subprocess.run(argv, **kwargs)
 
     exitcode = proc.returncode
diff --git a/pyperformance/tests/test_commands.py b/pyperformance/tests/test_commands.py
index 5972e85e..7c311cf8 100644
--- a/pyperformance/tests/test_commands.py
+++ b/pyperformance/tests/test_commands.py
@@ -63,14 +63,12 @@ def run_pyperformance(self, cmd, *args,
                           exitcode=0,
                           capture='both',
                           verbose=True,
-                          timeout=None,
                           ):
         ec, stdout, stderr = self.run_module(
             'pyperformance', cmd, *args,
             capture=capture,
             onfail=None,
             verbose=verbose,
-            timeout=timeout,
         )
         if exitcode is True:
             self.assertGreater(ec, 0, repr(stdout))
@@ -156,7 +154,6 @@ def test_run_and_show(self):
             '--debug-single-value',
             '-o', filename,
             capture=None,
-            timeout=None,
         )
 
         # Display slowest benchmarks

From 9147430420d87f9ccb6f930992dd5995e566bd58 Mon Sep 17 00:00:00 2001
From: Diego Russo <diego.russo@arm.com>
Date: Tue, 1 Oct 2024 17:43:12 +0100
Subject: [PATCH 3/3] Pin pyperf to 2.8.0

Update test_commands to reflect a change how pyperf reports data.
---
 pyperformance/requirements/requirements.txt |  2 +-
 pyperformance/tests/test_commands.py        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pyperformance/requirements/requirements.txt b/pyperformance/requirements/requirements.txt
index 80be7f29..4419cca8 100644
--- a/pyperformance/requirements/requirements.txt
+++ b/pyperformance/requirements/requirements.txt
@@ -10,5 +10,5 @@ psutil==5.9.5
     # via
     #   -r requirements.in
     #   pyperf
-pyperf==2.7.0
+pyperf==2.8.0
     # via -r requirements.in
diff --git a/pyperformance/tests/test_commands.py b/pyperformance/tests/test_commands.py
index 7c311cf8..870a58bc 100644
--- a/pyperformance/tests/test_commands.py
+++ b/pyperformance/tests/test_commands.py
@@ -399,7 +399,7 @@ def test_compare_single_value(self):
             Performance version: 0.2
 
             ### call_simple ###
-            7896.0 kB -> 7900.0 kB: 1.00x larger
+            7896.0 KiB -> 7900.0 KiB: 1.00x larger
         ''').lstrip())
 
     def test_compare_csv(self):
@@ -458,11 +458,11 @@ def test_compare_table_single_value(self):
 
             Performance version: 0.2
 
-            +-------------+-----------+-----------+--------------+------------------------------------------+
-            | Benchmark   | mem1.json | mem2.json | Change       | Significance                             |
-            +=============+===========+===========+==============+==========================================+
-            | call_simple | 7896.0 kB | 7900.0 kB | 1.00x larger | (benchmark only contains a single value) |
-            +-------------+-----------+-----------+--------------+------------------------------------------+
+            +-------------+------------+------------+--------------+------------------------------------------+
+            | Benchmark   | mem1.json  | mem2.json  | Change       | Significance                             |
+            +=============+============+============+==============+==========================================+
+            | call_simple | 7896.0 KiB | 7900.0 KiB | 1.00x larger | (benchmark only contains a single value) |
+            +-------------+------------+------------+--------------+------------------------------------------+
         ''').lstrip())