diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8f1a8748..61c835c4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,9 +2,7 @@ name: test on: push: - branches: [main] pull_request: - branches: [main] schedule: # Midnight UTC: - cron: "0 0 * * *" @@ -16,20 +14,33 @@ jobs: runs-on: ${{ matrix.os }} permissions: pull-requests: write - name: ${{ matrix.os }} - ${{ matrix.python }} + name: ${{ matrix.os }} - ${{ matrix.python }} ${{ matrix.build }} if: ${{ github.event_name != 'schedule' || (github.repository == 'python/pyperformance' && github.event_name == 'schedule') }} strategy: fail-fast: false matrix: # Test all supported versions on Ubuntu: - os: [ubuntu-latest] - python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + os: [ubuntu-latest, ubuntu-24.04-arm] + python: ["3.9", "3.10", "3.11", "3.12", "3.13"] experimental: [false] + build: [''] include: # As the experimental task for the dev version. - os: ubuntu-latest - python: "3.13-dev" + python: "3.13" experimental: true + build: 'free-threading' + - os: ubuntu-24.04-arm + python: "3.13" + experimental: true + build: 'free-threading' + - os: ubuntu-latest + python: "3.14-dev" + experimental: true + - os: ubuntu-latest + python: "3.14-dev" + experimental: true + build: 'free-threading' # Also test PyPy, macOS, and Windows: - os: ubuntu-latest python: pypy-3.10 @@ -38,23 +49,24 @@ jobs: python: pypy-3.9 experimental: false - os: macos-latest - python: "3.12" + python: "3.13" experimental: true - os: windows-latest - python: "3.12" - experimental: true + python: "3.13" + experimental: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 if: "!endsWith(matrix.python, '-dev')" with: python-version: ${{ matrix.python }} - name: Set up Python ${{ matrix.python }} using deadsnakes - uses: deadsnakes/action@v3.0.0 + uses: deadsnakes/action@v3.2.0 if: "endsWith(matrix.python, '-dev')" with: python-version: ${{ matrix.python }} + nogil: ${{ matrix.build == 'free-threading' }} - name: Install # pyperformance must be installed: # pyperformance/tests/test_compare.py imports it @@ -62,7 +74,9 @@ jobs: python -m pip install --upgrade pip setuptools python -m pip install -e . - name: Display Python version - run: python -c "import sys; print(sys.version)" + run: | + python -VV + python -c 'import sysconfig; print("Free threading?", "Yes" if sysconfig.get_config_var("Py_GIL_DISABLED") else "No")' - name: Run Tests id: pyperformance run: python -u -m pyperformance.tests diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index d9fb4a66..61159e4e 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -1,10 +1,6 @@ name: mypy -on: - push: - branches: [main] - pull_request: - workflow_dispatch: +on: [push, pull_request, workflow_dispatch] permissions: contents: read @@ -18,8 +14,8 @@ jobs: name: Check code with mypy runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: cache: "pip" cache-dependency-path: "pyproject.toml" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 315e95bf..35106833 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -15,9 +15,9 @@ jobs: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' cache: pip diff --git a/doc/benchmark.conf.sample b/doc/benchmark.conf.sample index b1224460..42208e68 100644 --- a/doc/benchmark.conf.sample +++ b/doc/benchmark.conf.sample @@ -37,6 +37,20 @@ lto = True # Profiled Guided Optimization (PGO)? pgo = True +# Build the experimental just-in-time (JIT) compiler? +# Possible values are: +# - no: (default) do not build the JIT or the micro-op interpreter. +# The new PYTHON_JIT environment variable has no effect. +# - yes: build the JIT and enable it by default. PYTHON_JIT=0 can be used to +# disable it at runtime. +# - yes-off: build the JIT, but do not enable it by default. PYTHON_JIT=1 can +# be used to enable it at runtime. +# - interpreter: do not build the JIT, but do build and enable the micro-op +# interpreter. This is useful for those of us who find ourselves developing +# or debugging micro-ops (but don’t want to deal with the JIT). +# PYTHON_JIT=0 can be used to disable the micro-op interpreter at runtime. +jit = no + # The space-separated list of libraries that are package-only, # i.e., locally installed but not on header and library paths. # For each such library, determine the install path and add an diff --git a/doc/changelog.rst b/doc/changelog.rst index 3611e3c5..ee5f3487 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,10 @@ Changelog ========= +* Bump dask[distributed] to 2024.10.1 for Windows compatibility +* Bump greenlet to 3.1.0 for compatibility with 3.13 +* Bump tornado to 6.2.0 + Version 1.11.0 (2024-03-09) -------------- * Add a --same-loops option to the run command to use the exact same number of diff --git a/doc/usage.rst b/doc/usage.rst index 34706144..95e9c25d 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -51,11 +51,11 @@ Run benchmarks Commands to compare Python 3.6 and Python 3.7 performance:: pyperformance run --python=python3.6 -o py36.json - pyperformance run --python=python3.7 -o py38.json - pyperformance compare py36.json py38.json + pyperformance run --python=python3.7 -o py37.json + pyperformance compare py36.json py37.json Note: ``python3 -m pyperformance ...`` syntax works as well (ex: ``python3 -m -pyperformance run -o py38.json``), but requires to install pyperformance on each +pyperformance run -o py37.json``), but requires to install pyperformance on each tested Python version. JSON files are produced by the pyperf module and so can be analyzed using pyperf @@ -71,7 +71,7 @@ commands:: It's also possible to use pyperf to compare results of two JSON files:: - python3 -m pyperf compare_to py36.json py38.json --table + python3 -m pyperf compare_to py36.json py37.json --table Basic commands -------------- @@ -102,8 +102,9 @@ Usage:: pyperformance run [-h] [-r] [-f] [--debug-single-value] [-v] [-m] [--affinity CPU_LIST] [-o FILENAME] [--append FILENAME] [--manifest MANIFEST] - [-b BM_LIST] [--inherit-environ VAR_LIST] - [-p PYTHON] + [--timeout TIMEOUT] [-b BM_LIST] + [--inherit-environ VAR_LIST] [-p PYTHON] + [--hook HOOK] options:: @@ -124,6 +125,8 @@ options:: baseline_python, not changed_python. --append FILENAME Add runs to an existing file, or create it if it doesn't exist + --timeout TIMEOUT Specify a timeout in seconds for a single + benchmark run (default: disabled) --manifest MANIFEST benchmark manifest file to use -b BM_LIST, --benchmarks BM_LIST Comma-separated list of benchmarks to run. Can @@ -144,6 +147,9 @@ options:: Use the same number of loops as a previous run (i.e., don't recalibrate). Should be a path to a .json file from a previous run. + --hook HOOK + Apply the given pyperf hook when running the + benchmarks. show ---- diff --git a/pyperformance/__init__.py b/pyperformance/__init__.py index b4efa911..2273312a 100644 --- a/pyperformance/__init__.py +++ b/pyperformance/__init__.py @@ -1,5 +1,7 @@ +import json import os.path import sys +from importlib.metadata import distribution VERSION = (1, 11, 0) @@ -20,7 +22,7 @@ def is_installed(): def is_dev(): parent = os.path.dirname(PKG_ROOT) - return os.path.exists(os.path.join(parent, 'setup.py')) + return os.path.exists(os.path.join(parent, 'pyproject.toml')) def _is_venv(): @@ -33,14 +35,8 @@ def _is_devel_install(): # pip install -e will do a "devel" install. # This means it creates a link back to the checkout instead # of copying the files. - try: - import packaging - except ModuleNotFoundError: - return False - sitepackages = os.path.dirname(os.path.dirname(packaging.__file__)) - if os.path.isdir(os.path.join(sitepackages, 'pyperformance')): - return False - if not os.path.exists(os.path.join(sitepackages, 'pyperformance.egg-link')): - # XXX Check the contents? - return False - return True + + direct_url = distribution("pyperformance").read_text("direct_url.json") + if direct_url: + return json.loads(direct_url).get("dir_info", {}).get("editable", False) + return False diff --git a/pyperformance/_benchmark.py b/pyperformance/_benchmark.py index 8ca5eaac..5ec6fe07 100644 --- a/pyperformance/_benchmark.py +++ b/pyperformance/_benchmark.py @@ -233,7 +233,11 @@ def _run_perf_script(python, runscript, runid, *, sys.stderr.flush() sys.stderr.write(stderr) sys.stderr.flush() - raise RuntimeError("Benchmark died") + # pyperf returns exit code 124 if the benchmark execution times out + if ec == 124: + raise TimeoutError("Benchmark timed out") + else: + raise RuntimeError("Benchmark died") return pyperf.BenchmarkSuite.load(tmp) diff --git a/pyperformance/cli.py b/pyperformance/cli.py index 3d83772b..df68dc3d 100644 --- a/pyperformance/cli.py +++ b/pyperformance/cli.py @@ -3,7 +3,7 @@ import os.path import sys -from pyperformance import _utils, is_installed, is_dev +from pyperformance import _utils, is_installed, is_dev, __version__ from pyperformance.commands import ( cmd_list, cmd_list_groups, @@ -19,12 +19,21 @@ cmd_compare, ) +from pyperf import _hooks + def comma_separated(values): values = [value.strip() for value in values.split(',')] return list(filter(None, values)) +def check_positive(value): + value = int(value) + if value <= 0: + raise argparse.ArgumentTypeError("Argument must a be positive integer.") + return value + + def filter_opts(cmd, *, allow_no_benchmarks=False): cmd.add_argument("--manifest", help="benchmark manifest file to use") @@ -40,8 +49,11 @@ def filter_opts(cmd, *, allow_no_benchmarks=False): def parse_args(): parser = argparse.ArgumentParser( + prog='pyperformance', description=("Compares the performance of baseline_python with" " changed_python and prints a report.")) + parser.add_argument('-V', '--version', action='version', + version=f'%(prog)s {__version__}') subparsers = parser.add_subparsers(dest='action') cmds = [] @@ -79,6 +91,16 @@ def parse_args(): help="Use the same number of loops as a previous run " "(i.e., don't recalibrate). Should be a path to a " ".json file from a previous run.") + cmd.add_argument("--timeout", + help="Specify a timeout in seconds for a single " + "benchmark run (default: disabled)", + type=check_positive) + hook_names = list(_hooks.get_hook_names()) + cmd.add_argument("--hook", + action="append", + choices=hook_names, + metavar=f"{', '.join(x for x in hook_names if not x.startswith('_'))}", + help="Apply the given pyperf hook(s) when running each benchmark") filter_opts(cmd) # show diff --git a/pyperformance/commands.py b/pyperformance/commands.py index ade1cb12..7cfa4033 100644 --- a/pyperformance/commands.py +++ b/pyperformance/commands.py @@ -191,8 +191,8 @@ def cmd_run(options, benchmarks): if errors: print("%s benchmarks failed:" % len(errors)) - for name in errors: - print("- %s" % name) + for name, reason in errors: + print("- %s (%s)" % (name, reason)) print() sys.exit(1) diff --git a/pyperformance/compile.py b/pyperformance/compile.py index 8f26aded..d7ccc3f8 100644 --- a/pyperformance/compile.py +++ b/pyperformance/compile.py @@ -291,6 +291,8 @@ def compile(self): config_args.append('--with-pydebug') elif self.conf.lto: config_args.append('--with-lto') + if self.conf.jit: + config_args.append(f'--enable-experimental-jit={self.conf.jit}') if self.conf.pkg_only: config_args.extend(self.get_package_only_flags()) if self.conf.debug: @@ -801,6 +803,7 @@ def getint(section, key, default=None): conf.directory = getfile('compile', 'bench_dir') conf.lto = getboolean('compile', 'lto', True) conf.pgo = getboolean('compile', 'pgo', True) + conf.jit = getstr('compile', 'jit', '') conf.install = getboolean('compile', 'install', True) conf.pkg_only = getstr('compile', 'pkg_only', '').split() try: diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index 3210b97f..2d2e5bf5 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -2,6 +2,8 @@ name metafile 2to3 +argparse +argparse_subparsers async_generators async_tree async_tree_cpu_io_mixed @@ -22,6 +24,7 @@ async_tree_eager_memoization_tg asyncio_tcp asyncio_tcp_ssl asyncio_websockets +bpe_tokeniser concurrent_imap coroutines coverage @@ -53,6 +56,9 @@ logging mako mdp meteor_contest +networkx +networkx_connected_components +networkx_k_core nbody nqueens pathlib @@ -74,12 +80,13 @@ richards richards_super scimark spectral_norm +sphinx sqlalchemy_declarative sqlalchemy_imperative -sqlglot -sqlglot_parse -sqlglot_transpile -sqlglot_optimize +sqlglot_v2 +sqlglot_v2_parse +sqlglot_v2_transpile +sqlglot_v2_optimize sqlite_synth sympy telco @@ -93,14 +100,11 @@ unpickle_pure_python xml_etree -#[groups] -#asyncio -#startup -#regex -#serialize -#apps -#math -#template - - [group default] +[group asyncio] +[group startup] +[group regex] +[group serialize] +[group apps] +[group math] +[group template] diff --git a/pyperformance/data-files/benchmarks/bm_argparse/bm_argparse_subparsers.toml b/pyperformance/data-files/benchmarks/bm_argparse/bm_argparse_subparsers.toml new file mode 100644 index 00000000..38044444 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_argparse/bm_argparse_subparsers.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_argparse" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "argparse_subparsers" +extra_opts = ["subparsers"] diff --git a/pyperformance/data-files/benchmarks/bm_argparse/pyproject.toml b/pyperformance/data-files/benchmarks/bm_argparse/pyproject.toml new file mode 100644 index 00000000..0e1040ef --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_argparse/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_argparse" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "argparse_many_optionals" +extra_opts = ["many_optionals"] diff --git a/pyperformance/data-files/benchmarks/bm_argparse/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_argparse/run_benchmark.py new file mode 100644 index 00000000..edab780e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_argparse/run_benchmark.py @@ -0,0 +1,125 @@ +""" +Benchmark argparse programs with: +1) multiple subparsers, each with their own subcommands, and then parse a series of command-line arguments. +2) a large number of optional arguments, and then parse a series of command-line arguments. + +Author: Savannah Ostrowski +""" + +import argparse +import pyperf + + +def generate_arguments(i: int) -> list: + arguments = ["input.txt", "output.txt"] + for i in range(i): + arguments.extend([f"--option{i}", f"value{i}"]) + return arguments + + +def bm_many_optionals() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="A version control system CLI") + + parser.add_argument("--version", action="version", version="1.0") + + subparsers = parser.add_subparsers(dest="command", required=True) + + add_parser = subparsers.add_parser("add", help="Add a file to the repository") + add_parser.add_argument("files", nargs="+", help="List of files to add to staging") + + commit_parser = subparsers.add_parser( + "commit", help="Commit changes to the repository" + ) + commit_parser.add_argument("-m", "--message", required=True, help="Commit message") + + commit_group = commit_parser.add_mutually_exclusive_group(required=False) + commit_group.add_argument( + "--amend", action="store_true", help="Amend the last commit" + ) + commit_group.add_argument( + "--no-edit", action="store_true", help="Reuse the last commit message" + ) + + push_parser = subparsers.add_parser( + "push", help="Push changes to remote repository" + ) + + network_group = push_parser.add_argument_group("Network options") + network_group.add_argument("--dryrun", action="store_true", help="Simulate changes") + network_group.add_argument( + "--timeout", type=int, default=30, help="Timeout in seconds" + ) + + auth_group = push_parser.add_argument_group("Authentication options") + auth_group.add_argument( + "--username", required=True, help="Username for authentication" + ) + auth_group.add_argument( + "--password", required=True, help="Password for authentication" + ) + + global_group = parser.add_mutually_exclusive_group() + global_group.add_argument("--verbose", action="store_true", help="Verbose output") + global_group.add_argument("--quiet", action="store_true", help="Quiet output") + + argument_lists = [ + ["--verbose", "add", "file1.txt", "file2.txt"], + ["add", "file1.txt", "file2.txt"], + ["commit", "-m", "Initial commit"], + ["commit", "-m", "Add new feature", "--amend"], + [ + "push", + "--dryrun", + "--timeout", + "60", + "--username", + "user", + "--password", + "pass", + ], + ] + + for arguments in argument_lists: + parser.parse_args(arguments) + + +def bm_subparsers() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + + parser.add_argument("input_file", type=str, help="The input file") + parser.add_argument("output_file", type=str, help="The output file") + + for i in range(1000): + parser.add_argument(f"--option{i}", type=str, help=f"Optional argument {i}") + + argument_lists = [ + generate_arguments(500), + generate_arguments(1000), + ] + + for args in argument_lists: + parser.parse_args(args) + + +BENCHMARKS = { + "many_optionals": bm_many_optionals, + "subparsers": bm_subparsers, +} + + +def add_cmdline_args(cmd, args): + cmd.append(args.benchmark) + + +def add_parser_args(parser): + parser.add_argument("benchmark", choices=BENCHMARKS, help="Which benchmark to run.") + + +if __name__ == "__main__": + runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) + runner.metadata["description"] = "Argparse benchmark" + add_parser_args(runner.argparser) + args = runner.parse_args() + benchmark = args.benchmark + + runner.bench_func(args.benchmark, BENCHMARKS[args.benchmark]) diff --git a/pyperformance/data-files/benchmarks/bm_async_generators/pyproject.toml b/pyperformance/data-files/benchmarks/bm_async_generators/pyproject.toml index 07d3aa05..b50faf27 100644 --- a/pyperformance/data-files/benchmarks/bm_async_generators/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_async_generators/pyproject.toml @@ -7,3 +7,4 @@ dynamic = ["version"] [tool.pyperformance] name = "async_generators" +tags = "asyncio" diff --git a/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py index 131e59c1..2b1f3d5c 100644 --- a/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_async_tree/run_benchmark.py @@ -159,7 +159,7 @@ def add_parser_args(parser): Determines which benchmark to run. Options: 1) "none": No actual async work in the async tree. 2) "io": All leaf nodes simulate async IO workload (async sleep 50ms). -3) "memoization": All leaf nodes simulate async IO workload with 90% of +3) "memoization": All leaf nodes simulate async IO workload with 90%% of the data memoized 4) "cpu_io_mixed": Half of the leaf nodes simulate CPU-bound workload and the other half simulate the same workload as the diff --git a/pyperformance/data-files/benchmarks/bm_asyncio_tcp/pyproject.toml b/pyperformance/data-files/benchmarks/bm_asyncio_tcp/pyproject.toml index 2e5f1e47..a23db887 100644 --- a/pyperformance/data-files/benchmarks/bm_asyncio_tcp/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_asyncio_tcp/pyproject.toml @@ -7,3 +7,4 @@ dynamic = ["version"] [tool.pyperformance] name = "asyncio_tcp" +tags = "asyncio" diff --git a/pyperformance/data-files/benchmarks/bm_asyncio_websockets/pyproject.toml b/pyperformance/data-files/benchmarks/bm_asyncio_websockets/pyproject.toml index e985b0e1..b0773b0c 100644 --- a/pyperformance/data-files/benchmarks/bm_asyncio_websockets/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_asyncio_websockets/pyproject.toml @@ -7,3 +7,4 @@ dynamic = ["version"] [tool.pyperformance] name = "asyncio_websockets" +tags = "asyncio" \ No newline at end of file diff --git a/pyperformance/data-files/benchmarks/bm_asyncio_websockets/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_asyncio_websockets/run_benchmark.py index 1f167f6f..12d2df45 100644 --- a/pyperformance/data-files/benchmarks/bm_asyncio_websockets/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_asyncio_websockets/run_benchmark.py @@ -8,6 +8,7 @@ import pyperf import websockets.server import websockets.client +import websockets.exceptions import asyncio CHUNK_SIZE = 1024 ** 2 @@ -23,13 +24,20 @@ async def handler(websocket) -> None: stop.set() +async def send(ws): + try: + await ws.send(DATA) + except websockets.exceptions.ConnectionClosedOK: + pass + + async def main() -> None: global stop t0 = pyperf.perf_counter() stop = asyncio.Event() async with websockets.server.serve(handler, "", 8001): async with websockets.client.connect("ws://localhost:8001") as ws: - await asyncio.gather(*[ws.send(DATA) for _ in range(100)]) + await asyncio.gather(*[send(ws) for _ in range(100)]) await stop.wait() return pyperf.perf_counter() - t0 diff --git a/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/data/frankenstein_intro.txt b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/data/frankenstein_intro.txt new file mode 100644 index 00000000..07ec91e3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/data/frankenstein_intro.txt @@ -0,0 +1,145 @@ +Letter 1 +To Mrs. Saville, England. + +St. Petersburgh, Dec. 11th, 17—. + +You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking. + +I am already far north of London, and as I walk in the streets of Petersburgh, I feel a cold northern breeze play upon my cheeks, which braces my nerves and fills me with delight. Do you understand this feeling? This breeze, which has travelled from the regions towards which I am advancing, gives me a foretaste of those icy climes. Inspirited by this wind of promise, my daydreams become more fervent and vivid. I try in vain to be persuaded that the pole is the seat of frost and desolation; it ever presents itself to my imagination as the region of beauty and delight. There, Margaret, the sun is for ever visible, its broad disk just skirting the horizon and diffusing a perpetual splendour. There—for with your leave, my sister, I will put some trust in preceding navigators—there snow and frost are banished; and, sailing over a calm sea, we may be wafted to a land surpassing in wonders and in beauty every region hitherto discovered on the habitable globe. Its productions and features may be without example, as the phenomena of the heavenly bodies undoubtedly are in those undiscovered solitudes. What may not be expected in a country of eternal light? I may there discover the wondrous power which attracts the needle and may regulate a thousand celestial observations that require only this voyage to render their seeming eccentricities consistent for ever. I shall satiate my ardent curiosity with the sight of a part of the world never before visited, and may tread a land never before imprinted by the foot of man. These are my enticements, and they are sufficient to conquer all fear of danger or death and to induce me to commence this laborious voyage with the joy a child feels when he embarks in a little boat, with his holiday mates, on an expedition of discovery up his native river. But supposing all these conjectures to be false, you cannot contest the inestimable benefit which I shall confer on all mankind, to the last generation, by discovering a passage near the pole to those countries, to reach which at present so many months are requisite; or by ascertaining the secret of the magnet, which, if at all possible, can only be effected by an undertaking such as mine. + +These reflections have dispelled the agitation with which I began my letter, and I feel my heart glow with an enthusiasm which elevates me to heaven, for nothing contributes so much to tranquillise the mind as a steady purpose—a point on which the soul may fix its intellectual eye. This expedition has been the favourite dream of my early years. I have read with ardour the accounts of the various voyages which have been made in the prospect of arriving at the North Pacific Ocean through the seas which surround the pole. You may remember that a history of all the voyages made for purposes of discovery composed the whole of our good Uncle Thomas’ library. My education was neglected, yet I was passionately fond of reading. These volumes were my study day and night, and my familiarity with them increased that regret which I had felt, as a child, on learning that my father’s dying injunction had forbidden my uncle to allow me to embark in a seafaring life. + +These visions faded when I perused, for the first time, those poets whose effusions entranced my soul and lifted it to heaven. I also became a poet and for one year lived in a paradise of my own creation; I imagined that I also might obtain a niche in the temple where the names of Homer and Shakespeare are consecrated. You are well acquainted with my failure and how heavily I bore the disappointment. But just at that time I inherited the fortune of my cousin, and my thoughts were turned into the channel of their earlier bent. + +Six years have passed since I resolved on my present undertaking. I can, even now, remember the hour from which I dedicated myself to this great enterprise. I commenced by inuring my body to hardship. I accompanied the whale-fishers on several expeditions to the North Sea; I voluntarily endured cold, famine, thirst, and want of sleep; I often worked harder than the common sailors during the day and devoted my nights to the study of mathematics, the theory of medicine, and those branches of physical science from which a naval adventurer might derive the greatest practical advantage. Twice I actually hired myself as an under-mate in a Greenland whaler, and acquitted myself to admiration. I must own I felt a little proud when my captain offered me the second dignity in the vessel and entreated me to remain with the greatest earnestness, so valuable did he consider my services. + +And now, dear Margaret, do I not deserve to accomplish some great purpose? My life might have been passed in ease and luxury, but I preferred glory to every enticement that wealth placed in my path. Oh, that some encouraging voice would answer in the affirmative! My courage and my resolution is firm; but my hopes fluctuate, and my spirits are often depressed. I am about to proceed on a long and difficult voyage, the emergencies of which will demand all my fortitude: I am required not only to raise the spirits of others, but sometimes to sustain my own, when theirs are failing. + +This is the most favourable period for travelling in Russia. They fly quickly over the snow in their sledges; the motion is pleasant, and, in my opinion, far more agreeable than that of an English stagecoach. The cold is not excessive, if you are wrapped in furs—a dress which I have already adopted, for there is a great difference between walking the deck and remaining seated motionless for hours, when no exercise prevents the blood from actually freezing in your veins. I have no ambition to lose my life on the post-road between St. Petersburgh and Archangel. + +I shall depart for the latter town in a fortnight or three weeks; and my intention is to hire a ship there, which can easily be done by paying the insurance for the owner, and to engage as many sailors as I think necessary among those who are accustomed to the whale-fishing. I do not intend to sail until the month of June; and when shall I return? Ah, dear sister, how can I answer this question? If I succeed, many, many months, perhaps years, will pass before you and I may meet. If I fail, you will see me again soon, or never. + +Farewell, my dear, excellent Margaret. Heaven shower down blessings on you, and save me, that I may again and again testify my gratitude for all your love and kindness. + +Your affectionate brother, +R. Walton + +Letter 2 +To Mrs. Saville, England. + +Archangel, 28th March, 17—. + +How slowly the time passes here, encompassed as I am by frost and snow! Yet a second step is taken towards my enterprise. I have hired a vessel and am occupied in collecting my sailors; those whom I have already engaged appear to be men on whom I can depend and are certainly possessed of dauntless courage. + +But I have one want which I have never yet been able to satisfy, and the absence of the object of which I now feel as a most severe evil, I have no friend, Margaret: when I am glowing with the enthusiasm of success, there will be none to participate my joy; if I am assailed by disappointment, no one will endeavour to sustain me in dejection. I shall commit my thoughts to paper, it is true; but that is a poor medium for the communication of feeling. I desire the company of a man who could sympathise with me, whose eyes would reply to mine. You may deem me romantic, my dear sister, but I bitterly feel the want of a friend. I have no one near me, gentle yet courageous, possessed of a cultivated as well as of a capacious mind, whose tastes are like my own, to approve or amend my plans. How would such a friend repair the faults of your poor brother! I am too ardent in execution and too impatient of difficulties. But it is a still greater evil to me that I am self-educated: for the first fourteen years of my life I ran wild on a common and read nothing but our Uncle Thomas’ books of voyages. At that age I became acquainted with the celebrated poets of our own country; but it was only when it had ceased to be in my power to derive its most important benefits from such a conviction that I perceived the necessity of becoming acquainted with more languages than that of my native country. Now I am twenty-eight and am in reality more illiterate than many schoolboys of fifteen. It is true that I have thought more and that my daydreams are more extended and magnificent, but they want (as the painters call it) keeping; and I greatly need a friend who would have sense enough not to despise me as romantic, and affection enough for me to endeavour to regulate my mind. + +Well, these are useless complaints; I shall certainly find no friend on the wide ocean, nor even here in Archangel, among merchants and seamen. Yet some feelings, unallied to the dross of human nature, beat even in these rugged bosoms. My lieutenant, for instance, is a man of wonderful courage and enterprise; he is madly desirous of glory, or rather, to word my phrase more characteristically, of advancement in his profession. He is an Englishman, and in the midst of national and professional prejudices, unsoftened by cultivation, retains some of the noblest endowments of humanity. I first became acquainted with him on board a whale vessel; finding that he was unemployed in this city, I easily engaged him to assist in my enterprise. + +The master is a person of an excellent disposition and is remarkable in the ship for his gentleness and the mildness of his discipline. This circumstance, added to his well-known integrity and dauntless courage, made me very desirous to engage him. A youth passed in solitude, my best years spent under your gentle and feminine fosterage, has so refined the groundwork of my character that I cannot overcome an intense distaste to the usual brutality exercised on board ship: I have never believed it to be necessary, and when I heard of a mariner equally noted for his kindliness of heart and the respect and obedience paid to him by his crew, I felt myself peculiarly fortunate in being able to secure his services. I heard of him first in rather a romantic manner, from a lady who owes to him the happiness of her life. This, briefly, is his story. Some years ago he loved a young Russian lady of moderate fortune, and having amassed a considerable sum in prize-money, the father of the girl consented to the match. He saw his mistress once before the destined ceremony; but she was bathed in tears, and throwing herself at his feet, entreated him to spare her, confessing at the same time that she loved another, but that he was poor, and that her father would never consent to the union. My generous friend reassured the suppliant, and on being informed of the name of her lover, instantly abandoned his pursuit. He had already bought a farm with his money, on which he had designed to pass the remainder of his life; but he bestowed the whole on his rival, together with the remains of his prize-money to purchase stock, and then himself solicited the young woman’s father to consent to her marriage with her lover. But the old man decidedly refused, thinking himself bound in honour to my friend, who, when he found the father inexorable, quitted his country, nor returned until he heard that his former mistress was married according to her inclinations. “What a noble fellow!” you will exclaim. He is so; but then he is wholly uneducated: he is as silent as a Turk, and a kind of ignorant carelessness attends him, which, while it renders his conduct the more astonishing, detracts from the interest and sympathy which otherwise he would command. + +Yet do not suppose, because I complain a little or because I can conceive a consolation for my toils which I may never know, that I am wavering in my resolutions. Those are as fixed as fate, and my voyage is only now delayed until the weather shall permit my embarkation. The winter has been dreadfully severe, but the spring promises well, and it is considered as a remarkably early season, so that perhaps I may sail sooner than I expected. I shall do nothing rashly: you know me sufficiently to confide in my prudence and considerateness whenever the safety of others is committed to my care. + +I cannot describe to you my sensations on the near prospect of my undertaking. It is impossible to communicate to you a conception of the trembling sensation, half pleasurable and half fearful, with which I am preparing to depart. I am going to unexplored regions, to “the land of mist and snow,” but I shall kill no albatross; therefore do not be alarmed for my safety or if I should come back to you as worn and woeful as the “Ancient Mariner.” You will smile at my allusion, but I will disclose a secret. I have often attributed my attachment to, my passionate enthusiasm for, the dangerous mysteries of ocean to that production of the most imaginative of modern poets. There is something at work in my soul which I do not understand. I am practically industrious—painstaking, a workman to execute with perseverance and labour—but besides this there is a love for the marvellous, a belief in the marvellous, intertwined in all my projects, which hurries me out of the common pathways of men, even to the wild sea and unvisited regions I am about to explore. + +But to return to dearer considerations. Shall I meet you again, after having traversed immense seas, and returned by the most southern cape of Africa or America? I dare not expect such success, yet I cannot bear to look on the reverse of the picture. Continue for the present to write to me by every opportunity: I may receive your letters on some occasions when I need them most to support my spirits. I love you very tenderly. Remember me with affection, should you never hear from me again. + +Your affectionate brother, +Robert Walton + +Letter 3 +To Mrs. Saville, England. + +July 7th, 17—. + +My dear Sister, + +I write a few lines in haste to say that I am safe—and well advanced on my voyage. This letter will reach England by a merchantman now on its homeward voyage from Archangel; more fortunate than I, who may not see my native land, perhaps, for many years. I am, however, in good spirits: my men are bold and apparently firm of purpose, nor do the floating sheets of ice that continually pass us, indicating the dangers of the region towards which we are advancing, appear to dismay them. We have already reached a very high latitude; but it is the height of summer, and although not so warm as in England, the southern gales, which blow us speedily towards those shores which I so ardently desire to attain, breathe a degree of renovating warmth which I had not expected. + +No incidents have hitherto befallen us that would make a figure in a letter. One or two stiff gales and the springing of a leak are accidents which experienced navigators scarcely remember to record, and I shall be well content if nothing worse happen to us during our voyage. + +Adieu, my dear Margaret. Be assured that for my own sake, as well as yours, I will not rashly encounter danger. I will be cool, persevering, and prudent. + +But success shall crown my endeavours. Wherefore not? Thus far I have gone, tracing a secure way over the pathless seas, the very stars themselves being witnesses and testimonies of my triumph. Why not still proceed over the untamed yet obedient element? What can stop the determined heart and resolved will of man? + +My swelling heart involuntarily pours itself out thus. But I must finish. Heaven bless my beloved sister! + +R.W. + +Letter 4 +To Mrs. Saville, England. + +August 5th, 17—. + +So strange an accident has happened to us that I cannot forbear recording it, although it is very probable that you will see me before these papers can come into your possession. + +Last Monday (July 31st) we were nearly surrounded by ice, which closed in the ship on all sides, scarcely leaving her the sea-room in which she floated. Our situation was somewhat dangerous, especially as we were compassed round by a very thick fog. We accordingly lay to, hoping that some change would take place in the atmosphere and weather. + +About two o’clock the mist cleared away, and we beheld, stretched out in every direction, vast and irregular plains of ice, which seemed to have no end. Some of my comrades groaned, and my own mind began to grow watchful with anxious thoughts, when a strange sight suddenly attracted our attention and diverted our solicitude from our own situation. We perceived a low carriage, fixed on a sledge and drawn by dogs, pass on towards the north, at the distance of half a mile; a being which had the shape of a man, but apparently of gigantic stature, sat in the sledge and guided the dogs. We watched the rapid progress of the traveller with our telescopes until he was lost among the distant inequalities of the ice. + +This appearance excited our unqualified wonder. We were, as we believed, many hundred miles from any land; but this apparition seemed to denote that it was not, in reality, so distant as we had supposed. Shut in, however, by ice, it was impossible to follow his track, which we had observed with the greatest attention. + +About two hours after this occurrence we heard the ground sea, and before night the ice broke and freed our ship. We, however, lay to until the morning, fearing to encounter in the dark those large loose masses which float about after the breaking up of the ice. I profited of this time to rest for a few hours. + +In the morning, however, as soon as it was light, I went upon deck and found all the sailors busy on one side of the vessel, apparently talking to someone in the sea. It was, in fact, a sledge, like that we had seen before, which had drifted towards us in the night on a large fragment of ice. Only one dog remained alive; but there was a human being within it whom the sailors were persuading to enter the vessel. He was not, as the other traveller seemed to be, a savage inhabitant of some undiscovered island, but a European. When I appeared on deck the master said, “Here is our captain, and he will not allow you to perish on the open sea.” + +On perceiving me, the stranger addressed me in English, although with a foreign accent. “Before I come on board your vessel,” said he, “will you have the kindness to inform me whither you are bound?” + +You may conceive my astonishment on hearing such a question addressed to me from a man on the brink of destruction and to whom I should have supposed that my vessel would have been a resource which he would not have exchanged for the most precious wealth the earth can afford. I replied, however, that we were on a voyage of discovery towards the northern pole. + +Upon hearing this he appeared satisfied and consented to come on board. Good God! Margaret, if you had seen the man who thus capitulated for his safety, your surprise would have been boundless. His limbs were nearly frozen, and his body dreadfully emaciated by fatigue and suffering. I never saw a man in so wretched a condition. We attempted to carry him into the cabin, but as soon as he had quitted the fresh air he fainted. We accordingly brought him back to the deck and restored him to animation by rubbing him with brandy and forcing him to swallow a small quantity. As soon as he showed signs of life we wrapped him up in blankets and placed him near the chimney of the kitchen stove. By slow degrees he recovered and ate a little soup, which restored him wonderfully. + +Two days passed in this manner before he was able to speak, and I often feared that his sufferings had deprived him of understanding. When he had in some measure recovered, I removed him to my own cabin and attended on him as much as my duty would permit. I never saw a more interesting creature: his eyes have generally an expression of wildness, and even madness, but there are moments when, if anyone performs an act of kindness towards him or does him any the most trifling service, his whole countenance is lighted up, as it were, with a beam of benevolence and sweetness that I never saw equalled. But he is generally melancholy and despairing, and sometimes he gnashes his teeth, as if impatient of the weight of woes that oppresses him. + +When my guest was a little recovered I had great trouble to keep off the men, who wished to ask him a thousand questions; but I would not allow him to be tormented by their idle curiosity, in a state of body and mind whose restoration evidently depended upon entire repose. Once, however, the lieutenant asked why he had come so far upon the ice in so strange a vehicle. + +His countenance instantly assumed an aspect of the deepest gloom, and he replied, “To seek one who fled from me.” + +“And did the man whom you pursued travel in the same fashion?” + +“Yes.” + +“Then I fancy we have seen him, for the day before we picked you up we saw some dogs drawing a sledge, with a man in it, across the ice.” + +This aroused the stranger’s attention, and he asked a multitude of questions concerning the route which the dæmon, as he called him, had pursued. Soon after, when he was alone with me, he said, “I have, doubtless, excited your curiosity, as well as that of these good people; but you are too considerate to make inquiries.” + +“Certainly; it would indeed be very impertinent and inhuman in me to trouble you with any inquisitiveness of mine.” + +“And yet you rescued me from a strange and perilous situation; you have benevolently restored me to life.” + +Soon after this he inquired if I thought that the breaking up of the ice had destroyed the other sledge. I replied that I could not answer with any degree of certainty, for the ice had not broken until near midnight, and the traveller might have arrived at a place of safety before that time; but of this I could not judge. + +From this time a new spirit of life animated the decaying frame of the stranger. He manifested the greatest eagerness to be upon deck to watch for the sledge which had before appeared; but I have persuaded him to remain in the cabin, for he is far too weak to sustain the rawness of the atmosphere. I have promised that someone should watch for him and give him instant notice if any new object should appear in sight. + +Such is my journal of what relates to this strange occurrence up to the present day. The stranger has gradually improved in health but is very silent and appears uneasy when anyone except myself enters his cabin. Yet his manners are so conciliating and gentle that the sailors are all interested in him, although they have had very little communication with him. For my own part, I begin to love him as a brother, and his constant and deep grief fills me with sympathy and compassion. He must have been a noble creature in his better days, being even now in wreck so attractive and amiable. + +I said in one of my letters, my dear Margaret, that I should find no friend on the wide ocean; yet I have found a man who, before his spirit had been broken by misery, I should have been happy to have possessed as the brother of my heart. + +I shall continue my journal concerning the stranger at intervals, should I have any fresh incidents to record. + +August 13th, 17—. + +My affection for my guest increases every day. He excites at once my admiration and my pity to an astonishing degree. How can I see so noble a creature destroyed by misery without feeling the most poignant grief? He is so gentle, yet so wise; his mind is so cultivated, and when he speaks, although his words are culled with the choicest art, yet they flow with rapidity and unparalleled eloquence. + +He is now much recovered from his illness and is continually on the deck, apparently watching for the sledge that preceded his own. Yet, although unhappy, he is not so utterly occupied by his own misery but that he interests himself deeply in the projects of others. He has frequently conversed with me on mine, which I have communicated to him without disguise. He entered attentively into all my arguments in favour of my eventual success and into every minute detail of the measures I had taken to secure it. I was easily led by the sympathy which he evinced to use the language of my heart, to give utterance to the burning ardour of my soul and to say, with all the fervour that warmed me, how gladly I would sacrifice my fortune, my existence, my every hope, to the furtherance of my enterprise. One man’s life or death were but a small price to pay for the acquirement of the knowledge which I sought, for the dominion I should acquire and transmit over the elemental foes of our race. As I spoke, a dark gloom spread over my listener’s countenance. At first I perceived that he tried to suppress his emotion; he placed his hands before his eyes, and my voice quivered and failed me as I beheld tears trickle fast from between his fingers; a groan burst from his heaving breast. I paused; at length he spoke, in broken accents: “Unhappy man! Do you share my madness? Have you drunk also of the intoxicating draught? Hear me; let me reveal my tale, and you will dash the cup from your lips!” + +Such words, you may imagine, strongly excited my curiosity; but the paroxysm of grief that had seized the stranger overcame his weakened powers, and many hours of repose and tranquil conversation were necessary to restore his composure. + +Having conquered the violence of his feelings, he appeared to despise himself for being the slave of passion; and quelling the dark tyranny of despair, he led me again to converse concerning myself personally. He asked me the history of my earlier years. The tale was quickly told, but it awakened various trains of reflection. I spoke of my desire of finding a friend, of my thirst for a more intimate sympathy with a fellow mind than had ever fallen to my lot, and expressed my conviction that a man could boast of little happiness who did not enjoy this blessing. + +“I agree with you,” replied the stranger; “we are unfashioned creatures, but half made up, if one wiser, better, dearer than ourselves—such a friend ought to be—do not lend his aid to perfectionate our weak and faulty natures. I once had a friend, the most noble of human creatures, and am entitled, therefore, to judge respecting friendship. You have hope, and the world before you, and have no cause for despair. But I—I have lost everything and cannot begin life anew.” + +As he said this his countenance became expressive of a calm, settled grief that touched me to the heart. But he was silent and presently retired to his cabin. + +Even broken in spirit as he is, no one can feel more deeply than he does the beauties of nature. The starry sky, the sea, and every sight afforded by these wonderful regions seem still to have the power of elevating his soul from earth. Such a man has a double existence: he may suffer misery and be overwhelmed by disappointments, yet when he has retired into himself, he will be like a celestial spirit that has a halo around him, within whose circle no grief or folly ventures. + +Will you smile at the enthusiasm I express concerning this divine wanderer? You would not if you saw him. You have been tutored and refined by books and retirement from the world, and you are therefore somewhat fastidious; but this only renders you the more fit to appreciate the extraordinary merits of this wonderful man. Sometimes I have endeavoured to discover what quality it is which he possesses that elevates him so immeasurably above any other person I ever knew. I believe it to be an intuitive discernment, a quick but never-failing power of judgment, a penetration into the causes of things, unequalled for clearness and precision; add to this a facility of expression and a voice whose varied intonations are soul-subduing music. + +August 19th, 17—. + +Yesterday the stranger said to me, “You may easily perceive, Captain Walton, that I have suffered great and unparalleled misfortunes. I had determined at one time that the memory of these evils should die with me, but you have won me to alter my determination. You seek for knowledge and wisdom, as I once did; and I ardently hope that the gratification of your wishes may not be a serpent to sting you, as mine has been. I do not know that the relation of my disasters will be useful to you; yet, when I reflect that you are pursuing the same course, exposing yourself to the same dangers which have rendered me what I am, I imagine that you may deduce an apt moral from my tale, one that may direct you if you succeed in your undertaking and console you in case of failure. Prepare to hear of occurrences which are usually deemed marvellous. Were we among the tamer scenes of nature I might fear to encounter your unbelief, perhaps your ridicule; but many things will appear possible in these wild and mysterious regions which would provoke the laughter of those unacquainted with the ever-varied powers of nature; nor can I doubt but that my tale conveys in its series internal evidence of the truth of the events of which it is composed.” + +You may easily imagine that I was much gratified by the offered communication, yet I could not endure that he should renew his grief by a recital of his misfortunes. I felt the greatest eagerness to hear the promised narrative, partly from curiosity and partly from a strong desire to ameliorate his fate if it were in my power. I expressed these feelings in my answer. + +“I thank you,” he replied, “for your sympathy, but it is useless; my fate is nearly fulfilled. I wait but for one event, and then I shall repose in peace. I understand your feeling,” continued he, perceiving that I wished to interrupt him; “but you are mistaken, my friend, if thus you will allow me to name you; nothing can alter my destiny; listen to my history, and you will perceive how irrevocably it is determined.” + +He then told me that he would commence his narrative the next day when I should be at leisure. This promise drew from me the warmest thanks. I have resolved every night, when I am not imperatively occupied by my duties, to record, as nearly as possible in his own words, what he has related during the day. If I should be engaged, I will at least make notes. This manuscript will doubtless afford you the greatest pleasure; but to me, who know him, and who hear it from his own lips—with what interest and sympathy shall I read it in some future day! Even now, as I commence my task, his full-toned voice swells in my ears; his lustrous eyes dwell on me with all their melancholy sweetness; I see his thin hand raised in animation, while the lineaments of his face are irradiated by the soul within. Strange and harrowing must be his story, frightful the storm which embraced the gallant vessel on its course and wrecked it—thus! diff --git a/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/pyproject.toml b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/pyproject.toml new file mode 100644 index 00000000..21fb5a26 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "pyperformance_bm_bpe_tokeniser" +requires-python = ">=3.7" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "bpe_tokeniser" diff --git a/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/run_benchmark.py new file mode 100644 index 00000000..2a544ac7 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_bpe_tokeniser/run_benchmark.py @@ -0,0 +1,157 @@ +""" +Benchmark a BPE tokeniser. + +Author: Shantanu Jain + +Based on code from tiktoken. +https://github.com/openai/tiktoken +""" +from __future__ import annotations + +import pyperf + + +import collections +import re +from pathlib import Path + + +class SimpleBytePairEncoding: + def __init__(self, *, pat_str: str, mergeable_ranks: dict[bytes, int]) -> None: + self.pat_str = pat_str + self.mergeable_ranks = mergeable_ranks + + self._decoder = {token: token_bytes for token_bytes, token in mergeable_ranks.items()} + self._pat = re.compile(pat_str) + + def encode(self, text: str) -> list[int]: + # Use the regex to split the text into (approximately) words + words = self._pat.findall(text) + tokens = [] + for word in words: + # Turn each word into tokens, using the byte pair encoding algorithm + word_bytes = word.encode("utf-8") + word_tokens = bpe_encode(self.mergeable_ranks, word_bytes) + tokens.extend(word_tokens) + return tokens + + def decode_bytes(self, tokens: list[int]) -> bytes: + return b"".join(self._decoder[token] for token in tokens) + + def decode(self, tokens: list[int]) -> str: + return self.decode_bytes(tokens).decode("utf-8", errors="replace") + + @staticmethod + def train(training_data: str, vocab_size: int, pat_str: str): + mergeable_ranks = bpe_train(data=training_data, vocab_size=vocab_size, pat_str=pat_str) + return SimpleBytePairEncoding(pat_str=pat_str, mergeable_ranks=mergeable_ranks) + + +def bpe_encode(mergeable_ranks: dict[bytes, int], input: bytes) -> list[int]: + # A simple, uncached, quadratic BPE + parts = [bytes([b]) for b in input] + while True: + # Iterate over all pairs and find the pair we want to merge the most + min_idx = None + min_rank = None + for i, pair in enumerate(zip(parts[:-1], parts[1:])): + rank = mergeable_ranks.get(pair[0] + pair[1]) + if rank is not None and (min_rank is None or rank < min_rank): + min_idx = i + min_rank = rank + + # If there were no pairs we could merge, we're done! + if min_rank is None: + break + assert min_idx is not None + + # Otherwise, merge that pair and leave the rest unchanged. Then repeat. + parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2 :] + + tokens = [mergeable_ranks[part] for part in parts] + return tokens + + +def bpe_train(data: str, vocab_size: int, pat_str: str) -> dict[bytes, int]: + # First, add tokens for each individual byte value + if vocab_size < 2**8: + raise ValueError("vocab_size must be at least 256, so we can encode all bytes") + ranks = {} + for i in range(2**8): + ranks[bytes([i])] = i + + # Splinter up our data into lists of bytes + # data = "Hello world" + # words = [ + # [b'H', b'e', b'l', b'l', b'o'], + # [b' ', b'w', b'o', b'r', b'l', b'd'] + # ] + words: list[list[bytes]] = [ + [bytes([b]) for b in word.encode("utf-8")] for word in re.findall(pat_str, data) + ] + + # Now, use our data to figure out which merges we should make + while len(ranks) < vocab_size: + # Find the most common pair. This will become our next token + stats = collections.Counter() + for piece in words: + for pair in zip(piece[:-1], piece[1:]): + stats[pair] += 1 + + most_common_pair = max(stats, key=lambda x: stats[x]) + token_bytes = most_common_pair[0] + most_common_pair[1] + token = len(ranks) + # Add the new token! + ranks[token_bytes] = token + + # Now merge that most common pair in all the words. That is, update our training data + # to reflect our decision to make that pair into a new token. + new_words = [] + for word in words: + new_word = [] + i = 0 + while i < len(word) - 1: + if (word[i], word[i + 1]) == most_common_pair: + # We found our pair! Merge it + new_word.append(token_bytes) + i += 2 + else: + new_word.append(word[i]) + i += 1 + if i == len(word) - 1: + new_word.append(word[i]) + new_words.append(new_word) + words = new_words + + return ranks + + +def train(data: str): + pattern = ( + r"""'s|'t|'re|'ve|'m|'ll|'d| ?[a-zA-Z]+| ?\d+| ?[^\sa-zA-Z\d]+|\s+(?!\S)|\s+""" + ) + enc = SimpleBytePairEncoding.train(data, vocab_size=1024, pat_str=pattern) + + tokens = enc.encode("hello world") + assert enc.decode(tokens) == "hello world" + + enc.encode(data) + + +def bench_bpe_tokeniser(loops: int) -> float: + DATA = Path(__file__).parent / "data" / "frankenstein_intro.txt" + with open(DATA, "r", encoding="utf8") as f: + data = f.read() + + range_it = range(loops) + + t0 = pyperf.perf_counter() + for _ in range_it: + train(data) + return pyperf.perf_counter() - t0 + + +if __name__ == "__main__": + runner = pyperf.Runner() + runner.metadata["description"] = "Benchmark a BPE tokeniser" + runner.bench_time_func("bpe_tokeniser", bench_bpe_tokeniser) diff --git a/pyperformance/data-files/benchmarks/bm_coroutines/pyproject.toml b/pyperformance/data-files/benchmarks/bm_coroutines/pyproject.toml index c5b10060..4510edfb 100644 --- a/pyperformance/data-files/benchmarks/bm_coroutines/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_coroutines/pyproject.toml @@ -7,3 +7,4 @@ dynamic = ["version"] [tool.pyperformance] name = "coroutines" +tags = "asyncio" diff --git a/pyperformance/data-files/benchmarks/bm_dask/pyproject.toml b/pyperformance/data-files/benchmarks/bm_dask/pyproject.toml index 747fb2f2..4af6cd2e 100644 --- a/pyperformance/data-files/benchmarks/bm_dask/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_dask/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyperformance_bm_dask" -requires-python = ">=3.8" +requires-python = ">=3.12" dependencies = ["pyperf"] urls = {repository = "https://github.com/python/pyperformance"} dynamic = ["version"] diff --git a/pyperformance/data-files/benchmarks/bm_dask/requirements.txt b/pyperformance/data-files/benchmarks/bm_dask/requirements.txt index 374cf38a..3b8ca8e1 100644 --- a/pyperformance/data-files/benchmarks/bm_dask/requirements.txt +++ b/pyperformance/data-files/benchmarks/bm_dask/requirements.txt @@ -1 +1 @@ -dask[distributed]==2022.2.0 +dask[distributed]==2024.10.0 diff --git a/pyperformance/data-files/benchmarks/bm_dask/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_dask/run_benchmark.py index cf6610d2..2a684e41 100644 --- a/pyperformance/data-files/benchmarks/bm_dask/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_dask/run_benchmark.py @@ -5,6 +5,7 @@ """ from dask.distributed import Client, Worker, Scheduler, wait +from dask import distributed import pyperf diff --git a/pyperformance/data-files/benchmarks/bm_decimal_factorial/pyproject.toml b/pyperformance/data-files/benchmarks/bm_decimal_factorial/pyproject.toml new file mode 100644 index 00000000..a6ece1c5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_decimal_factorial/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_decimal_factorial" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "decimal_factorial" +tags = "decimal" diff --git a/pyperformance/data-files/benchmarks/bm_decimal_factorial/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_decimal_factorial/run_benchmark.py new file mode 100644 index 00000000..dceba16b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_decimal_factorial/run_benchmark.py @@ -0,0 +1,50 @@ +""" +Calculate `factorial` using the decimal module. + +- 2024-06-14: Michael Droettboom copied this from + Modules/_decimal/tests/bench.py in the CPython source and adapted to use + pyperf. +""" + +# Original copyright notice in CPython source: + +# +# Copyright (C) 2001-2012 Python Software Foundation. All Rights Reserved. +# Modified and extended by Stefan Krah. +# + + +import decimal + + +import pyperf + + +def factorial(n, m): + if n > m: + return factorial(m, n) + elif m == 0: + return 1 + elif n == m: + return n + else: + return factorial(n, (n + m) // 2) * factorial((n + m) // 2 + 1, m) + + +def bench_decimal_factorial(): + c = decimal.getcontext() + c.prec = decimal.MAX_PREC + c.Emax = decimal.MAX_EMAX + c.Emin = decimal.MIN_EMIN + + for n in [10000, 100000]: + # C version of decimal + _ = factorial(decimal.Decimal(n), 0) + + +if __name__ == "__main__": + runner = pyperf.Runner() + runner.metadata["description"] = "decimal_factorial benchmark" + + args = runner.parse_args() + runner.bench_func("decimal_factorial", bench_decimal_factorial) diff --git a/pyperformance/data-files/benchmarks/bm_decimal_pi/pyproject.toml b/pyperformance/data-files/benchmarks/bm_decimal_pi/pyproject.toml new file mode 100644 index 00000000..0cbb7e7f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_decimal_pi/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "pyperformance_bm_decimal_pi" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "decimal_pi" +tags = "decimal" diff --git a/pyperformance/data-files/benchmarks/bm_decimal_pi/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_decimal_pi/run_benchmark.py new file mode 100644 index 00000000..afb7ae83 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_decimal_pi/run_benchmark.py @@ -0,0 +1,50 @@ +""" +Calculate `pi` using the decimal module. + +The `pidigits` benchmark does a similar thing using regular (long) ints. + +- 2024-06-14: Michael Droettboom copied this from + Modules/_decimal/tests/bench.py in the CPython source and adapted to use + pyperf. +""" + +# Original copyright notice in CPython source: + +# +# Copyright (C) 2001-2012 Python Software Foundation. All Rights Reserved. +# Modified and extended by Stefan Krah. +# + + +import decimal + + +import pyperf + + +def pi_decimal(): + """decimal""" + D = decimal.Decimal + lasts, t, s, n, na, d, da = D(0), D(3), D(3), D(1), D(0), D(0), D(24) + while s != lasts: + lasts = s + n, na = n + na, na + 8 + d, da = d + da, da + 32 + t = (t * n) / d + s += t + return s + + +def bench_decimal_pi(): + for prec in [9, 19]: + decimal.getcontext().prec = prec + for _ in range(10000): + _ = pi_decimal() + + +if __name__ == "__main__": + runner = pyperf.Runner() + runner.metadata["description"] = "decimal_pi benchmark" + + args = runner.parse_args() + runner.bench_func("decimal_pi", bench_decimal_pi) diff --git a/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_connected_components.toml b/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_connected_components.toml new file mode 100644 index 00000000..fc574dd3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_connected_components.toml @@ -0,0 +1,3 @@ +[tool.pyperformance] +name = "networkx_connected_components" +extra_opts = ["connected_components"] diff --git a/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_k_core.toml b/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_k_core.toml new file mode 100644 index 00000000..cdffa6ce --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_networkx/bm_networkx_k_core.toml @@ -0,0 +1,3 @@ +[tool.pyperformance] +name = "networkx_k_core" +extra_opts = ["k_core"] diff --git a/pyperformance/data-files/benchmarks/bm_networkx/data/amazon0302.txt.gz b/pyperformance/data-files/benchmarks/bm_networkx/data/amazon0302.txt.gz new file mode 100755 index 00000000..f8316509 Binary files /dev/null and b/pyperformance/data-files/benchmarks/bm_networkx/data/amazon0302.txt.gz differ diff --git a/pyperformance/data-files/benchmarks/bm_networkx/pyproject.toml b/pyperformance/data-files/benchmarks/bm_networkx/pyproject.toml new file mode 100644 index 00000000..5ce2974c --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_networkx/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "pyperformance_bm_networkx" +requires-python = ">=3.11" +dependencies = [ + "pyperf", + "networkx", +] +urls.repository = "https://github.com/python/pyperformance" +dynamic = ["version"] + +[tool.pyperformance] +name = "networkx_shortest_path" +extra_opts = ["shortest_path"] diff --git a/pyperformance/data-files/benchmarks/bm_networkx/requirements.txt b/pyperformance/data-files/benchmarks/bm_networkx/requirements.txt new file mode 100644 index 00000000..5e0aaf71 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_networkx/requirements.txt @@ -0,0 +1 @@ +networkx==3.4.2 diff --git a/pyperformance/data-files/benchmarks/bm_networkx/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_networkx/run_benchmark.py new file mode 100644 index 00000000..27ac7ed7 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_networkx/run_benchmark.py @@ -0,0 +1,61 @@ +""" +Some graph algorithm benchmarks using networkx + +This uses the public domain Amazon data set from the SNAP benchmarks: + + https://snap.stanford.edu/data/amazon0302.html + +Choice of benchmarks inspired by Timothy Lin's work here: + + https://www.timlrx.com/blog/benchmark-of-popular-graph-network-packages +""" + +import collections +from pathlib import Path + +import networkx + +import pyperf + + +DATA_FILE = Path(__file__).parent / "data" / "amazon0302.txt.gz" + + +graph = networkx.read_adjlist(DATA_FILE) + + +def bench_shortest_path(): + collections.deque(networkx.shortest_path_length(graph, "0")) + + +def bench_connected_components(): + networkx.number_connected_components(graph) + + +def bench_k_core(): + networkx.k_core(graph) + + +BENCHMARKS = { + "shortest_path": bench_shortest_path, + "connected_components": bench_connected_components, + "k_core": bench_k_core, +} + + +def add_cmdline_args(cmd, args): + cmd.append(args.benchmark) + + +def add_parser_args(parser): + parser.add_argument("benchmark", choices=BENCHMARKS, help="Which benchmark to run.") + + +if __name__ == "__main__": + runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) + runner.metadata["description"] = "NetworkX benchmark" + add_parser_args(runner.argparser) + args = runner.parse_args() + benchmark = args.benchmark + + runner.bench_func(args.benchmark, BENCHMARKS[args.benchmark]) diff --git a/pyperformance/data-files/benchmarks/bm_regex_v8/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_regex_v8/run_benchmark.py index e31b6433..7d231632 100644 --- a/pyperformance/data-files/benchmarks/bm_regex_v8/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_regex_v8/run_benchmark.py @@ -280,7 +280,7 @@ def block0(): regexs[1].search(r'uggc://jjj.snprobbx.pbz/ybtva.cuc') for i in range(739): - regexs[2].sub(r'', 'QBZPbageby_cynprubyqre', subcount[2]) + regexs[2].sub(r'', 'QBZPbageby_cynprubyqre', count=subcount[2]) for i in range(598): regexs[1].search(r'uggc://jjj.snprobbx.pbz/') @@ -299,10 +299,10 @@ def block0(): regexs[4].search(r'/ZlFcnprUbzrcntr/Vaqrk-FvgrUbzr,10000000') for i in range(177): - regexs[5].sub(r'', 'vachggrkg', subcount[5]) + regexs[5].sub(r'', 'vachggrkg', count=subcount[5]) for i in range(170): - regexs[6].sub(r'', '528.9', subcount[6]) + regexs[6].sub(r'', '528.9', count=subcount[6]) regexs[7].search(r'528') for i in range(156): @@ -313,7 +313,7 @@ def block0(): regexs[0].search(r'xrlcerff') for i in range(139): - regexs[6].sub(r'', '521', subcount[6]) + regexs[6].sub(r'', '521', count=subcount[6]) # This has a different output to the V8 version. # It could just be a difference in the engines. @@ -322,31 +322,31 @@ def block0(): re.search(r'JroXvg\/(\S+)', strings[0]) for i in range(137): - regexs[10].sub(r'', 'qvi .so_zrah', subcount[10]) - re.sub(r'\[', '', 'qvi .so_zrah', 0) - regexs[11].sub(r'', 'qvi.so_zrah', subcount[11]) + regexs[10].sub(r'', 'qvi .so_zrah', count=subcount[10]) + re.sub(r'\[', '', 'qvi .so_zrah', count=0) + regexs[11].sub(r'', 'qvi.so_zrah', count=subcount[11]) for i in range(117): - regexs[2].sub(r'', 'uvqqra_ryrz', subcount[2]) + regexs[2].sub(r'', 'uvqqra_ryrz', count=subcount[2]) for i in range(95): re.search(r'(?:^|;)\s*sevraqfgre_ynat=([^;]*)', 'sevraqfgre_naba=nvq%3Qn6ss9p85n868ro9s059pn854735956o3%26ers%3Q%26df%3Q%26vpgl%3QHF') for i in range(93): - regexs[12].sub(r'', 'uggc://ubzr.zlfcnpr.pbz/vaqrk.psz', subcount[12]) + regexs[12].sub(r'', 'uggc://ubzr.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) regexs[13].search(r'uggc://ubzr.zlfcnpr.pbz/vaqrk.psz') for i in range(92): - re.sub(r'([a-zA-Z]|\s)+', '', strings[1], 1) + re.sub(r'([a-zA-Z]|\s)+', '', strings[1], count=1) for i in range(85): - regexs[14].sub(r'', 'svefg', subcount[14]) - regexs[15].sub(r'', 'svefg', subcount[15]) + regexs[14].sub(r'', 'svefg', count=subcount[14]) + regexs[15].sub(r'', 'svefg', count=subcount[15]) regexs[12].sub( - r'', 'uggc://cebsvyr.zlfcnpr.pbz/vaqrk.psz', subcount[12]) - regexs[14].sub(r'', 'ynfg', subcount[14]) - regexs[15].sub(r'', 'ynfg', subcount[15]) + r'', 'uggc://cebsvyr.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) + regexs[14].sub(r'', 'ynfg', count=subcount[14]) + regexs[15].sub(r'', 'ynfg', count=subcount[15]) regexs[16].search(r'qvfcynl') regexs[13].search(r'uggc://cebsvyr.zlfcnpr.pbz/vaqrk.psz') @@ -356,16 +356,16 @@ def block1(): regexs[8].search(r'VC=74.125.75.1') for i in range(78): - re.sub(r'(\s)+e', '', '9.0 e115', 1) - re.sub(r'.', '', 'k', 1) + re.sub(r'(\s)+e', '', '9.0 e115', count=1) + re.sub(r'.', '', 'k', count=1) # This prints a unicode escape where the V8 version prints the # unicode character. - regexs[17].sub(r'', strings[2], subcount[17]) + regexs[17].sub(r'', strings[2], count=subcount[17]) # This prints a unicode escape where the V8 version prints the # unicode character. - regexs[17].sub(r'', strings[3], subcount[17]) + regexs[17].sub(r'', strings[3], count=subcount[17]) regexs[8].search(r'144631658') regexs[8].search(r'Pbhagel=IIZ%3Q') @@ -383,12 +383,12 @@ def block1(): for i in range(77): regexs[12].sub( - r'', 'uggc://zrffntvat.zlfcnpr.pbz/vaqrk.psz', subcount[12]) + r'', 'uggc://zrffntvat.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) regexs[13].search(r'uggc://zrffntvat.zlfcnpr.pbz/vaqrk.psz') for i in range(73): regexs[18].sub( - r'', 'FrffvbaFgbentr=%7O%22GnoThvq%22%3N%7O%22thvq%22%3N1231367125017%7Q%7Q', subcount[18]) + r'', 'FrffvbaFgbentr=%7O%22GnoThvq%22%3N%7O%22thvq%22%3N1231367125017%7Q%7Q', count=subcount[18]) for i in range(72): regexs[1].search(strings[6]) @@ -397,12 +397,12 @@ def block1(): regexs[19].search(r'') for i in range(70): - regexs[11].sub(r'', '3.5.0.0', subcount[11]) - re.sub(r'd1', '', strings[7], 0) - re.sub(r'NQ_VQ', '', strings[8], 0) - re.sub(r'd2', '', strings[9], 0) + regexs[11].sub(r'', '3.5.0.0', count=subcount[11]) + re.sub(r'd1', '', strings[7], count=0) + re.sub(r'NQ_VQ', '', strings[8], count=0) + re.sub(r'd2', '', strings[9], count=0) re.sub( - r'_', '', 'NI%3Q1_CI%3Q1_PI%3Q1_EI%3Q1_HI%3Q1_HP%3Q1_IC%3Q0.0.0.0_IH%3Q0', 0) + r'_', '', 'NI%3Q1_CI%3Q1_PI%3Q1_EI%3Q1_HI%3Q1_HP%3Q1_IC%3Q0.0.0.0_IH%3Q0', count=0) regexs[20].split( r'svz_zlfcnpr_ubzrcntr_abgybttrqva,svz_zlfcnpr_aba_HTP,svz_zlfcnpr_havgrq-fgngrf') regexs[21].search(r'ybnqvat') @@ -420,17 +420,17 @@ def block1(): for i in range(44): regexs[12].sub( - r'', 'uggc://sevraqf.zlfcnpr.pbz/vaqrk.psz', subcount[12]) + r'', 'uggc://sevraqf.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) regexs[13].search(r'uggc://sevraqf.zlfcnpr.pbz/vaqrk.psz') def block2(): for i in range(40): - regexs[14].sub(r'', 'fryrpgrq', subcount[14]) - regexs[15].sub(r'', 'fryrpgrq', subcount[15]) + regexs[14].sub(r'', 'fryrpgrq', count=subcount[14]) + regexs[15].sub(r'', 'fryrpgrq', count=subcount[15]) for i in range(39): - re.sub(r'\buvqqra_ryrz\b', '', 'vachggrkg uvqqra_ryrz', 0) + re.sub(r'\buvqqra_ryrz\b', '', 'vachggrkg uvqqra_ryrz', count=0) regexs[3].search(r'vachggrkg ') regexs[3].search(r'vachggrkg') regexs[22].search(r'HVYvaxOhggba') @@ -446,10 +446,10 @@ def block2(): r'FrffvbaQQS2=111soqs57qo8o8480qo18sor2011r3n591q7s6s37r120904') for i in range(35): - regexs[14].sub(r'', 'puvyq p1 svefg', subcount[14]) - regexs[15].sub(r'', 'puvyq p1 svefg', subcount[15]) - regexs[14].sub(r'', 'sylbhg pybfrq', subcount[14]) - regexs[15].sub(r'', 'sylbhg pybfrq', subcount[15]) + regexs[14].sub(r'', 'puvyq p1 svefg', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p1 svefg', count=subcount[15]) + regexs[14].sub(r'', 'sylbhg pybfrq', count=subcount[14]) + regexs[15].sub(r'', 'sylbhg pybfrq', count=subcount[15]) for i in range(34): regexs[19].search(r'gno2') @@ -465,7 +465,7 @@ def block2(): re.search(r'puebzr', strings[0], re.IGNORECASE) for i in range(31): - regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/', subcount[23]) + regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/', count=subcount[23]) regexs[8].search(r'SbeprqRkcvengvba=633669358527244818') regexs[8].search(r'VC=66.249.85.130') regexs[8].search( @@ -474,45 +474,45 @@ def block2(): regexs[24].search(r'uggc://jjj.snprobbx.pbz/') for i in range(30): - regexs[6].sub(r'', '419', subcount[6]) + regexs[6].sub(r'', '419', count=subcount[6]) re.search(r'(?:^|\s+)gvzrfgnzc(?:\s+|$)', 'gvzrfgnzc') regexs[7].search(r'419') for i in range(29): - regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/ybtva.cuc', subcount[23]) + regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/ybtva.cuc', count=subcount[23]) for i in range(28): - regexs[25].sub(r'', 'Funer guvf tnqtrg', subcount[25]) - regexs[12].sub(r'', 'Funer guvf tnqtrg', subcount[12]) + regexs[25].sub(r'', 'Funer guvf tnqtrg', count=subcount[25]) + regexs[12].sub(r'', 'Funer guvf tnqtrg', count=subcount[12]) regexs[26].search(r'uggc://jjj.tbbtyr.pbz/vt/qverpgbel') def block3(): for i in range(27): - re.sub(r'[A-Za-z]', '', 'e115', 0) + re.sub(r'[A-Za-z]', '', 'e115', count=0) for i in range(23): - regexs[27].sub(r'', 'qvfcynl', subcount[27]) - regexs[27].sub(r'', 'cbfvgvba', subcount[27]) + regexs[27].sub(r'', 'qvfcynl', count=subcount[27]) + regexs[27].sub(r'', 'cbfvgvba', count=subcount[27]) for i in range(22): - regexs[14].sub(r'', 'unaqyr', subcount[14]) - regexs[15].sub(r'', 'unaqyr', subcount[15]) - regexs[14].sub(r'', 'yvar', subcount[14]) - regexs[15].sub(r'', 'yvar', subcount[15]) - regexs[14].sub(r'', 'cnerag puebzr6 fvatyr1 gno', subcount[14]) - regexs[15].sub(r'', 'cnerag puebzr6 fvatyr1 gno', subcount[15]) - regexs[14].sub(r'', 'fyvqre', subcount[14]) - regexs[15].sub(r'', 'fyvqre', subcount[15]) + regexs[14].sub(r'', 'unaqyr', count=subcount[14]) + regexs[15].sub(r'', 'unaqyr', count=subcount[15]) + regexs[14].sub(r'', 'yvar', count=subcount[14]) + regexs[15].sub(r'', 'yvar', count=subcount[15]) + regexs[14].sub(r'', 'cnerag puebzr6 fvatyr1 gno', count=subcount[14]) + regexs[15].sub(r'', 'cnerag puebzr6 fvatyr1 gno', count=subcount[15]) + regexs[14].sub(r'', 'fyvqre', count=subcount[14]) + regexs[15].sub(r'', 'fyvqre', count=subcount[15]) regexs[28].search(r'') for i in range(21): - regexs[12].sub(r'', 'uggc://jjj.zlfcnpr.pbz/', subcount[12]) + regexs[12].sub(r'', 'uggc://jjj.zlfcnpr.pbz/', count=subcount[12]) regexs[13].search(r'uggc://jjj.zlfcnpr.pbz/') for i in range(20): - regexs[29].sub(r'', 'cntrivrj', subcount[29]) - regexs[30].sub(r'', 'cntrivrj', subcount[30]) + regexs[29].sub(r'', 'cntrivrj', count=subcount[29]) + regexs[30].sub(r'', 'cntrivrj', count=subcount[30]) regexs[19].search(r'ynfg') regexs[19].search(r'ba svefg') regexs[8].search(r'VC=74.125.75.3') @@ -523,7 +523,7 @@ def block3(): for i in range(18): regexs[32].split(strings[10]) regexs[32].split(strings[11]) - regexs[33].sub(r'', strings[12], subcount[33]) + regexs[33].sub(r'', strings[12], count=subcount[33]) regexs[8].search(r'144631658.0.10.1231363570') regexs[8].search( r'144631658.1231363570.1.1.hgzpfe=(qverpg)|hgzppa=(qverpg)|hgzpzq=(abar)') @@ -544,39 +544,39 @@ def block3(): re.match(r'bcren', strings[0], re.IGNORECASE) regexs[32].split(strings[15]) regexs[32].split(strings[16]) - regexs[14].sub(r'', 'ohggba', subcount[14]) - regexs[15].sub(r'', 'ohggba', subcount[15]) - regexs[14].sub(r'', 'puvyq p1 svefg sylbhg pybfrq', subcount[14]) - regexs[15].sub(r'', 'puvyq p1 svefg sylbhg pybfrq', subcount[15]) - regexs[14].sub(r'', 'pvgvrf', subcount[14]) - regexs[15].sub(r'', 'pvgvrf', subcount[15]) - regexs[14].sub(r'', 'pybfrq', subcount[14]) - regexs[15].sub(r'', 'pybfrq', subcount[15]) - regexs[14].sub(r'', 'qry', subcount[14]) - regexs[15].sub(r'', 'qry', subcount[15]) - regexs[14].sub(r'', 'uqy_zba', subcount[14]) - regexs[15].sub(r'', 'uqy_zba', subcount[15]) - regexs[33].sub(r'', strings[17], subcount[33]) - re.sub(r'%3P', '', strings[18], 0) - re.sub(r'%3R', '', strings[18], 0) - re.sub(r'%3q', '', strings[18], 0) - regexs[35].sub(r'', strings[18], subcount[35]) - regexs[14].sub(r'', 'yvaxyvfg16', subcount[14]) - regexs[15].sub(r'', 'yvaxyvfg16', subcount[15]) - regexs[14].sub(r'', 'zvahf', subcount[14]) - regexs[15].sub(r'', 'zvahf', subcount[15]) - regexs[14].sub(r'', 'bcra', subcount[14]) - regexs[15].sub(r'', 'bcra', subcount[15]) - regexs[14].sub(r'', 'cnerag puebzr5 fvatyr1 ps NU', subcount[14]) - regexs[15].sub(r'', 'cnerag puebzr5 fvatyr1 ps NU', subcount[15]) - regexs[14].sub(r'', 'cynlre', subcount[14]) - regexs[15].sub(r'', 'cynlre', subcount[15]) - regexs[14].sub(r'', 'cyhf', subcount[14]) - regexs[15].sub(r'', 'cyhf', subcount[15]) - regexs[14].sub(r'', 'cb_uqy', subcount[14]) - regexs[15].sub(r'', 'cb_uqy', subcount[15]) - regexs[14].sub(r'', 'hyJVzt', subcount[14]) - regexs[15].sub(r'', 'hyJVzt', subcount[15]) + regexs[14].sub(r'', 'ohggba', count=subcount[14]) + regexs[15].sub(r'', 'ohggba', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p1 svefg sylbhg pybfrq', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p1 svefg sylbhg pybfrq', count=subcount[15]) + regexs[14].sub(r'', 'pvgvrf', count=subcount[14]) + regexs[15].sub(r'', 'pvgvrf', count=subcount[15]) + regexs[14].sub(r'', 'pybfrq', count=subcount[14]) + regexs[15].sub(r'', 'pybfrq', count=subcount[15]) + regexs[14].sub(r'', 'qry', count=subcount[14]) + regexs[15].sub(r'', 'qry', count=subcount[15]) + regexs[14].sub(r'', 'uqy_zba', count=subcount[14]) + regexs[15].sub(r'', 'uqy_zba', count=subcount[15]) + regexs[33].sub(r'', strings[17], count=subcount[33]) + re.sub(r'%3P', '', strings[18], count=0) + re.sub(r'%3R', '', strings[18], count=0) + re.sub(r'%3q', '', strings[18], count=0) + regexs[35].sub(r'', strings[18], count=subcount[35]) + regexs[14].sub(r'', 'yvaxyvfg16', count=subcount[14]) + regexs[15].sub(r'', 'yvaxyvfg16', count=subcount[15]) + regexs[14].sub(r'', 'zvahf', count=subcount[14]) + regexs[15].sub(r'', 'zvahf', count=subcount[15]) + regexs[14].sub(r'', 'bcra', count=subcount[14]) + regexs[15].sub(r'', 'bcra', count=subcount[15]) + regexs[14].sub(r'', 'cnerag puebzr5 fvatyr1 ps NU', count=subcount[14]) + regexs[15].sub(r'', 'cnerag puebzr5 fvatyr1 ps NU', count=subcount[15]) + regexs[14].sub(r'', 'cynlre', count=subcount[14]) + regexs[15].sub(r'', 'cynlre', count=subcount[15]) + regexs[14].sub(r'', 'cyhf', count=subcount[14]) + regexs[15].sub(r'', 'cyhf', count=subcount[15]) + regexs[14].sub(r'', 'cb_uqy', count=subcount[14]) + regexs[15].sub(r'', 'cb_uqy', count=subcount[15]) + regexs[14].sub(r'', 'hyJVzt', count=subcount[14]) + regexs[15].sub(r'', 'hyJVzt', count=subcount[15]) regexs[8].search(r'144631658.0.10.1231363638') regexs[8].search( r'144631658.1231363638.1.1.hgzpfe=(qverpg)|hgzppa=(qverpg)|hgzpzq=(abar)') @@ -602,7 +602,7 @@ def block3(): def block4(): for i in range(16): - re.sub(r'\*', '', '', 0) + re.sub(r'\*', '', '', count=0) re.search(r'\bnpgvir\b', 'npgvir') re.search(r'sversbk', strings[0], re.IGNORECASE) regexs[36].search(r'glcr') @@ -613,10 +613,10 @@ def block4(): regexs[32].split(strings[21]) regexs[32].split(strings[22]) regexs[12].sub( - r'', 'uggc://ohyyrgvaf.zlfcnpr.pbz/vaqrk.psz', subcount[12]) - regexs[33].sub(r'', strings[23], subcount[33]) - regexs[37].sub(r'', 'yv', subcount[37]) - regexs[18].sub(r'', 'yv', subcount[18]) + r'', 'uggc://ohyyrgvaf.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) + regexs[33].sub(r'', strings[23], count=subcount[33]) + regexs[37].sub(r'', 'yv', count=subcount[37]) + regexs[18].sub(r'', 'yv', count=subcount[18]) regexs[8].search(r'144631658.0.10.1231367822') regexs[8].search( r'144631658.1231367822.1.1.hgzpfe=(qverpg)|hgzppa=(qverpg)|hgzpzq=(abar)') @@ -643,22 +643,22 @@ def block4(): regexs[38].search(r'yv') for i in range(14): - regexs[18].sub(r'', '', subcount[18]) - re.sub(r'(\s+e|\s+o[0-9]+)', '', '9.0 e115', 1) - re.sub(r'<', '', 'Funer guvf tnqtrg', 0) - re.sub(r'>', '', 'Funer guvf tnqtrg', 0) - regexs[39].sub(r'', 'Funer guvf tnqtrg', subcount[39]) + regexs[18].sub(r'', '', count=subcount[18]) + re.sub(r'(\s+e|\s+o[0-9]+)', '', '9.0 e115', count=1) + re.sub(r'<', '', 'Funer guvf tnqtrg', count=0) + re.sub(r'>', '', 'Funer guvf tnqtrg', count=0) + regexs[39].sub(r'', 'Funer guvf tnqtrg', count=subcount[39]) regexs[12].sub( - r'', 'uggc://cebsvyrrqvg.zlfcnpr.pbz/vaqrk.psz', subcount[12]) - regexs[40].sub(r'', 'grnfre', subcount[40]) - regexs[41].sub(r'', 'grnfre', subcount[41]) - regexs[42].sub(r'', 'grnfre', subcount[42]) - regexs[43].sub(r'', 'grnfre', subcount[43]) - regexs[44].sub(r'', 'grnfre', subcount[44]) - regexs[45].sub(r'', 'grnfre', subcount[45]) - regexs[46].sub(r'', 'grnfre', subcount[46]) - regexs[47].sub(r'', 'grnfre', subcount[47]) - regexs[48].sub(r'', 'grnfre', subcount[48]) + r'', 'uggc://cebsvyrrqvg.zlfcnpr.pbz/vaqrk.psz', count=subcount[12]) + regexs[40].sub(r'', 'grnfre', count=subcount[40]) + regexs[41].sub(r'', 'grnfre', count=subcount[41]) + regexs[42].sub(r'', 'grnfre', count=subcount[42]) + regexs[43].sub(r'', 'grnfre', count=subcount[43]) + regexs[44].sub(r'', 'grnfre', count=subcount[44]) + regexs[45].sub(r'', 'grnfre', count=subcount[45]) + regexs[46].sub(r'', 'grnfre', count=subcount[46]) + regexs[47].sub(r'', 'grnfre', count=subcount[47]) + regexs[48].sub(r'', 'grnfre', count=subcount[48]) regexs[16].search(r'znetva-gbc') regexs[16].search(r'cbfvgvba') regexs[19].search(r'gno1') @@ -674,84 +674,84 @@ def block4(): def block5(): for i in range(13): - regexs[14].sub(r'', 'purpx', subcount[14]) - regexs[15].sub(r'', 'purpx', subcount[15]) - regexs[14].sub(r'', 'pvgl', subcount[14]) - regexs[15].sub(r'', 'pvgl', subcount[15]) - regexs[14].sub(r'', 'qrpe fyvqrgrkg', subcount[14]) - regexs[15].sub(r'', 'qrpe fyvqrgrkg', subcount[15]) - regexs[14].sub(r'', 'svefg fryrpgrq', subcount[14]) - regexs[15].sub(r'', 'svefg fryrpgrq', subcount[15]) - regexs[14].sub(r'', 'uqy_rag', subcount[14]) - regexs[15].sub(r'', 'uqy_rag', subcount[15]) - regexs[14].sub(r'', 'vape fyvqrgrkg', subcount[14]) - regexs[15].sub(r'', 'vape fyvqrgrkg', subcount[15]) - regexs[5].sub(r'', 'vachggrkg QBZPbageby_cynprubyqre', subcount[5]) + regexs[14].sub(r'', 'purpx', count=subcount[14]) + regexs[15].sub(r'', 'purpx', count=subcount[15]) + regexs[14].sub(r'', 'pvgl', count=subcount[14]) + regexs[15].sub(r'', 'pvgl', count=subcount[15]) + regexs[14].sub(r'', 'qrpe fyvqrgrkg', count=subcount[14]) + regexs[15].sub(r'', 'qrpe fyvqrgrkg', count=subcount[15]) + regexs[14].sub(r'', 'svefg fryrpgrq', count=subcount[14]) + regexs[15].sub(r'', 'svefg fryrpgrq', count=subcount[15]) + regexs[14].sub(r'', 'uqy_rag', count=subcount[14]) + regexs[15].sub(r'', 'uqy_rag', count=subcount[15]) + regexs[14].sub(r'', 'vape fyvqrgrkg', count=subcount[14]) + regexs[15].sub(r'', 'vape fyvqrgrkg', count=subcount[15]) + regexs[5].sub(r'', 'vachggrkg QBZPbageby_cynprubyqre', count=subcount[5]) regexs[14].sub( - r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq', subcount[14]) + r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq', count=subcount[14]) regexs[15].sub( - r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq', subcount[15]) - regexs[14].sub(r'', 'cb_guz', subcount[14]) - regexs[15].sub(r'', 'cb_guz', subcount[15]) - regexs[14].sub(r'', 'fhozvg', subcount[14]) - regexs[15].sub(r'', 'fhozvg', subcount[15]) + r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq', count=subcount[15]) + regexs[14].sub(r'', 'cb_guz', count=subcount[14]) + regexs[15].sub(r'', 'cb_guz', count=subcount[15]) + regexs[14].sub(r'', 'fhozvg', count=subcount[14]) + regexs[15].sub(r'', 'fhozvg', count=subcount[15]) regexs[50].search(r'') re.search(r'NccyrJroXvg\/([^\s]*)', strings[0]) re.search(r'XUGZY', strings[0]) for i in range(12): re.sub(r'(\$\{cebg\})|(\$cebg\b)', '', - '${cebg}://${ubfg}${cngu}/${dz}', 0) - regexs[40].sub(r'', '1', subcount[40]) - regexs[10].sub(r'', '1', subcount[10]) - regexs[51].sub(r'', '1', subcount[51]) - regexs[52].sub(r'', '1', subcount[52]) - regexs[53].sub(r'', '1', subcount[53]) - regexs[39].sub(r'', '1', subcount[39]) - regexs[54].sub(r'', '1', subcount[54]) - re.sub(r'^(.*)\..*$', '', '9.0 e115', 1) - re.sub(r'^.*e(.*)$', '', '9.0 e115', 1) - regexs[55].sub(r'', '', subcount[55]) + '${cebg}://${ubfg}${cngu}/${dz}', count=0) + regexs[40].sub(r'', '1', count=subcount[40]) + regexs[10].sub(r'', '1', count=subcount[10]) + regexs[51].sub(r'', '1', count=subcount[51]) + regexs[52].sub(r'', '1', count=subcount[52]) + regexs[53].sub(r'', '1', count=subcount[53]) + regexs[39].sub(r'', '1', count=subcount[39]) + regexs[54].sub(r'', '1', count=subcount[54]) + re.sub(r'^(.*)\..*$', '', '9.0 e115', count=1) + re.sub(r'^.*e(.*)$', '', '9.0 e115', count=1) + regexs[55].sub(r'', '', count=subcount[55]) regexs[55].sub( - r'', '', subcount[55]) - re.sub(r'^.*\s+(\S+\s+\S+$)', '', strings[1], 1) - regexs[30].sub(r'', 'tzk%2Subzrcntr%2Sfgneg%2Sqr%2S', subcount[30]) - regexs[30].sub(r'', 'tzk', subcount[30]) + r'', '', count=subcount[55]) + re.sub(r'^.*\s+(\S+\s+\S+$)', '', strings[1], count=1) + regexs[30].sub(r'', 'tzk%2Subzrcntr%2Sfgneg%2Sqr%2S', count=subcount[30]) + regexs[30].sub(r'', 'tzk', count=subcount[30]) re.sub(r'(\$\{ubfg\})|(\$ubfg\b)', '', - 'uggc://${ubfg}${cngu}/${dz}', 0) + 'uggc://${ubfg}${cngu}/${dz}', count=0) regexs[56].sub( - r'', 'uggc://nqpyvrag.hvzfrei.arg${cngu}/${dz}', subcount[56]) + r'', 'uggc://nqpyvrag.hvzfrei.arg${cngu}/${dz}', count=subcount[56]) re.sub(r'(\$\{dz\})|(\$dz\b)', '', - 'uggc://nqpyvrag.hvzfrei.arg/wf.at/${dz}', 0) - regexs[29].sub(r'', 'frpgvba', subcount[29]) - regexs[30].sub(r'', 'frpgvba', subcount[30]) - regexs[29].sub(r'', 'fvgr', subcount[29]) - regexs[30].sub(r'', 'fvgr', subcount[30]) - regexs[29].sub(r'', 'fcrpvny', subcount[29]) - regexs[30].sub(r'', 'fcrpvny', subcount[30]) + 'uggc://nqpyvrag.hvzfrei.arg/wf.at/${dz}', count=0) + regexs[29].sub(r'', 'frpgvba', count=subcount[29]) + regexs[30].sub(r'', 'frpgvba', count=subcount[30]) + regexs[29].sub(r'', 'fvgr', count=subcount[29]) + regexs[30].sub(r'', 'fvgr', count=subcount[30]) + regexs[29].sub(r'', 'fcrpvny', count=subcount[29]) + regexs[30].sub(r'', 'fcrpvny', count=subcount[30]) regexs[36].search(r'anzr') re.search(r'e', '9.0 e115') def block6(): for i in range(11): - re.sub(r'(?i)##yv0##', '', strings[27], 0) - regexs[57].sub(r'', strings[27], subcount[57]) - regexs[58].sub(r'', strings[28], subcount[58]) - regexs[59].sub(r'', strings[29], subcount[59]) - re.sub(r'(?i)##\/o##', '', strings[30], 0) - re.sub(r'(?i)##\/v##', '', strings[30], 0) - re.sub(r'(?i)##\/h##', '', strings[30], 0) - re.sub(r'(?i)##o##', '', strings[30], 0) - re.sub(r'(?i)##oe##', '', strings[30], 0) - re.sub(r'(?i)##v##', '', strings[30], 0) - re.sub(r'(?i)##h##', '', strings[30], 0) - re.sub(r'(?i)##n##', '', strings[31], 0) - re.sub(r'(?i)##\/n##', '', strings[32], 0) + re.sub(r'(?i)##yv0##', '', strings[27], count=0) + regexs[57].sub(r'', strings[27], count=subcount[57]) + regexs[58].sub(r'', strings[28], count=subcount[58]) + regexs[59].sub(r'', strings[29], count=subcount[59]) + re.sub(r'(?i)##\/o##', '', strings[30], count=0) + re.sub(r'(?i)##\/v##', '', strings[30], count=0) + re.sub(r'(?i)##\/h##', '', strings[30], count=0) + re.sub(r'(?i)##o##', '', strings[30], count=0) + re.sub(r'(?i)##oe##', '', strings[30], count=0) + re.sub(r'(?i)##v##', '', strings[30], count=0) + re.sub(r'(?i)##h##', '', strings[30], count=0) + re.sub(r'(?i)##n##', '', strings[31], count=0) + re.sub(r'(?i)##\/n##', '', strings[32], count=0) # This prints a unicode escape where the V8 version # prints the unicode character. - re.sub(r'#~#argjbexybtb#~#', '', strings[33], 0) + re.sub(r'#~#argjbexybtb#~#', '', strings[33], count=0) re.search(r' Zbovyr\/', strings[0]) re.search(r'##yv1##', strings[27], re.IGNORECASE) @@ -784,29 +784,29 @@ def block6(): re.search(r'AbxvnA[^\/]*', strings[0]) for i in range(10): - re.sub(r'(?:^|\s+)bss(?:\s+|$)', '', ' bss', 0) - re.sub(r'(\$\{0\})|(\$0\b)', '', strings[34], 0) - re.sub(r'(\$\{1\})|(\$1\b)', '', strings[34], 0) - re.sub(r'(\$\{pbzcyrgr\})|(\$pbzcyrgr\b)', '', strings[34], 0) - re.sub(r'(\$\{sentzrag\})|(\$sentzrag\b)', '', strings[34], 0) - re.sub(r'(\$\{ubfgcbeg\})|(\$ubfgcbeg\b)', '', strings[34], 0) - regexs[56].sub(r'', strings[34], subcount[56]) - re.sub(r'(\$\{cebgbpby\})|(\$cebgbpby\b)', '', strings[34], 0) - re.sub(r'(\$\{dhrel\})|(\$dhrel\b)', '', strings[34], 0) - regexs[29].sub(r'', 'nqfvmr', subcount[29]) - regexs[30].sub(r'', 'nqfvmr', subcount[30]) - re.sub(r'(\$\{2\})|(\$2\b)', '', 'uggc://${2}${3}${4}${5}', 0) + re.sub(r'(?:^|\s+)bss(?:\s+|$)', '', ' bss', count=0) + re.sub(r'(\$\{0\})|(\$0\b)', '', strings[34], count=0) + re.sub(r'(\$\{1\})|(\$1\b)', '', strings[34], count=0) + re.sub(r'(\$\{pbzcyrgr\})|(\$pbzcyrgr\b)', '', strings[34], count=0) + re.sub(r'(\$\{sentzrag\})|(\$sentzrag\b)', '', strings[34], count=0) + re.sub(r'(\$\{ubfgcbeg\})|(\$ubfgcbeg\b)', '', strings[34], count=0) + regexs[56].sub(r'', strings[34], count=subcount[56]) + re.sub(r'(\$\{cebgbpby\})|(\$cebgbpby\b)', '', strings[34], count=0) + re.sub(r'(\$\{dhrel\})|(\$dhrel\b)', '', strings[34], count=0) + regexs[29].sub(r'', 'nqfvmr', count=subcount[29]) + regexs[30].sub(r'', 'nqfvmr', count=subcount[30]) + re.sub(r'(\$\{2\})|(\$2\b)', '', 'uggc://${2}${3}${4}${5}', count=0) re.sub(r'(\$\{3\})|(\$3\b)', '', - 'uggc://wf.hv-cbegny.qr${3}${4}${5}', 0) - regexs[40].sub(r'', 'arjf', subcount[40]) - regexs[41].sub(r'', 'arjf', subcount[41]) - regexs[42].sub(r'', 'arjf', subcount[42]) - regexs[43].sub(r'', 'arjf', subcount[43]) - regexs[44].sub(r'', 'arjf', subcount[44]) - regexs[45].sub(r'', 'arjf', subcount[45]) - regexs[46].sub(r'', 'arjf', subcount[46]) - regexs[47].sub(r'', 'arjf', subcount[47]) - regexs[48].sub(r'', 'arjf', subcount[48]) + 'uggc://wf.hv-cbegny.qr${3}${4}${5}', count=0) + regexs[40].sub(r'', 'arjf', count=subcount[40]) + regexs[41].sub(r'', 'arjf', count=subcount[41]) + regexs[42].sub(r'', 'arjf', count=subcount[42]) + regexs[43].sub(r'', 'arjf', count=subcount[43]) + regexs[44].sub(r'', 'arjf', count=subcount[44]) + regexs[45].sub(r'', 'arjf', count=subcount[45]) + regexs[46].sub(r'', 'arjf', count=subcount[46]) + regexs[47].sub(r'', 'arjf', count=subcount[47]) + regexs[48].sub(r'', 'arjf', count=subcount[48]) re.search(r' PC=i=(\d+)&oe=(.)', strings[35]) regexs[60].search(r' ') regexs[60].search(r' bss') @@ -827,49 +827,49 @@ def block6(): def block7(): for i in range(9): - regexs[40].sub(r'', '0', subcount[40]) - regexs[10].sub(r'', '0', subcount[10]) - regexs[51].sub(r'', '0', subcount[51]) - regexs[52].sub(r'', '0', subcount[52]) - regexs[53].sub(r'', '0', subcount[53]) - regexs[39].sub(r'', '0', subcount[39]) - regexs[54].sub(r'', '0', subcount[54]) - regexs[40].sub(r'', 'Lrf', subcount[40]) - regexs[10].sub(r'', 'Lrf', subcount[10]) - regexs[51].sub(r'', 'Lrf', subcount[51]) - regexs[52].sub(r'', 'Lrf', subcount[52]) - regexs[53].sub(r'', 'Lrf', subcount[53]) - regexs[39].sub(r'', 'Lrf', subcount[39]) - regexs[54].sub(r'', 'Lrf', subcount[54]) + regexs[40].sub(r'', '0', count=subcount[40]) + regexs[10].sub(r'', '0', count=subcount[10]) + regexs[51].sub(r'', '0', count=subcount[51]) + regexs[52].sub(r'', '0', count=subcount[52]) + regexs[53].sub(r'', '0', count=subcount[53]) + regexs[39].sub(r'', '0', count=subcount[39]) + regexs[54].sub(r'', '0', count=subcount[54]) + regexs[40].sub(r'', 'Lrf', count=subcount[40]) + regexs[10].sub(r'', 'Lrf', count=subcount[10]) + regexs[51].sub(r'', 'Lrf', count=subcount[51]) + regexs[52].sub(r'', 'Lrf', count=subcount[52]) + regexs[53].sub(r'', 'Lrf', count=subcount[53]) + regexs[39].sub(r'', 'Lrf', count=subcount[39]) + regexs[54].sub(r'', 'Lrf', count=subcount[54]) for i in range(8): - regexs[63].sub(r'', 'Pybfr {0}', subcount[63]) - regexs[63].sub(r'', 'Bcra {0}', subcount[63]) + regexs[63].sub(r'', 'Pybfr {0}', count=subcount[63]) + regexs[63].sub(r'', 'Bcra {0}', count=subcount[63]) regexs[32].split(strings[36]) regexs[32].split(strings[37]) - regexs[14].sub(r'', 'puvyq p1 svefg gnournqref', subcount[14]) - regexs[15].sub(r'', 'puvyq p1 svefg gnournqref', subcount[15]) - regexs[14].sub(r'', 'uqy_fcb', subcount[14]) - regexs[15].sub(r'', 'uqy_fcb', subcount[15]) - regexs[14].sub(r'', 'uvag', subcount[14]) - regexs[15].sub(r'', 'uvag', subcount[15]) - regexs[33].sub(r'', strings[38], subcount[33]) - regexs[14].sub(r'', 'yvfg', subcount[14]) - regexs[15].sub(r'', 'yvfg', subcount[15]) - regexs[30].sub(r'', 'at_bhgre', subcount[30]) - regexs[14].sub(r'', 'cnerag puebzr5 qbhoyr2 NU', subcount[14]) - regexs[15].sub(r'', 'cnerag puebzr5 qbhoyr2 NU', subcount[15]) + regexs[14].sub(r'', 'puvyq p1 svefg gnournqref', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p1 svefg gnournqref', count=subcount[15]) + regexs[14].sub(r'', 'uqy_fcb', count=subcount[14]) + regexs[15].sub(r'', 'uqy_fcb', count=subcount[15]) + regexs[14].sub(r'', 'uvag', count=subcount[14]) + regexs[15].sub(r'', 'uvag', count=subcount[15]) + regexs[33].sub(r'', strings[38], count=subcount[33]) + regexs[14].sub(r'', 'yvfg', count=subcount[14]) + regexs[15].sub(r'', 'yvfg', count=subcount[15]) + regexs[30].sub(r'', 'at_bhgre', count=subcount[30]) + regexs[14].sub(r'', 'cnerag puebzr5 qbhoyr2 NU', count=subcount[14]) + regexs[15].sub(r'', 'cnerag puebzr5 qbhoyr2 NU', count=subcount[15]) regexs[14].sub( - r'', 'cnerag puebzr5 dhnq5 ps NU osyvax zbarl', subcount[14]) + r'', 'cnerag puebzr5 dhnq5 ps NU osyvax zbarl', count=subcount[14]) regexs[15].sub( - r'', 'cnerag puebzr5 dhnq5 ps NU osyvax zbarl', subcount[15]) - regexs[14].sub(r'', 'cnerag puebzr6 fvatyr1', subcount[14]) - regexs[15].sub(r'', 'cnerag puebzr6 fvatyr1', subcount[15]) - regexs[14].sub(r'', 'cb_qrs', subcount[14]) - regexs[15].sub(r'', 'cb_qrs', subcount[15]) - regexs[14].sub(r'', 'gnopbagrag', subcount[14]) - regexs[15].sub(r'', 'gnopbagrag', subcount[15]) - regexs[30].sub(r'', 'iv_svefg_gvzr', subcount[30]) + r'', 'cnerag puebzr5 dhnq5 ps NU osyvax zbarl', count=subcount[15]) + regexs[14].sub(r'', 'cnerag puebzr6 fvatyr1', count=subcount[14]) + regexs[15].sub(r'', 'cnerag puebzr6 fvatyr1', count=subcount[15]) + regexs[14].sub(r'', 'cb_qrs', count=subcount[14]) + regexs[15].sub(r'', 'cb_qrs', count=subcount[15]) + regexs[14].sub(r'', 'gnopbagrag', count=subcount[14]) + regexs[15].sub(r'', 'gnopbagrag', count=subcount[15]) + regexs[30].sub(r'', 'iv_svefg_gvzr', count=subcount[30]) re.search(r'(^|.)(ronl|qri-ehf3.wbg)(|fgberf|zbgbef|yvirnhpgvbaf|jvxv|rkcerff|punggre).(pbz(|.nh|.pa|.ux|.zl|.ft|.oe|.zk)|pb(.hx|.xe|.am)|pn|qr|se|vg|ay|or|ng|pu|vr|va|rf|cy|cu|fr)$', 'cntrf.ronl.pbz', re.IGNORECASE) regexs[8].search(r'144631658.0.10.1231364074') regexs[8].search( @@ -896,28 +896,28 @@ def block7(): def block8(): for i in range(7): re.match(r'\d+', strings[1]) - regexs[64].sub(r'', 'nsgre', subcount[64]) - regexs[64].sub(r'', 'orsber', subcount[64]) - regexs[64].sub(r'', 'obggbz', subcount[64]) - regexs[65].sub(r'', 'ohvygva_jrngure.kzy', subcount[65]) - regexs[37].sub(r'', 'ohggba', subcount[37]) - regexs[18].sub(r'', 'ohggba', subcount[18]) - regexs[65].sub(r'', 'qngrgvzr.kzy', subcount[65]) + regexs[64].sub(r'', 'nsgre', count=subcount[64]) + regexs[64].sub(r'', 'orsber', count=subcount[64]) + regexs[64].sub(r'', 'obggbz', count=subcount[64]) + regexs[65].sub(r'', 'ohvygva_jrngure.kzy', count=subcount[65]) + regexs[37].sub(r'', 'ohggba', count=subcount[37]) + regexs[18].sub(r'', 'ohggba', count=subcount[18]) + regexs[65].sub(r'', 'qngrgvzr.kzy', count=subcount[65]) regexs[65].sub( - r'', 'uggc://eff.paa.pbz/eff/paa_gbcfgbevrf.eff', subcount[65]) - regexs[37].sub(r'', 'vachg', subcount[37]) - regexs[18].sub(r'', 'vachg', subcount[18]) - regexs[64].sub(r'', 'vafvqr', subcount[64]) - regexs[27].sub(r'', 'cbvagre', subcount[27]) - re.sub(r'[A-Z]', '', 'cbfvgvba', 0) - regexs[27].sub(r'', 'gbc', subcount[27]) - regexs[64].sub(r'', 'gbc', subcount[64]) - regexs[37].sub(r'', 'hy', subcount[37]) - regexs[18].sub(r'', 'hy', subcount[18]) - regexs[37].sub(r'', strings[26], subcount[37]) - regexs[18].sub(r'', strings[26], subcount[18]) - regexs[65].sub(r'', 'lbhghor_vtbbtyr/i2/lbhghor.kzy', subcount[65]) - regexs[27].sub(r'', 'm-vaqrk', subcount[27]) + r'', 'uggc://eff.paa.pbz/eff/paa_gbcfgbevrf.eff', count=subcount[65]) + regexs[37].sub(r'', 'vachg', count=subcount[37]) + regexs[18].sub(r'', 'vachg', count=subcount[18]) + regexs[64].sub(r'', 'vafvqr', count=subcount[64]) + regexs[27].sub(r'', 'cbvagre', count=subcount[27]) + re.sub(r'[A-Z]', '', 'cbfvgvba', count=0) + regexs[27].sub(r'', 'gbc', count=subcount[27]) + regexs[64].sub(r'', 'gbc', count=subcount[64]) + regexs[37].sub(r'', 'hy', count=subcount[37]) + regexs[18].sub(r'', 'hy', count=subcount[18]) + regexs[37].sub(r'', strings[26], count=subcount[37]) + regexs[18].sub(r'', strings[26], count=subcount[18]) + regexs[65].sub(r'', 'lbhghor_vtbbtyr/i2/lbhghor.kzy', count=subcount[65]) + regexs[27].sub(r'', 'm-vaqrk', count=subcount[27]) re.search(r'#([\w-]+)', strings[26]) regexs[16].search(r'urvtug') regexs[16].search(r'znetvaGbc') @@ -943,27 +943,27 @@ def block8(): re.search(r'eton?\([\d\s,]+\)', 'fgngvp') for i in range(6): - re.sub(r'\r', '', '', 0) - regexs[40].sub(r'', '/', subcount[40]) - regexs[10].sub(r'', '/', subcount[10]) - regexs[51].sub(r'', '/', subcount[51]) - regexs[52].sub(r'', '/', subcount[52]) - regexs[53].sub(r'', '/', subcount[53]) - regexs[39].sub(r'', '/', subcount[39]) - regexs[54].sub(r'', '/', subcount[54]) + re.sub(r'\r', '', '', count=0) + regexs[40].sub(r'', '/', count=subcount[40]) + regexs[10].sub(r'', '/', count=subcount[10]) + regexs[51].sub(r'', '/', count=subcount[51]) + regexs[52].sub(r'', '/', count=subcount[52]) + regexs[53].sub(r'', '/', count=subcount[53]) + regexs[39].sub(r'', '/', count=subcount[39]) + regexs[54].sub(r'', '/', count=subcount[54]) regexs[63].sub( - r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/{0}?[NDO]&{1}&{2}&[NDR]', subcount[63]) - regexs[12].sub(r'', strings[41], subcount[12]) - regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/fepu.cuc', subcount[23]) - regexs[40].sub(r'', 'freivpr', subcount[40]) - regexs[41].sub(r'', 'freivpr', subcount[41]) - regexs[42].sub(r'', 'freivpr', subcount[42]) - regexs[43].sub(r'', 'freivpr', subcount[43]) - regexs[44].sub(r'', 'freivpr', subcount[44]) - regexs[45].sub(r'', 'freivpr', subcount[45]) - regexs[46].sub(r'', 'freivpr', subcount[46]) - regexs[47].sub(r'', 'freivpr', subcount[47]) - regexs[48].sub(r'', 'freivpr', subcount[48]) + r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/{0}?[NDO]&{1}&{2}&[NDR]', count=subcount[63]) + regexs[12].sub(r'', strings[41], count=subcount[12]) + regexs[23].sub(r'', 'uggc://jjj.snprobbx.pbz/fepu.cuc', count=subcount[23]) + regexs[40].sub(r'', 'freivpr', count=subcount[40]) + regexs[41].sub(r'', 'freivpr', count=subcount[41]) + regexs[42].sub(r'', 'freivpr', count=subcount[42]) + regexs[43].sub(r'', 'freivpr', count=subcount[43]) + regexs[44].sub(r'', 'freivpr', count=subcount[44]) + regexs[45].sub(r'', 'freivpr', count=subcount[45]) + regexs[46].sub(r'', 'freivpr', count=subcount[46]) + regexs[47].sub(r'', 'freivpr', count=subcount[47]) + regexs[48].sub(r'', 'freivpr', count=subcount[48]) re.search(r'((ZFVR\s+([6-9]|\d\d)\.))', strings[0]) regexs[66].search(r'') regexs[50].search(r'fryrpgrq') @@ -983,11 +983,11 @@ def block9(): regexs[32].split(strings[43]) regexs[20].split( r'svz_zlfcnpr_hfre-ivrj-pbzzragf,svz_zlfcnpr_havgrq-fgngrf') - regexs[33].sub(r'', strings[44], subcount[33]) + regexs[33].sub(r'', strings[44], count=subcount[33]) regexs[67].sub( - r'', 'zrah_arj zrah_arj_gbttyr zrah_gbttyr', subcount[67]) + r'', 'zrah_arj zrah_arj_gbttyr zrah_gbttyr', count=subcount[67]) regexs[67].sub( - r'', 'zrah_byq zrah_byq_gbttyr zrah_gbttyr', subcount[67]) + r'', 'zrah_byq zrah_byq_gbttyr zrah_gbttyr', count=subcount[67]) regexs[8].search(r'102n9o0o9pq60132qn0337rr867p75953502q2s27s2s5r98') regexs[8].search(r'144631658.0.10.1231364380') regexs[8].search( @@ -1008,97 +1008,97 @@ def block9(): r'__hgzm=144631658.1231364380.1.1.hgzpfe=(qverpg)|hgzppa=(qverpg)|hgzpzq=(abar)') for i in range(4): - regexs[14].sub(r'', ' yvfg1', subcount[14]) - regexs[15].sub(r'', ' yvfg1', subcount[15]) - regexs[14].sub(r'', ' yvfg2', subcount[14]) - regexs[15].sub(r'', ' yvfg2', subcount[15]) - regexs[14].sub(r'', ' frneputebhc1', subcount[14]) - regexs[15].sub(r'', ' frneputebhc1', subcount[15]) - regexs[68].sub(r'', strings[47], subcount[68]) - regexs[18].sub(r'', strings[47], subcount[18]) - re.sub(r'&', '', '', 0) - regexs[35].sub(r'', '', subcount[35]) - regexs[63].sub(r'', '(..-{0})(|(d+)|)', subcount[63]) - regexs[18].sub(r'', strings[48], subcount[18]) + regexs[14].sub(r'', ' yvfg1', count=subcount[14]) + regexs[15].sub(r'', ' yvfg1', count=subcount[15]) + regexs[14].sub(r'', ' yvfg2', count=subcount[14]) + regexs[15].sub(r'', ' yvfg2', count=subcount[15]) + regexs[14].sub(r'', ' frneputebhc1', count=subcount[14]) + regexs[15].sub(r'', ' frneputebhc1', count=subcount[15]) + regexs[68].sub(r'', strings[47], count=subcount[68]) + regexs[18].sub(r'', strings[47], count=subcount[18]) + re.sub(r'&', '', '', count=0) + regexs[35].sub(r'', '', count=subcount[35]) + regexs[63].sub(r'', '(..-{0})(|(d+)|)', count=subcount[63]) + regexs[18].sub(r'', strings[48], count=subcount[18]) regexs[56].sub( - r'', '//vzt.jro.qr/vij/FC/${cngu}/${anzr}/${inyhr}?gf=${abj}', subcount[56]) + r'', '//vzt.jro.qr/vij/FC/${cngu}/${anzr}/${inyhr}?gf=${abj}', count=subcount[56]) re.sub(r'(\$\{anzr\})|(\$anzr\b)', '', - '//vzt.jro.qr/vij/FC/tzk_uc/${anzr}/${inyhr}?gf=${abj}', 0) + '//vzt.jro.qr/vij/FC/tzk_uc/${anzr}/${inyhr}?gf=${abj}', count=0) regexs[69].sub( - r'', 'Jvaqbjf Yvir Ubgznvy{1}', subcount[69]) + r'', 'Jvaqbjf Yvir Ubgznvy{1}', count=subcount[69]) regexs[63].sub( - r'', '{0}{1}', subcount[63]) + r'', '{0}{1}', count=subcount[63]) regexs[69].sub( - r'', '{1}', subcount[69]) + r'', '{1}', count=subcount[69]) regexs[63].sub( - r'', '{1}', subcount[63]) - regexs[15].sub(r'', 'Vzntrf', subcount[15]) - regexs[15].sub(r'', 'ZFA', subcount[15]) - regexs[15].sub(r'', 'Zncf', subcount[15]) - regexs[39].sub(r'', 'Zbq-Vasb-Vasb-WninFpevcgUvag', subcount[39]) - regexs[15].sub(r'', 'Arjf', subcount[15]) + r'', '{1}', count=subcount[63]) + regexs[15].sub(r'', 'Vzntrf', count=subcount[15]) + regexs[15].sub(r'', 'ZFA', count=subcount[15]) + regexs[15].sub(r'', 'Zncf', count=subcount[15]) + regexs[39].sub(r'', 'Zbq-Vasb-Vasb-WninFpevcgUvag', count=subcount[39]) + regexs[15].sub(r'', 'Arjf', count=subcount[15]) regexs[32].split(strings[49]) regexs[32].split(strings[50]) - regexs[15].sub(r'', 'Ivqrb', subcount[15]) - regexs[15].sub(r'', 'Jro', subcount[15]) - regexs[39].sub(r'', 'n', subcount[39]) + regexs[15].sub(r'', 'Ivqrb', count=subcount[15]) + regexs[15].sub(r'', 'Jro', count=subcount[15]) + regexs[39].sub(r'', 'n', count=subcount[39]) regexs[70].split(r'nwnkFgneg') regexs[70].split(r'nwnkFgbc') - regexs[14].sub(r'', 'ovaq', subcount[14]) - regexs[15].sub(r'', 'ovaq', subcount[15]) + regexs[14].sub(r'', 'ovaq', count=subcount[14]) + regexs[15].sub(r'', 'ovaq', count=subcount[15]) regexs[63].sub( - r'', 'oevatf lbh zber. Zber fcnpr (5TO), zber frphevgl, fgvyy serr.', subcount[63]) - regexs[14].sub(r'', 'puvyq p1 svefg qrpx', subcount[14]) - regexs[15].sub(r'', 'puvyq p1 svefg qrpx', subcount[15]) - regexs[14].sub(r'', 'puvyq p1 svefg qbhoyr2', subcount[14]) - regexs[15].sub(r'', 'puvyq p1 svefg qbhoyr2', subcount[15]) - regexs[14].sub(r'', 'puvyq p2 ynfg', subcount[14]) - regexs[15].sub(r'', 'puvyq p2 ynfg', subcount[15]) - regexs[14].sub(r'', 'puvyq p2', subcount[14]) - regexs[15].sub(r'', 'puvyq p2', subcount[15]) - regexs[14].sub(r'', 'puvyq p3', subcount[14]) - regexs[15].sub(r'', 'puvyq p3', subcount[15]) - regexs[14].sub(r'', 'puvyq p4 ynfg', subcount[14]) - regexs[15].sub(r'', 'puvyq p4 ynfg', subcount[15]) - regexs[14].sub(r'', 'pbclevtug', subcount[14]) - regexs[15].sub(r'', 'pbclevtug', subcount[15]) - regexs[14].sub(r'', 'qZFAZR_1', subcount[14]) - regexs[15].sub(r'', 'qZFAZR_1', subcount[15]) - regexs[14].sub(r'', 'qbhoyr2 ps', subcount[14]) - regexs[15].sub(r'', 'qbhoyr2 ps', subcount[15]) - regexs[14].sub(r'', 'qbhoyr2', subcount[14]) - regexs[15].sub(r'', 'qbhoyr2', subcount[15]) - regexs[14].sub(r'', 'uqy_arj', subcount[14]) - regexs[15].sub(r'', 'uqy_arj', subcount[15]) - regexs[30].sub(r'', 'uc_fubccvatobk', subcount[30]) - regexs[29].sub(r'', 'ugzy%2Rvq', subcount[29]) - regexs[30].sub(r'', 'ugzy%2Rvq', subcount[30]) - regexs[33].sub(r'', strings[51], subcount[33]) + r'', 'oevatf lbh zber. Zber fcnpr (5TO), zber frphevgl, fgvyy serr.', count=subcount[63]) + regexs[14].sub(r'', 'puvyq p1 svefg qrpx', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p1 svefg qrpx', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p1 svefg qbhoyr2', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p1 svefg qbhoyr2', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p2 ynfg', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p2 ynfg', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p2', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p2', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p3', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p3', count=subcount[15]) + regexs[14].sub(r'', 'puvyq p4 ynfg', count=subcount[14]) + regexs[15].sub(r'', 'puvyq p4 ynfg', count=subcount[15]) + regexs[14].sub(r'', 'pbclevtug', count=subcount[14]) + regexs[15].sub(r'', 'pbclevtug', count=subcount[15]) + regexs[14].sub(r'', 'qZFAZR_1', count=subcount[14]) + regexs[15].sub(r'', 'qZFAZR_1', count=subcount[15]) + regexs[14].sub(r'', 'qbhoyr2 ps', count=subcount[14]) + regexs[15].sub(r'', 'qbhoyr2 ps', count=subcount[15]) + regexs[14].sub(r'', 'qbhoyr2', count=subcount[14]) + regexs[15].sub(r'', 'qbhoyr2', count=subcount[15]) + regexs[14].sub(r'', 'uqy_arj', count=subcount[14]) + regexs[15].sub(r'', 'uqy_arj', count=subcount[15]) + regexs[30].sub(r'', 'uc_fubccvatobk', count=subcount[30]) + regexs[29].sub(r'', 'ugzy%2Rvq', count=subcount[29]) + regexs[30].sub(r'', 'ugzy%2Rvq', count=subcount[30]) + regexs[33].sub(r'', strings[51], count=subcount[33]) regexs[71].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/cebgbglcr.wf${4}${5}', subcount[71]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/cebgbglcr.wf${4}${5}', count=subcount[71]) regexs[72].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/cebgbglcr.wf${5}', subcount[72]) - regexs[73].sub(r'', strings[52], subcount[73]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/cebgbglcr.wf${5}', count=subcount[72]) + regexs[73].sub(r'', strings[52], count=subcount[73]) regexs[69].sub( - r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/f55332979829981?[NDO]&{1}&{2}&[NDR]', subcount[69]) - regexs[14].sub(r'', 'vztZFSG', subcount[14]) - regexs[15].sub(r'', 'vztZFSG', subcount[15]) - regexs[14].sub(r'', 'zfasbbg1 ps', subcount[14]) - regexs[15].sub(r'', 'zfasbbg1 ps', subcount[15]) - regexs[14].sub(r'', strings[53], subcount[14]) - regexs[15].sub(r'', strings[53], subcount[15]) + r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/f55332979829981?[NDO]&{1}&{2}&[NDR]', count=subcount[69]) + regexs[14].sub(r'', 'vztZFSG', count=subcount[14]) + regexs[15].sub(r'', 'vztZFSG', count=subcount[15]) + regexs[14].sub(r'', 'zfasbbg1 ps', count=subcount[14]) + regexs[15].sub(r'', 'zfasbbg1 ps', count=subcount[15]) + regexs[14].sub(r'', strings[53], count=subcount[14]) + regexs[15].sub(r'', strings[53], count=subcount[15]) regexs[14].sub( - r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq ovaq', subcount[14]) + r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq ovaq', count=subcount[14]) regexs[15].sub( - r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq ovaq', subcount[15]) - regexs[14].sub(r'', 'cevznel', subcount[14]) - regexs[15].sub(r'', 'cevznel', subcount[15]) - regexs[30].sub(r'', 'erpgnatyr', subcount[30]) - regexs[14].sub(r'', 'frpbaqnel', subcount[14]) - regexs[15].sub(r'', 'frpbaqnel', subcount[15]) + r'', 'cnerag puebzr6 fvatyr1 gno fryrpgrq ovaq', count=subcount[15]) + regexs[14].sub(r'', 'cevznel', count=subcount[14]) + regexs[15].sub(r'', 'cevznel', count=subcount[15]) + regexs[30].sub(r'', 'erpgnatyr', count=subcount[30]) + regexs[14].sub(r'', 'frpbaqnel', count=subcount[14]) + regexs[15].sub(r'', 'frpbaqnel', count=subcount[15]) regexs[70].split(r'haybnq') - regexs[63].sub(r'', '{0}{1}1', subcount[63]) - regexs[69].sub(r'', '|{1}1', subcount[69]) + regexs[63].sub(r'', '{0}{1}1', count=subcount[63]) + regexs[69].sub(r'', '|{1}1', count=subcount[69]) re.search(r'(..-HF)(\|(\d+)|)', 'xb-xe,ra-va,gu-gu', re.IGNORECASE) regexs[4].search(r'/ZlFcnprNccf/NccPnainf,45000012') regexs[8].search(r'144631658.0.10.1231367708') @@ -1161,128 +1161,128 @@ def block9(): def block10(): for i in range(3): - regexs[39].sub(r'', '%3Szxg=ra-HF', subcount[39]) - regexs[40].sub(r'', '-8', subcount[40]) - regexs[10].sub(r'', '-8', subcount[10]) - regexs[51].sub(r'', '-8', subcount[51]) - regexs[52].sub(r'', '-8', subcount[52]) - regexs[53].sub(r'', '-8', subcount[53]) - regexs[39].sub(r'', '-8', subcount[39]) - regexs[54].sub(r'', '-8', subcount[54]) - regexs[40].sub(r'', '1.5', subcount[40]) - regexs[10].sub(r'', '1.5', subcount[10]) - regexs[51].sub(r'', '1.5', subcount[51]) - regexs[52].sub(r'', '1.5', subcount[52]) - regexs[53].sub(r'', '1.5', subcount[53]) - regexs[39].sub(r'', '1.5', subcount[39]) - regexs[54].sub(r'', '1.5', subcount[54]) - regexs[40].sub(r'', '1024k768', subcount[40]) - regexs[10].sub(r'', '1024k768', subcount[10]) - regexs[51].sub(r'', '1024k768', subcount[51]) - regexs[52].sub(r'', '1024k768', subcount[52]) - regexs[53].sub(r'', '1024k768', subcount[53]) - regexs[39].sub(r'', '1024k768', subcount[39]) - regexs[54].sub(r'', '1024k768', subcount[54]) - regexs[40].sub(r'', strings[64], subcount[40]) - regexs[10].sub(r'', strings[64], subcount[10]) - regexs[51].sub(r'', strings[64], subcount[51]) - regexs[52].sub(r'', strings[64], subcount[52]) - regexs[53].sub(r'', strings[64], subcount[53]) - regexs[39].sub(r'', strings[64], subcount[39]) - regexs[54].sub(r'', strings[64], subcount[54]) - regexs[40].sub(r'', '14', subcount[40]) - regexs[10].sub(r'', '14', subcount[10]) - regexs[51].sub(r'', '14', subcount[51]) - regexs[52].sub(r'', '14', subcount[52]) - regexs[53].sub(r'', '14', subcount[53]) - regexs[39].sub(r'', '14', subcount[39]) - regexs[54].sub(r'', '14', subcount[54]) - regexs[40].sub(r'', '24', subcount[40]) - regexs[10].sub(r'', '24', subcount[10]) - regexs[51].sub(r'', '24', subcount[51]) - regexs[52].sub(r'', '24', subcount[52]) - regexs[53].sub(r'', '24', subcount[53]) - regexs[39].sub(r'', '24', subcount[39]) - regexs[54].sub(r'', '24', subcount[54]) - regexs[40].sub(r'', strings[65], subcount[40]) - regexs[10].sub(r'', strings[65], subcount[10]) - regexs[51].sub(r'', strings[65], subcount[51]) - regexs[52].sub(r'', strings[65], subcount[52]) - regexs[53].sub(r'', strings[65], subcount[53]) - regexs[39].sub(r'', strings[65], subcount[39]) - regexs[54].sub(r'', strings[65], subcount[54]) - regexs[40].sub(r'', strings[66], subcount[40]) - regexs[10].sub(r'', strings[66], subcount[10]) - regexs[51].sub(r'', strings[66], subcount[51]) - regexs[52].sub(r'', strings[66], subcount[52]) - regexs[53].sub(r'', strings[66], subcount[53]) - regexs[39].sub(r'', strings[66], subcount[39]) - regexs[54].sub(r'', strings[66], subcount[54]) - regexs[40].sub(r'', '9.0', subcount[40]) - regexs[10].sub(r'', '9.0', subcount[10]) - regexs[51].sub(r'', '9.0', subcount[51]) - regexs[52].sub(r'', '9.0', subcount[52]) - regexs[53].sub(r'', '9.0', subcount[53]) - regexs[39].sub(r'', '9.0', subcount[39]) - regexs[54].sub(r'', '9.0', subcount[54]) - regexs[40].sub(r'', '994k634', subcount[40]) - regexs[10].sub(r'', '994k634', subcount[10]) - regexs[51].sub(r'', '994k634', subcount[51]) - regexs[52].sub(r'', '994k634', subcount[52]) - regexs[53].sub(r'', '994k634', subcount[53]) - regexs[39].sub(r'', '994k634', subcount[39]) - regexs[54].sub(r'', '994k634', subcount[54]) - regexs[40].sub(r'', '?zxg=ra-HF', subcount[40]) - regexs[10].sub(r'', '?zxg=ra-HF', subcount[10]) - regexs[51].sub(r'', '?zxg=ra-HF', subcount[51]) - regexs[52].sub(r'', '?zxg=ra-HF', subcount[52]) - regexs[53].sub(r'', '?zxg=ra-HF', subcount[53]) - regexs[54].sub(r'', '?zxg=ra-HF', subcount[54]) - regexs[25].sub(r'', 'PAA.pbz', subcount[25]) - regexs[12].sub(r'', 'PAA.pbz', subcount[12]) - regexs[39].sub(r'', 'PAA.pbz', subcount[39]) - regexs[25].sub(r'', 'Qngr & Gvzr', subcount[25]) - regexs[12].sub(r'', 'Qngr & Gvzr', subcount[12]) - regexs[39].sub(r'', 'Qngr & Gvzr', subcount[39]) - regexs[40].sub(r'', 'Frnepu Zvpebfbsg.pbz', subcount[40]) - regexs[54].sub(r'', 'Frnepu Zvpebfbsg.pbz', subcount[54]) - regexs[10].sub(r'', strings[67], subcount[10]) - regexs[51].sub(r'', strings[67], subcount[51]) - regexs[52].sub(r'', strings[67], subcount[52]) - regexs[53].sub(r'', strings[67], subcount[53]) - regexs[39].sub(r'', strings[67], subcount[39]) + regexs[39].sub(r'', '%3Szxg=ra-HF', count=subcount[39]) + regexs[40].sub(r'', '-8', count=subcount[40]) + regexs[10].sub(r'', '-8', count=subcount[10]) + regexs[51].sub(r'', '-8', count=subcount[51]) + regexs[52].sub(r'', '-8', count=subcount[52]) + regexs[53].sub(r'', '-8', count=subcount[53]) + regexs[39].sub(r'', '-8', count=subcount[39]) + regexs[54].sub(r'', '-8', count=subcount[54]) + regexs[40].sub(r'', '1.5', count=subcount[40]) + regexs[10].sub(r'', '1.5', count=subcount[10]) + regexs[51].sub(r'', '1.5', count=subcount[51]) + regexs[52].sub(r'', '1.5', count=subcount[52]) + regexs[53].sub(r'', '1.5', count=subcount[53]) + regexs[39].sub(r'', '1.5', count=subcount[39]) + regexs[54].sub(r'', '1.5', count=subcount[54]) + regexs[40].sub(r'', '1024k768', count=subcount[40]) + regexs[10].sub(r'', '1024k768', count=subcount[10]) + regexs[51].sub(r'', '1024k768', count=subcount[51]) + regexs[52].sub(r'', '1024k768', count=subcount[52]) + regexs[53].sub(r'', '1024k768', count=subcount[53]) + regexs[39].sub(r'', '1024k768', count=subcount[39]) + regexs[54].sub(r'', '1024k768', count=subcount[54]) + regexs[40].sub(r'', strings[64], count=subcount[40]) + regexs[10].sub(r'', strings[64], count=subcount[10]) + regexs[51].sub(r'', strings[64], count=subcount[51]) + regexs[52].sub(r'', strings[64], count=subcount[52]) + regexs[53].sub(r'', strings[64], count=subcount[53]) + regexs[39].sub(r'', strings[64], count=subcount[39]) + regexs[54].sub(r'', strings[64], count=subcount[54]) + regexs[40].sub(r'', '14', count=subcount[40]) + regexs[10].sub(r'', '14', count=subcount[10]) + regexs[51].sub(r'', '14', count=subcount[51]) + regexs[52].sub(r'', '14', count=subcount[52]) + regexs[53].sub(r'', '14', count=subcount[53]) + regexs[39].sub(r'', '14', count=subcount[39]) + regexs[54].sub(r'', '14', count=subcount[54]) + regexs[40].sub(r'', '24', count=subcount[40]) + regexs[10].sub(r'', '24', count=subcount[10]) + regexs[51].sub(r'', '24', count=subcount[51]) + regexs[52].sub(r'', '24', count=subcount[52]) + regexs[53].sub(r'', '24', count=subcount[53]) + regexs[39].sub(r'', '24', count=subcount[39]) + regexs[54].sub(r'', '24', count=subcount[54]) + regexs[40].sub(r'', strings[65], count=subcount[40]) + regexs[10].sub(r'', strings[65], count=subcount[10]) + regexs[51].sub(r'', strings[65], count=subcount[51]) + regexs[52].sub(r'', strings[65], count=subcount[52]) + regexs[53].sub(r'', strings[65], count=subcount[53]) + regexs[39].sub(r'', strings[65], count=subcount[39]) + regexs[54].sub(r'', strings[65], count=subcount[54]) + regexs[40].sub(r'', strings[66], count=subcount[40]) + regexs[10].sub(r'', strings[66], count=subcount[10]) + regexs[51].sub(r'', strings[66], count=subcount[51]) + regexs[52].sub(r'', strings[66], count=subcount[52]) + regexs[53].sub(r'', strings[66], count=subcount[53]) + regexs[39].sub(r'', strings[66], count=subcount[39]) + regexs[54].sub(r'', strings[66], count=subcount[54]) + regexs[40].sub(r'', '9.0', count=subcount[40]) + regexs[10].sub(r'', '9.0', count=subcount[10]) + regexs[51].sub(r'', '9.0', count=subcount[51]) + regexs[52].sub(r'', '9.0', count=subcount[52]) + regexs[53].sub(r'', '9.0', count=subcount[53]) + regexs[39].sub(r'', '9.0', count=subcount[39]) + regexs[54].sub(r'', '9.0', count=subcount[54]) + regexs[40].sub(r'', '994k634', count=subcount[40]) + regexs[10].sub(r'', '994k634', count=subcount[10]) + regexs[51].sub(r'', '994k634', count=subcount[51]) + regexs[52].sub(r'', '994k634', count=subcount[52]) + regexs[53].sub(r'', '994k634', count=subcount[53]) + regexs[39].sub(r'', '994k634', count=subcount[39]) + regexs[54].sub(r'', '994k634', count=subcount[54]) + regexs[40].sub(r'', '?zxg=ra-HF', count=subcount[40]) + regexs[10].sub(r'', '?zxg=ra-HF', count=subcount[10]) + regexs[51].sub(r'', '?zxg=ra-HF', count=subcount[51]) + regexs[52].sub(r'', '?zxg=ra-HF', count=subcount[52]) + regexs[53].sub(r'', '?zxg=ra-HF', count=subcount[53]) + regexs[54].sub(r'', '?zxg=ra-HF', count=subcount[54]) + regexs[25].sub(r'', 'PAA.pbz', count=subcount[25]) + regexs[12].sub(r'', 'PAA.pbz', count=subcount[12]) + regexs[39].sub(r'', 'PAA.pbz', count=subcount[39]) + regexs[25].sub(r'', 'Qngr & Gvzr', count=subcount[25]) + regexs[12].sub(r'', 'Qngr & Gvzr', count=subcount[12]) + regexs[39].sub(r'', 'Qngr & Gvzr', count=subcount[39]) + regexs[40].sub(r'', 'Frnepu Zvpebfbsg.pbz', count=subcount[40]) + regexs[54].sub(r'', 'Frnepu Zvpebfbsg.pbz', count=subcount[54]) + regexs[10].sub(r'', strings[67], count=subcount[10]) + regexs[51].sub(r'', strings[67], count=subcount[51]) + regexs[52].sub(r'', strings[67], count=subcount[52]) + regexs[53].sub(r'', strings[67], count=subcount[53]) + regexs[39].sub(r'', strings[67], count=subcount[39]) regexs[32].split(strings[68]) regexs[32].split(strings[69]) - regexs[52].sub(r'', strings[70], subcount[52]) - regexs[53].sub(r'', strings[70], subcount[53]) - regexs[39].sub(r'', strings[70], subcount[39]) - regexs[40].sub(r'', strings[71], subcount[40]) - regexs[10].sub(r'', strings[71], subcount[10]) - regexs[51].sub(r'', strings[71], subcount[51]) - regexs[54].sub(r'', strings[71], subcount[54]) - regexs[25].sub(r'', 'Jrngure', subcount[25]) - regexs[12].sub(r'', 'Jrngure', subcount[12]) - regexs[39].sub(r'', 'Jrngure', subcount[39]) - regexs[25].sub(r'', 'LbhGhor', subcount[25]) - regexs[12].sub(r'', 'LbhGhor', subcount[12]) - regexs[39].sub(r'', 'LbhGhor', subcount[39]) - regexs[33].sub(r'', strings[72], subcount[33]) - re.sub(r'^erzbgr_vsenzr_', '', 'erzbgr_vsenzr_1', 1) - regexs[40].sub(r'', strings[73], subcount[40]) - regexs[10].sub(r'', strings[73], subcount[10]) - regexs[51].sub(r'', strings[73], subcount[51]) - regexs[52].sub(r'', strings[73], subcount[52]) - regexs[53].sub(r'', strings[73], subcount[53]) - regexs[39].sub(r'', strings[73], subcount[39]) - regexs[54].sub(r'', strings[73], subcount[54]) - regexs[40].sub(r'', strings[74], subcount[40]) - regexs[10].sub(r'', strings[74], subcount[10]) - regexs[51].sub(r'', strings[74], subcount[51]) - regexs[52].sub(r'', strings[74], subcount[52]) - regexs[53].sub(r'', strings[74], subcount[53]) - regexs[39].sub(r'', strings[74], subcount[39]) - regexs[54].sub(r'', strings[74], subcount[54]) - re.sub(r'\-', '', 'lhv-h', 0) + regexs[52].sub(r'', strings[70], count=subcount[52]) + regexs[53].sub(r'', strings[70], count=subcount[53]) + regexs[39].sub(r'', strings[70], count=subcount[39]) + regexs[40].sub(r'', strings[71], count=subcount[40]) + regexs[10].sub(r'', strings[71], count=subcount[10]) + regexs[51].sub(r'', strings[71], count=subcount[51]) + regexs[54].sub(r'', strings[71], count=subcount[54]) + regexs[25].sub(r'', 'Jrngure', count=subcount[25]) + regexs[12].sub(r'', 'Jrngure', count=subcount[12]) + regexs[39].sub(r'', 'Jrngure', count=subcount[39]) + regexs[25].sub(r'', 'LbhGhor', count=subcount[25]) + regexs[12].sub(r'', 'LbhGhor', count=subcount[12]) + regexs[39].sub(r'', 'LbhGhor', count=subcount[39]) + regexs[33].sub(r'', strings[72], count=subcount[33]) + re.sub(r'^erzbgr_vsenzr_', '', 'erzbgr_vsenzr_1', count=1) + regexs[40].sub(r'', strings[73], count=subcount[40]) + regexs[10].sub(r'', strings[73], count=subcount[10]) + regexs[51].sub(r'', strings[73], count=subcount[51]) + regexs[52].sub(r'', strings[73], count=subcount[52]) + regexs[53].sub(r'', strings[73], count=subcount[53]) + regexs[39].sub(r'', strings[73], count=subcount[39]) + regexs[54].sub(r'', strings[73], count=subcount[54]) + regexs[40].sub(r'', strings[74], count=subcount[40]) + regexs[10].sub(r'', strings[74], count=subcount[10]) + regexs[51].sub(r'', strings[74], count=subcount[51]) + regexs[52].sub(r'', strings[74], count=subcount[52]) + regexs[53].sub(r'', strings[74], count=subcount[53]) + regexs[39].sub(r'', strings[74], count=subcount[39]) + regexs[54].sub(r'', strings[74], count=subcount[54]) + re.sub(r'\-', '', 'lhv-h', count=0) regexs[9].search(r'p') regexs[9].search(r'qz p') regexs[9].search(r'zbqynory') @@ -1315,207 +1315,207 @@ def block10(): def block11(): for i in range(2): - regexs[18].sub(r'', ' .pybfr', subcount[18]) - regexs[18].sub(r'', ' n.svryqOgaPnapry', subcount[18]) - regexs[18].sub(r'', ' qg', subcount[18]) - regexs[68].sub(r'', strings[77], subcount[68]) - regexs[18].sub(r'', strings[77], subcount[18]) - regexs[39].sub(r'', '', subcount[39]) - re.sub(r'^', '', '', 1) + regexs[18].sub(r'', ' .pybfr', count=subcount[18]) + regexs[18].sub(r'', ' n.svryqOgaPnapry', count=subcount[18]) + regexs[18].sub(r'', ' qg', count=subcount[18]) + regexs[68].sub(r'', strings[77], count=subcount[68]) + regexs[18].sub(r'', strings[77], count=subcount[18]) + regexs[39].sub(r'', '', count=subcount[39]) + re.sub(r'^', '', '', count=1) regexs[86].split(r'') - regexs[39].sub(r'', '*', subcount[39]) - regexs[68].sub(r'', '*', subcount[68]) - regexs[18].sub(r'', '*', subcount[18]) - regexs[68].sub(r'', '.pybfr', subcount[68]) - regexs[18].sub(r'', '.pybfr', subcount[18]) + regexs[39].sub(r'', '*', count=subcount[39]) + regexs[68].sub(r'', '*', count=subcount[68]) + regexs[18].sub(r'', '*', count=subcount[18]) + regexs[68].sub(r'', '.pybfr', count=subcount[68]) + regexs[18].sub(r'', '.pybfr', count=subcount[18]) regexs[87].sub( - r'', '//vzt.jro.qr/vij/FC/tzk_uc/fperra/${inyhr}?gf=${abj}', subcount[87]) + r'', '//vzt.jro.qr/vij/FC/tzk_uc/fperra/${inyhr}?gf=${abj}', count=subcount[87]) regexs[88].sub( - r'', '//vzt.jro.qr/vij/FC/tzk_uc/fperra/1024?gf=${abj}', subcount[88]) + r'', '//vzt.jro.qr/vij/FC/tzk_uc/fperra/1024?gf=${abj}', count=subcount[88]) regexs[87].sub( - r'', '//vzt.jro.qr/vij/FC/tzk_uc/jvafvmr/${inyhr}?gf=${abj}', subcount[87]) + r'', '//vzt.jro.qr/vij/FC/tzk_uc/jvafvmr/${inyhr}?gf=${abj}', count=subcount[87]) regexs[88].sub( - r'', '//vzt.jro.qr/vij/FC/tzk_uc/jvafvmr/992/608?gf=${abj}', subcount[88]) - regexs[30].sub(r'', '300k120', subcount[30]) - regexs[30].sub(r'', '300k250', subcount[30]) - regexs[30].sub(r'', '310k120', subcount[30]) - regexs[30].sub(r'', '310k170', subcount[30]) - regexs[30].sub(r'', '310k250', subcount[30]) - re.sub(r'^.*\.(.*)\s.*$', '', '9.0 e115', 1) - regexs[2].sub(r'', 'Nppbeqvba', subcount[2]) - regexs[89].sub(r'', 'Nxghryy\x0a', subcount[89]) - regexs[90].sub(r'', 'Nxghryy\x0a', subcount[90]) - regexs[2].sub(r'', 'Nccyvpngvba', subcount[2]) - regexs[89].sub(r'', 'Oyvpxchaxg\x0a', subcount[89]) - regexs[90].sub(r'', 'Oyvpxchaxg\x0a', subcount[90]) - regexs[89].sub(r'', 'Svanamra\x0a', subcount[89]) - regexs[90].sub(r'', 'Svanamra\x0a', subcount[90]) - regexs[89].sub(r'', 'Tnzrf\x0a', subcount[89]) - regexs[90].sub(r'', 'Tnzrf\x0a', subcount[90]) - regexs[89].sub(r'', 'Ubebfxbc\x0a', subcount[89]) - regexs[90].sub(r'', 'Ubebfxbc\x0a', subcount[90]) - regexs[89].sub(r'', 'Xvab\x0a', subcount[89]) - regexs[90].sub(r'', 'Xvab\x0a', subcount[90]) - regexs[2].sub(r'', 'Zbqhyrf', subcount[2]) - regexs[89].sub(r'', 'Zhfvx\x0a', subcount[89]) - regexs[90].sub(r'', 'Zhfvx\x0a', subcount[90]) - regexs[89].sub(r'', 'Anpuevpugra\x0a', subcount[89]) - regexs[90].sub(r'', 'Anpuevpugra\x0a', subcount[90]) - regexs[2].sub(r'', 'Cuk', subcount[2]) + r'', '//vzt.jro.qr/vij/FC/tzk_uc/jvafvmr/992/608?gf=${abj}', count=subcount[88]) + regexs[30].sub(r'', '300k120', count=subcount[30]) + regexs[30].sub(r'', '300k250', count=subcount[30]) + regexs[30].sub(r'', '310k120', count=subcount[30]) + regexs[30].sub(r'', '310k170', count=subcount[30]) + regexs[30].sub(r'', '310k250', count=subcount[30]) + re.sub(r'^.*\.(.*)\s.*$', '', '9.0 e115', count=1) + regexs[2].sub(r'', 'Nppbeqvba', count=subcount[2]) + regexs[89].sub(r'', 'Nxghryy\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Nxghryy\x0a', count=subcount[90]) + regexs[2].sub(r'', 'Nccyvpngvba', count=subcount[2]) + regexs[89].sub(r'', 'Oyvpxchaxg\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Oyvpxchaxg\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Svanamra\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Svanamra\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Tnzrf\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Tnzrf\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Ubebfxbc\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Ubebfxbc\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Xvab\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Xvab\x0a', count=subcount[90]) + regexs[2].sub(r'', 'Zbqhyrf', count=subcount[2]) + regexs[89].sub(r'', 'Zhfvx\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Zhfvx\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Anpuevpugra\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Anpuevpugra\x0a', count=subcount[90]) + regexs[2].sub(r'', 'Cuk', count=subcount[2]) regexs[70].split(r'ErdhrfgSvavfu') regexs[70].split(r'ErdhrfgSvavfu.NWNK.Cuk') - regexs[89].sub(r'', 'Ebhgr\x0a', subcount[89]) - regexs[90].sub(r'', 'Ebhgr\x0a', subcount[90]) + regexs[89].sub(r'', 'Ebhgr\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Ebhgr\x0a', count=subcount[90]) regexs[32].split(strings[78]) regexs[32].split(strings[79]) regexs[32].split(strings[80]) regexs[32].split(strings[81]) - regexs[89].sub(r'', 'Fcbeg\x0a', subcount[89]) - regexs[90].sub(r'', 'Fcbeg\x0a', subcount[90]) - regexs[89].sub(r'', 'GI-Fcbg\x0a', subcount[89]) - regexs[90].sub(r'', 'GI-Fcbg\x0a', subcount[90]) - regexs[89].sub(r'', 'Gbhe\x0a', subcount[89]) - regexs[90].sub(r'', 'Gbhe\x0a', subcount[90]) - regexs[89].sub(r'', 'Hagreunyghat\x0a', subcount[89]) - regexs[90].sub(r'', 'Hagreunyghat\x0a', subcount[90]) - regexs[89].sub(r'', 'Ivqrb\x0a', subcount[89]) - regexs[90].sub(r'', 'Ivqrb\x0a', subcount[90]) - regexs[89].sub(r'', 'Jrggre\x0a', subcount[89]) - regexs[90].sub(r'', 'Jrggre\x0a', subcount[90]) - regexs[68].sub(r'', strings[82], subcount[68]) - regexs[18].sub(r'', strings[82], subcount[18]) - regexs[68].sub(r'', strings[83], subcount[68]) - regexs[18].sub(r'', strings[83], subcount[18]) - regexs[68].sub(r'', strings[84], subcount[68]) - regexs[18].sub(r'', strings[84], subcount[18]) - regexs[30].sub(r'', 'nqiFreivprObk', subcount[30]) - regexs[30].sub(r'', 'nqiFubccvatObk', subcount[30]) - regexs[39].sub(r'', 'nwnk', subcount[39]) - regexs[40].sub(r'', 'nxghryy', subcount[40]) - regexs[41].sub(r'', 'nxghryy', subcount[41]) - regexs[42].sub(r'', 'nxghryy', subcount[42]) - regexs[43].sub(r'', 'nxghryy', subcount[43]) - regexs[44].sub(r'', 'nxghryy', subcount[44]) - regexs[45].sub(r'', 'nxghryy', subcount[45]) - regexs[46].sub(r'', 'nxghryy', subcount[46]) - regexs[47].sub(r'', 'nxghryy', subcount[47]) - regexs[48].sub(r'', 'nxghryy', subcount[48]) - regexs[40].sub(r'', strings[85], subcount[40]) - regexs[41].sub(r'', strings[85], subcount[41]) - regexs[42].sub(r'', strings[85], subcount[42]) - regexs[43].sub(r'', strings[85], subcount[43]) - regexs[44].sub(r'', strings[85], subcount[44]) - regexs[45].sub(r'', strings[85], subcount[45]) - regexs[46].sub(r'', strings[85], subcount[46]) - regexs[47].sub(r'', strings[85], subcount[47]) - regexs[48].sub(r'', strings[85], subcount[48]) - regexs[29].sub(r'', 'pngrtbel', subcount[29]) - regexs[30].sub(r'', 'pngrtbel', subcount[30]) - regexs[39].sub(r'', 'pybfr', subcount[39]) - regexs[39].sub(r'', 'qvi', subcount[39]) - regexs[68].sub(r'', strings[86], subcount[68]) - regexs[18].sub(r'', strings[86], subcount[18]) - regexs[39].sub(r'', 'qg', subcount[39]) - regexs[68].sub(r'', 'qg', subcount[68]) - regexs[18].sub(r'', 'qg', subcount[18]) - regexs[39].sub(r'', 'rzorq', subcount[39]) - regexs[68].sub(r'', 'rzorq', subcount[68]) - regexs[18].sub(r'', 'rzorq', subcount[18]) - regexs[39].sub(r'', 'svryqOga', subcount[39]) - regexs[39].sub(r'', 'svryqOgaPnapry', subcount[39]) + regexs[89].sub(r'', 'Fcbeg\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Fcbeg\x0a', count=subcount[90]) + regexs[89].sub(r'', 'GI-Fcbg\x0a', count=subcount[89]) + regexs[90].sub(r'', 'GI-Fcbg\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Gbhe\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Gbhe\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Hagreunyghat\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Hagreunyghat\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Ivqrb\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Ivqrb\x0a', count=subcount[90]) + regexs[89].sub(r'', 'Jrggre\x0a', count=subcount[89]) + regexs[90].sub(r'', 'Jrggre\x0a', count=subcount[90]) + regexs[68].sub(r'', strings[82], count=subcount[68]) + regexs[18].sub(r'', strings[82], count=subcount[18]) + regexs[68].sub(r'', strings[83], count=subcount[68]) + regexs[18].sub(r'', strings[83], count=subcount[18]) + regexs[68].sub(r'', strings[84], count=subcount[68]) + regexs[18].sub(r'', strings[84], count=subcount[18]) + regexs[30].sub(r'', 'nqiFreivprObk', count=subcount[30]) + regexs[30].sub(r'', 'nqiFubccvatObk', count=subcount[30]) + regexs[39].sub(r'', 'nwnk', count=subcount[39]) + regexs[40].sub(r'', 'nxghryy', count=subcount[40]) + regexs[41].sub(r'', 'nxghryy', count=subcount[41]) + regexs[42].sub(r'', 'nxghryy', count=subcount[42]) + regexs[43].sub(r'', 'nxghryy', count=subcount[43]) + regexs[44].sub(r'', 'nxghryy', count=subcount[44]) + regexs[45].sub(r'', 'nxghryy', count=subcount[45]) + regexs[46].sub(r'', 'nxghryy', count=subcount[46]) + regexs[47].sub(r'', 'nxghryy', count=subcount[47]) + regexs[48].sub(r'', 'nxghryy', count=subcount[48]) + regexs[40].sub(r'', strings[85], count=subcount[40]) + regexs[41].sub(r'', strings[85], count=subcount[41]) + regexs[42].sub(r'', strings[85], count=subcount[42]) + regexs[43].sub(r'', strings[85], count=subcount[43]) + regexs[44].sub(r'', strings[85], count=subcount[44]) + regexs[45].sub(r'', strings[85], count=subcount[45]) + regexs[46].sub(r'', strings[85], count=subcount[46]) + regexs[47].sub(r'', strings[85], count=subcount[47]) + regexs[48].sub(r'', strings[85], count=subcount[48]) + regexs[29].sub(r'', 'pngrtbel', count=subcount[29]) + regexs[30].sub(r'', 'pngrtbel', count=subcount[30]) + regexs[39].sub(r'', 'pybfr', count=subcount[39]) + regexs[39].sub(r'', 'qvi', count=subcount[39]) + regexs[68].sub(r'', strings[86], count=subcount[68]) + regexs[18].sub(r'', strings[86], count=subcount[18]) + regexs[39].sub(r'', 'qg', count=subcount[39]) + regexs[68].sub(r'', 'qg', count=subcount[68]) + regexs[18].sub(r'', 'qg', count=subcount[18]) + regexs[39].sub(r'', 'rzorq', count=subcount[39]) + regexs[68].sub(r'', 'rzorq', count=subcount[68]) + regexs[18].sub(r'', 'rzorq', count=subcount[18]) + regexs[39].sub(r'', 'svryqOga', count=subcount[39]) + regexs[39].sub(r'', 'svryqOgaPnapry', count=subcount[39]) regexs[20].split(r'svz_zlfcnpr_nccf-pnainf,svz_zlfcnpr_havgrq-fgngrf') - regexs[40].sub(r'', 'svanamra', subcount[40]) - regexs[41].sub(r'', 'svanamra', subcount[41]) - regexs[42].sub(r'', 'svanamra', subcount[42]) - regexs[43].sub(r'', 'svanamra', subcount[43]) - regexs[44].sub(r'', 'svanamra', subcount[44]) - regexs[45].sub(r'', 'svanamra', subcount[45]) - regexs[46].sub(r'', 'svanamra', subcount[46]) - regexs[47].sub(r'', 'svanamra', subcount[47]) - regexs[48].sub(r'', 'svanamra', subcount[48]) + regexs[40].sub(r'', 'svanamra', count=subcount[40]) + regexs[41].sub(r'', 'svanamra', count=subcount[41]) + regexs[42].sub(r'', 'svanamra', count=subcount[42]) + regexs[43].sub(r'', 'svanamra', count=subcount[43]) + regexs[44].sub(r'', 'svanamra', count=subcount[44]) + regexs[45].sub(r'', 'svanamra', count=subcount[45]) + regexs[46].sub(r'', 'svanamra', count=subcount[46]) + regexs[47].sub(r'', 'svanamra', count=subcount[47]) + regexs[48].sub(r'', 'svanamra', count=subcount[48]) regexs[70].split(r'sbphf') regexs[70].split(r'sbphf.gno sbphfva.gno') regexs[70].split(r'sbphfva') - regexs[39].sub(r'', 'sbez', subcount[39]) - regexs[68].sub(r'', 'sbez.nwnk', subcount[68]) - regexs[18].sub(r'', 'sbez.nwnk', subcount[18]) - regexs[40].sub(r'', 'tnzrf', subcount[40]) - regexs[41].sub(r'', 'tnzrf', subcount[41]) - regexs[42].sub(r'', 'tnzrf', subcount[42]) - regexs[43].sub(r'', 'tnzrf', subcount[43]) - regexs[44].sub(r'', 'tnzrf', subcount[44]) - regexs[45].sub(r'', 'tnzrf', subcount[45]) - regexs[46].sub(r'', 'tnzrf', subcount[46]) - regexs[47].sub(r'', 'tnzrf', subcount[47]) - regexs[48].sub(r'', 'tnzrf', subcount[48]) - regexs[30].sub(r'', 'ubzrcntr', subcount[30]) - regexs[40].sub(r'', 'ubebfxbc', subcount[40]) - regexs[41].sub(r'', 'ubebfxbc', subcount[41]) - regexs[42].sub(r'', 'ubebfxbc', subcount[42]) - regexs[43].sub(r'', 'ubebfxbc', subcount[43]) - regexs[44].sub(r'', 'ubebfxbc', subcount[44]) - regexs[45].sub(r'', 'ubebfxbc', subcount[45]) - regexs[46].sub(r'', 'ubebfxbc', subcount[46]) - regexs[47].sub(r'', 'ubebfxbc', subcount[47]) - regexs[48].sub(r'', 'ubebfxbc', subcount[48]) - regexs[30].sub(r'', 'uc_cebzbobk_ugzy%2Puc_cebzbobk_vzt', subcount[30]) - regexs[30].sub(r'', 'uc_erpgnatyr', subcount[30]) - regexs[33].sub(r'', strings[87], subcount[33]) - regexs[33].sub(r'', strings[88], subcount[33]) + regexs[39].sub(r'', 'sbez', count=subcount[39]) + regexs[68].sub(r'', 'sbez.nwnk', count=subcount[68]) + regexs[18].sub(r'', 'sbez.nwnk', count=subcount[18]) + regexs[40].sub(r'', 'tnzrf', count=subcount[40]) + regexs[41].sub(r'', 'tnzrf', count=subcount[41]) + regexs[42].sub(r'', 'tnzrf', count=subcount[42]) + regexs[43].sub(r'', 'tnzrf', count=subcount[43]) + regexs[44].sub(r'', 'tnzrf', count=subcount[44]) + regexs[45].sub(r'', 'tnzrf', count=subcount[45]) + regexs[46].sub(r'', 'tnzrf', count=subcount[46]) + regexs[47].sub(r'', 'tnzrf', count=subcount[47]) + regexs[48].sub(r'', 'tnzrf', count=subcount[48]) + regexs[30].sub(r'', 'ubzrcntr', count=subcount[30]) + regexs[40].sub(r'', 'ubebfxbc', count=subcount[40]) + regexs[41].sub(r'', 'ubebfxbc', count=subcount[41]) + regexs[42].sub(r'', 'ubebfxbc', count=subcount[42]) + regexs[43].sub(r'', 'ubebfxbc', count=subcount[43]) + regexs[44].sub(r'', 'ubebfxbc', count=subcount[44]) + regexs[45].sub(r'', 'ubebfxbc', count=subcount[45]) + regexs[46].sub(r'', 'ubebfxbc', count=subcount[46]) + regexs[47].sub(r'', 'ubebfxbc', count=subcount[47]) + regexs[48].sub(r'', 'ubebfxbc', count=subcount[48]) + regexs[30].sub(r'', 'uc_cebzbobk_ugzy%2Puc_cebzbobk_vzt', count=subcount[30]) + regexs[30].sub(r'', 'uc_erpgnatyr', count=subcount[30]) + regexs[33].sub(r'', strings[87], count=subcount[33]) + regexs[33].sub(r'', strings[88], count=subcount[33]) regexs[71].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/onfr.wf${4}${5}', subcount[71]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/onfr.wf${4}${5}', count=subcount[71]) regexs[72].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/onfr.wf${5}', subcount[72]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/onfr.wf${5}', count=subcount[72]) regexs[71].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/qlaYvo.wf${4}${5}', subcount[71]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/qlaYvo.wf${4}${5}', count=subcount[71]) regexs[72].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/qlaYvo.wf${5}', subcount[72]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/qlaYvo.wf${5}', count=subcount[72]) regexs[71].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/rssrpgYvo.wf${4}${5}', subcount[71]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/rssrpgYvo.wf${4}${5}', count=subcount[71]) regexs[72].sub( - r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/rssrpgYvo.wf${5}', subcount[72]) - regexs[73].sub(r'', strings[89], subcount[73]) + r'', 'uggc://wf.hv-cbegny.qr/tzk/ubzr/wf/20080602/rssrpgYvo.wf${5}', count=subcount[72]) + regexs[73].sub(r'', strings[89], count=subcount[73]) regexs[69].sub( - r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/f55023338617756?[NDO]&{1}&{2}&[NDR]', subcount[69]) - regexs[23].sub(r'', strings[6], subcount[23]) - regexs[40].sub(r'', 'xvab', subcount[40]) - regexs[41].sub(r'', 'xvab', subcount[41]) - regexs[42].sub(r'', 'xvab', subcount[42]) - regexs[43].sub(r'', 'xvab', subcount[43]) - regexs[44].sub(r'', 'xvab', subcount[44]) - regexs[45].sub(r'', 'xvab', subcount[45]) - regexs[46].sub(r'', 'xvab', subcount[46]) - regexs[47].sub(r'', 'xvab', subcount[47]) - regexs[48].sub(r'', 'xvab', subcount[48]) + r'', 'uggc://zfacbegny.112.2b7.arg/o/ff/zfacbegnyubzr/1/U.7-cqi-2/f55023338617756?[NDO]&{1}&{2}&[NDR]', count=subcount[69]) + regexs[23].sub(r'', strings[6], count=subcount[23]) + regexs[40].sub(r'', 'xvab', count=subcount[40]) + regexs[41].sub(r'', 'xvab', count=subcount[41]) + regexs[42].sub(r'', 'xvab', count=subcount[42]) + regexs[43].sub(r'', 'xvab', count=subcount[43]) + regexs[44].sub(r'', 'xvab', count=subcount[44]) + regexs[45].sub(r'', 'xvab', count=subcount[45]) + regexs[46].sub(r'', 'xvab', count=subcount[46]) + regexs[47].sub(r'', 'xvab', count=subcount[47]) + regexs[48].sub(r'', 'xvab', count=subcount[48]) regexs[70].split(r'ybnq') regexs[18].sub( - r'', 'zrqvnzbqgno lhv-anifrg lhv-anifrg-gbc', subcount[18]) - regexs[39].sub(r'', 'zrgn', subcount[39]) - regexs[68].sub(r'', strings[90], subcount[68]) - regexs[18].sub(r'', strings[90], subcount[18]) + r'', 'zrqvnzbqgno lhv-anifrg lhv-anifrg-gbc', count=subcount[18]) + regexs[39].sub(r'', 'zrgn', count=subcount[39]) + regexs[68].sub(r'', strings[90], count=subcount[68]) + regexs[18].sub(r'', strings[90], count=subcount[18]) regexs[70].split(r'zbhfrzbir') regexs[70].split(r'zbhfrzbir.gno') - re.sub(r'^.*jroxvg\/(\d+(\.\d+)?).*$', '', strings[63], 1) - regexs[40].sub(r'', 'zhfvx', subcount[40]) - regexs[41].sub(r'', 'zhfvx', subcount[41]) - regexs[42].sub(r'', 'zhfvx', subcount[42]) - regexs[43].sub(r'', 'zhfvx', subcount[43]) - regexs[44].sub(r'', 'zhfvx', subcount[44]) - regexs[45].sub(r'', 'zhfvx', subcount[45]) - regexs[46].sub(r'', 'zhfvx', subcount[46]) - regexs[47].sub(r'', 'zhfvx', subcount[47]) - regexs[48].sub(r'', 'zhfvx', subcount[48]) - regexs[52].sub(r'', 'zlfcnpr_nccf_pnainf', subcount[52]) - regexs[40].sub(r'', strings[91], subcount[40]) - regexs[41].sub(r'', strings[91], subcount[41]) - regexs[42].sub(r'', strings[91], subcount[42]) - regexs[43].sub(r'', strings[91], subcount[43]) - regexs[44].sub(r'', strings[91], subcount[44]) - regexs[45].sub(r'', strings[91], subcount[45]) - regexs[46].sub(r'', strings[91], subcount[46]) - regexs[47].sub(r'', strings[91], subcount[47]) - regexs[48].sub(r'', strings[91], subcount[48]) - regexs[39].sub(r'', 'anzr', subcount[39]) + re.sub(r'^.*jroxvg\/(\d+(\.\d+)?).*$', '', strings[63], count=1) + regexs[40].sub(r'', 'zhfvx', count=subcount[40]) + regexs[41].sub(r'', 'zhfvx', count=subcount[41]) + regexs[42].sub(r'', 'zhfvx', count=subcount[42]) + regexs[43].sub(r'', 'zhfvx', count=subcount[43]) + regexs[44].sub(r'', 'zhfvx', count=subcount[44]) + regexs[45].sub(r'', 'zhfvx', count=subcount[45]) + regexs[46].sub(r'', 'zhfvx', count=subcount[46]) + regexs[47].sub(r'', 'zhfvx', count=subcount[47]) + regexs[48].sub(r'', 'zhfvx', count=subcount[48]) + regexs[52].sub(r'', 'zlfcnpr_nccf_pnainf', count=subcount[52]) + regexs[40].sub(r'', strings[91], count=subcount[40]) + regexs[41].sub(r'', strings[91], count=subcount[41]) + regexs[42].sub(r'', strings[91], count=subcount[42]) + regexs[43].sub(r'', strings[91], count=subcount[43]) + regexs[44].sub(r'', strings[91], count=subcount[44]) + regexs[45].sub(r'', strings[91], count=subcount[45]) + regexs[46].sub(r'', strings[91], count=subcount[46]) + regexs[47].sub(r'', strings[91], count=subcount[47]) + regexs[48].sub(r'', strings[91], count=subcount[48]) + regexs[39].sub(r'', 'anzr', count=subcount[39]) # This prints something different to the V8 version # The V8 version is escaping different things in the string that @@ -1523,84 +1523,84 @@ def block11(): # # V8 treats /\S/ like / + escaped S + / # Python treats it like / + \ + S + / - re.sub(r'\b\w+\b', '', strings[92], 0) - - regexs[39].sub(r'', 'bow-nppbeqvba', subcount[39]) - regexs[39].sub(r'', 'bowrpg', subcount[39]) - regexs[68].sub(r'', 'bowrpg', subcount[68]) - regexs[18].sub(r'', 'bowrpg', subcount[18]) - regexs[29].sub(r'', 'cnenzf%2Rfglyrf', subcount[29]) - regexs[30].sub(r'', 'cnenzf%2Rfglyrf', subcount[30]) - regexs[30].sub(r'', 'cbchc', subcount[30]) - regexs[40].sub(r'', 'ebhgr', subcount[40]) - regexs[41].sub(r'', 'ebhgr', subcount[41]) - regexs[42].sub(r'', 'ebhgr', subcount[42]) - regexs[43].sub(r'', 'ebhgr', subcount[43]) - regexs[44].sub(r'', 'ebhgr', subcount[44]) - regexs[45].sub(r'', 'ebhgr', subcount[45]) - regexs[46].sub(r'', 'ebhgr', subcount[46]) - regexs[47].sub(r'', 'ebhgr', subcount[47]) - regexs[48].sub(r'', 'ebhgr', subcount[48]) - regexs[30].sub(r'', 'freivprobk_uc', subcount[30]) - regexs[30].sub(r'', 'fubccvatobk_uc', subcount[30]) - regexs[39].sub(r'', 'fubhgobk', subcount[39]) - regexs[40].sub(r'', 'fcbeg', subcount[40]) - regexs[41].sub(r'', 'fcbeg', subcount[41]) - regexs[42].sub(r'', 'fcbeg', subcount[42]) - regexs[43].sub(r'', 'fcbeg', subcount[43]) - regexs[44].sub(r'', 'fcbeg', subcount[44]) - regexs[45].sub(r'', 'fcbeg', subcount[45]) - regexs[46].sub(r'', 'fcbeg', subcount[46]) - regexs[47].sub(r'', 'fcbeg', subcount[47]) - regexs[48].sub(r'', 'fcbeg', subcount[48]) - regexs[40].sub(r'', 'gbhe', subcount[40]) - regexs[41].sub(r'', 'gbhe', subcount[41]) - regexs[42].sub(r'', 'gbhe', subcount[42]) - regexs[43].sub(r'', 'gbhe', subcount[43]) - regexs[44].sub(r'', 'gbhe', subcount[44]) - regexs[45].sub(r'', 'gbhe', subcount[45]) - regexs[46].sub(r'', 'gbhe', subcount[46]) - regexs[47].sub(r'', 'gbhe', subcount[47]) - regexs[48].sub(r'', 'gbhe', subcount[48]) - regexs[40].sub(r'', 'gi-fcbg', subcount[40]) - regexs[41].sub(r'', 'gi-fcbg', subcount[41]) - regexs[42].sub(r'', 'gi-fcbg', subcount[42]) - regexs[43].sub(r'', 'gi-fcbg', subcount[43]) - regexs[44].sub(r'', 'gi-fcbg', subcount[44]) - regexs[45].sub(r'', 'gi-fcbg', subcount[45]) - regexs[46].sub(r'', 'gi-fcbg', subcount[46]) - regexs[47].sub(r'', 'gi-fcbg', subcount[47]) - regexs[48].sub(r'', 'gi-fcbg', subcount[48]) - regexs[39].sub(r'', 'glcr', subcount[39]) - re.sub(r'\/', '', 'haqrsvarq', 0) - regexs[40].sub(r'', strings[93], subcount[40]) - regexs[41].sub(r'', strings[93], subcount[41]) - regexs[42].sub(r'', strings[93], subcount[42]) - regexs[43].sub(r'', strings[93], subcount[43]) - regexs[44].sub(r'', strings[93], subcount[44]) - regexs[45].sub(r'', strings[93], subcount[45]) - regexs[46].sub(r'', strings[93], subcount[46]) - regexs[47].sub(r'', strings[93], subcount[47]) - regexs[48].sub(r'', strings[93], subcount[48]) - regexs[40].sub(r'', 'ivqrb', subcount[40]) - regexs[41].sub(r'', 'ivqrb', subcount[41]) - regexs[42].sub(r'', 'ivqrb', subcount[42]) - regexs[43].sub(r'', 'ivqrb', subcount[43]) - regexs[44].sub(r'', 'ivqrb', subcount[44]) - regexs[45].sub(r'', 'ivqrb', subcount[45]) - regexs[46].sub(r'', 'ivqrb', subcount[46]) - regexs[47].sub(r'', 'ivqrb', subcount[47]) - regexs[48].sub(r'', 'ivqrb', subcount[48]) + re.sub(r'\b\w+\b', '', strings[92], count=0) + + regexs[39].sub(r'', 'bow-nppbeqvba', count=subcount[39]) + regexs[39].sub(r'', 'bowrpg', count=subcount[39]) + regexs[68].sub(r'', 'bowrpg', count=subcount[68]) + regexs[18].sub(r'', 'bowrpg', count=subcount[18]) + regexs[29].sub(r'', 'cnenzf%2Rfglyrf', count=subcount[29]) + regexs[30].sub(r'', 'cnenzf%2Rfglyrf', count=subcount[30]) + regexs[30].sub(r'', 'cbchc', count=subcount[30]) + regexs[40].sub(r'', 'ebhgr', count=subcount[40]) + regexs[41].sub(r'', 'ebhgr', count=subcount[41]) + regexs[42].sub(r'', 'ebhgr', count=subcount[42]) + regexs[43].sub(r'', 'ebhgr', count=subcount[43]) + regexs[44].sub(r'', 'ebhgr', count=subcount[44]) + regexs[45].sub(r'', 'ebhgr', count=subcount[45]) + regexs[46].sub(r'', 'ebhgr', count=subcount[46]) + regexs[47].sub(r'', 'ebhgr', count=subcount[47]) + regexs[48].sub(r'', 'ebhgr', count=subcount[48]) + regexs[30].sub(r'', 'freivprobk_uc', count=subcount[30]) + regexs[30].sub(r'', 'fubccvatobk_uc', count=subcount[30]) + regexs[39].sub(r'', 'fubhgobk', count=subcount[39]) + regexs[40].sub(r'', 'fcbeg', count=subcount[40]) + regexs[41].sub(r'', 'fcbeg', count=subcount[41]) + regexs[42].sub(r'', 'fcbeg', count=subcount[42]) + regexs[43].sub(r'', 'fcbeg', count=subcount[43]) + regexs[44].sub(r'', 'fcbeg', count=subcount[44]) + regexs[45].sub(r'', 'fcbeg', count=subcount[45]) + regexs[46].sub(r'', 'fcbeg', count=subcount[46]) + regexs[47].sub(r'', 'fcbeg', count=subcount[47]) + regexs[48].sub(r'', 'fcbeg', count=subcount[48]) + regexs[40].sub(r'', 'gbhe', count=subcount[40]) + regexs[41].sub(r'', 'gbhe', count=subcount[41]) + regexs[42].sub(r'', 'gbhe', count=subcount[42]) + regexs[43].sub(r'', 'gbhe', count=subcount[43]) + regexs[44].sub(r'', 'gbhe', count=subcount[44]) + regexs[45].sub(r'', 'gbhe', count=subcount[45]) + regexs[46].sub(r'', 'gbhe', count=subcount[46]) + regexs[47].sub(r'', 'gbhe', count=subcount[47]) + regexs[48].sub(r'', 'gbhe', count=subcount[48]) + regexs[40].sub(r'', 'gi-fcbg', count=subcount[40]) + regexs[41].sub(r'', 'gi-fcbg', count=subcount[41]) + regexs[42].sub(r'', 'gi-fcbg', count=subcount[42]) + regexs[43].sub(r'', 'gi-fcbg', count=subcount[43]) + regexs[44].sub(r'', 'gi-fcbg', count=subcount[44]) + regexs[45].sub(r'', 'gi-fcbg', count=subcount[45]) + regexs[46].sub(r'', 'gi-fcbg', count=subcount[46]) + regexs[47].sub(r'', 'gi-fcbg', count=subcount[47]) + regexs[48].sub(r'', 'gi-fcbg', count=subcount[48]) + regexs[39].sub(r'', 'glcr', count=subcount[39]) + re.sub(r'\/', '', 'haqrsvarq', count=0) + regexs[40].sub(r'', strings[93], count=subcount[40]) + regexs[41].sub(r'', strings[93], count=subcount[41]) + regexs[42].sub(r'', strings[93], count=subcount[42]) + regexs[43].sub(r'', strings[93], count=subcount[43]) + regexs[44].sub(r'', strings[93], count=subcount[44]) + regexs[45].sub(r'', strings[93], count=subcount[45]) + regexs[46].sub(r'', strings[93], count=subcount[46]) + regexs[47].sub(r'', strings[93], count=subcount[47]) + regexs[48].sub(r'', strings[93], count=subcount[48]) + regexs[40].sub(r'', 'ivqrb', count=subcount[40]) + regexs[41].sub(r'', 'ivqrb', count=subcount[41]) + regexs[42].sub(r'', 'ivqrb', count=subcount[42]) + regexs[43].sub(r'', 'ivqrb', count=subcount[43]) + regexs[44].sub(r'', 'ivqrb', count=subcount[44]) + regexs[45].sub(r'', 'ivqrb', count=subcount[45]) + regexs[46].sub(r'', 'ivqrb', count=subcount[46]) + regexs[47].sub(r'', 'ivqrb', count=subcount[47]) + regexs[48].sub(r'', 'ivqrb', count=subcount[48]) regexs[86].split(r'ivfvgf=1') - regexs[40].sub(r'', 'jrggre', subcount[40]) - regexs[41].sub(r'', 'jrggre', subcount[41]) - regexs[42].sub(r'', 'jrggre', subcount[42]) - regexs[43].sub(r'', 'jrggre', subcount[43]) - regexs[44].sub(r'', 'jrggre', subcount[44]) - regexs[45].sub(r'', 'jrggre', subcount[45]) - regexs[46].sub(r'', 'jrggre', subcount[46]) - regexs[47].sub(r'', 'jrggre', subcount[47]) - regexs[48].sub(r'', 'jrggre', subcount[48]) + regexs[40].sub(r'', 'jrggre', count=subcount[40]) + regexs[41].sub(r'', 'jrggre', count=subcount[41]) + regexs[42].sub(r'', 'jrggre', count=subcount[42]) + regexs[43].sub(r'', 'jrggre', count=subcount[43]) + regexs[44].sub(r'', 'jrggre', count=subcount[44]) + regexs[45].sub(r'', 'jrggre', count=subcount[45]) + regexs[46].sub(r'', 'jrggre', count=subcount[46]) + regexs[47].sub(r'', 'jrggre', count=subcount[47]) + regexs[48].sub(r'', 'jrggre', count=subcount[48]) re.search(r'#[a-z0-9]+$', 'uggc://jjj.fpuhryreim.arg/Qrsnhyg', re.IGNORECASE) regexs[66].search(r'fryrpgrq') diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst new file mode 100644 index 00000000..5e6160ff --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst @@ -0,0 +1,38 @@ +===================== +About these documents +===================== + + +These documents are generated from `reStructuredText`_ sources by `Sphinx`_, a +document processor specifically written for the Python documentation. + +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _Sphinx: https://www.sphinx-doc.org/ + +.. In the online version of these documents, you can submit comments and suggest + changes directly on the documentation pages. + +Development of the documentation and its toolchain is an entirely volunteer +effort, just like Python itself. If you want to contribute, please take a +look at the :ref:`reporting-bugs` page for information on how to do so. New +volunteers are always welcome! + +Many thanks go to: + +* Fred L. Drake, Jr., the creator of the original Python documentation toolset + and writer of much of the content; +* the `Docutils `_ project for creating + reStructuredText and the Docutils suite; +* Fredrik Lundh for his Alternative Python Reference project from which Sphinx + got many good ideas. + + +Contributors to the Python Documentation +---------------------------------------- + +Many people have contributed to the Python language, the Python standard +library, and the Python documentation. See :source:`Misc/ACKS` in the Python +source distribution for a partial list of contributors. + +It is only with the input and contributions of the Python community +that Python has such wonderful documentation -- Thank You! diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst new file mode 100644 index 00000000..9aff2f0f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst @@ -0,0 +1,112 @@ +.. _reporting-bugs: + +***************** +Dealing with Bugs +***************** + +Python is a mature programming language which has established a reputation for +stability. In order to maintain this reputation, the developers would like to +know of any deficiencies you find in Python. + +It can be sometimes faster to fix bugs yourself and contribute patches to +Python as it streamlines the process and involves less people. Learn how to +:ref:`contribute `. + +Documentation bugs +================== + +If you find a bug in this documentation or would like to propose an improvement, +please submit a bug report on the :ref:`tracker `. If you +have a suggestion on how to fix it, include that as well. + +You can also open a discussion item on our +`Documentation Discourse forum `_. + +If you find a bug in the theme (HTML / CSS / JavaScript) of the +documentation, please submit a bug report on the `python-doc-theme bug +tracker `_. + +If you're short on time, you can also email documentation bug reports to +docs@python.org (behavioral bugs can be sent to python-list@python.org). +'docs@' is a mailing list run by volunteers; your request will be noticed, +though it may take a while to be processed. + +.. seealso:: + + `Documentation bugs`_ + A list of documentation bugs that have been submitted to the Python issue tracker. + + `Issue Tracking `_ + Overview of the process involved in reporting an improvement on the tracker. + + `Helping with Documentation `_ + Comprehensive guide for individuals that are interested in contributing to Python documentation. + + `Documentation Translations `_ + A list of GitHub pages for documentation translation and their primary contacts. + + +.. _using-the-tracker: + +Using the Python issue tracker +============================== + +Issue reports for Python itself should be submitted via the GitHub issues +tracker (https://github.com/python/cpython/issues). +The GitHub issues tracker offers a web form which allows pertinent information +to be entered and submitted to the developers. + +The first step in filing a report is to determine whether the problem has +already been reported. The advantage in doing so, aside from saving the +developers' time, is that you learn what has been done to fix it; it may be that +the problem has already been fixed for the next release, or additional +information is needed (in which case you are welcome to provide it if you can!). +To do this, search the tracker using the search box at the top of the page. + +If the problem you're reporting is not already in the list, log in to GitHub. +If you don't already have a GitHub account, create a new account using the +"Sign up" link. +It is not possible to submit a bug report anonymously. + +Being now logged in, you can submit an issue. +Click on the "New issue" button in the top bar to report a new issue. + +The submission form has two fields, "Title" and "Comment". + +For the "Title" field, enter a *very* short description of the problem; +fewer than ten words is good. + +In the "Comment" field, describe the problem in detail, including what you +expected to happen and what did happen. Be sure to include whether any +extension modules were involved, and what hardware and software platform you +were using (including version information as appropriate). + +Each issue report will be reviewed by a developer who will determine what needs to +be done to correct the problem. You will receive an update each time an action is +taken on the issue. + + +.. seealso:: + + `How to Report Bugs Effectively `_ + Article which goes into some detail about how to create a useful bug report. + This describes what kind of information is useful and why it is useful. + + `Bug Writing Guidelines `_ + Information about writing a good bug report. Some of this is specific to the + Mozilla project, but describes general good practices. + +.. _contributing-to-python: + +Getting started contributing to Python yourself +=============================================== + +Beyond just reporting bugs that you find, you are also welcome to submit +patches to fix them. You can find more information on how to get started +patching Python in the `Python Developer's Guide`_. If you have questions, +the `core-mentorship mailing list`_ is a friendly place to get answers to +any and all questions pertaining to the process of fixing issues in Python. + +.. _Documentation bugs: https://github.com/python/cpython/issues?q=is%3Aissue+is%3Aopen+label%3Adocs +.. _Python Developer's Guide: https://devguide.python.org/ +.. _core-mentorship mailing list: https://mail.python.org/mailman3/lists/core-mentorship.python.org/ diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py new file mode 100644 index 00000000..8e505836 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py @@ -0,0 +1,56 @@ +import os +import sys + +sys.path.append(os.path.abspath('tools/extensions')) + +extensions = [ + 'pyspecific', + 'sphinx.ext.extlinks', +] + +manpages_url = 'https://manpages.debian.org/{path}' + +# General substitutions. +project = 'Python' +copyright = f"2001, Python Software Foundation" + +version = release = sys.version.split(" ", 1)[0] + +rst_epilog = f""" +.. |python_version_literal| replace:: ``Python {version}`` +.. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` +""" + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%B %d, %Y' + +# By default, highlight as Python 3. +highlight_language = 'python3' + +# Minimum version of sphinx required +needs_sphinx = '6.2.1' + +# Create table of contents entries for domain objects (e.g. functions, classes, +# attributes, etc.). Default is True. +toc_object_entries = False + +# Disable Docutils smartquotes for several translations +smartquotes_excludes = { + 'languages': ['ja', 'fr', 'zh_TW', 'zh_CN'], + 'builders': ['man', 'text'], +} + +# Avoid a warning with Sphinx >= 4.0 +root_doc = 'contents' + +extlinks = { + "cve": ("https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", "CVE-%s"), + "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), + "pypi": ("https://pypi.org/project/%s/", "%s"), + "source": ('https://github.com/python/cpython/tree/3.13/%s', "%s"), +} +extlinks_detect_hardcoded_links = True diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt new file mode 100644 index 00000000..26ac1862 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt @@ -0,0 +1,26 @@ +# We have upper bounds on our transitive dependencies here +# To avoid new releases unexpectedly breaking our build. +# This file can be updated on an ad-hoc basis, +# though it will probably have to be updated +# whenever Doc/requirements.txt is updated. + +# Direct dependencies of Sphinx +babel<3 +colorama<0.5 +imagesize<2 +Jinja2<4 +packaging<25 +Pygments<3 +requests<3 +snowballstemmer<3 +# keep lower-bounds until Sphinx 8.1 is released +# https://github.com/sphinx-doc/sphinx/pull/12756 +sphinxcontrib-applehelp>=1.0.7,<3 +sphinxcontrib-devhelp>=1.0.6,<3 +sphinxcontrib-htmlhelp>=2.0.6,<3 +sphinxcontrib-jsmath>=1.0.1,<2 +sphinxcontrib-qthelp>=1.0.6,<3 +sphinxcontrib-serializinghtml>=1.1.9,<3 + +# Direct dependencies of Jinja2 (Jinja is a dependency of Sphinx, see above) +MarkupSafe<3 diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst new file mode 100644 index 00000000..b57f4b09 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst @@ -0,0 +1,23 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + Python Documentation contents +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +.. toctree:: + + whatsnew/index.rst + tutorial/index.rst + using/index.rst + reference/index.rst + library/index.rst + extending/index.rst + c-api/index.rst + installing/index.rst + howto/index.rst + faq/index.rst + deprecations/index.rst + glossary.rst + + about.rst + bugs.rst + copyright.rst + license.rst diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst new file mode 100644 index 00000000..8629ed1f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst @@ -0,0 +1,19 @@ +********* +Copyright +********* + +Python and this documentation is: + +Copyright © 2001-2024 Python Software Foundation. All rights reserved. + +Copyright © 2000 BeOpen.com. All rights reserved. + +Copyright © 1995-2000 Corporation for National Research Initiatives. All rights +reserved. + +Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved. + +------- + +See :ref:`history-and-license` for complete license and permissions information. + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst new file mode 100644 index 00000000..97cee075 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst @@ -0,0 +1,1313 @@ +.. _glossary: + +******** +Glossary +******** + +.. if you add new entries, keep the alphabetical sorting! + +.. glossary:: + + ``>>>`` + The default Python prompt of the :term:`interactive` shell. Often + seen for code examples which can be executed interactively in the + interpreter. + + ``...`` + Can refer to: + + * The default Python prompt of the :term:`interactive` shell when entering the + code for an indented code block, when within a pair of matching left and + right delimiters (parentheses, square brackets, curly braces or triple + quotes), or after specifying a decorator. + + * The :const:`Ellipsis` built-in constant. + + abstract base class + Abstract base classes complement :term:`duck-typing` by + providing a way to define interfaces when other techniques like + :func:`hasattr` would be clumsy or subtly wrong (for example with + :ref:`magic methods `). ABCs introduce virtual + subclasses, which are classes that don't inherit from a class but are + still recognized by :func:`isinstance` and :func:`issubclass`; see the + :mod:`abc` module documentation. Python comes with many built-in ABCs for + data structures (in the :mod:`collections.abc` module), numbers (in the + :mod:`numbers` module), streams (in the :mod:`io` module), import finders + and loaders (in the :mod:`importlib.abc` module). You can create your own + ABCs with the :mod:`abc` module. + + annotation + A label associated with a variable, a class + attribute or a function parameter or return value, + used by convention as a :term:`type hint`. + + Annotations of local variables cannot be accessed at runtime, but + annotations of global variables, class attributes, and functions + are stored in the :attr:`__annotations__` + special attribute of modules, classes, and functions, + respectively. + + See :term:`variable annotation`, :term:`function annotation`, :pep:`484` + and :pep:`526`, which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + argument + A value passed to a :term:`function` (or :term:`method`) when calling the + function. There are two kinds of argument: + + * :dfn:`keyword argument`: an argument preceded by an identifier (e.g. + ``name=``) in a function call or passed as a value in a dictionary + preceded by ``**``. For example, ``3`` and ``5`` are both keyword + arguments in the following calls to :func:`complex`:: + + complex(real=3, imag=5) + complex(**{'real': 3, 'imag': 5}) + + * :dfn:`positional argument`: an argument that is not a keyword argument. + Positional arguments can appear at the beginning of an argument list + and/or be passed as elements of an :term:`iterable` preceded by ``*``. + For example, ``3`` and ``5`` are both positional arguments in the + following calls:: + + complex(3, 5) + complex(*(3, 5)) + + Arguments are assigned to the named local variables in a function body. + See the :ref:`calls` section for the rules governing this assignment. + Syntactically, any expression can be used to represent an argument; the + evaluated value is assigned to the local variable. + + See also the :term:`parameter` glossary entry, the FAQ question on + :ref:`the difference between arguments and parameters + `, and :pep:`362`. + + asynchronous context manager + An object which controls the environment seen in an + :keyword:`async with` statement by defining :meth:`~object.__aenter__` and + :meth:`~object.__aexit__` methods. Introduced by :pep:`492`. + + asynchronous generator + A function which returns an :term:`asynchronous generator iterator`. It + looks like a coroutine function defined with :keyword:`async def` except + that it contains :keyword:`yield` expressions for producing a series of + values usable in an :keyword:`async for` loop. + + Usually refers to an asynchronous generator function, but may refer to an + *asynchronous generator iterator* in some contexts. In cases where the + intended meaning isn't clear, using the full terms avoids ambiguity. + + An asynchronous generator function may contain :keyword:`await` + expressions as well as :keyword:`async for`, and :keyword:`async with` + statements. + + asynchronous generator iterator + An object created by a :term:`asynchronous generator` function. + + This is an :term:`asynchronous iterator` which when called using the + :meth:`~object.__anext__` method returns an awaitable object which will execute + the body of the asynchronous generator function until the next + :keyword:`yield` expression. + + Each :keyword:`yield` temporarily suspends processing, remembering the + location execution state (including local variables and pending + try-statements). When the *asynchronous generator iterator* effectively + resumes with another awaitable returned by :meth:`~object.__anext__`, it + picks up where it left off. See :pep:`492` and :pep:`525`. + + asynchronous iterable + An object, that can be used in an :keyword:`async for` statement. + Must return an :term:`asynchronous iterator` from its + :meth:`~object.__aiter__` method. Introduced by :pep:`492`. + + asynchronous iterator + An object that implements the :meth:`~object.__aiter__` and :meth:`~object.__anext__` + methods. :meth:`~object.__anext__` must return an :term:`awaitable` object. + :keyword:`async for` resolves the awaitables returned by an asynchronous + iterator's :meth:`~object.__anext__` method until it raises a + :exc:`StopAsyncIteration` exception. Introduced by :pep:`492`. + + attribute + A value associated with an object which is usually referenced by name + using dotted expressions. + For example, if an object *o* has an attribute + *a* it would be referenced as *o.a*. + + It is possible to give an object an attribute whose name is not an + identifier as defined by :ref:`identifiers`, for example using + :func:`setattr`, if the object allows it. + Such an attribute will not be accessible using a dotted expression, + and would instead need to be retrieved with :func:`getattr`. + + awaitable + An object that can be used in an :keyword:`await` expression. Can be + a :term:`coroutine` or an object with an :meth:`~object.__await__` method. + See also :pep:`492`. + + BDFL + Benevolent Dictator For Life, a.k.a. `Guido van Rossum + `_, Python's creator. + + binary file + A :term:`file object` able to read and write + :term:`bytes-like objects `. + Examples of binary files are files opened in binary mode (``'rb'``, + ``'wb'`` or ``'rb+'``), :data:`sys.stdin.buffer `, + :data:`sys.stdout.buffer `, and instances of + :class:`io.BytesIO` and :class:`gzip.GzipFile`. + + See also :term:`text file` for a file object able to read and write + :class:`str` objects. + + borrowed reference + In Python's C API, a borrowed reference is a reference to an object, + where the code using the object does not own the reference. + It becomes a dangling + pointer if the object is destroyed. For example, a garbage collection can + remove the last :term:`strong reference` to the object and so destroy it. + + Calling :c:func:`Py_INCREF` on the :term:`borrowed reference` is + recommended to convert it to a :term:`strong reference` in-place, except + when the object cannot be destroyed before the last usage of the borrowed + reference. The :c:func:`Py_NewRef` function can be used to create a new + :term:`strong reference`. + + bytes-like object + An object that supports the :ref:`bufferobjects` and can + export a C-:term:`contiguous` buffer. This includes all :class:`bytes`, + :class:`bytearray`, and :class:`array.array` objects, as well as many + common :class:`memoryview` objects. Bytes-like objects can + be used for various operations that work with binary data; these include + compression, saving to a binary file, and sending over a socket. + + Some operations need the binary data to be mutable. The documentation + often refers to these as "read-write bytes-like objects". Example + mutable buffer objects include :class:`bytearray` and a + :class:`memoryview` of a :class:`bytearray`. + Other operations require the binary data to be stored in + immutable objects ("read-only bytes-like objects"); examples + of these include :class:`bytes` and a :class:`memoryview` + of a :class:`bytes` object. + + bytecode + Python source code is compiled into bytecode, the internal representation + of a Python program in the CPython interpreter. The bytecode is also + cached in ``.pyc`` files so that executing the same file is + faster the second time (recompilation from source to bytecode can be + avoided). This "intermediate language" is said to run on a + :term:`virtual machine` that executes the machine code corresponding to + each bytecode. Do note that bytecodes are not expected to work between + different Python virtual machines, nor to be stable between Python + releases. + + A list of bytecode instructions can be found in the documentation for + :ref:`the dis module `. + + callable + A callable is an object that can be called, possibly with a set + of arguments (see :term:`argument`), with the following syntax:: + + callable(argument1, argument2, argumentN) + + A :term:`function`, and by extension a :term:`method`, is a callable. + An instance of a class that implements the :meth:`~object.__call__` + method is also a callable. + + callback + A subroutine function which is passed as an argument to be executed at + some point in the future. + + class + A template for creating user-defined objects. Class definitions + normally contain method definitions which operate on instances of the + class. + + class variable + A variable defined in a class and intended to be modified only at + class level (i.e., not in an instance of the class). + + complex number + An extension of the familiar real number system in which all numbers are + expressed as a sum of a real part and an imaginary part. Imaginary + numbers are real multiples of the imaginary unit (the square root of + ``-1``), often written ``i`` in mathematics or ``j`` in + engineering. Python has built-in support for complex numbers, which are + written with this latter notation; the imaginary part is written with a + ``j`` suffix, e.g., ``3+1j``. To get access to complex equivalents of the + :mod:`math` module, use :mod:`cmath`. Use of complex numbers is a fairly + advanced mathematical feature. If you're not aware of a need for them, + it's almost certain you can safely ignore them. + + context manager + An object which controls the environment seen in a :keyword:`with` + statement by defining :meth:`~object.__enter__` and :meth:`~object.__exit__` methods. + See :pep:`343`. + + context variable + A variable which can have different values depending on its context. + This is similar to Thread-Local Storage in which each execution + thread may have a different value for a variable. However, with context + variables, there may be several contexts in one execution thread and the + main usage for context variables is to keep track of variables in + concurrent asynchronous tasks. + See :mod:`contextvars`. + + contiguous + .. index:: C-contiguous, Fortran contiguous + + A buffer is considered contiguous exactly if it is either + *C-contiguous* or *Fortran contiguous*. Zero-dimensional buffers are + C and Fortran contiguous. In one-dimensional arrays, the items + must be laid out in memory next to each other, in order of + increasing indexes starting from zero. In multidimensional + C-contiguous arrays, the last index varies the fastest when + visiting items in order of memory address. However, in + Fortran contiguous arrays, the first index varies the fastest. + + coroutine + Coroutines are a more generalized form of subroutines. Subroutines are + entered at one point and exited at another point. Coroutines can be + entered, exited, and resumed at many different points. They can be + implemented with the :keyword:`async def` statement. See also + :pep:`492`. + + coroutine function + A function which returns a :term:`coroutine` object. A coroutine + function may be defined with the :keyword:`async def` statement, + and may contain :keyword:`await`, :keyword:`async for`, and + :keyword:`async with` keywords. These were introduced + by :pep:`492`. + + CPython + The canonical implementation of the Python programming language, as + distributed on `python.org `_. The term "CPython" + is used when necessary to distinguish this implementation from others + such as Jython or IronPython. + + decorator + A function returning another function, usually applied as a function + transformation using the ``@wrapper`` syntax. Common examples for + decorators are :func:`classmethod` and :func:`staticmethod`. + + The decorator syntax is merely syntactic sugar, the following two + function definitions are semantically equivalent:: + + def f(arg): + ... + f = staticmethod(f) + + @staticmethod + def f(arg): + ... + + The same concept exists for classes, but is less commonly used there. See + the documentation for :ref:`function definitions ` and + :ref:`class definitions ` for more about decorators. + + descriptor + Any object which defines the methods :meth:`~object.__get__`, + :meth:`~object.__set__`, or :meth:`~object.__delete__`. + When a class attribute is a descriptor, its special + binding behavior is triggered upon attribute lookup. Normally, using + *a.b* to get, set or delete an attribute looks up the object named *b* in + the class dictionary for *a*, but if *b* is a descriptor, the respective + descriptor method gets called. Understanding descriptors is a key to a + deep understanding of Python because they are the basis for many features + including functions, methods, properties, class methods, static methods, + and reference to super classes. + + For more information about descriptors' methods, see :ref:`descriptors` + or the :ref:`Descriptor How To Guide `. + + dictionary + An associative array, where arbitrary keys are mapped to values. The + keys can be any object with :meth:`~object.__hash__` and + :meth:`~object.__eq__` methods. + Called a hash in Perl. + + dictionary comprehension + A compact way to process all or part of the elements in an iterable and + return a dictionary with the results. ``results = {n: n ** 2 for n in + range(10)}`` generates a dictionary containing key ``n`` mapped to + value ``n ** 2``. See :ref:`comprehensions`. + + dictionary view + The objects returned from :meth:`dict.keys`, :meth:`dict.values`, and + :meth:`dict.items` are called dictionary views. They provide a dynamic + view on the dictionary’s entries, which means that when the dictionary + changes, the view reflects these changes. To force the + dictionary view to become a full list use ``list(dictview)``. See + :ref:`dict-views`. + + docstring + A string literal which appears as the first expression in a class, + function or module. While ignored when the suite is executed, it is + recognized by the compiler and put into the :attr:`~definition.__doc__` attribute + of the enclosing class, function or module. Since it is available via + introspection, it is the canonical place for documentation of the + object. + + duck-typing + A programming style which does not look at an object's type to determine + if it has the right interface; instead, the method or attribute is simply + called or used ("If it looks like a duck and quacks like a duck, it + must be a duck.") By emphasizing interfaces rather than specific types, + well-designed code improves its flexibility by allowing polymorphic + substitution. Duck-typing avoids tests using :func:`type` or + :func:`isinstance`. (Note, however, that duck-typing can be complemented + with :term:`abstract base classes `.) Instead, it + typically employs :func:`hasattr` tests or :term:`EAFP` programming. + + EAFP + Easier to ask for forgiveness than permission. This common Python coding + style assumes the existence of valid keys or attributes and catches + exceptions if the assumption proves false. This clean and fast style is + characterized by the presence of many :keyword:`try` and :keyword:`except` + statements. The technique contrasts with the :term:`LBYL` style + common to many other languages such as C. + + expression + A piece of syntax which can be evaluated to some value. In other words, + an expression is an accumulation of expression elements like literals, + names, attribute access, operators or function calls which all return a + value. In contrast to many other languages, not all language constructs + are expressions. There are also :term:`statement`\s which cannot be used + as expressions, such as :keyword:`while`. Assignments are also statements, + not expressions. + + extension module + A module written in C or C++, using Python's C API to interact with the + core and with user code. + + f-string + String literals prefixed with ``'f'`` or ``'F'`` are commonly called + "f-strings" which is short for + :ref:`formatted string literals `. See also :pep:`498`. + + file object + An object exposing a file-oriented API (with methods such as + :meth:`!read` or :meth:`!write`) to an underlying resource. Depending + on the way it was created, a file object can mediate access to a real + on-disk file or to another type of storage or communication device + (for example standard input/output, in-memory buffers, sockets, pipes, + etc.). File objects are also called :dfn:`file-like objects` or + :dfn:`streams`. + + There are actually three categories of file objects: raw + :term:`binary files `, buffered + :term:`binary files ` and :term:`text files `. + Their interfaces are defined in the :mod:`io` module. The canonical + way to create a file object is by using the :func:`open` function. + + file-like object + A synonym for :term:`file object`. + + filesystem encoding and error handler + Encoding and error handler used by Python to decode bytes from the + operating system and encode Unicode to the operating system. + + The filesystem encoding must guarantee to successfully decode all bytes + below 128. If the file system encoding fails to provide this guarantee, + API functions can raise :exc:`UnicodeError`. + + The :func:`sys.getfilesystemencoding` and + :func:`sys.getfilesystemencodeerrors` functions can be used to get the + filesystem encoding and error handler. + + The :term:`filesystem encoding and error handler` are configured at + Python startup by the :c:func:`PyConfig_Read` function: see + :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. + + See also the :term:`locale encoding`. + + finder + An object that tries to find the :term:`loader` for a module that is + being imported. + + There are two types of finder: :term:`meta path finders + ` for use with :data:`sys.meta_path`, and :term:`path + entry finders ` for use with :data:`sys.path_hooks`. + + See :ref:`importsystem` and :mod:`importlib` for much more detail. + + floor division + Mathematical division that rounds down to nearest integer. The floor + division operator is ``//``. For example, the expression ``11 // 4`` + evaluates to ``2`` in contrast to the ``2.75`` returned by float true + division. Note that ``(-11) // 4`` is ``-3`` because that is ``-2.75`` + rounded *downward*. See :pep:`238`. + + free threading + A threading model where multiple threads can run Python bytecode + simultaneously within the same interpreter. This is in contrast to + the :term:`global interpreter lock` which allows only one thread to + execute Python bytecode at a time. See :pep:`703`. + + function + A series of statements which returns some value to a caller. It can also + be passed zero or more :term:`arguments ` which may be used in + the execution of the body. See also :term:`parameter`, :term:`method`, + and the :ref:`function` section. + + function annotation + An :term:`annotation` of a function parameter or return value. + + Function annotations are usually used for + :term:`type hints `: for example, this function is expected to take two + :class:`int` arguments and is also expected to have an :class:`int` + return value:: + + def sum_two_numbers(a: int, b: int) -> int: + return a + b + + Function annotation syntax is explained in section :ref:`function`. + + See :term:`variable annotation` and :pep:`484`, + which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + __future__ + A :ref:`future statement `, ``from __future__ import ``, + directs the compiler to compile the current module using syntax or + semantics that will become standard in a future release of Python. + The :mod:`__future__` module documents the possible values of + *feature*. By importing this module and evaluating its variables, + you can see when a new feature was first added to the language and + when it will (or did) become the default:: + + >>> import __future__ + >>> __future__.division + _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192) + + garbage collection + The process of freeing memory when it is not used anymore. Python + performs garbage collection via reference counting and a cyclic garbage + collector that is able to detect and break reference cycles. The + garbage collector can be controlled using the :mod:`gc` module. + + .. index:: single: generator + + generator + A function which returns a :term:`generator iterator`. It looks like a + normal function except that it contains :keyword:`yield` expressions + for producing a series of values usable in a for-loop or that can be + retrieved one at a time with the :func:`next` function. + + Usually refers to a generator function, but may refer to a + *generator iterator* in some contexts. In cases where the intended + meaning isn't clear, using the full terms avoids ambiguity. + + generator iterator + An object created by a :term:`generator` function. + + Each :keyword:`yield` temporarily suspends processing, remembering the + location execution state (including local variables and pending + try-statements). When the *generator iterator* resumes, it picks up where + it left off (in contrast to functions which start fresh on every + invocation). + + .. index:: single: generator expression + + generator expression + An :term:`expression` that returns an :term:`iterator`. It looks like a normal expression + followed by a :keyword:`!for` clause defining a loop variable, range, + and an optional :keyword:`!if` clause. The combined expression + generates values for an enclosing function:: + + >>> sum(i*i for i in range(10)) # sum of squares 0, 1, 4, ... 81 + 285 + + generic function + A function composed of multiple functions implementing the same operation + for different types. Which implementation should be used during a call is + determined by the dispatch algorithm. + + See also the :term:`single dispatch` glossary entry, the + :func:`functools.singledispatch` decorator, and :pep:`443`. + + generic type + A :term:`type` that can be parameterized; typically a + :ref:`container class` such as :class:`list` or + :class:`dict`. Used for :term:`type hints ` and + :term:`annotations `. + + For more details, see :ref:`generic alias types`, + :pep:`483`, :pep:`484`, :pep:`585`, and the :mod:`typing` module. + + GIL + See :term:`global interpreter lock`. + + global interpreter lock + The mechanism used by the :term:`CPython` interpreter to assure that + only one thread executes Python :term:`bytecode` at a time. + This simplifies the CPython implementation by making the object model + (including critical built-in types such as :class:`dict`) implicitly + safe against concurrent access. Locking the entire interpreter + makes it easier for the interpreter to be multi-threaded, at the + expense of much of the parallelism afforded by multi-processor + machines. + + However, some extension modules, either standard or third-party, + are designed so as to release the GIL when doing computationally intensive + tasks such as compression or hashing. Also, the GIL is always released + when doing I/O. + + As of Python 3.13, the GIL can be disabled using the :option:`--disable-gil` + build configuration. After building Python with this option, code must be + run with :option:`-X gil 0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` + environment variable. This feature enables improved performance for + multi-threaded applications and makes it easier to use multi-core CPUs + efficiently. For more details, see :pep:`703`. + + hash-based pyc + A bytecode cache file that uses the hash rather than the last-modified + time of the corresponding source file to determine its validity. See + :ref:`pyc-invalidation`. + + hashable + An object is *hashable* if it has a hash value which never changes during + its lifetime (it needs a :meth:`~object.__hash__` method), and can be + compared to other objects (it needs an :meth:`~object.__eq__` method). + Hashable objects which + compare equal must have the same hash value. + + Hashability makes an object usable as a dictionary key and a set member, + because these data structures use the hash value internally. + + Most of Python's immutable built-in objects are hashable; mutable + containers (such as lists or dictionaries) are not; immutable + containers (such as tuples and frozensets) are only hashable if + their elements are hashable. Objects which are + instances of user-defined classes are hashable by default. They all + compare unequal (except with themselves), and their hash value is derived + from their :func:`id`. + + IDLE + An Integrated Development and Learning Environment for Python. + :ref:`idle` is a basic editor and interpreter environment + which ships with the standard distribution of Python. + + immortal + *Immortal objects* are a CPython implementation detail introduced + in :pep:`683`. + + If an object is immortal, its :term:`reference count` is never modified, + and therefore it is never deallocated while the interpreter is running. + For example, :const:`True` and :const:`None` are immortal in CPython. + + immutable + An object with a fixed value. Immutable objects include numbers, strings and + tuples. Such an object cannot be altered. A new object has to + be created if a different value has to be stored. They play an important + role in places where a constant hash value is needed, for example as a key + in a dictionary. + + import path + A list of locations (or :term:`path entries `) that are + searched by the :term:`path based finder` for modules to import. During + import, this list of locations usually comes from :data:`sys.path`, but + for subpackages it may also come from the parent package's ``__path__`` + attribute. + + importing + The process by which Python code in one module is made available to + Python code in another module. + + importer + An object that both finds and loads a module; both a + :term:`finder` and :term:`loader` object. + + interactive + Python has an interactive interpreter which means you can enter + statements and expressions at the interpreter prompt, immediately + execute them and see their results. Just launch ``python`` with no + arguments (possibly by selecting it from your computer's main + menu). It is a very powerful way to test out new ideas or inspect + modules and packages (remember ``help(x)``). For more on interactive + mode, see :ref:`tut-interac`. + + interpreted + Python is an interpreted language, as opposed to a compiled one, + though the distinction can be blurry because of the presence of the + bytecode compiler. This means that source files can be run directly + without explicitly creating an executable which is then run. + Interpreted languages typically have a shorter development/debug cycle + than compiled ones, though their programs generally also run more + slowly. See also :term:`interactive`. + + interpreter shutdown + When asked to shut down, the Python interpreter enters a special phase + where it gradually releases all allocated resources, such as modules + and various critical internal structures. It also makes several calls + to the :term:`garbage collector `. This can trigger + the execution of code in user-defined destructors or weakref callbacks. + Code executed during the shutdown phase can encounter various + exceptions as the resources it relies on may not function anymore + (common examples are library modules or the warnings machinery). + + The main reason for interpreter shutdown is that the ``__main__`` module + or the script being run has finished executing. + + iterable + An object capable of returning its members one at a time. Examples of + iterables include all sequence types (such as :class:`list`, :class:`str`, + and :class:`tuple`) and some non-sequence types like :class:`dict`, + :term:`file objects `, and objects of any classes you define + with an :meth:`~iterator.__iter__` method or with a + :meth:`~object.__getitem__` method + that implements :term:`sequence` semantics. + + Iterables can be + used in a :keyword:`for` loop and in many other places where a sequence is + needed (:func:`zip`, :func:`map`, ...). When an iterable object is passed + as an argument to the built-in function :func:`iter`, it returns an + iterator for the object. This iterator is good for one pass over the set + of values. When using iterables, it is usually not necessary to call + :func:`iter` or deal with iterator objects yourself. The :keyword:`for` + statement does that automatically for you, creating a temporary unnamed + variable to hold the iterator for the duration of the loop. See also + :term:`iterator`, :term:`sequence`, and :term:`generator`. + + iterator + An object representing a stream of data. Repeated calls to the iterator's + :meth:`~iterator.__next__` method (or passing it to the built-in function + :func:`next`) return successive items in the stream. When no more data + are available a :exc:`StopIteration` exception is raised instead. At this + point, the iterator object is exhausted and any further calls to its + :meth:`!__next__` method just raise :exc:`StopIteration` again. Iterators + are required to have an :meth:`~iterator.__iter__` method that returns the iterator + object itself so every iterator is also iterable and may be used in most + places where other iterables are accepted. One notable exception is code + which attempts multiple iteration passes. A container object (such as a + :class:`list`) produces a fresh new iterator each time you pass it to the + :func:`iter` function or use it in a :keyword:`for` loop. Attempting this + with an iterator will just return the same exhausted iterator object used + in the previous iteration pass, making it appear like an empty container. + + More information can be found in :ref:`typeiter`. + + .. impl-detail:: + + CPython does not consistently apply the requirement that an iterator + define :meth:`~iterator.__iter__`. + And also please note that the free-threading CPython does not guarantee + the thread-safety of iterator operations. + + + key function + A key function or collation function is a callable that returns a value + used for sorting or ordering. For example, :func:`locale.strxfrm` is + used to produce a sort key that is aware of locale specific sort + conventions. + + A number of tools in Python accept key functions to control how elements + are ordered or grouped. They include :func:`min`, :func:`max`, + :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`, + :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and + :func:`itertools.groupby`. + + There are several ways to create a key function. For example. the + :meth:`str.lower` method can serve as a key function for case insensitive + sorts. Alternatively, a key function can be built from a + :keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also, + :func:`operator.attrgetter`, :func:`operator.itemgetter`, and + :func:`operator.methodcaller` are three key function constructors. See the :ref:`Sorting HOW TO + ` for examples of how to create and use key functions. + + keyword argument + See :term:`argument`. + + lambda + An anonymous inline function consisting of a single :term:`expression` + which is evaluated when the function is called. The syntax to create + a lambda function is ``lambda [parameters]: expression`` + + LBYL + Look before you leap. This coding style explicitly tests for + pre-conditions before making calls or lookups. This style contrasts with + the :term:`EAFP` approach and is characterized by the presence of many + :keyword:`if` statements. + + In a multi-threaded environment, the LBYL approach can risk introducing a + race condition between "the looking" and "the leaping". For example, the + code, ``if key in mapping: return mapping[key]`` can fail if another + thread removes *key* from *mapping* after the test, but before the lookup. + This issue can be solved with locks or by using the EAFP approach. + + list + A built-in Python :term:`sequence`. Despite its name it is more akin + to an array in other languages than to a linked list since access to + elements is *O*\ (1). + + list comprehension + A compact way to process all or part of the elements in a sequence and + return a list with the results. ``result = ['{:#04x}'.format(x) for x in + range(256) if x % 2 == 0]`` generates a list of strings containing + even hex numbers (0x..) in the range from 0 to 255. The :keyword:`if` + clause is optional. If omitted, all elements in ``range(256)`` are + processed. + + loader + An object that loads a module. It must define a method named + :meth:`load_module`. A loader is typically returned by a + :term:`finder`. See :pep:`302` for details and + :class:`importlib.abc.Loader` for an :term:`abstract base class`. + + locale encoding + On Unix, it is the encoding of the LC_CTYPE locale. It can be set with + :func:`locale.setlocale(locale.LC_CTYPE, new_locale) `. + + On Windows, it is the ANSI code page (ex: ``"cp1252"``). + + On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding. + + :func:`locale.getencoding` can be used to get the locale encoding. + + See also the :term:`filesystem encoding and error handler`. + + magic method + .. index:: pair: magic; method + + An informal synonym for :term:`special method`. + + mapping + A container object that supports arbitrary key lookups and implements the + methods specified in the :class:`collections.abc.Mapping` or + :class:`collections.abc.MutableMapping` + :ref:`abstract base classes `. Examples + include :class:`dict`, :class:`collections.defaultdict`, + :class:`collections.OrderedDict` and :class:`collections.Counter`. + + meta path finder + A :term:`finder` returned by a search of :data:`sys.meta_path`. Meta path + finders are related to, but different from :term:`path entry finders + `. + + See :class:`importlib.abc.MetaPathFinder` for the methods that meta path + finders implement. + + metaclass + The class of a class. Class definitions create a class name, a class + dictionary, and a list of base classes. The metaclass is responsible for + taking those three arguments and creating the class. Most object oriented + programming languages provide a default implementation. What makes Python + special is that it is possible to create custom metaclasses. Most users + never need this tool, but when the need arises, metaclasses can provide + powerful, elegant solutions. They have been used for logging attribute + access, adding thread-safety, tracking object creation, implementing + singletons, and many other tasks. + + More information can be found in :ref:`metaclasses`. + + method + A function which is defined inside a class body. If called as an attribute + of an instance of that class, the method will get the instance object as + its first :term:`argument` (which is usually called ``self``). + See :term:`function` and :term:`nested scope`. + + method resolution order + Method Resolution Order is the order in which base classes are searched + for a member during lookup. See :ref:`python_2.3_mro` for details of the + algorithm used by the Python interpreter since the 2.3 release. + + module + An object that serves as an organizational unit of Python code. Modules + have a namespace containing arbitrary Python objects. Modules are loaded + into Python by the process of :term:`importing`. + + See also :term:`package`. + + module spec + A namespace containing the import-related information used to load a + module. An instance of :class:`importlib.machinery.ModuleSpec`. + + MRO + See :term:`method resolution order`. + + mutable + Mutable objects can change their value but keep their :func:`id`. See + also :term:`immutable`. + + named tuple + The term "named tuple" applies to any type or class that inherits from + tuple and whose indexable elements are also accessible using named + attributes. The type or class may have other features as well. + + Several built-in types are named tuples, including the values returned + by :func:`time.localtime` and :func:`os.stat`. Another example is + :data:`sys.float_info`:: + + >>> sys.float_info[1] # indexed access + 1024 + >>> sys.float_info.max_exp # named field access + 1024 + >>> isinstance(sys.float_info, tuple) # kind of tuple + True + + Some named tuples are built-in types (such as the above examples). + Alternatively, a named tuple can be created from a regular class + definition that inherits from :class:`tuple` and that defines named + fields. Such a class can be written by hand, or it can be created by + inheriting :class:`typing.NamedTuple`, or with the factory function + :func:`collections.namedtuple`. The latter techniques also add some + extra methods that may not be found in hand-written or built-in named + tuples. + + namespace + The place where a variable is stored. Namespaces are implemented as + dictionaries. There are the local, global and built-in namespaces as well + as nested namespaces in objects (in methods). Namespaces support + modularity by preventing naming conflicts. For instance, the functions + :func:`builtins.open <.open>` and :func:`os.open` are distinguished by + their namespaces. Namespaces also aid readability and maintainability by + making it clear which module implements a function. For instance, writing + :func:`random.seed` or :func:`itertools.islice` makes it clear that those + functions are implemented by the :mod:`random` and :mod:`itertools` + modules, respectively. + + namespace package + A :pep:`420` :term:`package` which serves only as a container for + subpackages. Namespace packages may have no physical representation, + and specifically are not like a :term:`regular package` because they + have no ``__init__.py`` file. + + See also :term:`module`. + + nested scope + The ability to refer to a variable in an enclosing definition. For + instance, a function defined inside another function can refer to + variables in the outer function. Note that nested scopes by default work + only for reference and not for assignment. Local variables both read and + write in the innermost scope. Likewise, global variables read and write + to the global namespace. The :keyword:`nonlocal` allows writing to outer + scopes. + + new-style class + Old name for the flavor of classes now used for all class objects. In + earlier Python versions, only new-style classes could use Python's newer, + versatile features like :attr:`~object.__slots__`, descriptors, + properties, :meth:`~object.__getattribute__`, class methods, and static + methods. + + object + Any data with state (attributes or value) and defined behavior + (methods). Also the ultimate base class of any :term:`new-style + class`. + + optimized scope + A scope where target local variable names are reliably known to the + compiler when the code is compiled, allowing optimization of read and + write access to these names. The local namespaces for functions, + generators, coroutines, comprehensions, and generator expressions are + optimized in this fashion. Note: most interpreter optimizations are + applied to all scopes, only those relying on a known set of local + and nonlocal variable names are restricted to optimized scopes. + + package + A Python :term:`module` which can contain submodules or recursively, + subpackages. Technically, a package is a Python module with a + ``__path__`` attribute. + + See also :term:`regular package` and :term:`namespace package`. + + parameter + A named entity in a :term:`function` (or method) definition that + specifies an :term:`argument` (or in some cases, arguments) that the + function can accept. There are five kinds of parameter: + + * :dfn:`positional-or-keyword`: specifies an argument that can be passed + either :term:`positionally ` or as a :term:`keyword argument + `. This is the default kind of parameter, for example *foo* + and *bar* in the following:: + + def func(foo, bar=None): ... + + .. _positional-only_parameter: + + * :dfn:`positional-only`: specifies an argument that can be supplied only + by position. Positional-only parameters can be defined by including a + ``/`` character in the parameter list of the function definition after + them, for example *posonly1* and *posonly2* in the following:: + + def func(posonly1, posonly2, /, positional_or_keyword): ... + + .. _keyword-only_parameter: + + * :dfn:`keyword-only`: specifies an argument that can be supplied only + by keyword. Keyword-only parameters can be defined by including a + single var-positional parameter or bare ``*`` in the parameter list + of the function definition before them, for example *kw_only1* and + *kw_only2* in the following:: + + def func(arg, *, kw_only1, kw_only2): ... + + * :dfn:`var-positional`: specifies that an arbitrary sequence of + positional arguments can be provided (in addition to any positional + arguments already accepted by other parameters). Such a parameter can + be defined by prepending the parameter name with ``*``, for example + *args* in the following:: + + def func(*args, **kwargs): ... + + * :dfn:`var-keyword`: specifies that arbitrarily many keyword arguments + can be provided (in addition to any keyword arguments already accepted + by other parameters). Such a parameter can be defined by prepending + the parameter name with ``**``, for example *kwargs* in the example + above. + + Parameters can specify both optional and required arguments, as well as + default values for some optional arguments. + + See also the :term:`argument` glossary entry, the FAQ question on + :ref:`the difference between arguments and parameters + `, the :class:`inspect.Parameter` class, the + :ref:`function` section, and :pep:`362`. + + path entry + A single location on the :term:`import path` which the :term:`path + based finder` consults to find modules for importing. + + path entry finder + A :term:`finder` returned by a callable on :data:`sys.path_hooks` + (i.e. a :term:`path entry hook`) which knows how to locate modules given + a :term:`path entry`. + + See :class:`importlib.abc.PathEntryFinder` for the methods that path entry + finders implement. + + path entry hook + A callable on the :data:`sys.path_hooks` list which returns a :term:`path + entry finder` if it knows how to find modules on a specific :term:`path + entry`. + + path based finder + One of the default :term:`meta path finders ` which + searches an :term:`import path` for modules. + + path-like object + An object representing a file system path. A path-like object is either + a :class:`str` or :class:`bytes` object representing a path, or an object + implementing the :class:`os.PathLike` protocol. An object that supports + the :class:`os.PathLike` protocol can be converted to a :class:`str` or + :class:`bytes` file system path by calling the :func:`os.fspath` function; + :func:`os.fsdecode` and :func:`os.fsencode` can be used to guarantee a + :class:`str` or :class:`bytes` result instead, respectively. Introduced + by :pep:`519`. + + PEP + Python Enhancement Proposal. A PEP is a design document + providing information to the Python community, or describing a new + feature for Python or its processes or environment. PEPs should + provide a concise technical specification and a rationale for proposed + features. + + PEPs are intended to be the primary mechanisms for proposing major new + features, for collecting community input on an issue, and for documenting + the design decisions that have gone into Python. The PEP author is + responsible for building consensus within the community and documenting + dissenting opinions. + + See :pep:`1`. + + portion + A set of files in a single directory (possibly stored in a zip file) + that contribute to a namespace package, as defined in :pep:`420`. + + positional argument + See :term:`argument`. + + provisional API + A provisional API is one which has been deliberately excluded from + the standard library's backwards compatibility guarantees. While major + changes to such interfaces are not expected, as long as they are marked + provisional, backwards incompatible changes (up to and including removal + of the interface) may occur if deemed necessary by core developers. Such + changes will not be made gratuitously -- they will occur only if serious + fundamental flaws are uncovered that were missed prior to the inclusion + of the API. + + Even for provisional APIs, backwards incompatible changes are seen as + a "solution of last resort" - every attempt will still be made to find + a backwards compatible resolution to any identified problems. + + This process allows the standard library to continue to evolve over + time, without locking in problematic design errors for extended periods + of time. See :pep:`411` for more details. + + provisional package + See :term:`provisional API`. + + Python 3000 + Nickname for the Python 3.x release line (coined long ago when the + release of version 3 was something in the distant future.) This is also + abbreviated "Py3k". + + Pythonic + An idea or piece of code which closely follows the most common idioms + of the Python language, rather than implementing code using concepts + common to other languages. For example, a common idiom in Python is + to loop over all elements of an iterable using a :keyword:`for` + statement. Many other languages don't have this type of construct, so + people unfamiliar with Python sometimes use a numerical counter instead:: + + for i in range(len(food)): + print(food[i]) + + As opposed to the cleaner, Pythonic method:: + + for piece in food: + print(piece) + + qualified name + A dotted name showing the "path" from a module's global scope to a + class, function or method defined in that module, as defined in + :pep:`3155`. For top-level functions and classes, the qualified name + is the same as the object's name:: + + >>> class C: + ... class D: + ... def meth(self): + ... pass + ... + >>> C.__qualname__ + 'C' + >>> C.D.__qualname__ + 'C.D' + >>> C.D.meth.__qualname__ + 'C.D.meth' + + When used to refer to modules, the *fully qualified name* means the + entire dotted path to the module, including any parent packages, + e.g. ``email.mime.text``:: + + >>> import email.mime.text + >>> email.mime.text.__name__ + 'email.mime.text' + + reference count + The number of references to an object. When the reference count of an + object drops to zero, it is deallocated. Some objects are + :term:`immortal` and have reference counts that are never modified, and + therefore the objects are never deallocated. Reference counting is + generally not visible to Python code, but it is a key element of the + :term:`CPython` implementation. Programmers can call the + :func:`sys.getrefcount` function to return the + reference count for a particular object. + + regular package + A traditional :term:`package`, such as a directory containing an + ``__init__.py`` file. + + See also :term:`namespace package`. + + REPL + An acronym for the "read–eval–print loop", another name for the + :term:`interactive` interpreter shell. + + __slots__ + A declaration inside a class that saves memory by pre-declaring space for + instance attributes and eliminating instance dictionaries. Though + popular, the technique is somewhat tricky to get right and is best + reserved for rare cases where there are large numbers of instances in a + memory-critical application. + + sequence + An :term:`iterable` which supports efficient element access using integer + indices via the :meth:`~object.__getitem__` special method and defines a + :meth:`~object.__len__` method that returns the length of the sequence. + Some built-in sequence types are :class:`list`, :class:`str`, + :class:`tuple`, and :class:`bytes`. Note that :class:`dict` also + supports :meth:`~object.__getitem__` and :meth:`!__len__`, but is considered a + mapping rather than a sequence because the lookups use arbitrary + :term:`hashable` keys rather than integers. + + The :class:`collections.abc.Sequence` abstract base class + defines a much richer interface that goes beyond just + :meth:`~object.__getitem__` and :meth:`~object.__len__`, adding + :meth:`!count`, :meth:`!index`, :meth:`~object.__contains__`, and + :meth:`~object.__reversed__`. Types that implement this expanded + interface can be registered explicitly using + :func:`~abc.ABCMeta.register`. For more documentation on sequence + methods generally, see + :ref:`Common Sequence Operations `. + + set comprehension + A compact way to process all or part of the elements in an iterable and + return a set with the results. ``results = {c for c in 'abracadabra' if + c not in 'abc'}`` generates the set of strings ``{'r', 'd'}``. See + :ref:`comprehensions`. + + single dispatch + A form of :term:`generic function` dispatch where the implementation is + chosen based on the type of a single argument. + + slice + An object usually containing a portion of a :term:`sequence`. A slice is + created using the subscript notation, ``[]`` with colons between numbers + when several are given, such as in ``variable_name[1:3:5]``. The bracket + (subscript) notation uses :class:`slice` objects internally. + + soft deprecated + A soft deprecation can be used when using an API which should no longer + be used to write new code, but it remains safe to continue using it in + existing code. The API remains documented and tested, but will not be + developed further (no enhancement). + + The main difference between a "soft" and a (regular) "hard" deprecation + is that the soft deprecation does not imply scheduling the removal of the + deprecated API. + + Another difference is that a soft deprecation does not issue a warning. + + See `PEP 387: Soft Deprecation + `_. + + special method + .. index:: pair: special; method + + A method that is called implicitly by Python to execute a certain + operation on a type, such as addition. Such methods have names starting + and ending with double underscores. Special methods are documented in + :ref:`specialnames`. + + statement + A statement is part of a suite (a "block" of code). A statement is either + an :term:`expression` or one of several constructs with a keyword, such + as :keyword:`if`, :keyword:`while` or :keyword:`for`. + + static type checker + An external tool that reads Python code and analyzes it, looking for + issues such as incorrect types. See also :term:`type hints ` + and the :mod:`typing` module. + + strong reference + In Python's C API, a strong reference is a reference to an object + which is owned by the code holding the reference. The strong + reference is taken by calling :c:func:`Py_INCREF` when the + reference is created and released with :c:func:`Py_DECREF` + when the reference is deleted. + + The :c:func:`Py_NewRef` function can be used to create a strong reference + to an object. Usually, the :c:func:`Py_DECREF` function must be called on + the strong reference before exiting the scope of the strong reference, to + avoid leaking one reference. + + See also :term:`borrowed reference`. + + text encoding + A string in Python is a sequence of Unicode code points (in range + ``U+0000``--``U+10FFFF``). To store or transfer a string, it needs to be + serialized as a sequence of bytes. + + Serializing a string into a sequence of bytes is known as "encoding", and + recreating the string from the sequence of bytes is known as "decoding". + + There are a variety of different text serialization + :ref:`codecs `, which are collectively referred to as + "text encodings". + + text file + A :term:`file object` able to read and write :class:`str` objects. + Often, a text file actually accesses a byte-oriented datastream + and handles the :term:`text encoding` automatically. + Examples of text files are files opened in text mode (``'r'`` or ``'w'``), + :data:`sys.stdin`, :data:`sys.stdout`, and instances of + :class:`io.StringIO`. + + See also :term:`binary file` for a file object able to read and write + :term:`bytes-like objects `. + + triple-quoted string + A string which is bound by three instances of either a quotation mark + (") or an apostrophe ('). While they don't provide any functionality + not available with single-quoted strings, they are useful for a number + of reasons. They allow you to include unescaped single and double + quotes within a string and they can span multiple lines without the + use of the continuation character, making them especially useful when + writing docstrings. + + type + The type of a Python object determines what kind of object it is; every + object has a type. An object's type is accessible as its + :attr:`~object.__class__` attribute or can be retrieved with + ``type(obj)``. + + type alias + A synonym for a type, created by assigning the type to an identifier. + + Type aliases are useful for simplifying :term:`type hints `. + For example:: + + def remove_gray_shades( + colors: list[tuple[int, int, int]]) -> list[tuple[int, int, int]]: + pass + + could be made more readable like this:: + + Color = tuple[int, int, int] + + def remove_gray_shades(colors: list[Color]) -> list[Color]: + pass + + See :mod:`typing` and :pep:`484`, which describe this functionality. + + type hint + An :term:`annotation` that specifies the expected type for a variable, a class + attribute, or a function parameter or return value. + + Type hints are optional and are not enforced by Python but + they are useful to :term:`static type checkers `. + They can also aid IDEs with code completion and refactoring. + + Type hints of global variables, class attributes, and functions, + but not local variables, can be accessed using + :func:`typing.get_type_hints`. + + See :mod:`typing` and :pep:`484`, which describe this functionality. + + universal newlines + A manner of interpreting text streams in which all of the following are + recognized as ending a line: the Unix end-of-line convention ``'\n'``, + the Windows convention ``'\r\n'``, and the old Macintosh convention + ``'\r'``. See :pep:`278` and :pep:`3116`, as well as + :func:`bytes.splitlines` for an additional use. + + variable annotation + An :term:`annotation` of a variable or a class attribute. + + When annotating a variable or a class attribute, assignment is optional:: + + class C: + field: 'annotation' + + Variable annotations are usually used for + :term:`type hints `: for example this variable is expected to take + :class:`int` values:: + + count: int = 0 + + Variable annotation syntax is explained in section :ref:`annassign`. + + See :term:`function annotation`, :pep:`484` + and :pep:`526`, which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + virtual environment + A cooperatively isolated runtime environment that allows Python users + and applications to install and upgrade Python distribution packages + without interfering with the behaviour of other Python applications + running on the same system. + + See also :mod:`venv`. + + virtual machine + A computer defined entirely in software. Python's virtual machine + executes the :term:`bytecode` emitted by the bytecode compiler. + + Zen of Python + Listing of Python design principles and philosophies that are helpful in + understanding and using the language. The listing can be found by typing + "``import this``" at the interactive prompt. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst new file mode 100644 index 00000000..174078b8 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst @@ -0,0 +1,233 @@ +.. _annotations-howto: + +************************** +Annotations Best Practices +************************** + +:author: Larry Hastings + +.. topic:: Abstract + + This document is designed to encapsulate the best practices + for working with annotations dicts. If you write Python code + that examines ``__annotations__`` on Python objects, we + encourage you to follow the guidelines described below. + + The document is organized into four sections: + best practices for accessing the annotations of an object + in Python versions 3.10 and newer, + best practices for accessing the annotations of an object + in Python versions 3.9 and older, + other best practices + for ``__annotations__`` that apply to any Python version, + and + quirks of ``__annotations__``. + + Note that this document is specifically about working with + ``__annotations__``, not uses *for* annotations. + If you're looking for information on how to use "type hints" + in your code, please see the :mod:`typing` module. + + +Accessing The Annotations Dict Of An Object In Python 3.10 And Newer +==================================================================== + +Python 3.10 adds a new function to the standard library: +:func:`inspect.get_annotations`. In Python versions 3.10 +and newer, calling this function is the best practice for +accessing the annotations dict of any object that supports +annotations. This function can also "un-stringize" +stringized annotations for you. + +If for some reason :func:`inspect.get_annotations` isn't +viable for your use case, you may access the +``__annotations__`` data member manually. Best practice +for this changed in Python 3.10 as well: as of Python 3.10, +``o.__annotations__`` is guaranteed to *always* work +on Python functions, classes, and modules. If you're +certain the object you're examining is one of these three +*specific* objects, you may simply use ``o.__annotations__`` +to get at the object's annotations dict. + +However, other types of callables--for example, +callables created by :func:`functools.partial`--may +not have an ``__annotations__`` attribute defined. When +accessing the ``__annotations__`` of a possibly unknown +object, best practice in Python versions 3.10 and +newer is to call :func:`getattr` with three arguments, +for example ``getattr(o, '__annotations__', None)``. + +Before Python 3.10, accessing ``__annotations__`` on a class that +defines no annotations but that has a parent class with +annotations would return the parent's ``__annotations__``. +In Python 3.10 and newer, the child class's annotations +will be an empty dict instead. + + +Accessing The Annotations Dict Of An Object In Python 3.9 And Older +=================================================================== + +In Python 3.9 and older, accessing the annotations dict +of an object is much more complicated than in newer versions. +The problem is a design flaw in these older versions of Python, +specifically to do with class annotations. + +Best practice for accessing the annotations dict of other +objects--functions, other callables, and modules--is the same +as best practice for 3.10, assuming you aren't calling +:func:`inspect.get_annotations`: you should use three-argument +:func:`getattr` to access the object's ``__annotations__`` +attribute. + +Unfortunately, this isn't best practice for classes. The problem +is that, since ``__annotations__`` is optional on classes, and +because classes can inherit attributes from their base classes, +accessing the ``__annotations__`` attribute of a class may +inadvertently return the annotations dict of a *base class.* +As an example:: + + class Base: + a: int = 3 + b: str = 'abc' + + class Derived(Base): + pass + + print(Derived.__annotations__) + +This will print the annotations dict from ``Base``, not +``Derived``. + +Your code will have to have a separate code path if the object +you're examining is a class (``isinstance(o, type)``). +In that case, best practice relies on an implementation detail +of Python 3.9 and before: if a class has annotations defined, +they are stored in the class's :attr:`~type.__dict__` dictionary. Since +the class may or may not have annotations defined, best practice +is to call the :meth:`~dict.get` method on the class dict. + +To put it all together, here is some sample code that safely +accesses the ``__annotations__`` attribute on an arbitrary +object in Python 3.9 and before:: + + if isinstance(o, type): + ann = o.__dict__.get('__annotations__', None) + else: + ann = getattr(o, '__annotations__', None) + +After running this code, ``ann`` should be either a +dictionary or ``None``. You're encouraged to double-check +the type of ``ann`` using :func:`isinstance` before further +examination. + +Note that some exotic or malformed type objects may not have +a :attr:`~type.__dict__` attribute, so for extra safety you may also wish +to use :func:`getattr` to access :attr:`!__dict__`. + + +Manually Un-Stringizing Stringized Annotations +============================================== + +In situations where some annotations may be "stringized", +and you wish to evaluate those strings to produce the +Python values they represent, it really is best to +call :func:`inspect.get_annotations` to do this work +for you. + +If you're using Python 3.9 or older, or if for some reason +you can't use :func:`inspect.get_annotations`, you'll need +to duplicate its logic. You're encouraged to examine the +implementation of :func:`inspect.get_annotations` in the +current Python version and follow a similar approach. + +In a nutshell, if you wish to evaluate a stringized annotation +on an arbitrary object ``o``: + +* If ``o`` is a module, use ``o.__dict__`` as the + ``globals`` when calling :func:`eval`. +* If ``o`` is a class, use ``sys.modules[o.__module__].__dict__`` + as the ``globals``, and ``dict(vars(o))`` as the ``locals``, + when calling :func:`eval`. +* If ``o`` is a wrapped callable using :func:`functools.update_wrapper`, + :func:`functools.wraps`, or :func:`functools.partial`, iteratively + unwrap it by accessing either ``o.__wrapped__`` or ``o.func`` as + appropriate, until you have found the root unwrapped function. +* If ``o`` is a callable (but not a class), use + :attr:`o.__globals__ ` as the globals when calling + :func:`eval`. + +However, not all string values used as annotations can +be successfully turned into Python values by :func:`eval`. +String values could theoretically contain any valid string, +and in practice there are valid use cases for type hints that +require annotating with string values that specifically +*can't* be evaluated. For example: + +* :pep:`604` union types using ``|``, before support for this + was added to Python 3.10. +* Definitions that aren't needed at runtime, only imported + when :const:`typing.TYPE_CHECKING` is true. + +If :func:`eval` attempts to evaluate such values, it will +fail and raise an exception. So, when designing a library +API that works with annotations, it's recommended to only +attempt to evaluate string values when explicitly requested +to by the caller. + + +Best Practices For ``__annotations__`` In Any Python Version +============================================================ + +* You should avoid assigning to the ``__annotations__`` member + of objects directly. Let Python manage setting ``__annotations__``. + +* If you do assign directly to the ``__annotations__`` member + of an object, you should always set it to a ``dict`` object. + +* If you directly access the ``__annotations__`` member + of an object, you should ensure that it's a + dictionary before attempting to examine its contents. + +* You should avoid modifying ``__annotations__`` dicts. + +* You should avoid deleting the ``__annotations__`` attribute + of an object. + + +``__annotations__`` Quirks +========================== + +In all versions of Python 3, function +objects lazy-create an annotations dict if no annotations +are defined on that object. You can delete the ``__annotations__`` +attribute using ``del fn.__annotations__``, but if you then +access ``fn.__annotations__`` the object will create a new empty dict +that it will store and return as its annotations. Deleting the +annotations on a function before it has lazily created its annotations +dict will throw an ``AttributeError``; using ``del fn.__annotations__`` +twice in a row is guaranteed to always throw an ``AttributeError``. + +Everything in the above paragraph also applies to class and module +objects in Python 3.10 and newer. + +In all versions of Python 3, you can set ``__annotations__`` +on a function object to ``None``. However, subsequently +accessing the annotations on that object using ``fn.__annotations__`` +will lazy-create an empty dictionary as per the first paragraph of +this section. This is *not* true of modules and classes, in any Python +version; those objects permit setting ``__annotations__`` to any +Python value, and will retain whatever value is set. + +If Python stringizes your annotations for you +(using ``from __future__ import annotations``), and you +specify a string as an annotation, the string will +itself be quoted. In effect the annotation is quoted +*twice.* For example:: + + from __future__ import annotations + def foo(a: "str"): pass + + print(foo.__annotations__) + +This prints ``{'a': "'str'"}``. This shouldn't really be considered +a "quirk"; it's mentioned here simply because it might be surprising. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst new file mode 100644 index 00000000..30d9ac70 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst @@ -0,0 +1,850 @@ +.. _argparse-tutorial: + +***************** +Argparse Tutorial +***************** + +:author: Tshepang Mbambo + +.. currentmodule:: argparse + +This tutorial is intended to be a gentle introduction to :mod:`argparse`, the +recommended command-line parsing module in the Python standard library. + +.. note:: + + There are two other modules that fulfill the same task, namely + :mod:`getopt` (an equivalent for ``getopt()`` from the C + language) and the deprecated :mod:`optparse`. + Note also that :mod:`argparse` is based on :mod:`optparse`, + and therefore very similar in terms of usage. + + +Concepts +======== + +Let's show the sort of functionality that we are going to explore in this +introductory tutorial by making use of the :command:`ls` command: + +.. code-block:: shell-session + + $ ls + cpython devguide prog.py pypy rm-unused-function.patch + $ ls pypy + ctypes_configure demo dotviewer include lib_pypy lib-python ... + $ ls -l + total 20 + drwxr-xr-x 19 wena wena 4096 Feb 18 18:51 cpython + drwxr-xr-x 4 wena wena 4096 Feb 8 12:04 devguide + -rwxr-xr-x 1 wena wena 535 Feb 19 00:05 prog.py + drwxr-xr-x 14 wena wena 4096 Feb 7 00:59 pypy + -rw-r--r-- 1 wena wena 741 Feb 18 01:01 rm-unused-function.patch + $ ls --help + Usage: ls [OPTION]... [FILE]... + List information about the FILEs (the current directory by default). + Sort entries alphabetically if none of -cftuvSUX nor --sort is specified. + ... + +A few concepts we can learn from the four commands: + +* The :command:`ls` command is useful when run without any options at all. It defaults + to displaying the contents of the current directory. + +* If we want beyond what it provides by default, we tell it a bit more. In + this case, we want it to display a different directory, ``pypy``. + What we did is specify what is known as a positional argument. It's named so + because the program should know what to do with the value, solely based on + where it appears on the command line. This concept is more relevant + to a command like :command:`cp`, whose most basic usage is ``cp SRC DEST``. + The first position is *what you want copied,* and the second + position is *where you want it copied to*. + +* Now, say we want to change behaviour of the program. In our example, + we display more info for each file instead of just showing the file names. + The ``-l`` in that case is known as an optional argument. + +* That's a snippet of the help text. It's very useful in that you can + come across a program you have never used before, and can figure out + how it works simply by reading its help text. + + +The basics +========== + +Let us start with a very simple example which does (almost) nothing:: + + import argparse + parser = argparse.ArgumentParser() + parser.parse_args() + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py + $ python prog.py --help + usage: prog.py [-h] + + options: + -h, --help show this help message and exit + $ python prog.py --verbose + usage: prog.py [-h] + prog.py: error: unrecognized arguments: --verbose + $ python prog.py foo + usage: prog.py [-h] + prog.py: error: unrecognized arguments: foo + +Here is what is happening: + +* Running the script without any options results in nothing displayed to + stdout. Not so useful. + +* The second one starts to display the usefulness of the :mod:`argparse` + module. We have done almost nothing, but already we get a nice help message. + +* The ``--help`` option, which can also be shortened to ``-h``, is the only + option we get for free (i.e. no need to specify it). Specifying anything + else results in an error. But even then, we do get a useful usage message, + also for free. + + +Introducing Positional arguments +================================ + +An example:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("echo") + args = parser.parse_args() + print(args.echo) + +And running the code: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] echo + prog.py: error: the following arguments are required: echo + $ python prog.py --help + usage: prog.py [-h] echo + + positional arguments: + echo + + options: + -h, --help show this help message and exit + $ python prog.py foo + foo + +Here is what's happening: + +* We've added the :meth:`~ArgumentParser.add_argument` method, which is what we use to specify + which command-line options the program is willing to accept. In this case, + I've named it ``echo`` so that it's in line with its function. + +* Calling our program now requires us to specify an option. + +* The :meth:`~ArgumentParser.parse_args` method actually returns some data from the + options specified, in this case, ``echo``. + +* The variable is some form of 'magic' that :mod:`argparse` performs for free + (i.e. no need to specify which variable that value is stored in). + You will also notice that its name matches the string argument given + to the method, ``echo``. + +Note however that, although the help display looks nice and all, it currently +is not as helpful as it can be. For example we see that we got ``echo`` as a +positional argument, but we don't know what it does, other than by guessing or +by reading the source code. So, let's make it a bit more useful:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("echo", help="echo the string you use here") + args = parser.parse_args() + print(args.echo) + +And we get: + +.. code-block:: shell-session + + $ python prog.py -h + usage: prog.py [-h] echo + + positional arguments: + echo echo the string you use here + + options: + -h, --help show this help message and exit + +Now, how about doing something even more useful:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", help="display a square of a given number") + args = parser.parse_args() + print(args.square**2) + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py 4 + Traceback (most recent call last): + File "prog.py", line 5, in + print(args.square**2) + TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int' + +That didn't go so well. That's because :mod:`argparse` treats the options we +give it as strings, unless we tell it otherwise. So, let's tell +:mod:`argparse` to treat that input as an integer:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", help="display a square of a given number", + type=int) + args = parser.parse_args() + print(args.square**2) + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py four + usage: prog.py [-h] square + prog.py: error: argument square: invalid int value: 'four' + +That went well. The program now even helpfully quits on bad illegal input +before proceeding. + + +Introducing Optional arguments +============================== + +So far we have been playing with positional arguments. Let us +have a look on how to add optional ones:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--verbosity", help="increase output verbosity") + args = parser.parse_args() + if args.verbosity: + print("verbosity turned on") + +And the output: + +.. code-block:: shell-session + + $ python prog.py --verbosity 1 + verbosity turned on + $ python prog.py + $ python prog.py --help + usage: prog.py [-h] [--verbosity VERBOSITY] + + options: + -h, --help show this help message and exit + --verbosity VERBOSITY + increase output verbosity + $ python prog.py --verbosity + usage: prog.py [-h] [--verbosity VERBOSITY] + prog.py: error: argument --verbosity: expected one argument + +Here is what is happening: + +* The program is written so as to display something when ``--verbosity`` is + specified and display nothing when not. + +* To show that the option is actually optional, there is no error when running + the program without it. Note that by default, if an optional argument isn't + used, the relevant variable, in this case ``args.verbosity``, is + given ``None`` as a value, which is the reason it fails the truth + test of the :keyword:`if` statement. + +* The help message is a bit different. + +* When using the ``--verbosity`` option, one must also specify some value, + any value. + +The above example accepts arbitrary integer values for ``--verbosity``, but for +our simple program, only two values are actually useful, ``True`` or ``False``. +Let's modify the code accordingly:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", help="increase output verbosity", + action="store_true") + args = parser.parse_args() + if args.verbose: + print("verbosity turned on") + +And the output: + +.. code-block:: shell-session + + $ python prog.py --verbose + verbosity turned on + $ python prog.py --verbose 1 + usage: prog.py [-h] [--verbose] + prog.py: error: unrecognized arguments: 1 + $ python prog.py --help + usage: prog.py [-h] [--verbose] + + options: + -h, --help show this help message and exit + --verbose increase output verbosity + +Here is what is happening: + +* The option is now more of a flag than something that requires a value. + We even changed the name of the option to match that idea. + Note that we now specify a new keyword, ``action``, and give it the value + ``"store_true"``. This means that, if the option is specified, + assign the value ``True`` to ``args.verbose``. + Not specifying it implies ``False``. + +* It complains when you specify a value, in true spirit of what flags + actually are. + +* Notice the different help text. + + +Short options +------------- + +If you are familiar with command line usage, +you will notice that I haven't yet touched on the topic of short +versions of the options. It's quite simple:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") + args = parser.parse_args() + if args.verbose: + print("verbosity turned on") + +And here goes: + +.. code-block:: shell-session + + $ python prog.py -v + verbosity turned on + $ python prog.py --help + usage: prog.py [-h] [-v] + + options: + -h, --help show this help message and exit + -v, --verbose increase output verbosity + +Note that the new ability is also reflected in the help text. + + +Combining Positional and Optional arguments +=========================================== + +Our program keeps growing in complexity:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbose", action="store_true", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbose: + print(f"the square of {args.square} equals {answer}") + else: + print(answer) + +And now the output: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] [-v] square + prog.py: error: the following arguments are required: square + $ python prog.py 4 + 16 + $ python prog.py 4 --verbose + the square of 4 equals 16 + $ python prog.py --verbose 4 + the square of 4 equals 16 + +* We've brought back a positional argument, hence the complaint. + +* Note that the order does not matter. + +How about we give this program of ours back the ability to have +multiple verbosity values, and actually get to use them:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", type=int, + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And the output: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py 4 -v + usage: prog.py [-h] [-v VERBOSITY] square + prog.py: error: argument -v/--verbosity: expected one argument + $ python prog.py 4 -v 1 + 4^2 == 16 + $ python prog.py 4 -v 2 + the square of 4 equals 16 + $ python prog.py 4 -v 3 + 16 + +These all look good except the last one, which exposes a bug in our program. +Let's fix it by restricting the values the ``--verbosity`` option can accept:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", type=int, choices=[0, 1, 2], + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And the output: + +.. code-block:: shell-session + + $ python prog.py 4 -v 3 + usage: prog.py [-h] [-v {0,1,2}] square + prog.py: error: argument -v/--verbosity: invalid choice: 3 (choose from 0, 1, 2) + $ python prog.py 4 -h + usage: prog.py [-h] [-v {0,1,2}] square + + positional arguments: + square display a square of a given number + + options: + -h, --help show this help message and exit + -v, --verbosity {0,1,2} + increase output verbosity + +Note that the change also reflects both in the error message as well as the +help string. + +Now, let's use a different approach of playing with verbosity, which is pretty +common. It also matches the way the CPython executable handles its own +verbosity argument (check the output of ``python --help``):: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display the square of a given number") + parser.add_argument("-v", "--verbosity", action="count", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +We have introduced another action, "count", +to count the number of occurrences of specific options. + + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py 4 -v + 4^2 == 16 + $ python prog.py 4 -vv + the square of 4 equals 16 + $ python prog.py 4 --verbosity --verbosity + the square of 4 equals 16 + $ python prog.py 4 -v 1 + usage: prog.py [-h] [-v] square + prog.py: error: unrecognized arguments: 1 + $ python prog.py 4 -h + usage: prog.py [-h] [-v] square + + positional arguments: + square display a square of a given number + + options: + -h, --help show this help message and exit + -v, --verbosity increase output verbosity + $ python prog.py 4 -vvv + 16 + +* Yes, it's now more of a flag (similar to ``action="store_true"``) in the + previous version of our script. That should explain the complaint. + +* It also behaves similar to "store_true" action. + +* Now here's a demonstration of what the "count" action gives. You've probably + seen this sort of usage before. + +* And if you don't specify the ``-v`` flag, that flag is considered to have + ``None`` value. + +* As should be expected, specifying the long form of the flag, we should get + the same output. + +* Sadly, our help output isn't very informative on the new ability our script + has acquired, but that can always be fixed by improving the documentation for + our script (e.g. via the ``help`` keyword argument). + +* That last output exposes a bug in our program. + + +Let's fix:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", action="count", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + + # bugfix: replace == with >= + if args.verbosity >= 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And this is what it gives: + +.. code-block:: shell-session + + $ python prog.py 4 -vvv + the square of 4 equals 16 + $ python prog.py 4 -vvvv + the square of 4 equals 16 + $ python prog.py 4 + Traceback (most recent call last): + File "prog.py", line 11, in + if args.verbosity >= 2: + TypeError: '>=' not supported between instances of 'NoneType' and 'int' + + +* First output went well, and fixes the bug we had before. + That is, we want any value >= 2 to be as verbose as possible. + +* Third output not so good. + +Let's fix that bug:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", action="count", default=0, + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity >= 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +We've just introduced yet another keyword, ``default``. +We've set it to ``0`` in order to make it comparable to the other int values. +Remember that by default, +if an optional argument isn't specified, +it gets the ``None`` value, and that cannot be compared to an int value +(hence the :exc:`TypeError` exception). + +And: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + +You can go quite far just with what we've learned so far, +and we have only scratched the surface. +The :mod:`argparse` module is very powerful, +and we'll explore a bit more of it before we end this tutorial. + + +Getting a little more advanced +============================== + +What if we wanted to expand our tiny program to perform other powers, +not just squares:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + parser.add_argument("-v", "--verbosity", action="count", default=0) + args = parser.parse_args() + answer = args.x**args.y + if args.verbosity >= 2: + print(f"{args.x} to the power {args.y} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.x}^{args.y} == {answer}") + else: + print(answer) + +Output: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] [-v] x y + prog.py: error: the following arguments are required: x, y + $ python prog.py -h + usage: prog.py [-h] [-v] x y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbosity + $ python prog.py 4 2 -v + 4^2 == 16 + + +Notice that so far we've been using verbosity level to *change* the text +that gets displayed. The following example instead uses verbosity level +to display *more* text instead:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + parser.add_argument("-v", "--verbosity", action="count", default=0) + args = parser.parse_args() + answer = args.x**args.y + if args.verbosity >= 2: + print(f"Running '{__file__}'") + if args.verbosity >= 1: + print(f"{args.x}^{args.y} == ", end="") + print(answer) + +Output: + +.. code-block:: shell-session + + $ python prog.py 4 2 + 16 + $ python prog.py 4 2 -v + 4^2 == 16 + $ python prog.py 4 2 -vv + Running 'prog.py' + 4^2 == 16 + + +.. _specifying-ambiguous-arguments: + +Specifying ambiguous arguments +------------------------------ + +When there is ambiguity in deciding whether an argument is positional or for an +argument, ``--`` can be used to tell :meth:`~ArgumentParser.parse_args` that +everything after that is a positional argument:: + + >>> parser = argparse.ArgumentParser(prog='PROG') + >>> parser.add_argument('-n', nargs='+') + >>> parser.add_argument('args', nargs='*') + + >>> # ambiguous, so parse_args assumes it's an option + >>> parser.parse_args(['-f']) + usage: PROG [-h] [-n N [N ...]] [args ...] + PROG: error: unrecognized arguments: -f + + >>> parser.parse_args(['--', '-f']) + Namespace(args=['-f'], n=None) + + >>> # ambiguous, so the -n option greedily accepts arguments + >>> parser.parse_args(['-n', '1', '2', '3']) + Namespace(args=[], n=['1', '2', '3']) + + >>> parser.parse_args(['-n', '1', '--', '2', '3']) + Namespace(args=['2', '3'], n=['1']) + + +Conflicting options +------------------- + +So far, we have been working with two methods of an +:class:`argparse.ArgumentParser` instance. Let's introduce a third one, +:meth:`~ArgumentParser.add_mutually_exclusive_group`. It allows for us to specify options that +conflict with each other. Let's also change the rest of the program so that +the new functionality makes more sense: +we'll introduce the ``--quiet`` option, +which will be the opposite of the ``--verbose`` one:: + + import argparse + + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group() + group.add_argument("-v", "--verbose", action="store_true") + group.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + args = parser.parse_args() + answer = args.x**args.y + + if args.quiet: + print(answer) + elif args.verbose: + print(f"{args.x} to the power {args.y} equals {answer}") + else: + print(f"{args.x}^{args.y} == {answer}") + +Our program is now simpler, and we've lost some functionality for the sake of +demonstration. Anyways, here's the output: + +.. code-block:: shell-session + + $ python prog.py 4 2 + 4^2 == 16 + $ python prog.py 4 2 -q + 16 + $ python prog.py 4 2 -v + 4 to the power 2 equals 16 + $ python prog.py 4 2 -vq + usage: prog.py [-h] [-v | -q] x y + prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose + $ python prog.py 4 2 -v --quiet + usage: prog.py [-h] [-v | -q] x y + prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose + +That should be easy to follow. I've added that last output so you can see the +sort of flexibility you get, i.e. mixing long form options with short form +ones. + +Before we conclude, you probably want to tell your users the main purpose of +your program, just in case they don't know:: + + import argparse + + parser = argparse.ArgumentParser(description="calculate X to the power of Y") + group = parser.add_mutually_exclusive_group() + group.add_argument("-v", "--verbose", action="store_true") + group.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + args = parser.parse_args() + answer = args.x**args.y + + if args.quiet: + print(answer) + elif args.verbose: + print(f"{args.x} to the power {args.y} equals {answer}") + else: + print(f"{args.x}^{args.y} == {answer}") + +Note that slight difference in the usage text. Note the ``[-v | -q]``, +which tells us that we can either use ``-v`` or ``-q``, +but not both at the same time: + +.. code-block:: shell-session + + $ python prog.py --help + usage: prog.py [-h] [-v | -q] x y + + calculate X to the power of Y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbose + -q, --quiet + + +How to translate the argparse output +==================================== + +The output of the :mod:`argparse` module such as its help text and error +messages are all made translatable using the :mod:`gettext` module. This +allows applications to easily localize messages produced by +:mod:`argparse`. See also :ref:`i18n-howto`. + +For instance, in this :mod:`argparse` output: + +.. code-block:: shell-session + + $ python prog.py --help + usage: prog.py [-h] [-v | -q] x y + + calculate X to the power of Y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbose + -q, --quiet + +The strings ``usage:``, ``positional arguments:``, ``options:`` and +``show this help message and exit`` are all translatable. + +In order to translate these strings, they must first be extracted +into a ``.po`` file. For example, using `Babel `__, +run this command: + +.. code-block:: shell-session + + $ pybabel extract -o messages.po /usr/lib/python3.12/argparse.py + +This command will extract all translatable strings from the :mod:`argparse` +module and output them into a file named ``messages.po``. This command assumes +that your Python installation is in ``/usr/lib``. + +You can find out the location of the :mod:`argparse` module on your system +using this script:: + + import argparse + print(argparse.__file__) + +Once the messages in the ``.po`` file are translated and the translations are +installed using :mod:`gettext`, :mod:`argparse` will be able to display the +translated messages. + +To translate your own strings in the :mod:`argparse` output, use :mod:`gettext`. + +Conclusion +========== + +The :mod:`argparse` module offers a lot more than shown here. +Its docs are quite detailed and thorough, and full of examples. +Having gone through this tutorial, you should easily digest them +without feeling overwhelmed. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst new file mode 100644 index 00000000..06097724 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst @@ -0,0 +1,14 @@ +:orphan: + +.. This page is retained solely for existing links to /howto/clinic.html. + Direct readers to the devguide. + +********************** +Argument Clinic How-To +********************** + + +.. note:: + + The Argument Clinic How-TO has been moved to the `Python Developer's Guide + `__. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst new file mode 100644 index 00000000..7773620b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst @@ -0,0 +1,26 @@ +.. highlight:: c + +.. _cporting-howto: + +************************************* +Porting Extension Modules to Python 3 +************************************* + +We recommend the following resources for porting extension modules to Python 3: + +* The `Migrating C extensions`_ chapter from + *Supporting Python 3: An in-depth guide*, a book on moving from Python 2 + to Python 3 in general, guides the reader through porting an extension + module. +* The `Porting guide`_ from the *py3c* project provides opinionated + suggestions with supporting code. +* The `Cython`_ and `CFFI`_ libraries offer abstractions over + Python's C API. + Extensions generally need to be re-written to use one of them, + but the library then handles differences between various Python + versions and implementations. + +.. _Migrating C extensions: http://python3porting.com/cextensions.html +.. _Porting guide: https://py3c.readthedocs.io/en/latest/guide.html +.. _Cython: https://cython.org/ +.. _CFFI: https://cffi.readthedocs.io/en/latest/ diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst new file mode 100644 index 00000000..f9ad81e3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst @@ -0,0 +1,547 @@ +.. _curses-howto: + +********************************** + Curses Programming with Python +********************************** + +.. currentmodule:: curses + +:Author: A.M. Kuchling, Eric S. Raymond +:Release: 2.04 + + +.. topic:: Abstract + + This document describes how to use the :mod:`curses` extension + module to control text-mode displays. + + +What is curses? +=============== + +The curses library supplies a terminal-independent screen-painting and +keyboard-handling facility for text-based terminals; such terminals +include VT100s, the Linux console, and the simulated terminal provided +by various programs. Display terminals support various control codes +to perform common operations such as moving the cursor, scrolling the +screen, and erasing areas. Different terminals use widely differing +codes, and often have their own minor quirks. + +In a world of graphical displays, one might ask "why bother"? It's +true that character-cell display terminals are an obsolete technology, +but there are niches in which being able to do fancy things with them +are still valuable. One niche is on small-footprint or embedded +Unixes that don't run an X server. Another is tools such as OS +installers and kernel configurators that may have to run before any +graphical support is available. + +The curses library provides fairly basic functionality, providing the +programmer with an abstraction of a display containing multiple +non-overlapping windows of text. The contents of a window can be +changed in various ways---adding text, erasing it, changing its +appearance---and the curses library will figure out what control codes +need to be sent to the terminal to produce the right output. curses +doesn't provide many user-interface concepts such as buttons, checkboxes, +or dialogs; if you need such features, consider a user interface library such as +:pypi:`Urwid`. + +The curses library was originally written for BSD Unix; the later System V +versions of Unix from AT&T added many enhancements and new functions. BSD curses +is no longer maintained, having been replaced by ncurses, which is an +open-source implementation of the AT&T interface. If you're using an +open-source Unix such as Linux or FreeBSD, your system almost certainly uses +ncurses. Since most current commercial Unix versions are based on System V +code, all the functions described here will probably be available. The older +versions of curses carried by some proprietary Unixes may not support +everything, though. + +The Windows version of Python doesn't include the :mod:`curses` +module. A ported version called :pypi:`UniCurses` is available. + + +The Python curses module +------------------------ + +The Python module is a fairly simple wrapper over the C functions provided by +curses; if you're already familiar with curses programming in C, it's really +easy to transfer that knowledge to Python. The biggest difference is that the +Python interface makes things simpler by merging different C functions such as +:c:func:`!addstr`, :c:func:`!mvaddstr`, and :c:func:`!mvwaddstr` into a single +:meth:`~curses.window.addstr` method. You'll see this covered in more +detail later. + +This HOWTO is an introduction to writing text-mode programs with curses +and Python. It doesn't attempt to be a complete guide to the curses API; for +that, see the Python library guide's section on ncurses, and the C manual pages +for ncurses. It will, however, give you the basic ideas. + + +Starting and ending a curses application +======================================== + +Before doing anything, curses must be initialized. This is done by +calling the :func:`~curses.initscr` function, which will determine the +terminal type, send any required setup codes to the terminal, and +create various internal data structures. If successful, +:func:`!initscr` returns a window object representing the entire +screen; this is usually called ``stdscr`` after the name of the +corresponding C variable. :: + + import curses + stdscr = curses.initscr() + +Usually curses applications turn off automatic echoing of keys to the +screen, in order to be able to read keys and only display them under +certain circumstances. This requires calling the +:func:`~curses.noecho` function. :: + + curses.noecho() + +Applications will also commonly need to react to keys instantly, +without requiring the Enter key to be pressed; this is called cbreak +mode, as opposed to the usual buffered input mode. :: + + curses.cbreak() + +Terminals usually return special keys, such as the cursor keys or navigation +keys such as Page Up and Home, as a multibyte escape sequence. While you could +write your application to expect such sequences and process them accordingly, +curses can do it for you, returning a special value such as +:const:`curses.KEY_LEFT`. To get curses to do the job, you'll have to enable +keypad mode. :: + + stdscr.keypad(True) + +Terminating a curses application is much easier than starting one. You'll need +to call:: + + curses.nocbreak() + stdscr.keypad(False) + curses.echo() + +to reverse the curses-friendly terminal settings. Then call the +:func:`~curses.endwin` function to restore the terminal to its original +operating mode. :: + + curses.endwin() + +A common problem when debugging a curses application is to get your terminal +messed up when the application dies without restoring the terminal to its +previous state. In Python this commonly happens when your code is buggy and +raises an uncaught exception. Keys are no longer echoed to the screen when +you type them, for example, which makes using the shell difficult. + +In Python you can avoid these complications and make debugging much easier by +importing the :func:`curses.wrapper` function and using it like this:: + + from curses import wrapper + + def main(stdscr): + # Clear screen + stdscr.clear() + + # This raises ZeroDivisionError when i == 10. + for i in range(0, 11): + v = i-10 + stdscr.addstr(i, 0, '10 divided by {} is {}'.format(v, 10/v)) + + stdscr.refresh() + stdscr.getkey() + + wrapper(main) + +The :func:`~curses.wrapper` function takes a callable object and does the +initializations described above, also initializing colors if color +support is present. :func:`!wrapper` then runs your provided callable. +Once the callable returns, :func:`!wrapper` will restore the original +state of the terminal. The callable is called inside a +:keyword:`try`...\ :keyword:`except` that catches exceptions, restores +the state of the terminal, and then re-raises the exception. Therefore +your terminal won't be left in a funny state on exception and you'll be +able to read the exception's message and traceback. + + +Windows and Pads +================ + +Windows are the basic abstraction in curses. A window object represents a +rectangular area of the screen, and supports methods to display text, +erase it, allow the user to input strings, and so forth. + +The ``stdscr`` object returned by the :func:`~curses.initscr` function is a +window object that covers the entire screen. Many programs may need +only this single window, but you might wish to divide the screen into +smaller windows, in order to redraw or clear them separately. The +:func:`~curses.newwin` function creates a new window of a given size, +returning the new window object. :: + + begin_x = 20; begin_y = 7 + height = 5; width = 40 + win = curses.newwin(height, width, begin_y, begin_x) + +Note that the coordinate system used in curses is unusual. +Coordinates are always passed in the order *y,x*, and the top-left +corner of a window is coordinate (0,0). This breaks the normal +convention for handling coordinates where the *x* coordinate comes +first. This is an unfortunate difference from most other computer +applications, but it's been part of curses since it was first written, +and it's too late to change things now. + +Your application can determine the size of the screen by using the +:data:`curses.LINES` and :data:`curses.COLS` variables to obtain the *y* and +*x* sizes. Legal coordinates will then extend from ``(0,0)`` to +``(curses.LINES - 1, curses.COLS - 1)``. + +When you call a method to display or erase text, the effect doesn't +immediately show up on the display. Instead you must call the +:meth:`~curses.window.refresh` method of window objects to update the +screen. + +This is because curses was originally written with slow 300-baud +terminal connections in mind; with these terminals, minimizing the +time required to redraw the screen was very important. Instead curses +accumulates changes to the screen and displays them in the most +efficient manner when you call :meth:`!refresh`. For example, if your +program displays some text in a window and then clears the window, +there's no need to send the original text because they're never +visible. + +In practice, explicitly telling curses to redraw a window doesn't +really complicate programming with curses much. Most programs go into a flurry +of activity, and then pause waiting for a keypress or some other action on the +part of the user. All you have to do is to be sure that the screen has been +redrawn before pausing to wait for user input, by first calling +:meth:`!stdscr.refresh` or the :meth:`!refresh` method of some other relevant +window. + +A pad is a special case of a window; it can be larger than the actual display +screen, and only a portion of the pad displayed at a time. Creating a pad +requires the pad's height and width, while refreshing a pad requires giving the +coordinates of the on-screen area where a subsection of the pad will be +displayed. :: + + pad = curses.newpad(100, 100) + # These loops fill the pad with letters; addch() is + # explained in the next section + for y in range(0, 99): + for x in range(0, 99): + pad.addch(y,x, ord('a') + (x*x+y*y) % 26) + + # Displays a section of the pad in the middle of the screen. + # (0,0) : coordinate of upper-left corner of pad area to display. + # (5,5) : coordinate of upper-left corner of window area to be filled + # with pad content. + # (20, 75) : coordinate of lower-right corner of window area to be + # : filled with pad content. + pad.refresh( 0,0, 5,5, 20,75) + +The :meth:`!refresh` call displays a section of the pad in the rectangle +extending from coordinate (5,5) to coordinate (20,75) on the screen; the upper +left corner of the displayed section is coordinate (0,0) on the pad. Beyond +that difference, pads are exactly like ordinary windows and support the same +methods. + +If you have multiple windows and pads on screen there is a more +efficient way to update the screen and prevent annoying screen flicker +as each part of the screen gets updated. :meth:`!refresh` actually +does two things: + +1) Calls the :meth:`~curses.window.noutrefresh` method of each window + to update an underlying data structure representing the desired + state of the screen. +2) Calls the function :func:`~curses.doupdate` function to change the + physical screen to match the desired state recorded in the data structure. + +Instead you can call :meth:`!noutrefresh` on a number of windows to +update the data structure, and then call :func:`!doupdate` to update +the screen. + + +Displaying Text +=============== + +From a C programmer's point of view, curses may sometimes look like a +twisty maze of functions, all subtly different. For example, +:c:func:`!addstr` displays a string at the current cursor location in +the ``stdscr`` window, while :c:func:`!mvaddstr` moves to a given y,x +coordinate first before displaying the string. :c:func:`!waddstr` is just +like :c:func:`!addstr`, but allows specifying a window to use instead of +using ``stdscr`` by default. :c:func:`!mvwaddstr` allows specifying both +a window and a coordinate. + +Fortunately the Python interface hides all these details. ``stdscr`` +is a window object like any other, and methods such as +:meth:`~curses.window.addstr` accept multiple argument forms. Usually there +are four different forms. + ++---------------------------------+-----------------------------------------------+ +| Form | Description | ++=================================+===============================================+ +| *str* or *ch* | Display the string *str* or character *ch* at | +| | the current position | ++---------------------------------+-----------------------------------------------+ +| *str* or *ch*, *attr* | Display the string *str* or character *ch*, | +| | using attribute *attr* at the current | +| | position | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch* | Move to position *y,x* within the window, and | +| | display *str* or *ch* | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch*, *attr* | Move to position *y,x* within the window, and | +| | display *str* or *ch*, using attribute *attr* | ++---------------------------------+-----------------------------------------------+ + +Attributes allow displaying text in highlighted forms such as boldface, +underline, reverse code, or in color. They'll be explained in more detail in +the next subsection. + + +The :meth:`~curses.window.addstr` method takes a Python string or +bytestring as the value to be displayed. The contents of bytestrings +are sent to the terminal as-is. Strings are encoded to bytes using +the value of the window's :attr:`~window.encoding` attribute; this defaults to +the default system encoding as returned by :func:`locale.getencoding`. + +The :meth:`~curses.window.addch` methods take a character, which can be +either a string of length 1, a bytestring of length 1, or an integer. + +Constants are provided for extension characters; these constants are +integers greater than 255. For example, :const:`ACS_PLMINUS` is a +/- +symbol, and :const:`ACS_ULCORNER` is the upper left corner of a box +(handy for drawing borders). You can also use the appropriate Unicode +character. + +Windows remember where the cursor was left after the last operation, so if you +leave out the *y,x* coordinates, the string or character will be displayed +wherever the last operation left off. You can also move the cursor with the +``move(y,x)`` method. Because some terminals always display a flashing cursor, +you may want to ensure that the cursor is positioned in some location where it +won't be distracting; it can be confusing to have the cursor blinking at some +apparently random location. + +If your application doesn't need a blinking cursor at all, you can +call ``curs_set(False)`` to make it invisible. For compatibility +with older curses versions, there's a ``leaveok(bool)`` function +that's a synonym for :func:`~curses.curs_set`. When *bool* is true, the +curses library will attempt to suppress the flashing cursor, and you +won't need to worry about leaving it in odd locations. + + +Attributes and Color +-------------------- + +Characters can be displayed in different ways. Status lines in a text-based +application are commonly shown in reverse video, or a text viewer may need to +highlight certain words. curses supports this by allowing you to specify an +attribute for each cell on the screen. + +An attribute is an integer, each bit representing a different +attribute. You can try to display text with multiple attribute bits +set, but curses doesn't guarantee that all the possible combinations +are available, or that they're all visually distinct. That depends on +the ability of the terminal being used, so it's safest to stick to the +most commonly available attributes, listed here. + ++----------------------+--------------------------------------+ +| Attribute | Description | ++======================+======================================+ +| :const:`A_BLINK` | Blinking text | ++----------------------+--------------------------------------+ +| :const:`A_BOLD` | Extra bright or bold text | ++----------------------+--------------------------------------+ +| :const:`A_DIM` | Half bright text | ++----------------------+--------------------------------------+ +| :const:`A_REVERSE` | Reverse-video text | ++----------------------+--------------------------------------+ +| :const:`A_STANDOUT` | The best highlighting mode available | ++----------------------+--------------------------------------+ +| :const:`A_UNDERLINE` | Underlined text | ++----------------------+--------------------------------------+ + +So, to display a reverse-video status line on the top line of the screen, you +could code:: + + stdscr.addstr(0, 0, "Current mode: Typing mode", + curses.A_REVERSE) + stdscr.refresh() + +The curses library also supports color on those terminals that provide it. The +most common such terminal is probably the Linux console, followed by color +xterms. + +To use color, you must call the :func:`~curses.start_color` function soon +after calling :func:`~curses.initscr`, to initialize the default color set +(the :func:`curses.wrapper` function does this automatically). Once that's +done, the :func:`~curses.has_colors` function returns TRUE if the terminal +in use can +actually display color. (Note: curses uses the American spelling 'color', +instead of the Canadian/British spelling 'colour'. If you're used to the +British spelling, you'll have to resign yourself to misspelling it for the sake +of these functions.) + +The curses library maintains a finite number of color pairs, containing a +foreground (or text) color and a background color. You can get the attribute +value corresponding to a color pair with the :func:`~curses.color_pair` +function; this can be bitwise-OR'ed with other attributes such as +:const:`A_REVERSE`, but again, such combinations are not guaranteed to work +on all terminals. + +An example, which displays a line of text using color pair 1:: + + stdscr.addstr("Pretty text", curses.color_pair(1)) + stdscr.refresh() + +As I said before, a color pair consists of a foreground and background color. +The ``init_pair(n, f, b)`` function changes the definition of color pair *n*, to +foreground color f and background color b. Color pair 0 is hard-wired to white +on black, and cannot be changed. + +Colors are numbered, and :func:`start_color` initializes 8 basic +colors when it activates color mode. They are: 0:black, 1:red, +2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and 7:white. The :mod:`curses` +module defines named constants for each of these colors: +:const:`curses.COLOR_BLACK`, :const:`curses.COLOR_RED`, and so forth. + +Let's put all this together. To change color 1 to red text on a white +background, you would call:: + + curses.init_pair(1, curses.COLOR_RED, curses.COLOR_WHITE) + +When you change a color pair, any text already displayed using that color pair +will change to the new colors. You can also display new text in this color +with:: + + stdscr.addstr(0,0, "RED ALERT!", curses.color_pair(1)) + +Very fancy terminals can change the definitions of the actual colors to a given +RGB value. This lets you change color 1, which is usually red, to purple or +blue or any other color you like. Unfortunately, the Linux console doesn't +support this, so I'm unable to try it out, and can't provide any examples. You +can check if your terminal can do this by calling +:func:`~curses.can_change_color`, which returns ``True`` if the capability is +there. If you're lucky enough to have such a talented terminal, consult your +system's man pages for more information. + + +User Input +========== + +The C curses library offers only very simple input mechanisms. Python's +:mod:`curses` module adds a basic text-input widget. (Other libraries +such as :pypi:`Urwid` have more extensive collections of widgets.) + +There are two methods for getting input from a window: + +* :meth:`~curses.window.getch` refreshes the screen and then waits for + the user to hit a key, displaying the key if :func:`~curses.echo` has been + called earlier. You can optionally specify a coordinate to which + the cursor should be moved before pausing. + +* :meth:`~curses.window.getkey` does the same thing but converts the + integer to a string. Individual characters are returned as + 1-character strings, and special keys such as function keys return + longer strings containing a key name such as ``KEY_UP`` or ``^G``. + +It's possible to not wait for the user using the +:meth:`~curses.window.nodelay` window method. After ``nodelay(True)``, +:meth:`!getch` and :meth:`!getkey` for the window become +non-blocking. To signal that no input is ready, :meth:`!getch` returns +``curses.ERR`` (a value of -1) and :meth:`!getkey` raises an exception. +There's also a :func:`~curses.halfdelay` function, which can be used to (in +effect) set a timer on each :meth:`!getch`; if no input becomes +available within a specified delay (measured in tenths of a second), +curses raises an exception. + +The :meth:`!getch` method returns an integer; if it's between 0 and 255, it +represents the ASCII code of the key pressed. Values greater than 255 are +special keys such as Page Up, Home, or the cursor keys. You can compare the +value returned to constants such as :const:`curses.KEY_PPAGE`, +:const:`curses.KEY_HOME`, or :const:`curses.KEY_LEFT`. The main loop of +your program may look something like this:: + + while True: + c = stdscr.getch() + if c == ord('p'): + PrintDocument() + elif c == ord('q'): + break # Exit the while loop + elif c == curses.KEY_HOME: + x = y = 0 + +The :mod:`curses.ascii` module supplies ASCII class membership functions that +take either integer or 1-character string arguments; these may be useful in +writing more readable tests for such loops. It also supplies +conversion functions that take either integer or 1-character-string arguments +and return the same type. For example, :func:`curses.ascii.ctrl` returns the +control character corresponding to its argument. + +There's also a method to retrieve an entire string, +:meth:`~curses.window.getstr`. It isn't used very often, because its +functionality is quite limited; the only editing keys available are +the backspace key and the Enter key, which terminates the string. It +can optionally be limited to a fixed number of characters. :: + + curses.echo() # Enable echoing of characters + + # Get a 15-character string, with the cursor on the top line + s = stdscr.getstr(0,0, 15) + +The :mod:`curses.textpad` module supplies a text box that supports an +Emacs-like set of keybindings. Various methods of the +:class:`~curses.textpad.Textbox` class support editing with input +validation and gathering the edit results either with or without +trailing spaces. Here's an example:: + + import curses + from curses.textpad import Textbox, rectangle + + def main(stdscr): + stdscr.addstr(0, 0, "Enter IM message: (hit Ctrl-G to send)") + + editwin = curses.newwin(5,30, 2,1) + rectangle(stdscr, 1,0, 1+5+1, 1+30+1) + stdscr.refresh() + + box = Textbox(editwin) + + # Let the user edit until Ctrl-G is struck. + box.edit() + + # Get resulting contents + message = box.gather() + +See the library documentation on :mod:`curses.textpad` for more details. + + +For More Information +==================== + +This HOWTO doesn't cover some advanced topics, such as reading the +contents of the screen or capturing mouse events from an xterm +instance, but the Python library page for the :mod:`curses` module is now +reasonably complete. You should browse it next. + +If you're in doubt about the detailed behavior of the curses +functions, consult the manual pages for your curses implementation, +whether it's ncurses or a proprietary Unix vendor's. The manual pages +will document any quirks, and provide complete lists of all the +functions, attributes, and :ref:`ACS_\* ` characters available to +you. + +Because the curses API is so large, some functions aren't supported in +the Python interface. Often this isn't because they're difficult to +implement, but because no one has needed them yet. Also, Python +doesn't yet support the menu library associated with ncurses. +Patches adding support for these would be welcome; see +`the Python Developer's Guide `_ to +learn more about submitting patches to Python. + +* `Writing Programs with NCURSES `_: + a lengthy tutorial for C programmers. +* `The ncurses man page `_ +* `The ncurses FAQ `_ +* `"Use curses... don't swear" `_: + video of a PyCon 2013 talk on controlling terminals using curses or Urwid. +* `"Console Applications with Urwid" `_: + video of a PyCon CA 2012 talk demonstrating some applications written using + Urwid. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst new file mode 100644 index 00000000..c60cd638 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst @@ -0,0 +1,1794 @@ +.. _descriptorhowto: + +================ +Descriptor Guide +================ + +:Author: Raymond Hettinger +:Contact: + +.. Contents:: + + +:term:`Descriptors ` let objects customize attribute lookup, +storage, and deletion. + +This guide has four major sections: + +1) The "primer" gives a basic overview, moving gently from simple examples, + adding one feature at a time. Start here if you're new to descriptors. + +2) The second section shows a complete, practical descriptor example. If you + already know the basics, start there. + +3) The third section provides a more technical tutorial that goes into the + detailed mechanics of how descriptors work. Most people don't need this + level of detail. + +4) The last section has pure Python equivalents for built-in descriptors that + are written in C. Read this if you're curious about how functions turn + into bound methods or about the implementation of common tools like + :func:`classmethod`, :func:`staticmethod`, :func:`property`, and + :term:`__slots__`. + + +Primer +^^^^^^ + +In this primer, we start with the most basic possible example and then we'll +add new capabilities one by one. + + +Simple example: A descriptor that returns a constant +---------------------------------------------------- + +The :class:`Ten` class is a descriptor whose :meth:`__get__` method always +returns the constant ``10``: + +.. testcode:: + + class Ten: + def __get__(self, obj, objtype=None): + return 10 + +To use the descriptor, it must be stored as a class variable in another class: + +.. testcode:: + + class A: + x = 5 # Regular class attribute + y = Ten() # Descriptor instance + +An interactive session shows the difference between normal attribute lookup +and descriptor lookup: + +.. doctest:: + + >>> a = A() # Make an instance of class A + >>> a.x # Normal attribute lookup + 5 + >>> a.y # Descriptor lookup + 10 + +In the ``a.x`` attribute lookup, the dot operator finds ``'x': 5`` +in the class dictionary. In the ``a.y`` lookup, the dot operator +finds a descriptor instance, recognized by its ``__get__`` method. +Calling that method returns ``10``. + +Note that the value ``10`` is not stored in either the class dictionary or the +instance dictionary. Instead, the value ``10`` is computed on demand. + +This example shows how a simple descriptor works, but it isn't very useful. +For retrieving constants, normal attribute lookup would be better. + +In the next section, we'll create something more useful, a dynamic lookup. + + +Dynamic lookups +--------------- + +Interesting descriptors typically run computations instead of returning +constants: + +.. testcode:: + + import os + + class DirectorySize: + + def __get__(self, obj, objtype=None): + return len(os.listdir(obj.dirname)) + + class Directory: + + size = DirectorySize() # Descriptor instance + + def __init__(self, dirname): + self.dirname = dirname # Regular instance attribute + +An interactive session shows that the lookup is dynamic — it computes +different, updated answers each time:: + + >>> s = Directory('songs') + >>> g = Directory('games') + >>> s.size # The songs directory has twenty files + 20 + >>> g.size # The games directory has three files + 3 + >>> os.remove('games/chess') # Delete a game + >>> g.size # File count is automatically updated + 2 + +Besides showing how descriptors can run computations, this example also +reveals the purpose of the parameters to :meth:`__get__`. The *self* +parameter is *size*, an instance of *DirectorySize*. The *obj* parameter is +either *g* or *s*, an instance of *Directory*. It is the *obj* parameter that +lets the :meth:`__get__` method learn the target directory. The *objtype* +parameter is the class *Directory*. + + +Managed attributes +------------------ + +A popular use for descriptors is managing access to instance data. The +descriptor is assigned to a public attribute in the class dictionary while the +actual data is stored as a private attribute in the instance dictionary. The +descriptor's :meth:`__get__` and :meth:`__set__` methods are triggered when +the public attribute is accessed. + +In the following example, *age* is the public attribute and *_age* is the +private attribute. When the public attribute is accessed, the descriptor logs +the lookup or update: + +.. testcode:: + + import logging + + logging.basicConfig(level=logging.INFO) + + class LoggedAgeAccess: + + def __get__(self, obj, objtype=None): + value = obj._age + logging.info('Accessing %r giving %r', 'age', value) + return value + + def __set__(self, obj, value): + logging.info('Updating %r to %r', 'age', value) + obj._age = value + + class Person: + + age = LoggedAgeAccess() # Descriptor instance + + def __init__(self, name, age): + self.name = name # Regular instance attribute + self.age = age # Calls __set__() + + def birthday(self): + self.age += 1 # Calls both __get__() and __set__() + + +An interactive session shows that all access to the managed attribute *age* is +logged, but that the regular attribute *name* is not logged: + +.. testcode:: + :hide: + + import logging, sys + logging.basicConfig(level=logging.INFO, stream=sys.stdout, force=True) + +.. doctest:: + + >>> mary = Person('Mary M', 30) # The initial age update is logged + INFO:root:Updating 'age' to 30 + >>> dave = Person('David D', 40) + INFO:root:Updating 'age' to 40 + + >>> vars(mary) # The actual data is in a private attribute + {'name': 'Mary M', '_age': 30} + >>> vars(dave) + {'name': 'David D', '_age': 40} + + >>> mary.age # Access the data and log the lookup + INFO:root:Accessing 'age' giving 30 + 30 + >>> mary.birthday() # Updates are logged as well + INFO:root:Accessing 'age' giving 30 + INFO:root:Updating 'age' to 31 + + >>> dave.name # Regular attribute lookup isn't logged + 'David D' + >>> dave.age # Only the managed attribute is logged + INFO:root:Accessing 'age' giving 40 + 40 + +One major issue with this example is that the private name *_age* is hardwired in +the *LoggedAgeAccess* class. That means that each instance can only have one +logged attribute and that its name is unchangeable. In the next example, +we'll fix that problem. + + +Customized names +---------------- + +When a class uses descriptors, it can inform each descriptor about which +variable name was used. + +In this example, the :class:`Person` class has two descriptor instances, +*name* and *age*. When the :class:`Person` class is defined, it makes a +callback to :meth:`__set_name__` in *LoggedAccess* so that the field names can +be recorded, giving each descriptor its own *public_name* and *private_name*: + +.. testcode:: + + import logging + + logging.basicConfig(level=logging.INFO) + + class LoggedAccess: + + def __set_name__(self, owner, name): + self.public_name = name + self.private_name = '_' + name + + def __get__(self, obj, objtype=None): + value = getattr(obj, self.private_name) + logging.info('Accessing %r giving %r', self.public_name, value) + return value + + def __set__(self, obj, value): + logging.info('Updating %r to %r', self.public_name, value) + setattr(obj, self.private_name, value) + + class Person: + + name = LoggedAccess() # First descriptor instance + age = LoggedAccess() # Second descriptor instance + + def __init__(self, name, age): + self.name = name # Calls the first descriptor + self.age = age # Calls the second descriptor + + def birthday(self): + self.age += 1 + +An interactive session shows that the :class:`Person` class has called +:meth:`__set_name__` so that the field names would be recorded. Here +we call :func:`vars` to look up the descriptor without triggering it: + +.. doctest:: + + >>> vars(vars(Person)['name']) + {'public_name': 'name', 'private_name': '_name'} + >>> vars(vars(Person)['age']) + {'public_name': 'age', 'private_name': '_age'} + +The new class now logs access to both *name* and *age*: + +.. testcode:: + :hide: + + import logging, sys + logging.basicConfig(level=logging.INFO, stream=sys.stdout, force=True) + +.. doctest:: + + >>> pete = Person('Peter P', 10) + INFO:root:Updating 'name' to 'Peter P' + INFO:root:Updating 'age' to 10 + >>> kate = Person('Catherine C', 20) + INFO:root:Updating 'name' to 'Catherine C' + INFO:root:Updating 'age' to 20 + +The two *Person* instances contain only the private names: + +.. doctest:: + + >>> vars(pete) + {'_name': 'Peter P', '_age': 10} + >>> vars(kate) + {'_name': 'Catherine C', '_age': 20} + + +Closing thoughts +---------------- + +A :term:`descriptor` is what we call any object that defines :meth:`__get__`, +:meth:`__set__`, or :meth:`__delete__`. + +Optionally, descriptors can have a :meth:`__set_name__` method. This is only +used in cases where a descriptor needs to know either the class where it was +created or the name of class variable it was assigned to. (This method, if +present, is called even if the class is not a descriptor.) + +Descriptors get invoked by the dot operator during attribute lookup. If a +descriptor is accessed indirectly with ``vars(some_class)[descriptor_name]``, +the descriptor instance is returned without invoking it. + +Descriptors only work when used as class variables. When put in instances, +they have no effect. + +The main motivation for descriptors is to provide a hook allowing objects +stored in class variables to control what happens during attribute lookup. + +Traditionally, the calling class controls what happens during lookup. +Descriptors invert that relationship and allow the data being looked-up to +have a say in the matter. + +Descriptors are used throughout the language. It is how functions turn into +bound methods. Common tools like :func:`classmethod`, :func:`staticmethod`, +:func:`property`, and :func:`functools.cached_property` are all implemented as +descriptors. + + +Complete Practical Example +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example, we create a practical and powerful tool for locating +notoriously hard to find data corruption bugs. + + +Validator class +--------------- + +A validator is a descriptor for managed attribute access. Prior to storing +any data, it verifies that the new value meets various type and range +restrictions. If those restrictions aren't met, it raises an exception to +prevent data corruption at its source. + +This :class:`Validator` class is both an :term:`abstract base class` and a +managed attribute descriptor: + +.. testcode:: + + from abc import ABC, abstractmethod + + class Validator(ABC): + + def __set_name__(self, owner, name): + self.private_name = '_' + name + + def __get__(self, obj, objtype=None): + return getattr(obj, self.private_name) + + def __set__(self, obj, value): + self.validate(value) + setattr(obj, self.private_name, value) + + @abstractmethod + def validate(self, value): + pass + +Custom validators need to inherit from :class:`Validator` and must supply a +:meth:`validate` method to test various restrictions as needed. + + +Custom validators +----------------- + +Here are three practical data validation utilities: + +1) :class:`OneOf` verifies that a value is one of a restricted set of options. + +2) :class:`Number` verifies that a value is either an :class:`int` or + :class:`float`. Optionally, it verifies that a value is between a given + minimum or maximum. + +3) :class:`String` verifies that a value is a :class:`str`. Optionally, it + validates a given minimum or maximum length. It can validate a + user-defined `predicate + `_ as well. + +.. testcode:: + + class OneOf(Validator): + + def __init__(self, *options): + self.options = set(options) + + def validate(self, value): + if value not in self.options: + raise ValueError( + f'Expected {value!r} to be one of {self.options!r}' + ) + + class Number(Validator): + + def __init__(self, minvalue=None, maxvalue=None): + self.minvalue = minvalue + self.maxvalue = maxvalue + + def validate(self, value): + if not isinstance(value, (int, float)): + raise TypeError(f'Expected {value!r} to be an int or float') + if self.minvalue is not None and value < self.minvalue: + raise ValueError( + f'Expected {value!r} to be at least {self.minvalue!r}' + ) + if self.maxvalue is not None and value > self.maxvalue: + raise ValueError( + f'Expected {value!r} to be no more than {self.maxvalue!r}' + ) + + class String(Validator): + + def __init__(self, minsize=None, maxsize=None, predicate=None): + self.minsize = minsize + self.maxsize = maxsize + self.predicate = predicate + + def validate(self, value): + if not isinstance(value, str): + raise TypeError(f'Expected {value!r} to be an str') + if self.minsize is not None and len(value) < self.minsize: + raise ValueError( + f'Expected {value!r} to be no smaller than {self.minsize!r}' + ) + if self.maxsize is not None and len(value) > self.maxsize: + raise ValueError( + f'Expected {value!r} to be no bigger than {self.maxsize!r}' + ) + if self.predicate is not None and not self.predicate(value): + raise ValueError( + f'Expected {self.predicate} to be true for {value!r}' + ) + + +Practical application +--------------------- + +Here's how the data validators can be used in a real class: + +.. testcode:: + + class Component: + + name = String(minsize=3, maxsize=10, predicate=str.isupper) + kind = OneOf('wood', 'metal', 'plastic') + quantity = Number(minvalue=0) + + def __init__(self, name, kind, quantity): + self.name = name + self.kind = kind + self.quantity = quantity + +The descriptors prevent invalid instances from being created: + +.. doctest:: + + >>> Component('Widget', 'metal', 5) # Blocked: 'Widget' is not all uppercase + Traceback (most recent call last): + ... + ValueError: Expected to be true for 'Widget' + + >>> Component('WIDGET', 'metle', 5) # Blocked: 'metle' is misspelled + Traceback (most recent call last): + ... + ValueError: Expected 'metle' to be one of {'metal', 'plastic', 'wood'} + + >>> Component('WIDGET', 'metal', -5) # Blocked: -5 is negative + Traceback (most recent call last): + ... + ValueError: Expected -5 to be at least 0 + + >>> Component('WIDGET', 'metal', 'V') # Blocked: 'V' isn't a number + Traceback (most recent call last): + ... + TypeError: Expected 'V' to be an int or float + + >>> c = Component('WIDGET', 'metal', 5) # Allowed: The inputs are valid + + +Technical Tutorial +^^^^^^^^^^^^^^^^^^ + +What follows is a more technical tutorial for the mechanics and details of how +descriptors work. + + +Abstract +-------- + +Defines descriptors, summarizes the protocol, and shows how descriptors are +called. Provides an example showing how object relational mappings work. + +Learning about descriptors not only provides access to a larger toolset, it +creates a deeper understanding of how Python works. + + +Definition and introduction +--------------------------- + +In general, a descriptor is an attribute value that has one of the methods in +the descriptor protocol. Those methods are :meth:`__get__`, :meth:`__set__`, +and :meth:`__delete__`. If any of those methods are defined for an +attribute, it is said to be a :term:`descriptor`. + +The default behavior for attribute access is to get, set, or delete the +attribute from an object's dictionary. For instance, ``a.x`` has a lookup chain +starting with ``a.__dict__['x']``, then ``type(a).__dict__['x']``, and +continuing through the method resolution order of ``type(a)``. If the +looked-up value is an object defining one of the descriptor methods, then Python +may override the default behavior and invoke the descriptor method instead. +Where this occurs in the precedence chain depends on which descriptor methods +were defined. + +Descriptors are a powerful, general purpose protocol. They are the mechanism +behind properties, methods, static methods, class methods, and +:func:`super`. They are used throughout Python itself. Descriptors +simplify the underlying C code and offer a flexible set of new tools for +everyday Python programs. + + +Descriptor protocol +------------------- + +``descr.__get__(self, obj, type=None)`` + +``descr.__set__(self, obj, value)`` + +``descr.__delete__(self, obj)`` + +That is all there is to it. Define any of these methods and an object is +considered a descriptor and can override default behavior upon being looked up +as an attribute. + +If an object defines :meth:`__set__` or :meth:`__delete__`, it is considered +a data descriptor. Descriptors that only define :meth:`__get__` are called +non-data descriptors (they are often used for methods but other uses are +possible). + +Data and non-data descriptors differ in how overrides are calculated with +respect to entries in an instance's dictionary. If an instance's dictionary +has an entry with the same name as a data descriptor, the data descriptor +takes precedence. If an instance's dictionary has an entry with the same +name as a non-data descriptor, the dictionary entry takes precedence. + +To make a read-only data descriptor, define both :meth:`__get__` and +:meth:`__set__` with the :meth:`__set__` raising an :exc:`AttributeError` when +called. Defining the :meth:`__set__` method with an exception raising +placeholder is enough to make it a data descriptor. + + +Overview of descriptor invocation +--------------------------------- + +A descriptor can be called directly with ``desc.__get__(obj)`` or +``desc.__get__(None, cls)``. + +But it is more common for a descriptor to be invoked automatically from +attribute access. + +The expression ``obj.x`` looks up the attribute ``x`` in the chain of +namespaces for ``obj``. If the search finds a descriptor outside of the +instance :attr:`~object.__dict__`, its :meth:`~object.__get__` method is +invoked according to the precedence rules listed below. + +The details of invocation depend on whether ``obj`` is an object, class, or +instance of super. + + +Invocation from an instance +--------------------------- + +Instance lookup scans through a chain of namespaces giving data descriptors +the highest priority, followed by instance variables, then non-data +descriptors, then class variables, and lastly :meth:`__getattr__` if it is +provided. + +If a descriptor is found for ``a.x``, then it is invoked with: +``desc.__get__(a, type(a))``. + +The logic for a dotted lookup is in :meth:`object.__getattribute__`. Here is +a pure Python equivalent: + +.. testcode:: + + def find_name_in_mro(cls, name, default): + "Emulate _PyType_Lookup() in Objects/typeobject.c" + for base in cls.__mro__: + if name in vars(base): + return vars(base)[name] + return default + + def object_getattribute(obj, name): + "Emulate PyObject_GenericGetAttr() in Objects/object.c" + null = object() + objtype = type(obj) + cls_var = find_name_in_mro(objtype, name, null) + descr_get = getattr(type(cls_var), '__get__', null) + if descr_get is not null: + if (hasattr(type(cls_var), '__set__') + or hasattr(type(cls_var), '__delete__')): + return descr_get(cls_var, obj, objtype) # data descriptor + if hasattr(obj, '__dict__') and name in vars(obj): + return vars(obj)[name] # instance variable + if descr_get is not null: + return descr_get(cls_var, obj, objtype) # non-data descriptor + if cls_var is not null: + return cls_var # class variable + raise AttributeError(name) + + +.. testcode:: + :hide: + + # Test the fidelity of object_getattribute() by comparing it with the + # normal object.__getattribute__(). The former will be accessed by + # square brackets and the latter by the dot operator. + + class Object: + + def __getitem__(obj, name): + try: + return object_getattribute(obj, name) + except AttributeError: + if not hasattr(type(obj), '__getattr__'): + raise + return type(obj).__getattr__(obj, name) # __getattr__ + + class DualOperator(Object): + + x = 10 + + def __init__(self, z): + self.z = z + + @property + def p2(self): + return 2 * self.x + + @property + def p3(self): + return 3 * self.x + + def m5(self, y): + return 5 * y + + def m7(self, y): + return 7 * y + + def __getattr__(self, name): + return ('getattr_hook', self, name) + + class DualOperatorWithSlots: + + __getitem__ = Object.__getitem__ + + __slots__ = ['z'] + + x = 15 + + def __init__(self, z): + self.z = z + + @property + def p2(self): + return 2 * self.x + + def m5(self, y): + return 5 * y + + def __getattr__(self, name): + return ('getattr_hook', self, name) + + class D1: + def __get__(self, obj, objtype=None): + return type(self), obj, objtype + + class U1: + x = D1() + + class U2(U1): + pass + +.. doctest:: + :hide: + + >>> a = DualOperator(11) + >>> vars(a).update(p3 = '_p3', m7 = '_m7') + >>> a.x == a['x'] == 10 + True + >>> a.z == a['z'] == 11 + True + >>> a.p2 == a['p2'] == 20 + True + >>> a.p3 == a['p3'] == 30 + True + >>> a.m5(100) == a.m5(100) == 500 + True + >>> a.m7 == a['m7'] == '_m7' + True + >>> a.g == a['g'] == ('getattr_hook', a, 'g') + True + + >>> b = DualOperatorWithSlots(22) + >>> b.x == b['x'] == 15 + True + >>> b.z == b['z'] == 22 + True + >>> b.p2 == b['p2'] == 30 + True + >>> b.m5(200) == b['m5'](200) == 1000 + True + >>> b.g == b['g'] == ('getattr_hook', b, 'g') + True + + >>> u2 = U2() + >>> object_getattribute(u2, 'x') == u2.x == (D1, u2, U2) + True + +Note, there is no :meth:`__getattr__` hook in the :meth:`__getattribute__` +code. That is why calling :meth:`__getattribute__` directly or with +``super().__getattribute__`` will bypass :meth:`__getattr__` entirely. + +Instead, it is the dot operator and the :func:`getattr` function that are +responsible for invoking :meth:`__getattr__` whenever :meth:`__getattribute__` +raises an :exc:`AttributeError`. Their logic is encapsulated in a helper +function: + +.. testcode:: + + def getattr_hook(obj, name): + "Emulate slot_tp_getattr_hook() in Objects/typeobject.c" + try: + return obj.__getattribute__(name) + except AttributeError: + if not hasattr(type(obj), '__getattr__'): + raise + return type(obj).__getattr__(obj, name) # __getattr__ + +.. doctest:: + :hide: + + + >>> class ClassWithGetAttr: + ... x = 123 + ... def __getattr__(self, attr): + ... return attr.upper() + ... + >>> cw = ClassWithGetAttr() + >>> cw.y = 456 + >>> getattr_hook(cw, 'x') + 123 + >>> getattr_hook(cw, 'y') + 456 + >>> getattr_hook(cw, 'z') + 'Z' + + >>> class ClassWithoutGetAttr: + ... x = 123 + ... + >>> cwo = ClassWithoutGetAttr() + >>> cwo.y = 456 + >>> getattr_hook(cwo, 'x') + 123 + >>> getattr_hook(cwo, 'y') + 456 + >>> getattr_hook(cwo, 'z') + Traceback (most recent call last): + ... + AttributeError: 'ClassWithoutGetAttr' object has no attribute 'z' + + +Invocation from a class +----------------------- + +The logic for a dotted lookup such as ``A.x`` is in +:meth:`type.__getattribute__`. The steps are similar to those for +:meth:`object.__getattribute__` but the instance dictionary lookup is replaced +by a search through the class's :term:`method resolution order`. + +If a descriptor is found, it is invoked with ``desc.__get__(None, A)``. + +The full C implementation can be found in :c:func:`!type_getattro` and +:c:func:`!_PyType_Lookup` in :source:`Objects/typeobject.c`. + + +Invocation from super +--------------------- + +The logic for super's dotted lookup is in the :meth:`__getattribute__` method for +object returned by :func:`super`. + +A dotted lookup such as ``super(A, obj).m`` searches ``obj.__class__.__mro__`` +for the base class ``B`` immediately following ``A`` and then returns +``B.__dict__['m'].__get__(obj, A)``. If not a descriptor, ``m`` is returned +unchanged. + +The full C implementation can be found in :c:func:`!super_getattro` in +:source:`Objects/typeobject.c`. A pure Python equivalent can be found in +`Guido's Tutorial +`_. + + +Summary of invocation logic +--------------------------- + +The mechanism for descriptors is embedded in the :meth:`__getattribute__` +methods for :class:`object`, :class:`type`, and :func:`super`. + +The important points to remember are: + +* Descriptors are invoked by the :meth:`__getattribute__` method. + +* Classes inherit this machinery from :class:`object`, :class:`type`, or + :func:`super`. + +* Overriding :meth:`__getattribute__` prevents automatic descriptor calls + because all the descriptor logic is in that method. + +* :meth:`object.__getattribute__` and :meth:`type.__getattribute__` make + different calls to :meth:`__get__`. The first includes the instance and may + include the class. The second puts in ``None`` for the instance and always + includes the class. + +* Data descriptors always override instance dictionaries. + +* Non-data descriptors may be overridden by instance dictionaries. + + +Automatic name notification +--------------------------- + +Sometimes it is desirable for a descriptor to know what class variable name it +was assigned to. When a new class is created, the :class:`type` metaclass +scans the dictionary of the new class. If any of the entries are descriptors +and if they define :meth:`__set_name__`, that method is called with two +arguments. The *owner* is the class where the descriptor is used, and the +*name* is the class variable the descriptor was assigned to. + +The implementation details are in :c:func:`!type_new` and +:c:func:`!set_names` in :source:`Objects/typeobject.c`. + +Since the update logic is in :meth:`type.__new__`, notifications only take +place at the time of class creation. If descriptors are added to the class +afterwards, :meth:`__set_name__` will need to be called manually. + + +ORM example +----------- + +The following code is a simplified skeleton showing how data descriptors could +be used to implement an `object relational mapping +`_. + +The essential idea is that the data is stored in an external database. The +Python instances only hold keys to the database's tables. Descriptors take +care of lookups or updates: + +.. testcode:: + + class Field: + + def __set_name__(self, owner, name): + self.fetch = f'SELECT {name} FROM {owner.table} WHERE {owner.key}=?;' + self.store = f'UPDATE {owner.table} SET {name}=? WHERE {owner.key}=?;' + + def __get__(self, obj, objtype=None): + return conn.execute(self.fetch, [obj.key]).fetchone()[0] + + def __set__(self, obj, value): + conn.execute(self.store, [value, obj.key]) + conn.commit() + +We can use the :class:`Field` class to define `models +`_ that describe the schema for +each table in a database: + +.. testcode:: + + class Movie: + table = 'Movies' # Table name + key = 'title' # Primary key + director = Field() + year = Field() + + def __init__(self, key): + self.key = key + + class Song: + table = 'Music' + key = 'title' + artist = Field() + year = Field() + genre = Field() + + def __init__(self, key): + self.key = key + +To use the models, first connect to the database:: + + >>> import sqlite3 + >>> conn = sqlite3.connect('entertainment.db') + +An interactive session shows how data is retrieved from the database and how +it can be updated: + +.. testsetup:: + + song_data = [ + ('Country Roads', 'John Denver', 1972), + ('Me and Bobby McGee', 'Janice Joplin', 1971), + ('Coal Miners Daughter', 'Loretta Lynn', 1970), + ] + + movie_data = [ + ('Star Wars', 'George Lucas', 1977), + ('Jaws', 'Steven Spielberg', 1975), + ('Aliens', 'James Cameron', 1986), + ] + + import sqlite3 + + conn = sqlite3.connect(':memory:') + conn.execute('CREATE TABLE Music (title text, artist text, year integer);') + conn.execute('CREATE INDEX MusicNdx ON Music (title);') + conn.executemany('INSERT INTO Music VALUES (?, ?, ?);', song_data) + conn.execute('CREATE TABLE Movies (title text, director text, year integer);') + conn.execute('CREATE INDEX MovieNdx ON Music (title);') + conn.executemany('INSERT INTO Movies VALUES (?, ?, ?);', movie_data) + conn.commit() + +.. doctest:: + + >>> Movie('Star Wars').director + 'George Lucas' + >>> jaws = Movie('Jaws') + >>> f'Released in {jaws.year} by {jaws.director}' + 'Released in 1975 by Steven Spielberg' + + >>> Song('Country Roads').artist + 'John Denver' + + >>> Movie('Star Wars').director = 'J.J. Abrams' + >>> Movie('Star Wars').director + 'J.J. Abrams' + +.. testcleanup:: + + conn.close() + + +Pure Python Equivalents +^^^^^^^^^^^^^^^^^^^^^^^ + +The descriptor protocol is simple and offers exciting possibilities. Several +use cases are so common that they have been prepackaged into built-in tools. +Properties, bound methods, static methods, class methods, and \_\_slots\_\_ are +all based on the descriptor protocol. + + +Properties +---------- + +Calling :func:`property` is a succinct way of building a data descriptor that +triggers a function call upon access to an attribute. Its signature is:: + + property(fget=None, fset=None, fdel=None, doc=None) -> property + +The documentation shows a typical use to define a managed attribute ``x``: + +.. testcode:: + + class C: + def getx(self): return self.__x + def setx(self, value): self.__x = value + def delx(self): del self.__x + x = property(getx, setx, delx, "I'm the 'x' property.") + +.. doctest:: + :hide: + + >>> C.x.__doc__ + "I'm the 'x' property." + >>> c.x = 2.71828 + >>> c.x + 2.71828 + >>> del c.x + >>> c.x + Traceback (most recent call last): + ... + AttributeError: 'C' object has no attribute '_C__x' + +To see how :func:`property` is implemented in terms of the descriptor protocol, +here is a pure Python equivalent that implements most of the core functionality: + +.. testcode:: + + class Property: + "Emulate PyProperty_Type() in Objects/descrobject.c" + + def __init__(self, fget=None, fset=None, fdel=None, doc=None): + self.fget = fget + self.fset = fset + self.fdel = fdel + if doc is None and fget is not None: + doc = fget.__doc__ + self.__doc__ = doc + + def __set_name__(self, owner, name): + self.__name__ = name + + def __get__(self, obj, objtype=None): + if obj is None: + return self + if self.fget is None: + raise AttributeError + return self.fget(obj) + + def __set__(self, obj, value): + if self.fset is None: + raise AttributeError + self.fset(obj, value) + + def __delete__(self, obj): + if self.fdel is None: + raise AttributeError + self.fdel(obj) + + def getter(self, fget): + return type(self)(fget, self.fset, self.fdel, self.__doc__) + + def setter(self, fset): + return type(self)(self.fget, fset, self.fdel, self.__doc__) + + def deleter(self, fdel): + return type(self)(self.fget, self.fset, fdel, self.__doc__) + +.. testcode:: + :hide: + + # Verify the Property() emulation + + class CC: + def getx(self): + return self.__x + def setx(self, value): + self.__x = value + def delx(self): + del self.__x + x = Property(getx, setx, delx, "I'm the 'x' property.") + no_getter = Property(None, setx, delx, "I'm the 'x' property.") + no_setter = Property(getx, None, delx, "I'm the 'x' property.") + no_deleter = Property(getx, setx, None, "I'm the 'x' property.") + no_doc = Property(getx, setx, delx, None) + + + # Now do it again but use the decorator style + + class CCC: + @Property + def x(self): + return self.__x + @x.setter + def x(self, value): + self.__x = value + @x.deleter + def x(self): + del self.__x + + +.. doctest:: + :hide: + + >>> cc = CC() + >>> hasattr(cc, 'x') + False + >>> cc.x = 33 + >>> cc.x + 33 + >>> del cc.x + >>> hasattr(cc, 'x') + False + + >>> ccc = CCC() + >>> hasattr(ccc, 'x') + False + >>> ccc.x = 333 + >>> ccc.x == 333 + True + >>> del ccc.x + >>> hasattr(ccc, 'x') + False + + >>> cc = CC() + >>> cc.x = 33 + >>> try: + ... cc.no_getter + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> try: + ... cc.no_setter = 33 + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> try: + ... del cc.no_deleter + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> CC.no_doc.__doc__ is None + True + +The :func:`property` builtin helps whenever a user interface has granted +attribute access and then subsequent changes require the intervention of a +method. + +For instance, a spreadsheet class may grant access to a cell value through +``Cell('b10').value``. Subsequent improvements to the program require the cell +to be recalculated on every access; however, the programmer does not want to +affect existing client code accessing the attribute directly. The solution is +to wrap access to the value attribute in a property data descriptor: + +.. testcode:: + + class Cell: + ... + + @property + def value(self): + "Recalculate the cell before returning value" + self.recalc() + return self._value + +Either the built-in :func:`property` or our :func:`Property` equivalent would +work in this example. + + +Functions and methods +--------------------- + +Python's object oriented features are built upon a function based environment. +Using non-data descriptors, the two are merged seamlessly. + +Functions stored in class dictionaries get turned into methods when invoked. +Methods only differ from regular functions in that the object instance is +prepended to the other arguments. By convention, the instance is called +*self* but could be called *this* or any other variable name. + +Methods can be created manually with :class:`types.MethodType` which is +roughly equivalent to: + +.. testcode:: + + class MethodType: + "Emulate PyMethod_Type in Objects/classobject.c" + + def __init__(self, func, obj): + self.__func__ = func + self.__self__ = obj + + def __call__(self, *args, **kwargs): + func = self.__func__ + obj = self.__self__ + return func(obj, *args, **kwargs) + + def __getattribute__(self, name): + "Emulate method_getset() in Objects/classobject.c" + if name == '__doc__': + return self.__func__.__doc__ + return object.__getattribute__(self, name) + + def __getattr__(self, name): + "Emulate method_getattro() in Objects/classobject.c" + return getattr(self.__func__, name) + + def __get__(self, obj, objtype=None): + "Emulate method_descr_get() in Objects/classobject.c" + return self + +To support automatic creation of methods, functions include the +:meth:`__get__` method for binding methods during attribute access. This +means that functions are non-data descriptors that return bound methods +during dotted lookup from an instance. Here's how it works: + +.. testcode:: + + class Function: + ... + + def __get__(self, obj, objtype=None): + "Simulate func_descr_get() in Objects/funcobject.c" + if obj is None: + return self + return MethodType(self, obj) + +Running the following class in the interpreter shows how the function +descriptor works in practice: + +.. testcode:: + + class D: + def f(self): + return self + + class D2: + pass + +.. doctest:: + :hide: + + >>> d = D() + >>> d2 = D2() + >>> d2.f = d.f.__get__(d2, D2) + >>> d2.f() is d + True + +The function has a :term:`qualified name` attribute to support introspection: + +.. doctest:: + + >>> D.f.__qualname__ + 'D.f' + +Accessing the function through the class dictionary does not invoke +:meth:`__get__`. Instead, it just returns the underlying function object:: + + >>> D.__dict__['f'] + + +Dotted access from a class calls :meth:`__get__` which just returns the +underlying function unchanged:: + + >>> D.f + + +The interesting behavior occurs during dotted access from an instance. The +dotted lookup calls :meth:`__get__` which returns a bound method object:: + + >>> d = D() + >>> d.f + > + +Internally, the bound method stores the underlying function and the bound +instance:: + + >>> d.f.__func__ + + + >>> d.f.__self__ + <__main__.D object at 0x00B18C90> + +If you have ever wondered where *self* comes from in regular methods or where +*cls* comes from in class methods, this is it! + + +Kinds of methods +---------------- + +Non-data descriptors provide a simple mechanism for variations on the usual +patterns of binding functions into methods. + +To recap, functions have a :meth:`__get__` method so that they can be converted +to a method when accessed as attributes. The non-data descriptor transforms an +``obj.f(*args)`` call into ``f(obj, *args)``. Calling ``cls.f(*args)`` +becomes ``f(*args)``. + +This chart summarizes the binding and its two most useful variants: + + +-----------------+----------------------+------------------+ + | Transformation | Called from an | Called from a | + | | object | class | + +=================+======================+==================+ + | function | f(obj, \*args) | f(\*args) | + +-----------------+----------------------+------------------+ + | staticmethod | f(\*args) | f(\*args) | + +-----------------+----------------------+------------------+ + | classmethod | f(type(obj), \*args) | f(cls, \*args) | + +-----------------+----------------------+------------------+ + + +Static methods +-------------- + +Static methods return the underlying function without changes. Calling either +``c.f`` or ``C.f`` is the equivalent of a direct lookup into +``object.__getattribute__(c, "f")`` or ``object.__getattribute__(C, "f")``. As a +result, the function becomes identically accessible from either an object or a +class. + +Good candidates for static methods are methods that do not reference the +``self`` variable. + +For instance, a statistics package may include a container class for +experimental data. The class provides normal methods for computing the average, +mean, median, and other descriptive statistics that depend on the data. However, +there may be useful functions which are conceptually related but do not depend +on the data. For instance, ``erf(x)`` is handy conversion routine that comes up +in statistical work but does not directly depend on a particular dataset. +It can be called either from an object or the class: ``s.erf(1.5) --> 0.9332`` +or ``Sample.erf(1.5) --> 0.9332``. + +Since static methods return the underlying function with no changes, the +example calls are unexciting: + +.. testcode:: + + class E: + @staticmethod + def f(x): + return x * 10 + +.. doctest:: + + >>> E.f(3) + 30 + >>> E().f(3) + 30 + +Using the non-data descriptor protocol, a pure Python version of +:func:`staticmethod` would look like this: + +.. testcode:: + + import functools + + class StaticMethod: + "Emulate PyStaticMethod_Type() in Objects/funcobject.c" + + def __init__(self, f): + self.f = f + functools.update_wrapper(self, f) + + def __get__(self, obj, objtype=None): + return self.f + + def __call__(self, *args, **kwds): + return self.f(*args, **kwds) + +The :func:`functools.update_wrapper` call adds a ``__wrapped__`` attribute +that refers to the underlying function. Also it carries forward +the attributes necessary to make the wrapper look like the wrapped +function: :attr:`~function.__name__`, :attr:`~function.__qualname__`, +:attr:`~function.__doc__`, and :attr:`~function.__annotations__`. + +.. testcode:: + :hide: + + class E_sim: + @StaticMethod + def f(x: int) -> str: + "Simple function example" + return "!" * x + + wrapped_ord = StaticMethod(ord) + +.. doctest:: + :hide: + + >>> E_sim.f(3) + '!!!' + >>> E_sim().f(3) + '!!!' + + >>> sm = vars(E_sim)['f'] + >>> type(sm).__name__ + 'StaticMethod' + >>> f = E_sim.f + >>> type(f).__name__ + 'function' + >>> sm.__name__ + 'f' + >>> f.__name__ + 'f' + >>> sm.__qualname__ + 'E_sim.f' + >>> f.__qualname__ + 'E_sim.f' + >>> sm.__doc__ + 'Simple function example' + >>> f.__doc__ + 'Simple function example' + >>> sm.__annotations__ + {'x': , 'return': } + >>> f.__annotations__ + {'x': , 'return': } + >>> sm.__module__ == f.__module__ + True + >>> sm(3) + '!!!' + >>> f(3) + '!!!' + + >>> wrapped_ord('A') + 65 + >>> wrapped_ord.__module__ == ord.__module__ + True + >>> wrapped_ord.__wrapped__ == ord + True + >>> wrapped_ord.__name__ == ord.__name__ + True + >>> wrapped_ord.__qualname__ == ord.__qualname__ + True + >>> wrapped_ord.__doc__ == ord.__doc__ + True + + +Class methods +------------- + +Unlike static methods, class methods prepend the class reference to the +argument list before calling the function. This format is the same +for whether the caller is an object or a class: + +.. testcode:: + + class F: + @classmethod + def f(cls, x): + return cls.__name__, x + +.. doctest:: + + >>> F.f(3) + ('F', 3) + >>> F().f(3) + ('F', 3) + +This behavior is useful whenever the method only needs to have a class +reference and does not rely on data stored in a specific instance. One use for +class methods is to create alternate class constructors. For example, the +classmethod :func:`dict.fromkeys` creates a new dictionary from a list of +keys. The pure Python equivalent is: + +.. testcode:: + + class Dict(dict): + @classmethod + def fromkeys(cls, iterable, value=None): + "Emulate dict_fromkeys() in Objects/dictobject.c" + d = cls() + for key in iterable: + d[key] = value + return d + +Now a new dictionary of unique keys can be constructed like this: + +.. doctest:: + + >>> d = Dict.fromkeys('abracadabra') + >>> type(d) is Dict + True + >>> d + {'a': None, 'b': None, 'r': None, 'c': None, 'd': None} + +Using the non-data descriptor protocol, a pure Python version of +:func:`classmethod` would look like this: + +.. testcode:: + + import functools + + class ClassMethod: + "Emulate PyClassMethod_Type() in Objects/funcobject.c" + + def __init__(self, f): + self.f = f + functools.update_wrapper(self, f) + + def __get__(self, obj, cls=None): + if cls is None: + cls = type(obj) + return MethodType(self.f, cls) + +.. testcode:: + :hide: + + # Verify the emulation works + class T: + @ClassMethod + def cm(cls, x: int, y: str) -> tuple[str, int, str]: + "Class method that returns a tuple" + return (cls.__name__, x, y) + + +.. doctest:: + :hide: + + >>> T.cm(11, 22) + ('T', 11, 22) + + # Also call it from an instance + >>> t = T() + >>> t.cm(11, 22) + ('T', 11, 22) + + # Verify that T uses our emulation + >>> type(vars(T)['cm']).__name__ + 'ClassMethod' + + # Verify that update_wrapper() correctly copied attributes + >>> T.cm.__name__ + 'cm' + >>> T.cm.__qualname__ + 'T.cm' + >>> T.cm.__doc__ + 'Class method that returns a tuple' + >>> T.cm.__annotations__ + {'x': , 'y': , 'return': tuple[str, int, str]} + + # Verify that __wrapped__ was added and works correctly + >>> f = vars(T)['cm'].__wrapped__ + >>> type(f).__name__ + 'function' + >>> f.__name__ + 'cm' + >>> f(T, 11, 22) + ('T', 11, 22) + + +The :func:`functools.update_wrapper` call in ``ClassMethod`` adds a +``__wrapped__`` attribute that refers to the underlying function. Also +it carries forward the attributes necessary to make the wrapper look +like the wrapped function: :attr:`~function.__name__`, +:attr:`~function.__qualname__`, :attr:`~function.__doc__`, +and :attr:`~function.__annotations__`. + + +Member objects and __slots__ +---------------------------- + +When a class defines ``__slots__``, it replaces instance dictionaries with a +fixed-length array of slot values. From a user point of view that has +several effects: + +1. Provides immediate detection of bugs due to misspelled attribute +assignments. Only attribute names specified in ``__slots__`` are allowed: + +.. testcode:: + + class Vehicle: + __slots__ = ('id_number', 'make', 'model') + +.. doctest:: + + >>> auto = Vehicle() + >>> auto.id_nubmer = 'VYE483814LQEX' + Traceback (most recent call last): + ... + AttributeError: 'Vehicle' object has no attribute 'id_nubmer' + +2. Helps create immutable objects where descriptors manage access to private +attributes stored in ``__slots__``: + +.. testcode:: + + class Immutable: + + __slots__ = ('_dept', '_name') # Replace the instance dictionary + + def __init__(self, dept, name): + self._dept = dept # Store to private attribute + self._name = name # Store to private attribute + + @property # Read-only descriptor + def dept(self): + return self._dept + + @property + def name(self): # Read-only descriptor + return self._name + +.. doctest:: + + >>> mark = Immutable('Botany', 'Mark Watney') + >>> mark.dept + 'Botany' + >>> mark.dept = 'Space Pirate' + Traceback (most recent call last): + ... + AttributeError: property 'dept' of 'Immutable' object has no setter + >>> mark.location = 'Mars' + Traceback (most recent call last): + ... + AttributeError: 'Immutable' object has no attribute 'location' + +3. Saves memory. On a 64-bit Linux build, an instance with two attributes +takes 48 bytes with ``__slots__`` and 152 bytes without. This `flyweight +design pattern `_ likely only +matters when a large number of instances are going to be created. + +4. Improves speed. Reading instance variables is 35% faster with +``__slots__`` (as measured with Python 3.10 on an Apple M1 processor). + +5. Blocks tools like :func:`functools.cached_property` which require an +instance dictionary to function correctly: + +.. testcode:: + + from functools import cached_property + + class CP: + __slots__ = () # Eliminates the instance dict + + @cached_property # Requires an instance dict + def pi(self): + return 4 * sum((-1.0)**n / (2.0*n + 1.0) + for n in reversed(range(100_000))) + +.. doctest:: + + >>> CP().pi + Traceback (most recent call last): + ... + TypeError: No '__dict__' attribute on 'CP' instance to cache 'pi' property. + +It is not possible to create an exact drop-in pure Python version of +``__slots__`` because it requires direct access to C structures and control +over object memory allocation. However, we can build a mostly faithful +simulation where the actual C structure for slots is emulated by a private +``_slotvalues`` list. Reads and writes to that private structure are managed +by member descriptors: + +.. testcode:: + + null = object() + + class Member: + + def __init__(self, name, clsname, offset): + 'Emulate PyMemberDef in Include/structmember.h' + # Also see descr_new() in Objects/descrobject.c + self.name = name + self.clsname = clsname + self.offset = offset + + def __get__(self, obj, objtype=None): + 'Emulate member_get() in Objects/descrobject.c' + # Also see PyMember_GetOne() in Python/structmember.c + if obj is None: + return self + value = obj._slotvalues[self.offset] + if value is null: + raise AttributeError(self.name) + return value + + def __set__(self, obj, value): + 'Emulate member_set() in Objects/descrobject.c' + obj._slotvalues[self.offset] = value + + def __delete__(self, obj): + 'Emulate member_delete() in Objects/descrobject.c' + value = obj._slotvalues[self.offset] + if value is null: + raise AttributeError(self.name) + obj._slotvalues[self.offset] = null + + def __repr__(self): + 'Emulate member_repr() in Objects/descrobject.c' + return f'' + +The :meth:`type.__new__` method takes care of adding member objects to class +variables: + +.. testcode:: + + class Type(type): + 'Simulate how the type metaclass adds member objects for slots' + + def __new__(mcls, clsname, bases, mapping, **kwargs): + 'Emulate type_new() in Objects/typeobject.c' + # type_new() calls PyTypeReady() which calls add_methods() + slot_names = mapping.get('slot_names', []) + for offset, name in enumerate(slot_names): + mapping[name] = Member(name, clsname, offset) + return type.__new__(mcls, clsname, bases, mapping, **kwargs) + +The :meth:`object.__new__` method takes care of creating instances that have +slots instead of an instance dictionary. Here is a rough simulation in pure +Python: + +.. testcode:: + + class Object: + 'Simulate how object.__new__() allocates memory for __slots__' + + def __new__(cls, *args, **kwargs): + 'Emulate object_new() in Objects/typeobject.c' + inst = super().__new__(cls) + if hasattr(cls, 'slot_names'): + empty_slots = [null] * len(cls.slot_names) + object.__setattr__(inst, '_slotvalues', empty_slots) + return inst + + def __setattr__(self, name, value): + 'Emulate _PyObject_GenericSetAttrWithDict() Objects/object.c' + cls = type(self) + if hasattr(cls, 'slot_names') and name not in cls.slot_names: + raise AttributeError( + f'{cls.__name__!r} object has no attribute {name!r}' + ) + super().__setattr__(name, value) + + def __delattr__(self, name): + 'Emulate _PyObject_GenericSetAttrWithDict() Objects/object.c' + cls = type(self) + if hasattr(cls, 'slot_names') and name not in cls.slot_names: + raise AttributeError( + f'{cls.__name__!r} object has no attribute {name!r}' + ) + super().__delattr__(name) + +To use the simulation in a real class, just inherit from :class:`Object` and +set the :term:`metaclass` to :class:`Type`: + +.. testcode:: + + class H(Object, metaclass=Type): + 'Instance variables stored in slots' + + slot_names = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + +At this point, the metaclass has loaded member objects for *x* and *y*:: + + >>> from pprint import pp + >>> pp(dict(vars(H))) + {'__module__': '__main__', + '__doc__': 'Instance variables stored in slots', + 'slot_names': ['x', 'y'], + '__init__': , + 'x': , + 'y': } + +.. doctest:: + :hide: + + # We test this separately because the preceding section is not + # doctestable due to the hex memory address for the __init__ function + >>> isinstance(vars(H)['x'], Member) + True + >>> isinstance(vars(H)['y'], Member) + True + +When instances are created, they have a ``slot_values`` list where the +attributes are stored: + +.. doctest:: + + >>> h = H(10, 20) + >>> vars(h) + {'_slotvalues': [10, 20]} + >>> h.x = 55 + >>> vars(h) + {'_slotvalues': [55, 20]} + +Misspelled or unassigned attributes will raise an exception: + +.. doctest:: + + >>> h.xz + Traceback (most recent call last): + ... + AttributeError: 'H' object has no attribute 'xz' + +.. doctest:: + :hide: + + # Examples for deleted attributes are not shown because this section + # is already a bit lengthy. We still test that code here. + >>> del h.x + >>> hasattr(h, 'x') + False + + # Also test the code for uninitialized slots + >>> class HU(Object, metaclass=Type): + ... slot_names = ['x', 'y'] + ... + >>> hu = HU() + >>> hasattr(hu, 'x') + False + >>> hasattr(hu, 'y') + False diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst new file mode 100644 index 00000000..66929b41 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst @@ -0,0 +1,1561 @@ +.. _enum-howto: + +========== +Enum HOWTO +========== + +.. _enum-basic-tutorial: + +.. currentmodule:: enum + +An :class:`Enum` is a set of symbolic names bound to unique values. They are +similar to global variables, but they offer a more useful :func:`repr`, +grouping, type-safety, and a few other features. + +They are most useful when you have a variable that can take one of a limited +selection of values. For example, the days of the week:: + + >>> from enum import Enum + >>> class Weekday(Enum): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 3 + ... THURSDAY = 4 + ... FRIDAY = 5 + ... SATURDAY = 6 + ... SUNDAY = 7 + +Or perhaps the RGB primary colors:: + + >>> from enum import Enum + >>> class Color(Enum): + ... RED = 1 + ... GREEN = 2 + ... BLUE = 3 + +As you can see, creating an :class:`Enum` is as simple as writing a class that +inherits from :class:`Enum` itself. + +.. note:: Case of Enum Members + + Because Enums are used to represent constants, and to help avoid issues + with name clashes between mixin-class methods/attributes and enum names, + we strongly recommend using UPPER_CASE names for members, and will be using + that style in our examples. + +Depending on the nature of the enum a member's value may or may not be +important, but either way that value can be used to get the corresponding +member:: + + >>> Weekday(3) + + +As you can see, the ``repr()`` of a member shows the enum name, the member name, +and the value. The ``str()`` of a member shows only the enum name and member +name:: + + >>> print(Weekday.THURSDAY) + Weekday.THURSDAY + +The *type* of an enumeration member is the enum it belongs to:: + + >>> type(Weekday.MONDAY) + + >>> isinstance(Weekday.FRIDAY, Weekday) + True + +Enum members have an attribute that contains just their :attr:`name`:: + + >>> print(Weekday.TUESDAY.name) + TUESDAY + +Likewise, they have an attribute for their :attr:`value`:: + + + >>> Weekday.WEDNESDAY.value + 3 + +Unlike many languages that treat enumerations solely as name/value pairs, +Python Enums can have behavior added. For example, :class:`datetime.date` +has two methods for returning the weekday: :meth:`weekday` and :meth:`isoweekday`. +The difference is that one of them counts from 0-6 and the other from 1-7. +Rather than keep track of that ourselves we can add a method to the :class:`Weekday` +enum to extract the day from the :class:`date` instance and return the matching +enum member:: + + @classmethod + def from_date(cls, date): + return cls(date.isoweekday()) + +The complete :class:`Weekday` enum now looks like this:: + + >>> class Weekday(Enum): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 3 + ... THURSDAY = 4 + ... FRIDAY = 5 + ... SATURDAY = 6 + ... SUNDAY = 7 + ... # + ... @classmethod + ... def from_date(cls, date): + ... return cls(date.isoweekday()) + +Now we can find out what today is! Observe:: + + >>> from datetime import date + >>> Weekday.from_date(date.today()) # doctest: +SKIP + + +Of course, if you're reading this on some other day, you'll see that day instead. + +This :class:`Weekday` enum is great if our variable only needs one day, but +what if we need several? Maybe we're writing a function to plot chores during +a week, and don't want to use a :class:`list` -- we could use a different type +of :class:`Enum`:: + + >>> from enum import Flag + >>> class Weekday(Flag): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 4 + ... THURSDAY = 8 + ... FRIDAY = 16 + ... SATURDAY = 32 + ... SUNDAY = 64 + +We've changed two things: we're inherited from :class:`Flag`, and the values are +all powers of 2. + +Just like the original :class:`Weekday` enum above, we can have a single selection:: + + >>> first_week_day = Weekday.MONDAY + >>> first_week_day + + +But :class:`Flag` also allows us to combine several members into a single +variable:: + + >>> weekend = Weekday.SATURDAY | Weekday.SUNDAY + >>> weekend + + +You can even iterate over a :class:`Flag` variable:: + + >>> for day in weekend: + ... print(day) + Weekday.SATURDAY + Weekday.SUNDAY + +Okay, let's get some chores set up:: + + >>> chores_for_ethan = { + ... 'feed the cat': Weekday.MONDAY | Weekday.WEDNESDAY | Weekday.FRIDAY, + ... 'do the dishes': Weekday.TUESDAY | Weekday.THURSDAY, + ... 'answer SO questions': Weekday.SATURDAY, + ... } + +And a function to display the chores for a given day:: + + >>> def show_chores(chores, day): + ... for chore, days in chores.items(): + ... if day in days: + ... print(chore) + ... + >>> show_chores(chores_for_ethan, Weekday.SATURDAY) + answer SO questions + +In cases where the actual values of the members do not matter, you can save +yourself some work and use :func:`auto` for the values:: + + >>> from enum import auto + >>> class Weekday(Flag): + ... MONDAY = auto() + ... TUESDAY = auto() + ... WEDNESDAY = auto() + ... THURSDAY = auto() + ... FRIDAY = auto() + ... SATURDAY = auto() + ... SUNDAY = auto() + ... WEEKEND = SATURDAY | SUNDAY + + +.. _enum-advanced-tutorial: + + +Programmatic access to enumeration members and their attributes +--------------------------------------------------------------- + +Sometimes it's useful to access members in enumerations programmatically (i.e. +situations where ``Color.RED`` won't do because the exact color is not known +at program-writing time). ``Enum`` allows such access:: + + >>> Color(1) + + >>> Color(3) + + +If you want to access enum members by *name*, use item access:: + + >>> Color['RED'] + + >>> Color['GREEN'] + + +If you have an enum member and need its :attr:`name` or :attr:`value`:: + + >>> member = Color.RED + >>> member.name + 'RED' + >>> member.value + 1 + + +Duplicating enum members and values +----------------------------------- + +Having two enum members with the same name is invalid:: + + >>> class Shape(Enum): + ... SQUARE = 2 + ... SQUARE = 3 + ... + Traceback (most recent call last): + ... + TypeError: 'SQUARE' already defined as 2 + +However, an enum member can have other names associated with it. Given two +entries ``A`` and ``B`` with the same value (and ``A`` defined first), ``B`` +is an alias for the member ``A``. By-value lookup of the value of ``A`` will +return the member ``A``. By-name lookup of ``A`` will return the member ``A``. +By-name lookup of ``B`` will also return the member ``A``:: + + >>> class Shape(Enum): + ... SQUARE = 2 + ... DIAMOND = 1 + ... CIRCLE = 3 + ... ALIAS_FOR_SQUARE = 2 + ... + >>> Shape.SQUARE + + >>> Shape.ALIAS_FOR_SQUARE + + >>> Shape(2) + + +.. note:: + + Attempting to create a member with the same name as an already + defined attribute (another member, a method, etc.) or attempting to create + an attribute with the same name as a member is not allowed. + + +Ensuring unique enumeration values +---------------------------------- + +By default, enumerations allow multiple names as aliases for the same value. +When this behavior isn't desired, you can use the :func:`unique` decorator:: + + >>> from enum import Enum, unique + >>> @unique + ... class Mistake(Enum): + ... ONE = 1 + ... TWO = 2 + ... THREE = 3 + ... FOUR = 3 + ... + Traceback (most recent call last): + ... + ValueError: duplicate values found in : FOUR -> THREE + + +Using automatic values +---------------------- + +If the exact value is unimportant you can use :class:`auto`:: + + >>> from enum import Enum, auto + >>> class Color(Enum): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> [member.value for member in Color] + [1, 2, 3] + +The values are chosen by :func:`_generate_next_value_`, which can be +overridden:: + + >>> class AutoName(Enum): + ... @staticmethod + ... def _generate_next_value_(name, start, count, last_values): + ... return name + ... + >>> class Ordinal(AutoName): + ... NORTH = auto() + ... SOUTH = auto() + ... EAST = auto() + ... WEST = auto() + ... + >>> [member.value for member in Ordinal] + ['NORTH', 'SOUTH', 'EAST', 'WEST'] + +.. note:: + + The :meth:`_generate_next_value_` method must be defined before any members. + +Iteration +--------- + +Iterating over the members of an enum does not provide the aliases:: + + >>> list(Shape) + [, , ] + >>> list(Weekday) + [, , , , , , ] + +Note that the aliases ``Shape.ALIAS_FOR_SQUARE`` and ``Weekday.WEEKEND`` aren't shown. + +The special attribute ``__members__`` is a read-only ordered mapping of names +to members. It includes all names defined in the enumeration, including the +aliases:: + + >>> for name, member in Shape.__members__.items(): + ... name, member + ... + ('SQUARE', ) + ('DIAMOND', ) + ('CIRCLE', ) + ('ALIAS_FOR_SQUARE', ) + +The ``__members__`` attribute can be used for detailed programmatic access to +the enumeration members. For example, finding all the aliases:: + + >>> [name for name, member in Shape.__members__.items() if member.name != name] + ['ALIAS_FOR_SQUARE'] + +.. note:: + + Aliases for flags include values with multiple flags set, such as ``3``, + and no flags set, i.e. ``0``. + + +Comparisons +----------- + +Enumeration members are compared by identity:: + + >>> Color.RED is Color.RED + True + >>> Color.RED is Color.BLUE + False + >>> Color.RED is not Color.BLUE + True + +Ordered comparisons between enumeration values are *not* supported. Enum +members are not integers (but see `IntEnum`_ below):: + + >>> Color.RED < Color.BLUE + Traceback (most recent call last): + File "", line 1, in + TypeError: '<' not supported between instances of 'Color' and 'Color' + +Equality comparisons are defined though:: + + >>> Color.BLUE == Color.RED + False + >>> Color.BLUE != Color.RED + True + >>> Color.BLUE == Color.BLUE + True + +Comparisons against non-enumeration values will always compare not equal +(again, :class:`IntEnum` was explicitly designed to behave differently, see +below):: + + >>> Color.BLUE == 2 + False + +.. warning:: + + It is possible to reload modules -- if a reloaded module contains + enums, they will be recreated, and the new members may not + compare identical/equal to the original members. + +Allowed members and attributes of enumerations +---------------------------------------------- + +Most of the examples above use integers for enumeration values. Using integers +is short and handy (and provided by default by the `Functional API`_), but not +strictly enforced. In the vast majority of use-cases, one doesn't care what +the actual value of an enumeration is. But if the value *is* important, +enumerations can have arbitrary values. + +Enumerations are Python classes, and can have methods and special methods as +usual. If we have this enumeration:: + + >>> class Mood(Enum): + ... FUNKY = 1 + ... HAPPY = 3 + ... + ... def describe(self): + ... # self is the member here + ... return self.name, self.value + ... + ... def __str__(self): + ... return 'my custom str! {0}'.format(self.value) + ... + ... @classmethod + ... def favorite_mood(cls): + ... # cls here is the enumeration + ... return cls.HAPPY + ... + +Then:: + + >>> Mood.favorite_mood() + + >>> Mood.HAPPY.describe() + ('HAPPY', 3) + >>> str(Mood.FUNKY) + 'my custom str! 1' + +The rules for what is allowed are as follows: names that start and end with +a single underscore are reserved by enum and cannot be used; all other +attributes defined within an enumeration will become members of this +enumeration, with the exception of special methods (:meth:`__str__`, +:meth:`__add__`, etc.), descriptors (methods are also descriptors), and +variable names listed in :attr:`_ignore_`. + +Note: if your enumeration defines :meth:`__new__` and/or :meth:`__init__`, +any value(s) given to the enum member will be passed into those methods. +See `Planet`_ for an example. + +.. note:: + + The :meth:`__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`__new__` which is used after + class creation for lookup of existing members. See :ref:`new-vs-init` for + more details. + + +Restricted Enum subclassing +--------------------------- + +A new :class:`Enum` class must have one base enum class, up to one concrete +data type, and as many :class:`object`-based mixin classes as needed. The +order of these base classes is:: + + class EnumName([mix-in, ...,] [data-type,] base-enum): + pass + +Also, subclassing an enumeration is allowed only if the enumeration does not define +any members. So this is forbidden:: + + >>> class MoreColor(Color): + ... PINK = 17 + ... + Traceback (most recent call last): + ... + TypeError: cannot extend + +But this is allowed:: + + >>> class Foo(Enum): + ... def some_behavior(self): + ... pass + ... + >>> class Bar(Foo): + ... HAPPY = 1 + ... SAD = 2 + ... + +Allowing subclassing of enums that define members would lead to a violation of +some important invariants of types and instances. On the other hand, it makes +sense to allow sharing some common behavior between a group of enumerations. +(See `OrderedEnum`_ for an example.) + + +.. _enum-dataclass-support: + +Dataclass support +----------------- + +When inheriting from a :class:`~dataclasses.dataclass`, +the :meth:`~Enum.__repr__` omits the inherited class' name. For example:: + + >>> from dataclasses import dataclass, field + >>> @dataclass + ... class CreatureDataMixin: + ... size: str + ... legs: int + ... tail: bool = field(repr=False, default=True) + ... + >>> class Creature(CreatureDataMixin, Enum): + ... BEETLE = 'small', 6 + ... DOG = 'medium', 4 + ... + >>> Creature.DOG + + +Use the :func:`~dataclasses.dataclass` argument ``repr=False`` +to use the standard :func:`repr`. + +.. versionchanged:: 3.12 + Only the dataclass fields are shown in the value area, not the dataclass' + name. + +.. note:: + + Adding :func:`~dataclasses.dataclass` decorator to :class:`Enum` + and its subclasses is not supported. It will not raise any errors, + but it will produce very strange results at runtime, such as members + being equal to each other:: + + >>> @dataclass # don't do this: it does not make any sense + ... class Color(Enum): + ... RED = 1 + ... BLUE = 2 + ... + >>> Color.RED is Color.BLUE + False + >>> Color.RED == Color.BLUE # problem is here: they should not be equal + True + + +Pickling +-------- + +Enumerations can be pickled and unpickled:: + + >>> from test.test_enum import Fruit + >>> from pickle import dumps, loads + >>> Fruit.TOMATO is loads(dumps(Fruit.TOMATO)) + True + +The usual restrictions for pickling apply: picklable enums must be defined in +the top level of a module, since unpickling requires them to be importable +from that module. + +.. note:: + + With pickle protocol version 4 it is possible to easily pickle enums + nested in other classes. + +It is possible to modify how enum members are pickled/unpickled by defining +:meth:`__reduce_ex__` in the enumeration class. The default method is by-value, +but enums with complicated values may want to use by-name:: + + >>> import enum + >>> class MyEnum(enum.Enum): + ... __reduce_ex__ = enum.pickle_by_enum_name + +.. note:: + + Using by-name for flags is not recommended, as unnamed aliases will + not unpickle. + + +Functional API +-------------- + +The :class:`Enum` class is callable, providing the following functional API:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG') + >>> Animal + + >>> Animal.ANT + + >>> list(Animal) + [, , , ] + +The semantics of this API resemble :class:`~collections.namedtuple`. The first +argument of the call to :class:`Enum` is the name of the enumeration. + +The second argument is the *source* of enumeration member names. It can be a +whitespace-separated string of names, a sequence of names, a sequence of +2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to +values. The last two options enable assigning arbitrary values to +enumerations; the others auto-assign increasing integers starting with 1 (use +the ``start`` parameter to specify a different starting value). A +new class derived from :class:`Enum` is returned. In other words, the above +assignment to :class:`Animal` is equivalent to:: + + >>> class Animal(Enum): + ... ANT = 1 + ... BEE = 2 + ... CAT = 3 + ... DOG = 4 + ... + +The reason for defaulting to ``1`` as the starting number and not ``0`` is +that ``0`` is ``False`` in a boolean sense, but by default enum members all +evaluate to ``True``. + +Pickling enums created with the functional API can be tricky as frame stack +implementation details are used to try and figure out which module the +enumeration is being created in (e.g. it will fail if you use a utility +function in a separate module, and also may not work on IronPython or Jython). +The solution is to specify the module name explicitly as follows:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG', module=__name__) + +.. warning:: + + If ``module`` is not supplied, and Enum cannot determine what it is, + the new Enum members will not be unpicklable; to keep errors closer to + the source, pickling will be disabled. + +The new pickle protocol 4 also, in some circumstances, relies on +:attr:`~type.__qualname__` being set to the location where pickle will be able +to find the class. For example, if the class was made available in class +SomeData in the global scope:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG', qualname='SomeData.Animal') + +The complete signature is:: + + Enum( + value='NewEnumName', + names=<...>, + *, + module='...', + qualname='...', + type=, + start=1, + ) + +* *value*: What the new enum class will record as its name. + +* *names*: The enum members. This can be a whitespace- or comma-separated string + (values will start at 1 unless otherwise specified):: + + 'RED GREEN BLUE' | 'RED,GREEN,BLUE' | 'RED, GREEN, BLUE' + + or an iterator of names:: + + ['RED', 'GREEN', 'BLUE'] + + or an iterator of (name, value) pairs:: + + [('CYAN', 4), ('MAGENTA', 5), ('YELLOW', 6)] + + or a mapping:: + + {'CHARTREUSE': 7, 'SEA_GREEN': 11, 'ROSEMARY': 42} + +* *module*: name of module where new enum class can be found. + +* *qualname*: where in module new enum class can be found. + +* *type*: type to mix in to new enum class. + +* *start*: number to start counting at if only names are passed in. + +.. versionchanged:: 3.5 + The *start* parameter was added. + + +Derived Enumerations +-------------------- + +IntEnum +^^^^^^^ + +The first variation of :class:`Enum` that is provided is also a subclass of +:class:`int`. Members of an :class:`IntEnum` can be compared to integers; +by extension, integer enumerations of different types can also be compared +to each other:: + + >>> from enum import IntEnum + >>> class Shape(IntEnum): + ... CIRCLE = 1 + ... SQUARE = 2 + ... + >>> class Request(IntEnum): + ... POST = 1 + ... GET = 2 + ... + >>> Shape == 1 + False + >>> Shape.CIRCLE == 1 + True + >>> Shape.CIRCLE == Request.POST + True + +However, they still can't be compared to standard :class:`Enum` enumerations:: + + >>> class Shape(IntEnum): + ... CIRCLE = 1 + ... SQUARE = 2 + ... + >>> class Color(Enum): + ... RED = 1 + ... GREEN = 2 + ... + >>> Shape.CIRCLE == Color.RED + False + +:class:`IntEnum` values behave like integers in other ways you'd expect:: + + >>> int(Shape.CIRCLE) + 1 + >>> ['a', 'b', 'c'][Shape.CIRCLE] + 'b' + >>> [i for i in range(Shape.SQUARE)] + [0, 1] + + +StrEnum +^^^^^^^ + +The second variation of :class:`Enum` that is provided is also a subclass of +:class:`str`. Members of a :class:`StrEnum` can be compared to strings; +by extension, string enumerations of different types can also be compared +to each other. + +.. versionadded:: 3.11 + + +IntFlag +^^^^^^^ + +The next variation of :class:`Enum` provided, :class:`IntFlag`, is also based +on :class:`int`. The difference being :class:`IntFlag` members can be combined +using the bitwise operators (&, \|, ^, ~) and the result is still an +:class:`IntFlag` member, if possible. Like :class:`IntEnum`, :class:`IntFlag` +members are also integers and can be used wherever an :class:`int` is used. + +.. note:: + + Any operation on an :class:`IntFlag` member besides the bit-wise operations will + lose the :class:`IntFlag` membership. + + Bit-wise operations that result in invalid :class:`IntFlag` values will lose the + :class:`IntFlag` membership. See :class:`FlagBoundary` for + details. + +.. versionadded:: 3.6 +.. versionchanged:: 3.11 + +Sample :class:`IntFlag` class:: + + >>> from enum import IntFlag + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... + >>> Perm.R | Perm.W + + >>> Perm.R + Perm.W + 6 + >>> RW = Perm.R | Perm.W + >>> Perm.R in RW + True + +It is also possible to name the combinations:: + + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... RWX = 7 + ... + >>> Perm.RWX + + >>> ~Perm.RWX + + >>> Perm(7) + + +.. note:: + + Named combinations are considered aliases. Aliases do not show up during + iteration, but can be returned from by-value lookups. + +.. versionchanged:: 3.11 + +Another important difference between :class:`IntFlag` and :class:`Enum` is that +if no flags are set (the value is 0), its boolean evaluation is :data:`False`:: + + >>> Perm.R & Perm.X + + >>> bool(Perm.R & Perm.X) + False + +Because :class:`IntFlag` members are also subclasses of :class:`int` they can +be combined with them (but may lose :class:`IntFlag` membership:: + + >>> Perm.X | 4 + + + >>> Perm.X + 8 + 9 + +.. note:: + + The negation operator, ``~``, always returns an :class:`IntFlag` member with a + positive value:: + + >>> (~Perm.X).value == (Perm.R|Perm.W).value == 6 + True + +:class:`IntFlag` members can also be iterated over:: + + >>> list(RW) + [, ] + +.. versionadded:: 3.11 + + +Flag +^^^^ + +The last variation is :class:`Flag`. Like :class:`IntFlag`, :class:`Flag` +members can be combined using the bitwise operators (&, \|, ^, ~). Unlike +:class:`IntFlag`, they cannot be combined with, nor compared against, any +other :class:`Flag` enumeration, nor :class:`int`. While it is possible to +specify the values directly it is recommended to use :class:`auto` as the +value and let :class:`Flag` select an appropriate value. + +.. versionadded:: 3.6 + +Like :class:`IntFlag`, if a combination of :class:`Flag` members results in no +flags being set, the boolean evaluation is :data:`False`:: + + >>> from enum import Flag, auto + >>> class Color(Flag): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.RED & Color.GREEN + + >>> bool(Color.RED & Color.GREEN) + False + +Individual flags should have values that are powers of two (1, 2, 4, 8, ...), +while combinations of flags will not:: + + >>> class Color(Flag): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... WHITE = RED | BLUE | GREEN + ... + >>> Color.WHITE + + +Giving a name to the "no flags set" condition does not change its boolean +value:: + + >>> class Color(Flag): + ... BLACK = 0 + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.BLACK + + >>> bool(Color.BLACK) + False + +:class:`Flag` members can also be iterated over:: + + >>> purple = Color.RED | Color.BLUE + >>> list(purple) + [, ] + +.. versionadded:: 3.11 + +.. note:: + + For the majority of new code, :class:`Enum` and :class:`Flag` are strongly + recommended, since :class:`IntEnum` and :class:`IntFlag` break some + semantic promises of an enumeration (by being comparable to integers, and + thus by transitivity to other unrelated enumerations). :class:`IntEnum` + and :class:`IntFlag` should be used only in cases where :class:`Enum` and + :class:`Flag` will not do; for example, when integer constants are replaced + with enumerations, or for interoperability with other systems. + + +Others +^^^^^^ + +While :class:`IntEnum` is part of the :mod:`enum` module, it would be very +simple to implement independently:: + + class IntEnum(int, ReprEnum): # or Enum instead of ReprEnum + pass + +This demonstrates how similar derived enumerations can be defined; for example +a :class:`FloatEnum` that mixes in :class:`float` instead of :class:`int`. + +Some rules: + +1. When subclassing :class:`Enum`, mix-in types must appear before the + :class:`Enum` class itself in the sequence of bases, as in the :class:`IntEnum` + example above. +2. Mix-in types must be subclassable. For example, :class:`bool` and + :class:`range` are not subclassable and will throw an error during Enum + creation if used as the mix-in type. +3. While :class:`Enum` can have members of any type, once you mix in an + additional type, all the members must have values of that type, e.g. + :class:`int` above. This restriction does not apply to mix-ins which only + add methods and don't specify another type. +4. When another data type is mixed in, the :attr:`value` attribute is *not the + same* as the enum member itself, although it is equivalent and will compare + equal. +5. A ``data type`` is a mixin that defines :meth:`__new__`, or a + :class:`~dataclasses.dataclass` +6. %-style formatting: ``%s`` and ``%r`` call the :class:`Enum` class's + :meth:`__str__` and :meth:`__repr__` respectively; other codes (such as + ``%i`` or ``%h`` for IntEnum) treat the enum member as its mixed-in type. +7. :ref:`Formatted string literals `, :meth:`str.format`, + and :func:`format` will use the enum's :meth:`__str__` method. + +.. note:: + + Because :class:`IntEnum`, :class:`IntFlag`, and :class:`StrEnum` are + designed to be drop-in replacements for existing constants, their + :meth:`__str__` method has been reset to their data types' + :meth:`__str__` method. + +.. _new-vs-init: + +When to use :meth:`__new__` vs. :meth:`__init__` +------------------------------------------------ + +:meth:`__new__` must be used whenever you want to customize the actual value of +the :class:`Enum` member. Any other modifications may go in either +:meth:`__new__` or :meth:`__init__`, with :meth:`__init__` being preferred. + +For example, if you want to pass several items to the constructor, but only +want one of them to be the value:: + + >>> class Coordinate(bytes, Enum): + ... """ + ... Coordinate with binary codes that can be indexed by the int code. + ... """ + ... def __new__(cls, value, label, unit): + ... obj = bytes.__new__(cls, [value]) + ... obj._value_ = value + ... obj.label = label + ... obj.unit = unit + ... return obj + ... PX = (0, 'P.X', 'km') + ... PY = (1, 'P.Y', 'km') + ... VX = (2, 'V.X', 'km/s') + ... VY = (3, 'V.Y', 'km/s') + ... + + >>> print(Coordinate['PY']) + Coordinate.PY + + >>> print(Coordinate(3)) + Coordinate.VY + +.. warning:: + + *Do not* call ``super().__new__()``, as the lookup-only ``__new__`` is the one + that is found; instead, use the data type directly. + + +Finer Points +^^^^^^^^^^^^ + +Supported ``__dunder__`` names +"""""""""""""""""""""""""""""" + +:attr:`__members__` is a read-only ordered mapping of ``member_name``:``member`` +items. It is only available on the class. + +:meth:`__new__`, if specified, must create and return the enum members; it is +also a very good idea to set the member's :attr:`_value_` appropriately. Once +all the members are created it is no longer used. + + +Supported ``_sunder_`` names +"""""""""""""""""""""""""""" + +- :attr:`~Enum._name_` -- name of the member +- :attr:`~Enum._value_` -- value of the member; can be set in ``__new__`` +- :meth:`~Enum._missing_` -- a lookup function used when a value is not found; + may be overridden +- :attr:`~Enum._ignore_` -- a list of names, either as a :class:`list` or a + :class:`str`, that will not be transformed into members, and will be removed + from the final class +- :meth:`~Enum._generate_next_value_` -- used to get an appropriate value for + an enum member; may be overridden +- :meth:`~Enum._add_alias_` -- adds a new name as an alias to an existing + member. +- :meth:`~Enum._add_value_alias_` -- adds a new value as an alias to an + existing member. See `MultiValueEnum`_ for an example. + + .. note:: + + For standard :class:`Enum` classes the next value chosen is the highest + value seen incremented by one. + + For :class:`Flag` classes the next value chosen will be the next highest + power-of-two. + + .. versionchanged:: 3.13 + Prior versions would use the last seen value instead of the highest value. + +.. versionadded:: 3.6 ``_missing_``, ``_order_``, ``_generate_next_value_`` +.. versionadded:: 3.7 ``_ignore_`` +.. versionadded:: 3.13 ``_add_alias_``, ``_add_value_alias_`` + +To help keep Python 2 / Python 3 code in sync an :attr:`_order_` attribute can +be provided. It will be checked against the actual order of the enumeration +and raise an error if the two do not match:: + + >>> class Color(Enum): + ... _order_ = 'RED GREEN BLUE' + ... RED = 1 + ... BLUE = 3 + ... GREEN = 2 + ... + Traceback (most recent call last): + ... + TypeError: member order does not match _order_: + ['RED', 'BLUE', 'GREEN'] + ['RED', 'GREEN', 'BLUE'] + +.. note:: + + In Python 2 code the :attr:`_order_` attribute is necessary as definition + order is lost before it can be recorded. + + +_Private__names +""""""""""""""" + +:ref:`Private names ` are not converted to enum members, +but remain normal attributes. + +.. versionchanged:: 3.11 + + +``Enum`` member type +"""""""""""""""""""" + +Enum members are instances of their enum class, and are normally accessed as +``EnumClass.member``. In certain situations, such as writing custom enum +behavior, being able to access one member directly from another is useful, +and is supported; however, in order to avoid name clashes between member names +and attributes/methods from mixed-in classes, upper-case names are strongly +recommended. + +.. versionchanged:: 3.5 + + +Creating members that are mixed with other data types +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +When subclassing other data types, such as :class:`int` or :class:`str`, with +an :class:`Enum`, all values after the ``=`` are passed to that data type's +constructor. For example:: + + >>> class MyEnum(IntEnum): # help(int) -> int(x, base=10) -> integer + ... example = '11', 16 # so x='11' and base=16 + ... + >>> MyEnum.example.value # and hex(11) is... + 17 + + +Boolean value of ``Enum`` classes and members +""""""""""""""""""""""""""""""""""""""""""""" + +Enum classes that are mixed with non-:class:`Enum` types (such as +:class:`int`, :class:`str`, etc.) are evaluated according to the mixed-in +type's rules; otherwise, all members evaluate as :data:`True`. To make your +own enum's boolean evaluation depend on the member's value add the following to +your class:: + + def __bool__(self): + return bool(self.value) + +Plain :class:`Enum` classes always evaluate as :data:`True`. + + +``Enum`` classes with methods +""""""""""""""""""""""""""""" + +If you give your enum subclass extra methods, like the `Planet`_ +class below, those methods will show up in a :func:`dir` of the member, +but not of the class:: + + >>> dir(Planet) # doctest: +SKIP + ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', 'VENUS', '__class__', '__doc__', '__members__', '__module__'] + >>> dir(Planet.EARTH) # doctest: +SKIP + ['__class__', '__doc__', '__module__', 'mass', 'name', 'radius', 'surface_gravity', 'value'] + + +Combining members of ``Flag`` +""""""""""""""""""""""""""""" + +Iterating over a combination of :class:`Flag` members will only return the members that +are comprised of a single bit:: + + >>> class Color(Flag): + ... RED = auto() + ... GREEN = auto() + ... BLUE = auto() + ... MAGENTA = RED | BLUE + ... YELLOW = RED | GREEN + ... CYAN = GREEN | BLUE + ... + >>> Color(3) # named combination + + >>> Color(7) # not named combination + + + +``Flag`` and ``IntFlag`` minutia +"""""""""""""""""""""""""""""""" + +Using the following snippet for our examples:: + + >>> class Color(IntFlag): + ... BLACK = 0 + ... RED = 1 + ... GREEN = 2 + ... BLUE = 4 + ... PURPLE = RED | BLUE + ... WHITE = RED | GREEN | BLUE + ... + +the following are true: + +- single-bit flags are canonical +- multi-bit and zero-bit flags are aliases +- only canonical flags are returned during iteration:: + + >>> list(Color.WHITE) + [, , ] + +- negating a flag or flag set returns a new flag/flag set with the + corresponding positive integer value:: + + >>> Color.BLUE + + + >>> ~Color.BLUE + + +- names of pseudo-flags are constructed from their members' names:: + + >>> (Color.RED | Color.GREEN).name + 'RED|GREEN' + + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... + >>> (Perm.R & Perm.W).name is None # effectively Perm(0) + True + +- multi-bit flags, aka aliases, can be returned from operations:: + + >>> Color.RED | Color.BLUE + + + >>> Color(7) # or Color(-1) + + + >>> Color(0) + + +- membership / containment checking: zero-valued flags are always considered + to be contained:: + + >>> Color.BLACK in Color.WHITE + True + + otherwise, only if all bits of one flag are in the other flag will True + be returned:: + + >>> Color.PURPLE in Color.WHITE + True + + >>> Color.GREEN in Color.PURPLE + False + +There is a new boundary mechanism that controls how out-of-range / invalid +bits are handled: ``STRICT``, ``CONFORM``, ``EJECT``, and ``KEEP``: + +* STRICT --> raises an exception when presented with invalid values +* CONFORM --> discards any invalid bits +* EJECT --> lose Flag status and become a normal int with the given value +* KEEP --> keep the extra bits + + - keeps Flag status and extra bits + - extra bits do not show up in iteration + - extra bits do show up in repr() and str() + +The default for Flag is ``STRICT``, the default for ``IntFlag`` is ``EJECT``, +and the default for ``_convert_`` is ``KEEP`` (see ``ssl.Options`` for an +example of when ``KEEP`` is needed). + + +.. _enum-class-differences: + +How are Enums and Flags different? +---------------------------------- + +Enums have a custom metaclass that affects many aspects of both derived :class:`Enum` +classes and their instances (members). + + +Enum Classes +^^^^^^^^^^^^ + +The :class:`EnumType` metaclass is responsible for providing the +:meth:`__contains__`, :meth:`__dir__`, :meth:`__iter__` and other methods that +allow one to do things with an :class:`Enum` class that fail on a typical +class, such as ``list(Color)`` or ``some_enum_var in Color``. :class:`EnumType` is +responsible for ensuring that various other methods on the final :class:`Enum` +class are correct (such as :meth:`__new__`, :meth:`__getnewargs__`, +:meth:`__str__` and :meth:`__repr__`). + +Flag Classes +^^^^^^^^^^^^ + +Flags have an expanded view of aliasing: to be canonical, the value of a flag +needs to be a power-of-two value, and not a duplicate name. So, in addition to the +:class:`Enum` definition of alias, a flag with no value (a.k.a. ``0``) or with more than one +power-of-two value (e.g. ``3``) is considered an alias. + +Enum Members (aka instances) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The most interesting thing about enum members is that they are singletons. +:class:`EnumType` creates them all while it is creating the enum class itself, +and then puts a custom :meth:`__new__` in place to ensure that no new ones are +ever instantiated by returning only the existing member instances. + +Flag Members +^^^^^^^^^^^^ + +Flag members can be iterated over just like the :class:`Flag` class, and only the +canonical members will be returned. For example:: + + >>> list(Color) + [, , ] + +(Note that ``BLACK``, ``PURPLE``, and ``WHITE`` do not show up.) + +Inverting a flag member returns the corresponding positive value, +rather than a negative value --- for example:: + + >>> ~Color.RED + + +Flag members have a length corresponding to the number of power-of-two values +they contain. For example:: + + >>> len(Color.PURPLE) + 2 + + +.. _enum-cookbook: + +Enum Cookbook +------------- + + +While :class:`Enum`, :class:`IntEnum`, :class:`StrEnum`, :class:`Flag`, and +:class:`IntFlag` are expected to cover the majority of use-cases, they cannot +cover them all. Here are recipes for some different types of enumerations +that can be used directly, or as examples for creating one's own. + + +Omitting values +^^^^^^^^^^^^^^^ + +In many use-cases, one doesn't care what the actual value of an enumeration +is. There are several ways to define this type of simple enumeration: + +- use instances of :class:`auto` for the value +- use instances of :class:`object` as the value +- use a descriptive string as the value +- use a tuple as the value and a custom :meth:`__new__` to replace the + tuple with an :class:`int` value + +Using any of these methods signifies to the user that these values are not +important, and also enables one to add, remove, or reorder members without +having to renumber the remaining members. + + +Using :class:`auto` +""""""""""""""""""" + +Using :class:`auto` would look like:: + + >>> class Color(Enum): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.GREEN + + + +Using :class:`object` +""""""""""""""""""""" + +Using :class:`object` would look like:: + + >>> class Color(Enum): + ... RED = object() + ... GREEN = object() + ... BLUE = object() + ... + >>> Color.GREEN # doctest: +SKIP + > + +This is also a good example of why you might want to write your own +:meth:`__repr__`:: + + >>> class Color(Enum): + ... RED = object() + ... GREEN = object() + ... BLUE = object() + ... def __repr__(self): + ... return "<%s.%s>" % (self.__class__.__name__, self._name_) + ... + >>> Color.GREEN + + + + +Using a descriptive string +"""""""""""""""""""""""""" + +Using a string as the value would look like:: + + >>> class Color(Enum): + ... RED = 'stop' + ... GREEN = 'go' + ... BLUE = 'too fast!' + ... + >>> Color.GREEN + + + +Using a custom :meth:`__new__` +"""""""""""""""""""""""""""""" + +Using an auto-numbering :meth:`__new__` would look like:: + + >>> class AutoNumber(Enum): + ... def __new__(cls): + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + >>> class Color(AutoNumber): + ... RED = () + ... GREEN = () + ... BLUE = () + ... + >>> Color.GREEN + + +To make a more general purpose ``AutoNumber``, add ``*args`` to the signature:: + + >>> class AutoNumber(Enum): + ... def __new__(cls, *args): # this is the only change from above + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + +Then when you inherit from ``AutoNumber`` you can write your own ``__init__`` +to handle any extra arguments:: + + >>> class Swatch(AutoNumber): + ... def __init__(self, pantone='unknown'): + ... self.pantone = pantone + ... AUBURN = '3497' + ... SEA_GREEN = '1246' + ... BLEACHED_CORAL = () # New color, no Pantone code yet! + ... + >>> Swatch.SEA_GREEN + + >>> Swatch.SEA_GREEN.pantone + '1246' + >>> Swatch.BLEACHED_CORAL.pantone + 'unknown' + +.. note:: + + The :meth:`__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`__new__` which is used after + class creation for lookup of existing members. + +.. warning:: + + *Do not* call ``super().__new__()``, as the lookup-only ``__new__`` is the one + that is found; instead, use the data type directly -- e.g.:: + + obj = int.__new__(cls, value) + + +OrderedEnum +^^^^^^^^^^^ + +An ordered enumeration that is not based on :class:`IntEnum` and so maintains +the normal :class:`Enum` invariants (such as not being comparable to other +enumerations):: + + >>> class OrderedEnum(Enum): + ... def __ge__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value >= other.value + ... return NotImplemented + ... def __gt__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value > other.value + ... return NotImplemented + ... def __le__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value <= other.value + ... return NotImplemented + ... def __lt__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value < other.value + ... return NotImplemented + ... + >>> class Grade(OrderedEnum): + ... A = 5 + ... B = 4 + ... C = 3 + ... D = 2 + ... F = 1 + ... + >>> Grade.C < Grade.A + True + + +DuplicateFreeEnum +^^^^^^^^^^^^^^^^^ + +Raises an error if a duplicate member value is found instead of creating an +alias:: + + >>> class DuplicateFreeEnum(Enum): + ... def __init__(self, *args): + ... cls = self.__class__ + ... if any(self.value == e.value for e in cls): + ... a = self.name + ... e = cls(self.value).name + ... raise ValueError( + ... "aliases not allowed in DuplicateFreeEnum: %r --> %r" + ... % (a, e)) + ... + >>> class Color(DuplicateFreeEnum): + ... RED = 1 + ... GREEN = 2 + ... BLUE = 3 + ... GRENE = 2 + ... + Traceback (most recent call last): + ... + ValueError: aliases not allowed in DuplicateFreeEnum: 'GRENE' --> 'GREEN' + +.. note:: + + This is a useful example for subclassing Enum to add or change other + behaviors as well as disallowing aliases. If the only desired change is + disallowing aliases, the :func:`unique` decorator can be used instead. + + +MultiValueEnum +^^^^^^^^^^^^^^^^^ + +Supports having more than one value per member:: + + >>> class MultiValueEnum(Enum): + ... def __new__(cls, value, *values): + ... self = object.__new__(cls) + ... self._value_ = value + ... for v in values: + ... self._add_value_alias_(v) + ... return self + ... + >>> class DType(MultiValueEnum): + ... float32 = 'f', 8 + ... double64 = 'd', 9 + ... + >>> DType('f') + + >>> DType(9) + + + +Planet +^^^^^^ + +If :meth:`__new__` or :meth:`__init__` is defined, the value of the enum member +will be passed to those methods:: + + >>> class Planet(Enum): + ... MERCURY = (3.303e+23, 2.4397e6) + ... VENUS = (4.869e+24, 6.0518e6) + ... EARTH = (5.976e+24, 6.37814e6) + ... MARS = (6.421e+23, 3.3972e6) + ... JUPITER = (1.9e+27, 7.1492e7) + ... SATURN = (5.688e+26, 6.0268e7) + ... URANUS = (8.686e+25, 2.5559e7) + ... NEPTUNE = (1.024e+26, 2.4746e7) + ... def __init__(self, mass, radius): + ... self.mass = mass # in kilograms + ... self.radius = radius # in meters + ... @property + ... def surface_gravity(self): + ... # universal gravitational constant (m3 kg-1 s-2) + ... G = 6.67300E-11 + ... return G * self.mass / (self.radius * self.radius) + ... + >>> Planet.EARTH.value + (5.976e+24, 6378140.0) + >>> Planet.EARTH.surface_gravity + 9.802652743337129 + +.. _enum-time-period: + +TimePeriod +^^^^^^^^^^ + +An example to show the :attr:`_ignore_` attribute in use:: + + >>> from datetime import timedelta + >>> class Period(timedelta, Enum): + ... "different lengths of time" + ... _ignore_ = 'Period i' + ... Period = vars() + ... for i in range(367): + ... Period['day_%d' % i] = i + ... + >>> list(Period)[:2] + [, ] + >>> list(Period)[-2:] + [, ] + + +.. _enumtype-examples: + +Subclassing EnumType +-------------------- + +While most enum needs can be met by customizing :class:`Enum` subclasses, +either with class decorators or custom functions, :class:`EnumType` can be +subclassed to provide a different Enum experience. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst new file mode 100644 index 00000000..6abe93d7 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst @@ -0,0 +1,280 @@ +.. highlight:: c + +.. _freethreading-extensions-howto: + +****************************************** +C API Extension Support for Free Threading +****************************************** + +Starting with the 3.13 release, CPython has experimental support for running +with the :term:`global interpreter lock` (GIL) disabled in a configuration +called :term:`free threading`. This document describes how to adapt C API +extensions to support free threading. + + +Identifying the Free-Threaded Build in C +======================================== + +The CPython C API exposes the ``Py_GIL_DISABLED`` macro: in the free-threaded +build it's defined to ``1``, and in the regular build it's not defined. +You can use it to enable code that only runs under the free-threaded build:: + + #ifdef Py_GIL_DISABLED + /* code that only runs in the free-threaded build */ + #endif + +Module Initialization +===================== + +Extension modules need to explicitly indicate that they support running with +the GIL disabled; otherwise importing the extension will raise a warning and +enable the GIL at runtime. + +There are two ways to indicate that an extension module supports running with +the GIL disabled depending on whether the extension uses multi-phase or +single-phase initialization. + +Multi-Phase Initialization +.......................... + +Extensions that use multi-phase initialization (i.e., +:c:func:`PyModuleDef_Init`) should add a :c:data:`Py_mod_gil` slot in the +module definition. If your extension supports older versions of CPython, +you should guard the slot with a :c:data:`PY_VERSION_HEX` check. + +:: + + static struct PyModuleDef_Slot module_slots[] = { + ... + #if PY_VERSION_HEX >= 0x030D0000 + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + #endif + {0, NULL} + }; + + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + .m_slots = module_slots, + ... + }; + + +Single-Phase Initialization +........................... + +Extensions that use single-phase initialization (i.e., +:c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to +indicate that they support running with the GIL disabled. The function is +only defined in the free-threaded build, so you should guard the call with +``#ifdef Py_GIL_DISABLED`` to avoid compilation errors in the regular build. + +:: + + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + ... + }; + + PyMODINIT_FUNC + PyInit_mymodule(void) + { + PyObject *m = PyModule_Create(&moduledef); + if (m == NULL) { + return NULL; + } + #ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); + #endif + return m; + } + + +General API Guidelines +====================== + +Most of the C API is thread-safe, but there are some exceptions. + +* **Struct Fields**: Accessing fields in Python C API objects or structs + directly is not thread-safe if the field may be concurrently modified. +* **Macros**: Accessor macros like :c:macro:`PyList_GET_ITEM` and + :c:macro:`PyList_SET_ITEM` do not perform any error checking or locking. + These macros are not thread-safe if the container object may be modified + concurrently. +* **Borrowed References**: C API functions that return + :term:`borrowed references ` may not be thread-safe if + the containing object is modified concurrently. See the section on + :ref:`borrowed references ` for more information. + + +Container Thread Safety +....................... + +Containers like :c:struct:`PyListObject`, +:c:struct:`PyDictObject`, and :c:struct:`PySetObject` perform internal locking +in the free-threaded build. For example, the :c:func:`PyList_Append` will +lock the list before appending an item. + +.. _PyDict_Next: + +``PyDict_Next`` +''''''''''''''' + +A notable exception is :c:func:`PyDict_Next`, which does not lock the +dictionary. You should use :c:macro:`Py_BEGIN_CRITICAL_SECTION` to protect +the dictionary while iterating over it if the dictionary may be concurrently +modified:: + + Py_BEGIN_CRITICAL_SECTION(dict); + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(dict, &pos, &key, &value)) { + ... + } + Py_END_CRITICAL_SECTION(); + + +Borrowed References +=================== + +.. _borrowed-references: + +Some C API functions return :term:`borrowed references `. +These APIs are not thread-safe if the containing object is modified +concurrently. For example, it's not safe to use :c:func:`PyList_GetItem` +if the list may be modified concurrently. + +The following table lists some borrowed reference APIs and their replacements +that return :term:`strong references `. + ++-----------------------------------+-----------------------------------+ +| Borrowed reference API | Strong reference API | ++===================================+===================================+ +| :c:func:`PyList_GetItem` | :c:func:`PyList_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItem` | :c:func:`PyDict_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItemWithError` | :c:func:`PyDict_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItemString` | :c:func:`PyDict_GetItemStringRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_SetDefault` | :c:func:`PyDict_SetDefaultRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_Next` | none (see :ref:`PyDict_Next`) | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyWeakref_GetObject` | :c:func:`PyWeakref_GetRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyWeakref_GET_OBJECT` | :c:func:`PyWeakref_GetRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyImport_AddModule` | :c:func:`PyImport_AddModuleRef` | ++-----------------------------------+-----------------------------------+ + +Not all APIs that return borrowed references are problematic. For +example, :c:func:`PyTuple_GetItem` is safe because tuples are immutable. +Similarly, not all uses of the above APIs are problematic. For example, +:c:func:`PyDict_GetItem` is often used for parsing keyword argument +dictionaries in function calls; those keyword argument dictionaries are +effectively private (not accessible by other threads), so using borrowed +references in that context is safe. + +Some of these functions were added in Python 3.13. You can use the +`pythoncapi-compat `_ package +to provide implementations of these functions for older Python versions. + + +.. _free-threaded-memory-allocation: + +Memory Allocation APIs +====================== + +Python's memory management C API provides functions in three different +:ref:`allocation domains `: "raw", "mem", and "object". +For thread-safety, the free-threaded build requires that only Python objects +are allocated using the object domain, and that all Python object are +allocated using that domain. This differs from the prior Python versions, +where this was only a best practice and not a hard requirement. + +.. note:: + + Search for uses of :c:func:`PyObject_Malloc` in your + extension and check that the allocated memory is used for Python objects. + Use :c:func:`PyMem_Malloc` to allocate buffers instead of + :c:func:`PyObject_Malloc`. + + +Thread State and GIL APIs +========================= + +Python provides a set of functions and macros to manage thread state and the +GIL, such as: + +* :c:func:`PyGILState_Ensure` and :c:func:`PyGILState_Release` +* :c:func:`PyEval_SaveThread` and :c:func:`PyEval_RestoreThread` +* :c:macro:`Py_BEGIN_ALLOW_THREADS` and :c:macro:`Py_END_ALLOW_THREADS` + +These functions should still be used in the free-threaded build to manage +thread state even when the :term:`GIL` is disabled. For example, if you +create a thread outside of Python, you must call :c:func:`PyGILState_Ensure` +before calling into the Python API to ensure that the thread has a valid +Python thread state. + +You should continue to call :c:func:`PyEval_SaveThread` or +:c:macro:`Py_BEGIN_ALLOW_THREADS` around blocking operations, such as I/O or +lock acquisitions, to allow other threads to run the +:term:`cyclic garbage collector `. + + +Protecting Internal Extension State +=================================== + +Your extension may have internal state that was previously protected by the +GIL. You may need to add locking to protect this state. The approach will +depend on your extension, but some common patterns include: + +* **Caches**: global caches are a common source of shared state. Consider + using a lock to protect the cache or disabling it in the free-threaded build + if the cache is not critical for performance. +* **Global State**: global state may need to be protected by a lock or moved + to thread local storage. C11 and C++11 provide the ``thread_local`` or + ``_Thread_local`` for + `thread-local storage `_. + + +Building Extensions for the Free-Threaded Build +=============================================== + +C API extensions need to be built specifically for the free-threaded build. +The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. + +* `pypa/manylinux `_ supports the + free-threaded build, with the ``t`` suffix, such as ``python3.13t``. +* `pypa/cibuildwheel `_ supports the + free-threaded build if you set + `CIBW_FREE_THREADED_SUPPORT `_. + +Limited C API and Stable ABI +............................ + +The free-threaded build does not currently support the +:ref:`Limited C API ` or the stable ABI. If you use +`setuptools `_ to build +your extension and currently set ``py_limited_api=True`` you can use +``py_limited_api=not sysconfig.get_config_var("Py_GIL_DISABLED")`` to opt out +of the limited API when building with the free-threaded build. + +.. note:: + You will need to build separate wheels specifically for the free-threaded + build. If you currently use the stable ABI, you can continue to build a + single wheel for multiple non-free-threaded Python versions. + + +Windows +....... + +Due to a limitation of the official Windows installer, you will need to +manually define ``Py_GIL_DISABLED=1`` when building extensions from source. + +.. seealso:: + + `Porting Extension Modules to Support Free-Threading + `_: + A community-maintained porting guide for extension authors. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst new file mode 100644 index 00000000..1f0608fb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst @@ -0,0 +1,1272 @@ +.. _functional-howto: + +******************************** + Functional Programming HOWTO +******************************** + +:Author: A. M. Kuchling +:Release: 0.32 + +In this document, we'll take a tour of Python's features suitable for +implementing programs in a functional style. After an introduction to the +concepts of functional programming, we'll look at language features such as +:term:`iterator`\s and :term:`generator`\s and relevant library modules such as +:mod:`itertools` and :mod:`functools`. + + +Introduction +============ + +This section explains the basic concept of functional programming; if +you're just interested in learning about Python language features, +skip to the next section on :ref:`functional-howto-iterators`. + +Programming languages support decomposing problems in several different ways: + +* Most programming languages are **procedural**: programs are lists of + instructions that tell the computer what to do with the program's input. C, + Pascal, and even Unix shells are procedural languages. + +* In **declarative** languages, you write a specification that describes the + problem to be solved, and the language implementation figures out how to + perform the computation efficiently. SQL is the declarative language you're + most likely to be familiar with; a SQL query describes the data set you want + to retrieve, and the SQL engine decides whether to scan tables or use indexes, + which subclauses should be performed first, etc. + +* **Object-oriented** programs manipulate collections of objects. Objects have + internal state and support methods that query or modify this internal state in + some way. Smalltalk and Java are object-oriented languages. C++ and Python + are languages that support object-oriented programming, but don't force the + use of object-oriented features. + +* **Functional** programming decomposes a problem into a set of functions. + Ideally, functions only take inputs and produce outputs, and don't have any + internal state that affects the output produced for a given input. Well-known + functional languages include the ML family (Standard ML, OCaml, and other + variants) and Haskell. + +The designers of some computer languages choose to emphasize one +particular approach to programming. This often makes it difficult to +write programs that use a different approach. Other languages are +multi-paradigm languages that support several different approaches. +Lisp, C++, and Python are multi-paradigm; you can write programs or +libraries that are largely procedural, object-oriented, or functional +in all of these languages. In a large program, different sections +might be written using different approaches; the GUI might be +object-oriented while the processing logic is procedural or +functional, for example. + +In a functional program, input flows through a set of functions. Each function +operates on its input and produces some output. Functional style discourages +functions with side effects that modify internal state or make other changes +that aren't visible in the function's return value. Functions that have no side +effects at all are called **purely functional**. Avoiding side effects means +not using data structures that get updated as a program runs; every function's +output must only depend on its input. + +Some languages are very strict about purity and don't even have assignment +statements such as ``a=3`` or ``c = a + b``, but it's difficult to avoid all +side effects, such as printing to the screen or writing to a disk file. Another +example is a call to the :func:`print` or :func:`time.sleep` function, neither +of which returns a useful value. Both are called only for their side effects +of sending some text to the screen or pausing execution for a second. + +Python programs written in functional style usually won't go to the extreme of +avoiding all I/O or all assignments; instead, they'll provide a +functional-appearing interface but will use non-functional features internally. +For example, the implementation of a function will still use assignments to +local variables, but won't modify global variables or have other side effects. + +Functional programming can be considered the opposite of object-oriented +programming. Objects are little capsules containing some internal state along +with a collection of method calls that let you modify this state, and programs +consist of making the right set of state changes. Functional programming wants +to avoid state changes as much as possible and works with data flowing between +functions. In Python you might combine the two approaches by writing functions +that take and return instances representing objects in your application (e-mail +messages, transactions, etc.). + +Functional design may seem like an odd constraint to work under. Why should you +avoid objects and side effects? There are theoretical and practical advantages +to the functional style: + +* Formal provability. +* Modularity. +* Composability. +* Ease of debugging and testing. + + +Formal provability +------------------ + +A theoretical benefit is that it's easier to construct a mathematical proof that +a functional program is correct. + +For a long time researchers have been interested in finding ways to +mathematically prove programs correct. This is different from testing a program +on numerous inputs and concluding that its output is usually correct, or reading +a program's source code and concluding that the code looks right; the goal is +instead a rigorous proof that a program produces the right result for all +possible inputs. + +The technique used to prove programs correct is to write down **invariants**, +properties of the input data and of the program's variables that are always +true. For each line of code, you then show that if invariants X and Y are true +**before** the line is executed, the slightly different invariants X' and Y' are +true **after** the line is executed. This continues until you reach the end of +the program, at which point the invariants should match the desired conditions +on the program's output. + +Functional programming's avoidance of assignments arose because assignments are +difficult to handle with this technique; assignments can break invariants that +were true before the assignment without producing any new invariants that can be +propagated onward. + +Unfortunately, proving programs correct is largely impractical and not relevant +to Python software. Even trivial programs require proofs that are several pages +long; the proof of correctness for a moderately complicated program would be +enormous, and few or none of the programs you use daily (the Python interpreter, +your XML parser, your web browser) could be proven correct. Even if you wrote +down or generated a proof, there would then be the question of verifying the +proof; maybe there's an error in it, and you wrongly believe you've proved the +program correct. + + +Modularity +---------- + +A more practical benefit of functional programming is that it forces you to +break apart your problem into small pieces. Programs are more modular as a +result. It's easier to specify and write a small function that does one thing +than a large function that performs a complicated transformation. Small +functions are also easier to read and to check for errors. + + +Ease of debugging and testing +----------------------------- + +Testing and debugging a functional-style program is easier. + +Debugging is simplified because functions are generally small and clearly +specified. When a program doesn't work, each function is an interface point +where you can check that the data are correct. You can look at the intermediate +inputs and outputs to quickly isolate the function that's responsible for a bug. + +Testing is easier because each function is a potential subject for a unit test. +Functions don't depend on system state that needs to be replicated before +running a test; instead you only have to synthesize the right input and then +check that the output matches expectations. + + +Composability +------------- + +As you work on a functional-style program, you'll write a number of functions +with varying inputs and outputs. Some of these functions will be unavoidably +specialized to a particular application, but others will be useful in a wide +variety of programs. For example, a function that takes a directory path and +returns all the XML files in the directory, or a function that takes a filename +and returns its contents, can be applied to many different situations. + +Over time you'll form a personal library of utilities. Often you'll assemble +new programs by arranging existing functions in a new configuration and writing +a few functions specialized for the current task. + + +.. _functional-howto-iterators: + +Iterators +========= + +I'll start by looking at a Python language feature that's an important +foundation for writing functional-style programs: iterators. + +An iterator is an object representing a stream of data; this object returns the +data one element at a time. A Python iterator must support a method called +:meth:`~iterator.__next__` that takes no arguments and always returns the next +element of the stream. If there are no more elements in the stream, +:meth:`~iterator.__next__` must raise the :exc:`StopIteration` exception. +Iterators don't have to be finite, though; it's perfectly reasonable to write +an iterator that produces an infinite stream of data. + +The built-in :func:`iter` function takes an arbitrary object and tries to return +an iterator that will return the object's contents or elements, raising +:exc:`TypeError` if the object doesn't support iteration. Several of Python's +built-in data types support iteration, the most common being lists and +dictionaries. An object is called :term:`iterable` if you can get an iterator +for it. + +You can experiment with the iteration interface manually: + + >>> L = [1, 2, 3] + >>> it = iter(L) + >>> it #doctest: +ELLIPSIS + <...iterator object at ...> + >>> it.__next__() # same as next(it) + 1 + >>> next(it) + 2 + >>> next(it) + 3 + >>> next(it) + Traceback (most recent call last): + File "", line 1, in + StopIteration + >>> + +Python expects iterable objects in several different contexts, the most +important being the :keyword:`for` statement. In the statement ``for X in Y``, +Y must be an iterator or some object for which :func:`iter` can create an +iterator. These two statements are equivalent:: + + + for i in iter(obj): + print(i) + + for i in obj: + print(i) + +Iterators can be materialized as lists or tuples by using the :func:`list` or +:func:`tuple` constructor functions: + + >>> L = [1, 2, 3] + >>> iterator = iter(L) + >>> t = tuple(iterator) + >>> t + (1, 2, 3) + +Sequence unpacking also supports iterators: if you know an iterator will return +N elements, you can unpack them into an N-tuple: + + >>> L = [1, 2, 3] + >>> iterator = iter(L) + >>> a, b, c = iterator + >>> a, b, c + (1, 2, 3) + +Built-in functions such as :func:`max` and :func:`min` can take a single +iterator argument and will return the largest or smallest element. The ``"in"`` +and ``"not in"`` operators also support iterators: ``X in iterator`` is true if +X is found in the stream returned by the iterator. You'll run into obvious +problems if the iterator is infinite; :func:`max`, :func:`min` +will never return, and if the element X never appears in the stream, the +``"in"`` and ``"not in"`` operators won't return either. + +Note that you can only go forward in an iterator; there's no way to get the +previous element, reset the iterator, or make a copy of it. Iterator objects +can optionally provide these additional capabilities, but the iterator protocol +only specifies the :meth:`~iterator.__next__` method. Functions may therefore +consume all of the iterator's output, and if you need to do something different +with the same stream, you'll have to create a new iterator. + + + +Data Types That Support Iterators +--------------------------------- + +We've already seen how lists and tuples support iterators. In fact, any Python +sequence type, such as strings, will automatically support creation of an +iterator. + +Calling :func:`iter` on a dictionary returns an iterator that will loop over the +dictionary's keys:: + + >>> m = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, + ... 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12} + >>> for key in m: + ... print(key, m[key]) + Jan 1 + Feb 2 + Mar 3 + Apr 4 + May 5 + Jun 6 + Jul 7 + Aug 8 + Sep 9 + Oct 10 + Nov 11 + Dec 12 + +Note that starting with Python 3.7, dictionary iteration order is guaranteed +to be the same as the insertion order. In earlier versions, the behaviour was +unspecified and could vary between implementations. + +Applying :func:`iter` to a dictionary always loops over the keys, but +dictionaries have methods that return other iterators. If you want to iterate +over values or key/value pairs, you can explicitly call the +:meth:`~dict.values` or :meth:`~dict.items` methods to get an appropriate +iterator. + +The :func:`dict` constructor can accept an iterator that returns a finite stream +of ``(key, value)`` tuples: + + >>> L = [('Italy', 'Rome'), ('France', 'Paris'), ('US', 'Washington DC')] + >>> dict(iter(L)) + {'Italy': 'Rome', 'France': 'Paris', 'US': 'Washington DC'} + +Files also support iteration by calling the :meth:`~io.TextIOBase.readline` +method until there are no more lines in the file. This means you can read each +line of a file like this:: + + for line in file: + # do something for each line + ... + +Sets can take their contents from an iterable and let you iterate over the set's +elements:: + + >>> S = {2, 3, 5, 7, 11, 13} + >>> for i in S: + ... print(i) + 2 + 3 + 5 + 7 + 11 + 13 + + + +Generator expressions and list comprehensions +============================================= + +Two common operations on an iterator's output are 1) performing some operation +for every element, 2) selecting a subset of elements that meet some condition. +For example, given a list of strings, you might want to strip off trailing +whitespace from each line or extract all the strings containing a given +substring. + +List comprehensions and generator expressions (short form: "listcomps" and +"genexps") are a concise notation for such operations, borrowed from the +functional programming language Haskell (https://www.haskell.org/). You can strip +all the whitespace from a stream of strings with the following code:: + + >>> line_list = [' line 1\n', 'line 2 \n', ' \n', ''] + + >>> # Generator expression -- returns iterator + >>> stripped_iter = (line.strip() for line in line_list) + + >>> # List comprehension -- returns list + >>> stripped_list = [line.strip() for line in line_list] + +You can select only certain elements by adding an ``"if"`` condition:: + + >>> stripped_list = [line.strip() for line in line_list + ... if line != ""] + +With a list comprehension, you get back a Python list; ``stripped_list`` is a +list containing the resulting lines, not an iterator. Generator expressions +return an iterator that computes the values as necessary, not needing to +materialize all the values at once. This means that list comprehensions aren't +useful if you're working with iterators that return an infinite stream or a very +large amount of data. Generator expressions are preferable in these situations. + +Generator expressions are surrounded by parentheses ("()") and list +comprehensions are surrounded by square brackets ("[]"). Generator expressions +have the form:: + + ( expression for expr in sequence1 + if condition1 + for expr2 in sequence2 + if condition2 + for expr3 in sequence3 + ... + if condition3 + for exprN in sequenceN + if conditionN ) + +Again, for a list comprehension only the outside brackets are different (square +brackets instead of parentheses). + +The elements of the generated output will be the successive values of +``expression``. The ``if`` clauses are all optional; if present, ``expression`` +is only evaluated and added to the result when ``condition`` is true. + +Generator expressions always have to be written inside parentheses, but the +parentheses signalling a function call also count. If you want to create an +iterator that will be immediately passed to a function you can write:: + + obj_total = sum(obj.count for obj in list_all_objects()) + +The ``for...in`` clauses contain the sequences to be iterated over. The +sequences do not have to be the same length, because they are iterated over from +left to right, **not** in parallel. For each element in ``sequence1``, +``sequence2`` is looped over from the beginning. ``sequence3`` is then looped +over for each resulting pair of elements from ``sequence1`` and ``sequence2``. + +To put it another way, a list comprehension or generator expression is +equivalent to the following Python code:: + + for expr1 in sequence1: + if not (condition1): + continue # Skip this element + for expr2 in sequence2: + if not (condition2): + continue # Skip this element + ... + for exprN in sequenceN: + if not (conditionN): + continue # Skip this element + + # Output the value of + # the expression. + +This means that when there are multiple ``for...in`` clauses but no ``if`` +clauses, the length of the resulting output will be equal to the product of the +lengths of all the sequences. If you have two lists of length 3, the output +list is 9 elements long: + + >>> seq1 = 'abc' + >>> seq2 = (1, 2, 3) + >>> [(x, y) for x in seq1 for y in seq2] #doctest: +NORMALIZE_WHITESPACE + [('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3), + ('c', 1), ('c', 2), ('c', 3)] + +To avoid introducing an ambiguity into Python's grammar, if ``expression`` is +creating a tuple, it must be surrounded with parentheses. The first list +comprehension below is a syntax error, while the second one is correct:: + + # Syntax error + [x, y for x in seq1 for y in seq2] + # Correct + [(x, y) for x in seq1 for y in seq2] + + +Generators +========== + +Generators are a special class of functions that simplify the task of writing +iterators. Regular functions compute a value and return it, but generators +return an iterator that returns a stream of values. + +You're doubtless familiar with how regular function calls work in Python or C. +When you call a function, it gets a private namespace where its local variables +are created. When the function reaches a ``return`` statement, the local +variables are destroyed and the value is returned to the caller. A later call +to the same function creates a new private namespace and a fresh set of local +variables. But, what if the local variables weren't thrown away on exiting a +function? What if you could later resume the function where it left off? This +is what generators provide; they can be thought of as resumable functions. + +Here's the simplest example of a generator function: + + >>> def generate_ints(N): + ... for i in range(N): + ... yield i + +Any function containing a :keyword:`yield` keyword is a generator function; +this is detected by Python's :term:`bytecode` compiler which compiles the +function specially as a result. + +When you call a generator function, it doesn't return a single value; instead it +returns a generator object that supports the iterator protocol. On executing +the ``yield`` expression, the generator outputs the value of ``i``, similar to a +``return`` statement. The big difference between ``yield`` and a ``return`` +statement is that on reaching a ``yield`` the generator's state of execution is +suspended and local variables are preserved. On the next call to the +generator's :meth:`~generator.__next__` method, the function will resume +executing. + +Here's a sample usage of the ``generate_ints()`` generator: + + >>> gen = generate_ints(3) + >>> gen #doctest: +ELLIPSIS + + >>> next(gen) + 0 + >>> next(gen) + 1 + >>> next(gen) + 2 + >>> next(gen) + Traceback (most recent call last): + File "stdin", line 1, in + File "stdin", line 2, in generate_ints + StopIteration + +You could equally write ``for i in generate_ints(5)``, or ``a, b, c = +generate_ints(3)``. + +Inside a generator function, ``return value`` causes ``StopIteration(value)`` +to be raised from the :meth:`~generator.__next__` method. Once this happens, or +the bottom of the function is reached, the procession of values ends and the +generator cannot yield any further values. + +You could achieve the effect of generators manually by writing your own class +and storing all the local variables of the generator as instance variables. For +example, returning a list of integers could be done by setting ``self.count`` to +0, and having the :meth:`~iterator.__next__` method increment ``self.count`` and +return it. +However, for a moderately complicated generator, writing a corresponding class +can be much messier. + +The test suite included with Python's library, +:source:`Lib/test/test_generators.py`, contains +a number of more interesting examples. Here's one generator that implements an +in-order traversal of a tree using generators recursively. :: + + # A recursive generator that generates Tree leaves in in-order. + def inorder(t): + if t: + for x in inorder(t.left): + yield x + + yield t.label + + for x in inorder(t.right): + yield x + +Two other examples in ``test_generators.py`` produce solutions for the N-Queens +problem (placing N queens on an NxN chess board so that no queen threatens +another) and the Knight's Tour (finding a route that takes a knight to every +square of an NxN chessboard without visiting any square twice). + + + +Passing values into a generator +------------------------------- + +In Python 2.4 and earlier, generators only produced output. Once a generator's +code was invoked to create an iterator, there was no way to pass any new +information into the function when its execution is resumed. You could hack +together this ability by making the generator look at a global variable or by +passing in some mutable object that callers then modify, but these approaches +are messy. + +In Python 2.5 there's a simple way to pass values into a generator. +:keyword:`yield` became an expression, returning a value that can be assigned to +a variable or otherwise operated on:: + + val = (yield i) + +I recommend that you **always** put parentheses around a ``yield`` expression +when you're doing something with the returned value, as in the above example. +The parentheses aren't always necessary, but it's easier to always add them +instead of having to remember when they're needed. + +(:pep:`342` explains the exact rules, which are that a ``yield``-expression must +always be parenthesized except when it occurs at the top-level expression on the +right-hand side of an assignment. This means you can write ``val = yield i`` +but have to use parentheses when there's an operation, as in ``val = (yield i) ++ 12``.) + +Values are sent into a generator by calling its :meth:`send(value) +` method. This method resumes the generator's code and the +``yield`` expression returns the specified value. If the regular +:meth:`~generator.__next__` method is called, the ``yield`` returns ``None``. + +Here's a simple counter that increments by 1 and allows changing the value of +the internal counter. + +.. testcode:: + + def counter(maximum): + i = 0 + while i < maximum: + val = (yield i) + # If value provided, change counter + if val is not None: + i = val + else: + i += 1 + +And here's an example of changing the counter: + + >>> it = counter(10) #doctest: +SKIP + >>> next(it) #doctest: +SKIP + 0 + >>> next(it) #doctest: +SKIP + 1 + >>> it.send(8) #doctest: +SKIP + 8 + >>> next(it) #doctest: +SKIP + 9 + >>> next(it) #doctest: +SKIP + Traceback (most recent call last): + File "t.py", line 15, in + it.next() + StopIteration + +Because ``yield`` will often be returning ``None``, you should always check for +this case. Don't just use its value in expressions unless you're sure that the +:meth:`~generator.send` method will be the only method used to resume your +generator function. + +In addition to :meth:`~generator.send`, there are two other methods on +generators: + +* :meth:`throw(value) ` is used to + raise an exception inside the generator; the exception is raised by the + ``yield`` expression where the generator's execution is paused. + +* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the + generator to terminate the iteration. On receiving this exception, the + generator's code must either raise :exc:`GeneratorExit` or + :exc:`StopIteration`; catching the exception and doing anything else is + illegal and will trigger a :exc:`RuntimeError`. :meth:`~generator.close` + will also be called by Python's garbage collector when the generator is + garbage-collected. + + If you need to run cleanup code when a :exc:`GeneratorExit` occurs, I suggest + using a ``try: ... finally:`` suite instead of catching :exc:`GeneratorExit`. + +The cumulative effect of these changes is to turn generators from one-way +producers of information into both producers and consumers. + +Generators also become **coroutines**, a more generalized form of subroutines. +Subroutines are entered at one point and exited at another point (the top of the +function, and a ``return`` statement), but coroutines can be entered, exited, +and resumed at many different points (the ``yield`` statements). + + +Built-in functions +================== + +Let's look in more detail at built-in functions often used with iterators. + +Two of Python's built-in functions, :func:`map` and :func:`filter` duplicate the +features of generator expressions: + +:func:`map(f, iterA, iterB, ...) ` returns an iterator over the sequence + ``f(iterA[0], iterB[0]), f(iterA[1], iterB[1]), f(iterA[2], iterB[2]), ...``. + + >>> def upper(s): + ... return s.upper() + + >>> list(map(upper, ['sentence', 'fragment'])) + ['SENTENCE', 'FRAGMENT'] + >>> [upper(s) for s in ['sentence', 'fragment']] + ['SENTENCE', 'FRAGMENT'] + +You can of course achieve the same effect with a list comprehension. + +:func:`filter(predicate, iter) ` returns an iterator over all the +sequence elements that meet a certain condition, and is similarly duplicated by +list comprehensions. A **predicate** is a function that returns the truth +value of some condition; for use with :func:`filter`, the predicate must take a +single value. + + >>> def is_even(x): + ... return (x % 2) == 0 + + >>> list(filter(is_even, range(10))) + [0, 2, 4, 6, 8] + + +This can also be written as a list comprehension: + + >>> list(x for x in range(10) if is_even(x)) + [0, 2, 4, 6, 8] + + +:func:`enumerate(iter, start=0) ` counts off the elements in the +iterable returning 2-tuples containing the count (from *start*) and +each element. :: + + >>> for item in enumerate(['subject', 'verb', 'object']): + ... print(item) + (0, 'subject') + (1, 'verb') + (2, 'object') + +:func:`enumerate` is often used when looping through a list and recording the +indexes at which certain conditions are met:: + + f = open('data.txt', 'r') + for i, line in enumerate(f): + if line.strip() == '': + print('Blank line at line #%i' % i) + +:func:`sorted(iterable, key=None, reverse=False) ` collects all the +elements of the iterable into a list, sorts the list, and returns the sorted +result. The *key* and *reverse* arguments are passed through to the +constructed list's :meth:`~list.sort` method. :: + + >>> import random + >>> # Generate 8 random numbers between [0, 10000) + >>> rand_list = random.sample(range(10000), 8) + >>> rand_list #doctest: +SKIP + [769, 7953, 9828, 6431, 8442, 9878, 6213, 2207] + >>> sorted(rand_list) #doctest: +SKIP + [769, 2207, 6213, 6431, 7953, 8442, 9828, 9878] + >>> sorted(rand_list, reverse=True) #doctest: +SKIP + [9878, 9828, 8442, 7953, 6431, 6213, 2207, 769] + +(For a more detailed discussion of sorting, see the :ref:`sortinghowto`.) + + +The :func:`any(iter) ` and :func:`all(iter) ` built-ins look at the +truth values of an iterable's contents. :func:`any` returns ``True`` if any element +in the iterable is a true value, and :func:`all` returns ``True`` if all of the +elements are true values: + + >>> any([0, 1, 0]) + True + >>> any([0, 0, 0]) + False + >>> any([1, 1, 1]) + True + >>> all([0, 1, 0]) + False + >>> all([0, 0, 0]) + False + >>> all([1, 1, 1]) + True + + +:func:`zip(iterA, iterB, ...) ` takes one element from each iterable and +returns them in a tuple:: + + zip(['a', 'b', 'c'], (1, 2, 3)) => + ('a', 1), ('b', 2), ('c', 3) + +It doesn't construct an in-memory list and exhaust all the input iterators +before returning; instead tuples are constructed and returned only if they're +requested. (The technical term for this behaviour is `lazy evaluation +`__.) + +This iterator is intended to be used with iterables that are all of the same +length. If the iterables are of different lengths, the resulting stream will be +the same length as the shortest iterable. :: + + zip(['a', 'b'], (1, 2, 3)) => + ('a', 1), ('b', 2) + +You should avoid doing this, though, because an element may be taken from the +longer iterators and discarded. This means you can't go on to use the iterators +further because you risk skipping a discarded element. + + +The itertools module +==================== + +The :mod:`itertools` module contains a number of commonly used iterators as well +as functions for combining several iterators. This section will introduce the +module's contents by showing small examples. + +The module's functions fall into a few broad classes: + +* Functions that create a new iterator based on an existing iterator. +* Functions for treating an iterator's elements as function arguments. +* Functions for selecting portions of an iterator's output. +* A function for grouping an iterator's output. + +Creating new iterators +---------------------- + +:func:`itertools.count(start, step) ` returns an infinite +stream of evenly spaced values. You can optionally supply the starting number, +which defaults to 0, and the interval between numbers, which defaults to 1:: + + itertools.count() => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + itertools.count(10) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + itertools.count(10, 5) => + 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, ... + +:func:`itertools.cycle(iter) ` saves a copy of the contents of +a provided iterable and returns a new iterator that returns its elements from +first to last. The new iterator will repeat these elements infinitely. :: + + itertools.cycle([1, 2, 3, 4, 5]) => + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, ... + +:func:`itertools.repeat(elem, [n]) ` returns the provided +element *n* times, or returns the element endlessly if *n* is not provided. :: + + itertools.repeat('abc') => + abc, abc, abc, abc, abc, abc, abc, abc, abc, abc, ... + itertools.repeat('abc', 5) => + abc, abc, abc, abc, abc + +:func:`itertools.chain(iterA, iterB, ...) ` takes an arbitrary +number of iterables as input, and returns all the elements of the first +iterator, then all the elements of the second, and so on, until all of the +iterables have been exhausted. :: + + itertools.chain(['a', 'b', 'c'], (1, 2, 3)) => + a, b, c, 1, 2, 3 + +:func:`itertools.islice(iter, [start], stop, [step]) ` returns +a stream that's a slice of the iterator. With a single *stop* argument, it +will return the first *stop* elements. If you supply a starting index, you'll +get *stop-start* elements, and if you supply a value for *step*, elements +will be skipped accordingly. Unlike Python's string and list slicing, you can't +use negative values for *start*, *stop*, or *step*. :: + + itertools.islice(range(10), 8) => + 0, 1, 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8) => + 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8, 2) => + 2, 4, 6 + +:func:`itertools.tee(iter, [n]) ` replicates an iterator; it +returns *n* independent iterators that will all return the contents of the +source iterator. +If you don't supply a value for *n*, the default is 2. Replicating iterators +requires saving some of the contents of the source iterator, so this can consume +significant memory if the iterator is large and one of the new iterators is +consumed more than the others. :: + + itertools.tee( itertools.count() ) => + iterA, iterB + + where iterA -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + and iterB -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + +Calling functions on elements +----------------------------- + +The :mod:`operator` module contains a set of functions corresponding to Python's +operators. Some examples are :func:`operator.add(a, b) ` (adds +two values), :func:`operator.ne(a, b) ` (same as ``a != b``), and +:func:`operator.attrgetter('id') ` +(returns a callable that fetches the ``.id`` attribute). + +:func:`itertools.starmap(func, iter) ` assumes that the +iterable will return a stream of tuples, and calls *func* using these tuples as +the arguments:: + + itertools.starmap(os.path.join, + [('/bin', 'python'), ('/usr', 'bin', 'java'), + ('/usr', 'bin', 'perl'), ('/usr', 'bin', 'ruby')]) + => + /bin/python, /usr/bin/java, /usr/bin/perl, /usr/bin/ruby + + +Selecting elements +------------------ + +Another group of functions chooses a subset of an iterator's elements based on a +predicate. + +:func:`itertools.filterfalse(predicate, iter) ` is the +opposite of :func:`filter`, returning all elements for which the predicate +returns false:: + + itertools.filterfalse(is_even, itertools.count()) => + 1, 3, 5, 7, 9, 11, 13, 15, ... + +:func:`itertools.takewhile(predicate, iter) ` returns +elements for as long as the predicate returns true. Once the predicate returns +false, the iterator will signal the end of its results. :: + + def less_than_10(x): + return x < 10 + + itertools.takewhile(less_than_10, itertools.count()) => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + + itertools.takewhile(is_even, itertools.count()) => + 0 + +:func:`itertools.dropwhile(predicate, iter) ` discards +elements while the predicate returns true, and then returns the rest of the +iterable's results. :: + + itertools.dropwhile(less_than_10, itertools.count()) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + + itertools.dropwhile(is_even, itertools.count()) => + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... + +:func:`itertools.compress(data, selectors) ` takes two +iterators and returns only those elements of *data* for which the corresponding +element of *selectors* is true, stopping whenever either one is exhausted:: + + itertools.compress([1, 2, 3, 4, 5], [True, True, False, False, True]) => + 1, 2, 5 + + +Combinatoric functions +---------------------- + +The :func:`itertools.combinations(iterable, r) ` +returns an iterator giving all possible *r*-tuple combinations of the +elements contained in *iterable*. :: + + itertools.combinations([1, 2, 3, 4, 5], 2) => + (1, 2), (1, 3), (1, 4), (1, 5), + (2, 3), (2, 4), (2, 5), + (3, 4), (3, 5), + (4, 5) + + itertools.combinations([1, 2, 3, 4, 5], 3) => + (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 3, 4), (1, 3, 5), (1, 4, 5), + (2, 3, 4), (2, 3, 5), (2, 4, 5), + (3, 4, 5) + +The elements within each tuple remain in the same order as +*iterable* returned them. For example, the number 1 is always before +2, 3, 4, or 5 in the examples above. A similar function, +:func:`itertools.permutations(iterable, r=None) `, +removes this constraint on the order, returning all possible +arrangements of length *r*:: + + itertools.permutations([1, 2, 3, 4, 5], 2) => + (1, 2), (1, 3), (1, 4), (1, 5), + (2, 1), (2, 3), (2, 4), (2, 5), + (3, 1), (3, 2), (3, 4), (3, 5), + (4, 1), (4, 2), (4, 3), (4, 5), + (5, 1), (5, 2), (5, 3), (5, 4) + + itertools.permutations([1, 2, 3, 4, 5]) => + (1, 2, 3, 4, 5), (1, 2, 3, 5, 4), (1, 2, 4, 3, 5), + ... + (5, 4, 3, 2, 1) + +If you don't supply a value for *r* the length of the iterable is used, +meaning that all the elements are permuted. + +Note that these functions produce all of the possible combinations by +position and don't require that the contents of *iterable* are unique:: + + itertools.permutations('aba', 3) => + ('a', 'b', 'a'), ('a', 'a', 'b'), ('b', 'a', 'a'), + ('b', 'a', 'a'), ('a', 'a', 'b'), ('a', 'b', 'a') + +The identical tuple ``('a', 'a', 'b')`` occurs twice, but the two 'a' +strings came from different positions. + +The :func:`itertools.combinations_with_replacement(iterable, r) ` +function relaxes a different constraint: elements can be repeated +within a single tuple. Conceptually an element is selected for the +first position of each tuple and then is replaced before the second +element is selected. :: + + itertools.combinations_with_replacement([1, 2, 3, 4, 5], 2) => + (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (2, 2), (2, 3), (2, 4), (2, 5), + (3, 3), (3, 4), (3, 5), + (4, 4), (4, 5), + (5, 5) + + +Grouping elements +----------------- + +The last function I'll discuss, :func:`itertools.groupby(iter, key_func=None) +`, is the most complicated. ``key_func(elem)`` is a function +that can compute a key value for each element returned by the iterable. If you +don't supply a key function, the key is simply each element itself. + +:func:`~itertools.groupby` collects all the consecutive elements from the +underlying iterable that have the same key value, and returns a stream of +2-tuples containing a key value and an iterator for the elements with that key. + +:: + + city_list = [('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL'), + ('Anchorage', 'AK'), ('Nome', 'AK'), + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ'), + ... + ] + + def get_state(city_state): + return city_state[1] + + itertools.groupby(city_list, get_state) => + ('AL', iterator-1), + ('AK', iterator-2), + ('AZ', iterator-3), ... + + where + iterator-1 => + ('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL') + iterator-2 => + ('Anchorage', 'AK'), ('Nome', 'AK') + iterator-3 => + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ') + +:func:`~itertools.groupby` assumes that the underlying iterable's contents will +already be sorted based on the key. Note that the returned iterators also use +the underlying iterable, so you have to consume the results of iterator-1 before +requesting iterator-2 and its corresponding key. + + +The functools module +==================== + +The :mod:`functools` module contains some higher-order functions. +A **higher-order function** takes one or more functions as input and returns a +new function. The most useful tool in this module is the +:func:`functools.partial` function. + +For programs written in a functional style, you'll sometimes want to construct +variants of existing functions that have some of the parameters filled in. +Consider a Python function ``f(a, b, c)``; you may wish to create a new function +``g(b, c)`` that's equivalent to ``f(1, b, c)``; you're filling in a value for +one of ``f()``'s parameters. This is called "partial function application". + +The constructor for :func:`~functools.partial` takes the arguments +``(function, arg1, arg2, ..., kwarg1=value1, kwarg2=value2)``. The resulting +object is callable, so you can just call it to invoke ``function`` with the +filled-in arguments. + +Here's a small but realistic example:: + + import functools + + def log(message, subsystem): + """Write the contents of 'message' to the specified subsystem.""" + print('%s: %s' % (subsystem, message)) + ... + + server_log = functools.partial(log, subsystem='server') + server_log('Unable to open socket') + +:func:`functools.reduce(func, iter, [initial_value]) ` +cumulatively performs an operation on all the iterable's elements and, +therefore, can't be applied to infinite iterables. *func* must be a function +that takes two elements and returns a single value. :func:`functools.reduce` +takes the first two elements A and B returned by the iterator and calculates +``func(A, B)``. It then requests the third element, C, calculates +``func(func(A, B), C)``, combines this result with the fourth element returned, +and continues until the iterable is exhausted. If the iterable returns no +values at all, a :exc:`TypeError` exception is raised. If the initial value is +supplied, it's used as a starting point and ``func(initial_value, A)`` is the +first calculation. :: + + >>> import operator, functools + >>> functools.reduce(operator.concat, ['A', 'BB', 'C']) + 'ABBC' + >>> functools.reduce(operator.concat, []) + Traceback (most recent call last): + ... + TypeError: reduce() of empty sequence with no initial value + >>> functools.reduce(operator.mul, [1, 2, 3], 1) + 6 + >>> functools.reduce(operator.mul, [], 1) + 1 + +If you use :func:`operator.add` with :func:`functools.reduce`, you'll add up all the +elements of the iterable. This case is so common that there's a special +built-in called :func:`sum` to compute it: + + >>> import functools, operator + >>> functools.reduce(operator.add, [1, 2, 3, 4], 0) + 10 + >>> sum([1, 2, 3, 4]) + 10 + >>> sum([]) + 0 + +For many uses of :func:`functools.reduce`, though, it can be clearer to just +write the obvious :keyword:`for` loop:: + + import functools + # Instead of: + product = functools.reduce(operator.mul, [1, 2, 3], 1) + + # You can write: + product = 1 + for i in [1, 2, 3]: + product *= i + +A related function is :func:`itertools.accumulate(iterable, func=operator.add) +`. It performs the same calculation, but instead of +returning only the final result, :func:`~itertools.accumulate` returns an iterator +that also yields each partial result:: + + itertools.accumulate([1, 2, 3, 4, 5]) => + 1, 3, 6, 10, 15 + + itertools.accumulate([1, 2, 3, 4, 5], operator.mul) => + 1, 2, 6, 24, 120 + + +The operator module +------------------- + +The :mod:`operator` module was mentioned earlier. It contains a set of +functions corresponding to Python's operators. These functions are often useful +in functional-style code because they save you from writing trivial functions +that perform a single operation. + +Some of the functions in this module are: + +* Math operations: ``add()``, ``sub()``, ``mul()``, ``floordiv()``, ``abs()``, ... +* Logical operations: ``not_()``, ``truth()``. +* Bitwise operations: ``and_()``, ``or_()``, ``invert()``. +* Comparisons: ``eq()``, ``ne()``, ``lt()``, ``le()``, ``gt()``, and ``ge()``. +* Object identity: ``is_()``, ``is_not()``. + +Consult the operator module's documentation for a complete list. + + +Small functions and the lambda expression +========================================= + +When writing functional-style programs, you'll often need little functions that +act as predicates or that combine elements in some way. + +If there's a Python built-in or a module function that's suitable, you don't +need to define a new function at all:: + + stripped_lines = [line.strip() for line in lines] + existing_files = filter(os.path.exists, file_list) + +If the function you need doesn't exist, you need to write it. One way to write +small functions is to use the :keyword:`lambda` expression. ``lambda`` takes a +number of parameters and an expression combining these parameters, and creates +an anonymous function that returns the value of the expression:: + + adder = lambda x, y: x+y + + print_assign = lambda name, value: name + '=' + str(value) + +An alternative is to just use the ``def`` statement and define a function in the +usual way:: + + def adder(x, y): + return x + y + + def print_assign(name, value): + return name + '=' + str(value) + +Which alternative is preferable? That's a style question; my usual course is to +avoid using ``lambda``. + +One reason for my preference is that ``lambda`` is quite limited in the +functions it can define. The result has to be computable as a single +expression, which means you can't have multiway ``if... elif... else`` +comparisons or ``try... except`` statements. If you try to do too much in a +``lambda`` statement, you'll end up with an overly complicated expression that's +hard to read. Quick, what's the following code doing? :: + + import functools + total = functools.reduce(lambda a, b: (0, a[1] + b[1]), items)[1] + +You can figure it out, but it takes time to disentangle the expression to figure +out what's going on. Using a short nested ``def`` statements makes things a +little bit better:: + + import functools + def combine(a, b): + return 0, a[1] + b[1] + + total = functools.reduce(combine, items)[1] + +But it would be best of all if I had simply used a ``for`` loop:: + + total = 0 + for a, b in items: + total += b + +Or the :func:`sum` built-in and a generator expression:: + + total = sum(b for a, b in items) + +Many uses of :func:`functools.reduce` are clearer when written as ``for`` loops. + +Fredrik Lundh once suggested the following set of rules for refactoring uses of +``lambda``: + +1. Write a lambda function. +2. Write a comment explaining what the heck that lambda does. +3. Study the comment for a while, and think of a name that captures the essence + of the comment. +4. Convert the lambda to a def statement, using that name. +5. Remove the comment. + +I really like these rules, but you're free to disagree +about whether this lambda-free style is better. + + +Revision History and Acknowledgements +===================================== + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Ian Bicking, +Nick Coghlan, Nick Efford, Raymond Hettinger, Jim Jewett, Mike Krell, Leandro +Lameiro, Jussi Salmela, Collin Winter, Blake Winton. + +Version 0.1: posted June 30 2006. + +Version 0.11: posted July 1 2006. Typo fixes. + +Version 0.2: posted July 10 2006. Merged genexp and listcomp sections into one. +Typo fixes. + +Version 0.21: Added more references suggested on the tutor mailing list. + +Version 0.30: Adds a section on the ``functional`` module written by Collin +Winter; adds short section on the operator module; a few other edits. + + +References +========== + +General +------- + +**Structure and Interpretation of Computer Programs**, by Harold Abelson and +Gerald Jay Sussman with Julie Sussman. The book can be found at +https://mitpress.mit.edu/sicp. In this classic textbook of computer science, +chapters 2 and 3 discuss the use of sequences and streams to organize the data +flow inside a program. The book uses Scheme for its examples, but many of the +design approaches described in these chapters are applicable to functional-style +Python code. + +https://www.defmacro.org/ramblings/fp.html: A general introduction to functional +programming that uses Java examples and has a lengthy historical introduction. + +https://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry +describing functional programming. + +https://en.wikipedia.org/wiki/Coroutine: Entry for coroutines. + +https://en.wikipedia.org/wiki/Partial_application: Entry for the concept of partial function application. + +https://en.wikipedia.org/wiki/Currying: Entry for the concept of currying. + +Python-specific +--------------- + +https://gnosis.cx/TPiP/: The first chapter of David Mertz's book +:title-reference:`Text Processing in Python` discusses functional programming +for text processing, in the section titled "Utilizing Higher-Order Functions in +Text Processing". + +Mertz also wrote a 3-part series of articles on functional programming +for IBM's DeveloperWorks site; see +`part 1 `__, +`part 2 `__, and +`part 3 `__, + + +Python documentation +-------------------- + +Documentation for the :mod:`itertools` module. + +Documentation for the :mod:`functools` module. + +Documentation for the :mod:`operator` module. + +:pep:`289`: "Generator Expressions" + +:pep:`342`: "Coroutines via Enhanced Generators" describes the new generator +features in Python 2.5. + +.. comment + + Handy little function for printing part of an iterator -- used + while writing this document. + + import itertools + def print_iter(it): + slice = itertools.islice(it, 10) + for elem in slice[:-1]: + sys.stdout.write(str(elem)) + sys.stdout.write(', ') + print(elem[-1]) diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst new file mode 100644 index 00000000..53bbf7dd --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst @@ -0,0 +1,449 @@ +.. _gdb: + +========================================================= +Debugging C API extensions and CPython Internals with GDB +========================================================= + +.. highlight:: none + +This document explains how the Python GDB extension, ``python-gdb.py``, can +be used with the GDB debugger to debug CPython extensions and the +CPython interpreter itself. + +When debugging low-level problems such as crashes or deadlocks, a low-level +debugger, such as GDB, is useful to diagnose and correct the issue. +By default, GDB (or any of its front-ends) doesn't support high-level +information specific to the CPython interpreter. + +The ``python-gdb.py`` extension adds CPython interpreter information to GDB. +The extension helps introspect the stack of currently executing Python functions. +Given a Python object represented by a :c:expr:`PyObject *` pointer, +the extension surfaces the type and value of the object. + +Developers who are working on CPython extensions or tinkering with parts +of CPython that are written in C can use this document to learn how to use the +``python-gdb.py`` extension with GDB. + +.. note:: + + This document assumes that you are familiar with the basics of GDB and the + CPython C API. It consolidates guidance from the + `devguide `_ and the + `Python wiki `_. + + +Prerequisites +============= + +You need to have: + +- GDB 7 or later. (For earlier versions of GDB, see ``Misc/gdbinit`` in the + sources of Python 3.11 or earlier.) +- GDB-compatible debugging information for Python and any extension you are + debugging. +- The ``python-gdb.py`` extension. + +The extension is built with Python, but might be distributed separately or +not at all. Below, we include tips for a few common systems as examples. +Note that even if the instructions match your system, they might be outdated. + + +Setup with Python built from source +----------------------------------- + +When you build CPython from source, debugging information should be available, +and the build should add a ``python-gdb.py`` file to the root directory of +your repository. + +To activate support, you must add the directory containing ``python-gdb.py`` +to GDB's "auto-load-safe-path". +If you haven't done this, recent versions of GDB will print out a warning +with instructions on how to do this. + +.. note:: + + If you do not see instructions for your version of GDB, put this in your + configuration file (``~/.gdbinit`` or ``~/.config/gdb/gdbinit``):: + + add-auto-load-safe-path /path/to/cpython + + You can also add multiple paths, separated by ``:``. + + +Setup for Python from a Linux distro +------------------------------------ + +Most Linux systems provide debug information for the system Python +in a package called ``python-debuginfo``, ``python-dbg`` or similar. +For example: + +- Fedora: + + .. code-block:: shell + + sudo dnf install gdb + sudo dnf debuginfo-install python3 + +- Ubuntu: + + .. code-block:: shell + + sudo apt install gdb python3-dbg + +On several recent Linux systems, GDB can download debugging symbols +automatically using *debuginfod*. +However, this will not install the ``python-gdb.py`` extension; +you generally do need to install the debug info package separately. + + +Using the Debug build and Development mode +========================================== + +For easier debugging, you might want to: + +- Use a :ref:`debug build ` of Python. (When building from source, + use ``configure --with-pydebug``. On Linux distros, install and run a package + like ``python-debug`` or ``python-dbg``, if available.) +- Use the runtime :ref:`development mode ` (``-X dev``). + +Both enable extra assertions and disable some optimizations. +Sometimes this hides the bug you are trying to find, but in most cases they +make the process easier. + + +Using the ``python-gdb`` extension +================================== + +When the extension is loaded, it provides two main features: +pretty printers for Python values, and additional commands. + +Pretty-printers +--------------- + +This is what a GDB backtrace looks like (truncated) when this extension is +enabled:: + + #0 0x000000000041a6b1 in PyObject_Malloc (nbytes=Cannot access memory at address 0x7fffff7fefe8 + ) at Objects/obmalloc.c:748 + #1 0x000000000041b7c0 in _PyObject_DebugMallocApi (id=111 'o', nbytes=24) at Objects/obmalloc.c:1445 + #2 0x000000000041b717 in _PyObject_DebugMalloc (nbytes=24) at Objects/obmalloc.c:1412 + #3 0x000000000044060a in _PyUnicode_New (length=11) at Objects/unicodeobject.c:346 + #4 0x00000000004466aa in PyUnicodeUCS2_DecodeUTF8Stateful (s=0x5c2b8d "__lltrace__", size=11, errors=0x0, consumed= + 0x0) at Objects/unicodeobject.c:2531 + #5 0x0000000000446647 in PyUnicodeUCS2_DecodeUTF8 (s=0x5c2b8d "__lltrace__", size=11, errors=0x0) + at Objects/unicodeobject.c:2495 + #6 0x0000000000440d1b in PyUnicodeUCS2_FromStringAndSize (u=0x5c2b8d "__lltrace__", size=11) + at Objects/unicodeobject.c:551 + #7 0x0000000000440d94 in PyUnicodeUCS2_FromString (u=0x5c2b8d "__lltrace__") at Objects/unicodeobject.c:569 + #8 0x0000000000584abd in PyDict_GetItemString (v= + {'Yuck': , '__builtins__': , '__file__': 'Lib/test/crashers/nasty_eq_vs_dict.py', '__package__': None, 'y': , 'dict': {0: 0, 1: 1, 2: 2, 3: 3}, '__cached__': None, '__name__': '__main__', 'z': , '__doc__': None}, key= + 0x5c2b8d "__lltrace__") at Objects/dictobject.c:2171 + +Notice how the dictionary argument to ``PyDict_GetItemString`` is displayed +as its ``repr()``, rather than an opaque ``PyObject *`` pointer. + +The extension works by supplying a custom printing routine for values of type +``PyObject *``. If you need to access lower-level details of an object, then +cast the value to a pointer of the appropriate type. For example:: + + (gdb) p globals + $1 = {'__builtins__': , '__name__': + '__main__', 'ctypes': , '__doc__': None, + '__package__': None} + + (gdb) p *(PyDictObject*)globals + $2 = {ob_refcnt = 3, ob_type = 0x3dbdf85820, ma_fill = 5, ma_used = 5, + ma_mask = 7, ma_table = 0x63d0f8, ma_lookup = 0x3dbdc7ea70 + , ma_smalltable = {{me_hash = 7065186196740147912, + me_key = '__builtins__', me_value = }, + {me_hash = -368181376027291943, me_key = '__name__', + me_value ='__main__'}, {me_hash = 0, me_key = 0x0, me_value = 0x0}, + {me_hash = 0, me_key = 0x0, me_value = 0x0}, + {me_hash = -9177857982131165996, me_key = 'ctypes', + me_value = }, + {me_hash = -8518757509529533123, me_key = '__doc__', me_value = None}, + {me_hash = 0, me_key = 0x0, me_value = 0x0}, { + me_hash = 6614918939584953775, me_key = '__package__', me_value = None}}} + +Note that the pretty-printers do not actually call ``repr()``. +For basic types, they try to match its result closely. + +An area that can be confusing is that the custom printer for some types look a +lot like GDB's built-in printer for standard types. For example, the +pretty-printer for a Python ``int`` (:c:expr:`PyLongObject *`) +gives a representation that is not distinguishable from one of a +regular machine-level integer:: + + (gdb) p some_machine_integer + $3 = 42 + + (gdb) p some_python_integer + $4 = 42 + +The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`: + + (gdb) p *(PyLongObject*)some_python_integer + $5 = {ob_base = {ob_base = {ob_refcnt = 8, ob_type = 0x3dad39f5e0}, ob_size = 1}, + ob_digit = {42}} + +A similar confusion can arise with the ``str`` type, where the output looks a +lot like gdb's built-in printer for ``char *``:: + + (gdb) p ptr_to_python_str + $6 = '__builtins__' + +The pretty-printer for ``str`` instances defaults to using single-quotes (as +does Python's ``repr`` for strings) whereas the standard printer for ``char *`` +values uses double-quotes and contains a hexadecimal address:: + + (gdb) p ptr_to_char_star + $7 = 0x6d72c0 "hello world" + +Again, the implementation details can be revealed with a cast to +:c:expr:`PyUnicodeObject *`:: + + (gdb) p *(PyUnicodeObject*)$6 + $8 = {ob_base = {ob_refcnt = 33, ob_type = 0x3dad3a95a0}, length = 12, + str = 0x7ffff2128500, hash = 7065186196740147912, state = 1, defenc = 0x0} + +``py-list`` +----------- + + The extension adds a ``py-list`` command, which + lists the Python source code (if any) for the current frame in the selected + thread. The current line is marked with a ">":: + + (gdb) py-list + 901 if options.profile: + 902 options.profile = False + 903 profile_me() + 904 return + 905 + >906 u = UI() + 907 if not u.quit: + 908 try: + 909 gtk.main() + 910 except KeyboardInterrupt: + 911 # properly quit on a keyboard interrupt... + + Use ``py-list START`` to list at a different line number within the Python + source, and ``py-list START,END`` to list a specific range of lines within + the Python source. + +``py-up`` and ``py-down`` +------------------------- + + The ``py-up`` and ``py-down`` commands are analogous to GDB's regular ``up`` + and ``down`` commands, but try to move at the level of CPython frames, rather + than C frames. + + GDB is not always able to read the relevant frame information, depending on + the optimization level with which CPython was compiled. Internally, the + commands look for C frames that are executing the default frame evaluation + function (that is, the core bytecode interpreter loop within CPython) and + look up the value of the related ``PyFrameObject *``. + + They emit the frame number (at the C level) within the thread. + + For example:: + + (gdb) py-up + #37 Frame 0x9420b04, for file /usr/lib/python2.6/site-packages/ + gnome_sudoku/main.py, line 906, in start_game () + u = UI() + (gdb) py-up + #40 Frame 0x948e82c, for file /usr/lib/python2.6/site-packages/ + gnome_sudoku/gnome_sudoku.py, line 22, in start_game(main=) + main.start_game() + (gdb) py-up + Unable to find an older python frame + + so we're at the top of the Python stack. + + The frame numbers correspond to those displayed by GDB's standard + ``backtrace`` command. + The command skips C frames which are not executing Python code. + + Going back down:: + + (gdb) py-down + #37 Frame 0x9420b04, for file /usr/lib/python2.6/site-packages/gnome_sudoku/main.py, line 906, in start_game () + u = UI() + (gdb) py-down + #34 (unable to read python frame information) + (gdb) py-down + #23 (unable to read python frame information) + (gdb) py-down + #19 (unable to read python frame information) + (gdb) py-down + #14 Frame 0x99262ac, for file /usr/lib/python2.6/site-packages/gnome_sudoku/game_selector.py, line 201, in run_swallowed_dialog (self=, puzzle=None, saved_games=[{'gsd.auto_fills': 0, 'tracking': {}, 'trackers': {}, 'notes': [], 'saved_at': 1270084485, 'game': '7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 0 0 0 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5\n7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 1 8 3 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5', 'gsd.impossible_hints': 0, 'timer.__absolute_start_time__': , 'gsd.hints': 0, 'timer.active_time': , 'timer.total_time': }], dialog=, saved_game_model=, sudoku_maker=, main_page=0) at remote 0x98fa6e4>, d=) + gtk.main() + (gdb) py-down + #8 (unable to read python frame information) + (gdb) py-down + Unable to find a newer python frame + + and we're at the bottom of the Python stack. + + Note that in Python 3.12 and newer, the same C stack frame can be used for + multiple Python stack frames. This means that ``py-up`` and ``py-down`` + may move multiple Python frames at once. For example:: + + (gdb) py-up + #6 Frame 0x7ffff7fb62b0, for file /tmp/rec.py, line 5, in recursive_function (n=0) + time.sleep(5) + #6 Frame 0x7ffff7fb6240, for file /tmp/rec.py, line 7, in recursive_function (n=1) + recursive_function(n-1) + #6 Frame 0x7ffff7fb61d0, for file /tmp/rec.py, line 7, in recursive_function (n=2) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6160, for file /tmp/rec.py, line 7, in recursive_function (n=3) + recursive_function(n-1) + #6 Frame 0x7ffff7fb60f0, for file /tmp/rec.py, line 7, in recursive_function (n=4) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6080, for file /tmp/rec.py, line 7, in recursive_function (n=5) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6020, for file /tmp/rec.py, line 9, in () + recursive_function(5) + (gdb) py-up + Unable to find an older python frame + + +``py-bt`` +--------- + + The ``py-bt`` command attempts to display a Python-level backtrace of the + current thread. + + For example:: + + (gdb) py-bt + #8 (unable to read python frame information) + #11 Frame 0x9aead74, for file /usr/lib/python2.6/site-packages/gnome_sudoku/dialog_swallower.py, line 48, in run_dialog (self=, main_page=0) at remote 0x98fa6e4>, d=) + gtk.main() + #14 Frame 0x99262ac, for file /usr/lib/python2.6/site-packages/gnome_sudoku/game_selector.py, line 201, in run_swallowed_dialog (self=, puzzle=None, saved_games=[{'gsd.auto_fills': 0, 'tracking': {}, 'trackers': {}, 'notes': [], 'saved_at': 1270084485, 'game': '7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 0 0 0 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5\n7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 1 8 3 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5', 'gsd.impossible_hints': 0, 'timer.__absolute_start_time__': , 'gsd.hints': 0, 'timer.active_time': , 'timer.total_time': }], dialog=, saved_game_model=, sudoku_maker=) + main.start_game() + + The frame numbers correspond to those displayed by GDB's standard + ``backtrace`` command. + +``py-print`` +------------ + + The ``py-print`` command looks up a Python name and tries to print it. + It looks in locals within the current thread, then globals, then finally + builtins:: + + (gdb) py-print self + local 'self' = , + main_page=0) at remote 0x98fa6e4> + (gdb) py-print __name__ + global '__name__' = 'gnome_sudoku.dialog_swallower' + (gdb) py-print len + builtin 'len' = + (gdb) py-print scarlet_pimpernel + 'scarlet_pimpernel' not found + + If the current C frame corresponds to multiple Python frames, ``py-print`` + only considers the first one. + +``py-locals`` +------------- + + The ``py-locals`` command looks up all Python locals within the current + Python frame in the selected thread, and prints their representations:: + + (gdb) py-locals + self = , + main_page=0) at remote 0x98fa6e4> + d = + + If the current C frame corresponds to multiple Python frames, locals from + all of them will be shown:: + + (gdb) py-locals + Locals for recursive_function + n = 0 + Locals for recursive_function + n = 1 + Locals for recursive_function + n = 2 + Locals for recursive_function + n = 3 + Locals for recursive_function + n = 4 + Locals for recursive_function + n = 5 + Locals for + + +Use with GDB commands +===================== + +The extension commands complement GDB's built-in commands. +For example, you can use a frame numbers shown by ``py-bt`` with the ``frame`` +command to go a specific frame within the selected thread, like this:: + + (gdb) py-bt + (output snipped) + #68 Frame 0xaa4560, for file Lib/test/regrtest.py, line 1548, in () + main() + (gdb) frame 68 + #68 0x00000000004cd1e6 in PyEval_EvalFrameEx (f=Frame 0xaa4560, for file Lib/test/regrtest.py, line 1548, in (), throwflag=0) at Python/ceval.c:2665 + 2665 x = call_function(&sp, oparg); + (gdb) py-list + 1543 # Run the tests in a context manager that temporary changes the CWD to a + 1544 # temporary and writable directory. If it's not possible to create or + 1545 # change the CWD, the original CWD will be used. The original CWD is + 1546 # available from test_support.SAVEDCWD. + 1547 with test_support.temp_cwd(TESTCWD, quiet=True): + >1548 main() + +The ``info threads`` command will give you a list of the threads within the +process, and you can use the ``thread`` command to select a different one:: + + (gdb) info threads + 105 Thread 0x7fffefa18710 (LWP 10260) sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:86 + 104 Thread 0x7fffdf5fe710 (LWP 10259) sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:86 + * 1 Thread 0x7ffff7fe2700 (LWP 10145) 0x00000038e46d73e3 in select () at ../sysdeps/unix/syscall-template.S:82 + +You can use ``thread apply all COMMAND`` or (``t a a COMMAND`` for short) to run +a command on all threads. With ``py-bt``, this lets you see what every +thread is doing at the Python level:: + + (gdb) t a a py-bt + + Thread 105 (Thread 0x7fffefa18710 (LWP 10260)): + #5 Frame 0x7fffd00019d0, for file /home/david/coding/python-svn/Lib/threading.py, line 155, in _acquire_restore (self=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, count_owner=(1, 140737213728528), count=1, owner=140737213728528) + self.__block.acquire() + #8 Frame 0x7fffac001640, for file /home/david/coding/python-svn/Lib/threading.py, line 269, in wait (self=<_Condition(_Condition__lock=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, acquire=, _is_owned=, _release_save=, release=, _acquire_restore=, _Verbose__verbose=False, _Condition__waiters=[]) at remote 0xd7fd10>, timeout=None, waiter=, saved_state=(1, 140737213728528)) + self._acquire_restore(saved_state) + #12 Frame 0x7fffb8001a10, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 348, in f () + cond.wait() + #16 Frame 0x7fffb8001c40, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 37, in task (tid=140737213728528) + f() + + Thread 104 (Thread 0x7fffdf5fe710 (LWP 10259)): + #5 Frame 0x7fffe4001580, for file /home/david/coding/python-svn/Lib/threading.py, line 155, in _acquire_restore (self=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, count_owner=(1, 140736940992272), count=1, owner=140736940992272) + self.__block.acquire() + #8 Frame 0x7fffc8002090, for file /home/david/coding/python-svn/Lib/threading.py, line 269, in wait (self=<_Condition(_Condition__lock=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, acquire=, _is_owned=, _release_save=, release=, _acquire_restore=, _Verbose__verbose=False, _Condition__waiters=[]) at remote 0xd7fd10>, timeout=None, waiter=, saved_state=(1, 140736940992272)) + self._acquire_restore(saved_state) + #12 Frame 0x7fffac001c90, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 348, in f () + cond.wait() + #16 Frame 0x7fffac0011c0, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 37, in task (tid=140736940992272) + f() + + Thread 1 (Thread 0x7ffff7fe2700 (LWP 10145)): + #5 Frame 0xcb5380, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 16, in _wait () + time.sleep(0.01) + #8 Frame 0x7fffd00024a0, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 378, in _check_notify (self=, skipped=[], _mirrorOutput=False, testsRun=39, buffer=False, _original_stderr=, _stdout_buffer=, _stderr_buffer=, _moduleSetUpFailed=False, expectedFailures=[], errors=[], _previousTestClass=, unexpectedSuccesses=[], failures=[], shouldStop=False, failfast=False) at remote 0xc185a0>, _threads=(0,), _cleanups=[], _type_equality_funcs={: , : , : , : , `: + +.. code-block:: none + + checking for --with-dtrace... yes + +On macOS, you can list available DTrace probes by running a Python +process in the background and listing all probes made available by the +Python provider:: + + $ python3.6 -q & + $ sudo dtrace -l -P python$! # or: dtrace -l -m python3.6 + + ID PROVIDER MODULE FUNCTION NAME + 29564 python18035 python3.6 _PyEval_EvalFrameDefault function-entry + 29565 python18035 python3.6 dtrace_function_entry function-entry + 29566 python18035 python3.6 _PyEval_EvalFrameDefault function-return + 29567 python18035 python3.6 dtrace_function_return function-return + 29568 python18035 python3.6 collect gc-done + 29569 python18035 python3.6 collect gc-start + 29570 python18035 python3.6 _PyEval_EvalFrameDefault line + 29571 python18035 python3.6 maybe_dtrace_line line + +On Linux, you can verify if the SystemTap static markers are present in +the built binary by seeing if it contains a ".note.stapsdt" section. + +:: + + $ readelf -S ./python | grep .note.stapsdt + [30] .note.stapsdt NOTE 0000000000000000 00308d78 + +If you've built Python as a shared library +(with the :option:`--enable-shared` configure option), you +need to look instead within the shared library. For example:: + + $ readelf -S libpython3.3dm.so.1.0 | grep .note.stapsdt + [29] .note.stapsdt NOTE 0000000000000000 00365b68 + +Sufficiently modern readelf can print the metadata:: + + $ readelf -n ./python + + Displaying notes found at file offset 0x00000254 with length 0x00000020: + Owner Data size Description + GNU 0x00000010 NT_GNU_ABI_TAG (ABI version tag) + OS: Linux, ABI: 2.6.32 + + Displaying notes found at file offset 0x00000274 with length 0x00000024: + Owner Data size Description + GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring) + Build ID: df924a2b08a7e89f6e11251d4602022977af2670 + + Displaying notes found at file offset 0x002d6c30 with length 0x00000144: + Owner Data size Description + stapsdt 0x00000031 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: gc__start + Location: 0x00000000004371c3, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf6 + Arguments: -4@%ebx + stapsdt 0x00000030 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: gc__done + Location: 0x00000000004374e1, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf8 + Arguments: -8@%rax + stapsdt 0x00000045 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: function__entry + Location: 0x000000000053db6c, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6be8 + Arguments: 8@%rbp 8@%r12 -4@%eax + stapsdt 0x00000046 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: function__return + Location: 0x000000000053dba8, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bea + Arguments: 8@%rbp 8@%r12 -4@%eax + +The above metadata contains information for SystemTap describing how it +can patch strategically placed machine code instructions to enable the +tracing hooks used by a SystemTap script. + + +Static DTrace probes +-------------------- + +The following example DTrace script can be used to show the call/return +hierarchy of a Python script, only tracing within the invocation of +a function called "start". In other words, import-time function +invocations are not going to be listed: + +.. code-block:: none + + self int indent; + + python$target:::function-entry + /copyinstr(arg1) == "start"/ + { + self->trace = 1; + } + + python$target:::function-entry + /self->trace/ + { + printf("%d\t%*s:", timestamp, 15, probename); + printf("%*s", self->indent, ""); + printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2); + self->indent++; + } + + python$target:::function-return + /self->trace/ + { + self->indent--; + printf("%d\t%*s:", timestamp, 15, probename); + printf("%*s", self->indent, ""); + printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2); + } + + python$target:::function-return + /copyinstr(arg1) == "start"/ + { + self->trace = 0; + } + +It can be invoked like this:: + + $ sudo dtrace -q -s call_stack.d -c "python3.6 script.py" + +The output looks like this: + +.. code-block:: none + + 156641360502280 function-entry:call_stack.py:start:23 + 156641360518804 function-entry: call_stack.py:function_1:1 + 156641360532797 function-entry: call_stack.py:function_3:9 + 156641360546807 function-return: call_stack.py:function_3:10 + 156641360563367 function-return: call_stack.py:function_1:2 + 156641360578365 function-entry: call_stack.py:function_2:5 + 156641360591757 function-entry: call_stack.py:function_1:1 + 156641360605556 function-entry: call_stack.py:function_3:9 + 156641360617482 function-return: call_stack.py:function_3:10 + 156641360629814 function-return: call_stack.py:function_1:2 + 156641360642285 function-return: call_stack.py:function_2:6 + 156641360656770 function-entry: call_stack.py:function_3:9 + 156641360669707 function-return: call_stack.py:function_3:10 + 156641360687853 function-entry: call_stack.py:function_4:13 + 156641360700719 function-return: call_stack.py:function_4:14 + 156641360719640 function-entry: call_stack.py:function_5:18 + 156641360732567 function-return: call_stack.py:function_5:21 + 156641360747370 function-return:call_stack.py:start:28 + + +Static SystemTap markers +------------------------ + +The low-level way to use the SystemTap integration is to use the static +markers directly. This requires you to explicitly state the binary file +containing them. + +For example, this SystemTap script can be used to show the call/return +hierarchy of a Python script: + +.. code-block:: none + + probe process("python").mark("function__entry") { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + + printf("%s => %s in %s:%d\\n", + thread_indent(1), funcname, filename, lineno); + } + + probe process("python").mark("function__return") { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + + printf("%s <= %s in %s:%d\\n", + thread_indent(-1), funcname, filename, lineno); + } + +It can be invoked like this:: + + $ stap \ + show-call-hierarchy.stp \ + -c "./python test.py" + +The output looks like this: + +.. code-block:: none + + 11408 python(8274): => __contains__ in Lib/_abcoll.py:362 + 11414 python(8274): => __getitem__ in Lib/os.py:425 + 11418 python(8274): => encode in Lib/os.py:490 + 11424 python(8274): <= encode in Lib/os.py:493 + 11428 python(8274): <= __getitem__ in Lib/os.py:426 + 11433 python(8274): <= __contains__ in Lib/_abcoll.py:366 + +where the columns are: + +- time in microseconds since start of script +- name of executable +- PID of process + +and the remainder indicates the call/return hierarchy as the script executes. + +For a :option:`--enable-shared` build of CPython, the markers are contained within the +libpython shared library, and the probe's dotted path needs to reflect this. For +example, this line from the above example: + +.. code-block:: none + + probe process("python").mark("function__entry") { + +should instead read: + +.. code-block:: none + + probe process("python").library("libpython3.6dm.so.1.0").mark("function__entry") { + +(assuming a :ref:`debug build ` of CPython 3.6) + + +Available static markers +------------------------ + +.. object:: function__entry(str filename, str funcname, int lineno) + + This marker indicates that execution of a Python function has begun. + It is only triggered for pure-Python (bytecode) functions. + + The filename, function name, and line number are provided back to the + tracing script as positional arguments, which must be accessed using + ``$arg1``, ``$arg2``, ``$arg3``: + + * ``$arg1`` : ``(const char *)`` filename, accessible using ``user_string($arg1)`` + + * ``$arg2`` : ``(const char *)`` function name, accessible using + ``user_string($arg2)`` + + * ``$arg3`` : ``int`` line number + +.. object:: function__return(str filename, str funcname, int lineno) + + This marker is the converse of :c:func:`!function__entry`, and indicates that + execution of a Python function has ended (either via ``return``, or via an + exception). It is only triggered for pure-Python (bytecode) functions. + + The arguments are the same as for :c:func:`!function__entry` + +.. object:: line(str filename, str funcname, int lineno) + + This marker indicates a Python line is about to be executed. It is + the equivalent of line-by-line tracing with a Python profiler. It is + not triggered within C functions. + + The arguments are the same as for :c:func:`!function__entry`. + +.. object:: gc__start(int generation) + + Fires when the Python interpreter starts a garbage collection cycle. + ``arg0`` is the generation to scan, like :func:`gc.collect`. + +.. object:: gc__done(long collected) + + Fires when the Python interpreter finishes a garbage collection + cycle. ``arg0`` is the number of collected objects. + +.. object:: import__find__load__start(str modulename) + + Fires before :mod:`importlib` attempts to find and load the module. + ``arg0`` is the module name. + + .. versionadded:: 3.7 + +.. object:: import__find__load__done(str modulename, int found) + + Fires after :mod:`importlib`'s find_and_load function is called. + ``arg0`` is the module name, ``arg1`` indicates if module was + successfully loaded. + + .. versionadded:: 3.7 + + +.. object:: audit(str event, void *tuple) + + Fires when :func:`sys.audit` or :c:func:`PySys_Audit` is called. + ``arg0`` is the event name as C string, ``arg1`` is a :c:type:`PyObject` + pointer to a tuple object. + + .. versionadded:: 3.8 + + +SystemTap Tapsets +----------------- + +The higher-level way to use the SystemTap integration is to use a "tapset": +SystemTap's equivalent of a library, which hides some of the lower-level +details of the static markers. + +Here is a tapset file, based on a non-shared build of CPython: + +.. code-block:: none + + /* + Provide a higher-level wrapping around the function__entry and + function__return markers: + \*/ + probe python.function.entry = process("python").mark("function__entry") + { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + frameptr = $arg4 + } + probe python.function.return = process("python").mark("function__return") + { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + frameptr = $arg4 + } + +If this file is installed in SystemTap's tapset directory (e.g. +``/usr/share/systemtap/tapset``), then these additional probepoints become +available: + +.. object:: python.function.entry(str filename, str funcname, int lineno, frameptr) + + This probe point indicates that execution of a Python function has begun. + It is only triggered for pure-Python (bytecode) functions. + +.. object:: python.function.return(str filename, str funcname, int lineno, frameptr) + + This probe point is the converse of ``python.function.return``, and + indicates that execution of a Python function has ended (either via + ``return``, or via an exception). It is only triggered for pure-Python + (bytecode) functions. + + +Examples +-------- +This SystemTap script uses the tapset above to more cleanly implement the +example given above of tracing the Python function-call hierarchy, without +needing to directly name the static markers: + +.. code-block:: none + + probe python.function.entry + { + printf("%s => %s in %s:%d\n", + thread_indent(1), funcname, filename, lineno); + } + + probe python.function.return + { + printf("%s <= %s in %s:%d\n", + thread_indent(-1), funcname, filename, lineno); + } + + +The following script uses the tapset above to provide a top-like view of all +running CPython code, showing the top 20 most frequently entered bytecode +frames, each second, across the whole system: + +.. code-block:: none + + global fn_calls; + + probe python.function.entry + { + fn_calls[pid(), filename, funcname, lineno] += 1; + } + + probe timer.ms(1000) { + printf("\033[2J\033[1;1H") /* clear screen \*/ + printf("%6s %80s %6s %30s %6s\n", + "PID", "FILENAME", "LINE", "FUNCTION", "CALLS") + foreach ([pid, filename, funcname, lineno] in fn_calls- limit 20) { + printf("%6d %80s %6d %30s %6d\n", + pid, filename, lineno, funcname, + fn_calls[pid, filename, funcname, lineno]); + } + delete fn_calls; + } + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst new file mode 100644 index 00000000..e852db98 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst @@ -0,0 +1,340 @@ +.. testsetup:: + + import ipaddress + +.. _ipaddress-howto: + +*************************************** +An introduction to the ipaddress module +*************************************** + +:author: Peter Moody +:author: Nick Coghlan + +.. topic:: Overview + + This document aims to provide a gentle introduction to the + :mod:`ipaddress` module. It is aimed primarily at users that aren't + already familiar with IP networking terminology, but may also be useful + to network engineers wanting an overview of how :mod:`ipaddress` + represents IP network addressing concepts. + + +Creating Address/Network/Interface objects +========================================== + +Since :mod:`ipaddress` is a module for inspecting and manipulating IP addresses, +the first thing you'll want to do is create some objects. You can use +:mod:`ipaddress` to create objects from strings and integers. + + +A Note on IP Versions +--------------------- + +For readers that aren't particularly familiar with IP addressing, it's +important to know that the Internet Protocol (IP) is currently in the process +of moving from version 4 of the protocol to version 6. This transition is +occurring largely because version 4 of the protocol doesn't provide enough +addresses to handle the needs of the whole world, especially given the +increasing number of devices with direct connections to the internet. + +Explaining the details of the differences between the two versions of the +protocol is beyond the scope of this introduction, but readers need to at +least be aware that these two versions exist, and it will sometimes be +necessary to force the use of one version or the other. + + +IP Host Addresses +----------------- + +Addresses, often referred to as "host addresses" are the most basic unit +when working with IP addressing. The simplest way to create addresses is +to use the :func:`ipaddress.ip_address` factory function, which automatically +determines whether to create an IPv4 or IPv6 address based on the passed in +value: + + >>> ipaddress.ip_address('192.0.2.1') + IPv4Address('192.0.2.1') + >>> ipaddress.ip_address('2001:DB8::1') + IPv6Address('2001:db8::1') + +Addresses can also be created directly from integers. Values that will +fit within 32 bits are assumed to be IPv4 addresses:: + + >>> ipaddress.ip_address(3221225985) + IPv4Address('192.0.2.1') + >>> ipaddress.ip_address(42540766411282592856903984951653826561) + IPv6Address('2001:db8::1') + +To force the use of IPv4 or IPv6 addresses, the relevant classes can be +invoked directly. This is particularly useful to force creation of IPv6 +addresses for small integers:: + + >>> ipaddress.ip_address(1) + IPv4Address('0.0.0.1') + >>> ipaddress.IPv4Address(1) + IPv4Address('0.0.0.1') + >>> ipaddress.IPv6Address(1) + IPv6Address('::1') + + +Defining Networks +----------------- + +Host addresses are usually grouped together into IP networks, so +:mod:`ipaddress` provides a way to create, inspect and manipulate network +definitions. IP network objects are constructed from strings that define the +range of host addresses that are part of that network. The simplest form +for that information is a "network address/network prefix" pair, where the +prefix defines the number of leading bits that are compared to determine +whether or not an address is part of the network and the network address +defines the expected value of those bits. + +As for addresses, a factory function is provided that determines the correct +IP version automatically:: + + >>> ipaddress.ip_network('192.0.2.0/24') + IPv4Network('192.0.2.0/24') + >>> ipaddress.ip_network('2001:db8::0/96') + IPv6Network('2001:db8::/96') + +Network objects cannot have any host bits set. The practical effect of this +is that ``192.0.2.1/24`` does not describe a network. Such definitions are +referred to as interface objects since the ip-on-a-network notation is +commonly used to describe network interfaces of a computer on a given network +and are described further in the next section. + +By default, attempting to create a network object with host bits set will +result in :exc:`ValueError` being raised. To request that the +additional bits instead be coerced to zero, the flag ``strict=False`` can +be passed to the constructor:: + + >>> ipaddress.ip_network('192.0.2.1/24') + Traceback (most recent call last): + ... + ValueError: 192.0.2.1/24 has host bits set + >>> ipaddress.ip_network('192.0.2.1/24', strict=False) + IPv4Network('192.0.2.0/24') + +While the string form offers significantly more flexibility, networks can +also be defined with integers, just like host addresses. In this case, the +network is considered to contain only the single address identified by the +integer, so the network prefix includes the entire network address:: + + >>> ipaddress.ip_network(3221225984) + IPv4Network('192.0.2.0/32') + >>> ipaddress.ip_network(42540766411282592856903984951653826560) + IPv6Network('2001:db8::/128') + +As with addresses, creation of a particular kind of network can be forced +by calling the class constructor directly instead of using the factory +function. + + +Host Interfaces +--------------- + +As mentioned just above, if you need to describe an address on a particular +network, neither the address nor the network classes are sufficient. +Notation like ``192.0.2.1/24`` is commonly used by network engineers and the +people who write tools for firewalls and routers as shorthand for "the host +``192.0.2.1`` on the network ``192.0.2.0/24``", Accordingly, :mod:`ipaddress` +provides a set of hybrid classes that associate an address with a particular +network. The interface for creation is identical to that for defining network +objects, except that the address portion isn't constrained to being a network +address. + + >>> ipaddress.ip_interface('192.0.2.1/24') + IPv4Interface('192.0.2.1/24') + >>> ipaddress.ip_interface('2001:db8::1/96') + IPv6Interface('2001:db8::1/96') + +Integer inputs are accepted (as with networks), and use of a particular IP +version can be forced by calling the relevant constructor directly. + + +Inspecting Address/Network/Interface Objects +============================================ + +You've gone to the trouble of creating an IPv(4|6)(Address|Network|Interface) +object, so you probably want to get information about it. :mod:`ipaddress` +tries to make doing this easy and intuitive. + +Extracting the IP version:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> addr6 = ipaddress.ip_address('2001:db8::1') + >>> addr6.version + 6 + >>> addr4.version + 4 + +Obtaining the network from an interface:: + + >>> host4 = ipaddress.ip_interface('192.0.2.1/24') + >>> host4.network + IPv4Network('192.0.2.0/24') + >>> host6 = ipaddress.ip_interface('2001:db8::1/96') + >>> host6.network + IPv6Network('2001:db8::/96') + +Finding out how many individual addresses are in a network:: + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> net4.num_addresses + 256 + >>> net6 = ipaddress.ip_network('2001:db8::0/96') + >>> net6.num_addresses + 4294967296 + +Iterating through the "usable" addresses on a network:: + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> for x in net4.hosts(): + ... print(x) # doctest: +ELLIPSIS + 192.0.2.1 + 192.0.2.2 + 192.0.2.3 + 192.0.2.4 + ... + 192.0.2.252 + 192.0.2.253 + 192.0.2.254 + + +Obtaining the netmask (i.e. set bits corresponding to the network prefix) or +the hostmask (any bits that are not part of the netmask): + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> net4.netmask + IPv4Address('255.255.255.0') + >>> net4.hostmask + IPv4Address('0.0.0.255') + >>> net6 = ipaddress.ip_network('2001:db8::0/96') + >>> net6.netmask + IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::') + >>> net6.hostmask + IPv6Address('::ffff:ffff') + + +Exploding or compressing the address:: + + >>> addr6.exploded + '2001:0db8:0000:0000:0000:0000:0000:0001' + >>> addr6.compressed + '2001:db8::1' + >>> net6.exploded + '2001:0db8:0000:0000:0000:0000:0000:0000/96' + >>> net6.compressed + '2001:db8::/96' + +While IPv4 doesn't support explosion or compression, the associated objects +still provide the relevant properties so that version neutral code can +easily ensure the most concise or most verbose form is used for IPv6 +addresses while still correctly handling IPv4 addresses. + + +Networks as lists of Addresses +============================== + +It's sometimes useful to treat networks as lists. This means it is possible +to index them like this:: + + >>> net4[1] + IPv4Address('192.0.2.1') + >>> net4[-1] + IPv4Address('192.0.2.255') + >>> net6[1] + IPv6Address('2001:db8::1') + >>> net6[-1] + IPv6Address('2001:db8::ffff:ffff') + + +It also means that network objects lend themselves to using the list +membership test syntax like this:: + + if address in network: + # do something + +Containment testing is done efficiently based on the network prefix:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> addr4 in ipaddress.ip_network('192.0.2.0/24') + True + >>> addr4 in ipaddress.ip_network('192.0.3.0/24') + False + + +Comparisons +=========== + +:mod:`ipaddress` provides some simple, hopefully intuitive ways to compare +objects, where it makes sense:: + + >>> ipaddress.ip_address('192.0.2.1') < ipaddress.ip_address('192.0.2.2') + True + +A :exc:`TypeError` exception is raised if you try to compare objects of +different versions or different types. + + +Using IP Addresses with other modules +===================================== + +Other modules that use IP addresses (such as :mod:`socket`) usually won't +accept objects from this module directly. Instead, they must be coerced to +an integer or string that the other module will accept:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> str(addr4) + '192.0.2.1' + >>> int(addr4) + 3221225985 + + +Getting more detail when instance creation fails +================================================ + +When creating address/network/interface objects using the version-agnostic +factory functions, any errors will be reported as :exc:`ValueError` with +a generic error message that simply says the passed in value was not +recognized as an object of that type. The lack of a specific error is +because it's necessary to know whether the value is *supposed* to be IPv4 +or IPv6 in order to provide more detail on why it has been rejected. + +To support use cases where it is useful to have access to this additional +detail, the individual class constructors actually raise the +:exc:`ValueError` subclasses :exc:`ipaddress.AddressValueError` and +:exc:`ipaddress.NetmaskValueError` to indicate exactly which part of +the definition failed to parse correctly. + +The error messages are significantly more detailed when using the +class constructors directly. For example:: + + >>> ipaddress.ip_address("192.168.0.256") + Traceback (most recent call last): + ... + ValueError: '192.168.0.256' does not appear to be an IPv4 or IPv6 address + >>> ipaddress.IPv4Address("192.168.0.256") + Traceback (most recent call last): + ... + ipaddress.AddressValueError: Octet 256 (> 255) not permitted in '192.168.0.256' + + >>> ipaddress.ip_network("192.168.0.1/64") + Traceback (most recent call last): + ... + ValueError: '192.168.0.1/64' does not appear to be an IPv4 or IPv6 network + >>> ipaddress.IPv4Network("192.168.0.1/64") + Traceback (most recent call last): + ... + ipaddress.NetmaskValueError: '64' is not a valid netmask + +However, both of the module specific exceptions have :exc:`ValueError` as their +parent class, so if you're not concerned with the particular type of error, +you can still write code like the following:: + + try: + network = ipaddress.IPv4Network(address) + except ValueError: + print('address/netmask is invalid for IPv4:', address) + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst new file mode 100644 index 00000000..a636e06b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst @@ -0,0 +1,629 @@ +.. highlight:: c + +.. _isolating-extensions-howto: + +*************************** +Isolating Extension Modules +*************************** + +.. topic:: Abstract + + Traditionally, state belonging to Python extension modules was kept in C + ``static`` variables, which have process-wide scope. This document + describes problems of such per-process state and shows a safer way: + per-module state. + + The document also describes how to switch to per-module state where + possible. This transition involves allocating space for that state, potentially + switching from static types to heap types, and—perhaps most + importantly—accessing per-module state from code. + + +Who should read this +==================== + +This guide is written for maintainers of :ref:`C-API ` extensions +who would like to make that extension safer to use in applications where +Python itself is used as a library. + + +Background +========== + +An *interpreter* is the context in which Python code runs. It contains +configuration (e.g. the import path) and runtime state (e.g. the set of +imported modules). + +Python supports running multiple interpreters in one process. There are +two cases to think about—users may run interpreters: + +- in sequence, with several :c:func:`Py_InitializeEx`/:c:func:`Py_FinalizeEx` + cycles, and +- in parallel, managing "sub-interpreters" using + :c:func:`Py_NewInterpreter`/:c:func:`Py_EndInterpreter`. + +Both cases (and combinations of them) would be most useful when +embedding Python within a library. Libraries generally shouldn't make +assumptions about the application that uses them, which include +assuming a process-wide "main Python interpreter". + +Historically, Python extension modules don't handle this use case well. +Many extension modules (and even some stdlib modules) use *per-process* +global state, because C ``static`` variables are extremely easy to use. +Thus, data that should be specific to an interpreter ends up being shared +between interpreters. Unless the extension developer is careful, it is very +easy to introduce edge cases that lead to crashes when a module is loaded in +more than one interpreter in the same process. + +Unfortunately, *per-interpreter* state is not easy to achieve. Extension +authors tend to not keep multiple interpreters in mind when developing, +and it is currently cumbersome to test the behavior. + +Enter Per-Module State +---------------------- + +Instead of focusing on per-interpreter state, Python's C API is evolving +to better support the more granular *per-module* state. +This means that C-level data should be attached to a *module object*. +Each interpreter creates its own module object, keeping the data separate. +For testing the isolation, multiple module objects corresponding to a single +extension can even be loaded in a single interpreter. + +Per-module state provides an easy way to think about lifetime and +resource ownership: the extension module will initialize when a +module object is created, and clean up when it's freed. In this regard, +a module is just like any other :c:expr:`PyObject *`; there are no "on +interpreter shutdown" hooks to think—or forget—about. + +Note that there are use cases for different kinds of "globals": +per-process, per-interpreter, per-thread or per-task state. +With per-module state as the default, these are still possible, +but you should treat them as exceptional cases: +if you need them, you should give them additional care and testing. +(Note that this guide does not cover them.) + + +Isolated Module Objects +----------------------- + +The key point to keep in mind when developing an extension module is +that several module objects can be created from a single shared library. +For example: + +.. code-block:: pycon + + >>> import sys + >>> import binascii + >>> old_binascii = binascii + >>> del sys.modules['binascii'] + >>> import binascii # create a new module object + >>> old_binascii == binascii + False + +As a rule of thumb, the two modules should be completely independent. +All objects and state specific to the module should be encapsulated +within the module object, not shared with other module objects, and +cleaned up when the module object is deallocated. +Since this just is a rule of thumb, exceptions are possible +(see `Managing Global State`_), but they will need more +thought and attention to edge cases. + +While some modules could do with less stringent restrictions, isolated +modules make it easier to set clear expectations and guidelines that +work across a variety of use cases. + + +Surprising Edge Cases +--------------------- + +Note that isolated modules do create some surprising edge cases. Most +notably, each module object will typically not share its classes and +exceptions with other similar modules. Continuing from the +`example above `__, +note that ``old_binascii.Error`` and ``binascii.Error`` are +separate objects. In the following code, the exception is *not* caught: + +.. code-block:: pycon + + >>> old_binascii.Error == binascii.Error + False + >>> try: + ... old_binascii.unhexlify(b'qwertyuiop') + ... except binascii.Error: + ... print('boo') + ... + Traceback (most recent call last): + File "", line 2, in + binascii.Error: Non-hexadecimal digit found + +This is expected. Notice that pure-Python modules behave the same way: +it is a part of how Python works. + +The goal is to make extension modules safe at the C level, not to make +hacks behave intuitively. Mutating ``sys.modules`` "manually" counts +as a hack. + + +Making Modules Safe with Multiple Interpreters +============================================== + + +Managing Global State +--------------------- + +Sometimes, the state associated with a Python module is not specific to that module, but +to the entire process (or something else "more global" than a module). +For example: + +- The ``readline`` module manages *the* terminal. +- A module running on a circuit board wants to control *the* on-board + LED. + +In these cases, the Python module should provide *access* to the global +state, rather than *own* it. If possible, write the module so that +multiple copies of it can access the state independently (along with +other libraries, whether for Python or other languages). If that is not +possible, consider explicit locking. + +If it is necessary to use process-global state, the simplest way to +avoid issues with multiple interpreters is to explicitly prevent a +module from being loaded more than once per process—see +`Opt-Out: Limiting to One Module Object per Process`_. + + +Managing Per-Module State +------------------------- + +To use per-module state, use +:ref:`multi-phase extension module initialization `. +This signals that your module supports multiple interpreters correctly. + +Set ``PyModuleDef.m_size`` to a positive number to request that many +bytes of storage local to the module. Usually, this will be set to the +size of some module-specific ``struct``, which can store all of the +module's C-level state. In particular, it is where you should put +pointers to classes (including exceptions, but excluding static types) +and settings (e.g. ``csv``'s :py:data:`~csv.field_size_limit`) +which the C code needs to function. + +.. note:: + Another option is to store state in the module's ``__dict__``, + but you must avoid crashing when users modify ``__dict__`` from + Python code. This usually means error- and type-checking at the C level, + which is easy to get wrong and hard to test sufficiently. + + However, if module state is not needed in C code, storing it in + ``__dict__`` only is a good idea. + +If the module state includes ``PyObject`` pointers, the module object +must hold references to those objects and implement the module-level hooks +``m_traverse``, ``m_clear`` and ``m_free``. These work like +``tp_traverse``, ``tp_clear`` and ``tp_free`` of a class. Adding them will +require some work and make the code longer; this is the price for +modules which can be unloaded cleanly. + +An example of a module with per-module state is currently available as +`xxlimited `__; +example module initialization shown at the bottom of the file. + + +Opt-Out: Limiting to One Module Object per Process +-------------------------------------------------- + +A non-negative ``PyModuleDef.m_size`` signals that a module supports +multiple interpreters correctly. If this is not yet the case for your +module, you can explicitly make your module loadable only once per +process. For example:: + + static int loaded = 0; + + static int + exec_module(PyObject* module) + { + if (loaded) { + PyErr_SetString(PyExc_ImportError, + "cannot load module more than once per process"); + return -1; + } + loaded = 1; + // ... rest of initialization + } + + +Module State Access from Functions +---------------------------------- + +Accessing the state from module-level functions is straightforward. +Functions get the module object as their first argument; for extracting +the state, you can use ``PyModule_GetState``:: + + static PyObject * + func(PyObject *module, PyObject *args) + { + my_struct *state = (my_struct*)PyModule_GetState(module); + if (state == NULL) { + return NULL; + } + // ... rest of logic + } + +.. note:: + ``PyModule_GetState`` may return ``NULL`` without setting an + exception if there is no module state, i.e. ``PyModuleDef.m_size`` was + zero. In your own module, you're in control of ``m_size``, so this is + easy to prevent. + + +Heap Types +========== + +Traditionally, types defined in C code are *static*; that is, +``static PyTypeObject`` structures defined directly in code and +initialized using ``PyType_Ready()``. + +Such types are necessarily shared across the process. Sharing them +between module objects requires paying attention to any state they own +or access. To limit the possible issues, static types are immutable at +the Python level: for example, you can't set ``str.myattribute = 123``. + +.. impl-detail:: + Sharing truly immutable objects between interpreters is fine, + as long as they don't provide access to mutable objects. + However, in CPython, every Python object has a mutable implementation + detail: the reference count. Changes to the refcount are guarded by the GIL. + Thus, code that shares any Python objects across interpreters implicitly + depends on CPython's current, process-wide GIL. + +Because they are immutable and process-global, static types cannot access +"their" module state. +If any method of such a type requires access to module state, +the type must be converted to a *heap-allocated type*, or *heap type* +for short. These correspond more closely to classes created by Python's +``class`` statement. + +For new modules, using heap types by default is a good rule of thumb. + + +Changing Static Types to Heap Types +----------------------------------- + +Static types can be converted to heap types, but note that +the heap type API was not designed for "lossless" conversion +from static types—that is, creating a type that works exactly like a given +static type. +So, when rewriting the class definition in a new API, +you are likely to unintentionally change a few details (e.g. pickleability +or inherited slots). +Always test the details that are important to you. + +Watch out for the following two points in particular (but note that this is not +a comprehensive list): + +* Unlike static types, heap type objects are mutable by default. + Use the :c:macro:`Py_TPFLAGS_IMMUTABLETYPE` flag to prevent mutability. +* Heap types inherit :c:member:`~PyTypeObject.tp_new` by default, + so it may become possible to instantiate them from Python code. + You can prevent this with the :c:macro:`Py_TPFLAGS_DISALLOW_INSTANTIATION` flag. + + +Defining Heap Types +------------------- + +Heap types can be created by filling a :c:struct:`PyType_Spec` structure, a +description or "blueprint" of a class, and calling +:c:func:`PyType_FromModuleAndSpec` to construct a new class object. + +.. note:: + Other functions, like :c:func:`PyType_FromSpec`, can also create + heap types, but :c:func:`PyType_FromModuleAndSpec` associates the module + with the class, allowing access to the module state from methods. + +The class should generally be stored in *both* the module state (for +safe access from C) and the module's ``__dict__`` (for access from +Python code). + + +Garbage-Collection Protocol +--------------------------- + +Instances of heap types hold a reference to their type. +This ensures that the type isn't destroyed before all its instances are, +but may result in reference cycles that need to be broken by the +garbage collector. + +To avoid memory leaks, instances of heap types must implement the +garbage collection protocol. +That is, heap types should: + +- Have the :c:macro:`Py_TPFLAGS_HAVE_GC` flag. +- Define a traverse function using ``Py_tp_traverse``, which + visits the type (e.g. using ``Py_VISIT(Py_TYPE(self))``). + +Please refer to the documentation of +:c:macro:`Py_TPFLAGS_HAVE_GC` and :c:member:`~PyTypeObject.tp_traverse` +for additional considerations. + +The API for defining heap types grew organically, leaving it +somewhat awkward to use in its current state. +The following sections will guide you through common issues. + + +``tp_traverse`` in Python 3.8 and lower +....................................... + +The requirement to visit the type from ``tp_traverse`` was added in Python 3.9. +If you support Python 3.8 and lower, the traverse function must *not* +visit the type, so it must be more complicated:: + + static int my_traverse(PyObject *self, visitproc visit, void *arg) + { + if (Py_Version >= 0x03090000) { + Py_VISIT(Py_TYPE(self)); + } + return 0; + } + +Unfortunately, :c:data:`Py_Version` was only added in Python 3.11. +As a replacement, use: + +* :c:macro:`PY_VERSION_HEX`, if not using the stable ABI, or +* :py:data:`sys.version_info` (via :c:func:`PySys_GetObject` and + :c:func:`PyArg_ParseTuple`). + + +Delegating ``tp_traverse`` +.......................... + +If your traverse function delegates to the :c:member:`~PyTypeObject.tp_traverse` +of its base class (or another type), ensure that ``Py_TYPE(self)`` is visited +only once. +Note that only heap type are expected to visit the type in ``tp_traverse``. + +For example, if your traverse function includes:: + + base->tp_traverse(self, visit, arg) + +...and ``base`` may be a static type, then it should also include:: + + if (base->tp_flags & Py_TPFLAGS_HEAPTYPE) { + // a heap type's tp_traverse already visited Py_TYPE(self) + } else { + if (Py_Version >= 0x03090000) { + Py_VISIT(Py_TYPE(self)); + } + } + +It is not necessary to handle the type's reference count in +:c:member:`~PyTypeObject.tp_new` and :c:member:`~PyTypeObject.tp_clear`. + + +Defining ``tp_dealloc`` +....................... + +If your type has a custom :c:member:`~PyTypeObject.tp_dealloc` function, +it needs to: + +- call :c:func:`PyObject_GC_UnTrack` before any fields are invalidated, and +- decrement the reference count of the type. + +To keep the type valid while ``tp_free`` is called, the type's refcount needs +to be decremented *after* the instance is deallocated. For example:: + + static void my_dealloc(PyObject *self) + { + PyObject_GC_UnTrack(self); + ... + PyTypeObject *type = Py_TYPE(self); + type->tp_free(self); + Py_DECREF(type); + } + +The default ``tp_dealloc`` function does this, so +if your type does *not* override +``tp_dealloc`` you don't need to add it. + + +Not overriding ``tp_free`` +.......................... + +The :c:member:`~PyTypeObject.tp_free` slot of a heap type must be set to +:c:func:`PyObject_GC_Del`. +This is the default; do not override it. + + +Avoiding ``PyObject_New`` +......................... + +GC-tracked objects need to be allocated using GC-aware functions. + +If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: + +- Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. + That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: + + TYPE *o = typeobj->tp_alloc(typeobj, 0); + + Replace ``o = PyObject_NewVar(TYPE, typeobj, size)`` with the same, + but use size instead of the 0. + +- If the above is not possible (e.g. inside a custom ``tp_alloc``), + call :c:func:`PyObject_GC_New` or :c:func:`PyObject_GC_NewVar`:: + + TYPE *o = PyObject_GC_New(TYPE, typeobj); + + TYPE *o = PyObject_GC_NewVar(TYPE, typeobj, size); + + +Module State Access from Classes +-------------------------------- + +If you have a type object defined with :c:func:`PyType_FromModuleAndSpec`, +you can call :c:func:`PyType_GetModule` to get the associated module, and then +:c:func:`PyModule_GetState` to get the module's state. + +To save a some tedious error-handling boilerplate code, you can combine +these two steps with :c:func:`PyType_GetModuleState`, resulting in:: + + my_struct *state = (my_struct*)PyType_GetModuleState(type); + if (state == NULL) { + return NULL; + } + + +Module State Access from Regular Methods +---------------------------------------- + +Accessing the module-level state from methods of a class is somewhat more +complicated, but is possible thanks to API introduced in Python 3.9. +To get the state, you need to first get the *defining class*, and then +get the module state from it. + +The largest roadblock is getting *the class a method was defined in*, or +that method's "defining class" for short. The defining class can have a +reference to the module it is part of. + +Do not confuse the defining class with ``Py_TYPE(self)``. If the method +is called on a *subclass* of your type, ``Py_TYPE(self)`` will refer to +that subclass, which may be defined in different module than yours. + +.. note:: + The following Python code can illustrate the concept. + ``Base.get_defining_class`` returns ``Base`` even + if ``type(self) == Sub``: + + .. code-block:: python + + class Base: + def get_type_of_self(self): + return type(self) + + def get_defining_class(self): + return __class__ + + class Sub(Base): + pass + +For a method to get its "defining class", it must use the +:ref:`METH_METHOD | METH_FASTCALL | METH_KEYWORDS ` +:c:type:`calling convention ` +and the corresponding :c:type:`PyCMethod` signature:: + + PyObject *PyCMethod( + PyObject *self, // object the method was called on + PyTypeObject *defining_class, // defining class + PyObject *const *args, // C array of arguments + Py_ssize_t nargs, // length of "args" + PyObject *kwnames) // NULL, or dict of keyword arguments + +Once you have the defining class, call :c:func:`PyType_GetModuleState` to get +the state of its associated module. + +For example:: + + static PyObject * + example_method(PyObject *self, + PyTypeObject *defining_class, + PyObject *const *args, + Py_ssize_t nargs, + PyObject *kwnames) + { + my_struct *state = (my_struct*)PyType_GetModuleState(defining_class); + if (state == NULL) { + return NULL; + } + ... // rest of logic + } + + PyDoc_STRVAR(example_method_doc, "..."); + + static PyMethodDef my_methods[] = { + {"example_method", + (PyCFunction)(void(*)(void))example_method, + METH_METHOD|METH_FASTCALL|METH_KEYWORDS, + example_method_doc} + {NULL}, + } + + +Module State Access from Slot Methods, Getters and Setters +---------------------------------------------------------- + +.. note:: + + This is new in Python 3.11. + + .. After adding to limited API: + + If you use the :ref:`limited API `, + you must update ``Py_LIMITED_API`` to ``0x030b0000``, losing ABI + compatibility with earlier versions. + +Slot methods—the fast C equivalents for special methods, such as +:c:member:`~PyNumberMethods.nb_add` for :py:attr:`~object.__add__` or +:c:member:`~PyTypeObject.tp_new` for initialization—have a very simple API that +doesn't allow passing in the defining class, unlike with :c:type:`PyCMethod`. +The same goes for getters and setters defined with +:c:type:`PyGetSetDef`. + +To access the module state in these cases, use the +:c:func:`PyType_GetModuleByDef` function, and pass in the module definition. +Once you have the module, call :c:func:`PyModule_GetState` +to get the state:: + + PyObject *module = PyType_GetModuleByDef(Py_TYPE(self), &module_def); + my_struct *state = (my_struct*)PyModule_GetState(module); + if (state == NULL) { + return NULL; + } + +:c:func:`!PyType_GetModuleByDef` works by searching the +:term:`method resolution order` (i.e. all superclasses) for the first +superclass that has a corresponding module. + +.. note:: + + In very exotic cases (inheritance chains spanning multiple modules + created from the same definition), :c:func:`!PyType_GetModuleByDef` might not + return the module of the true defining class. However, it will always + return a module with the same definition, ensuring a compatible + C memory layout. + + +Lifetime of the Module State +---------------------------- + +When a module object is garbage-collected, its module state is freed. +For each pointer to (a part of) the module state, you must hold a reference +to the module object. + +Usually this is not an issue, because types created with +:c:func:`PyType_FromModuleAndSpec`, and their instances, hold a reference +to the module. +However, you must be careful in reference counting when you reference +module state from other places, such as callbacks for external +libraries. + + +Open Issues +=========== + +Several issues around per-module state and heap types are still open. + +Discussions about improving the situation are best held on the `capi-sig +mailing list `__. + + +Per-Class Scope +--------------- + +It is currently (as of Python 3.11) not possible to attach state to individual +*types* without relying on CPython implementation details (which may change +in the future—perhaps, ironically, to allow a proper solution for +per-class scope). + + +Lossless Conversion to Heap Types +--------------------------------- + +The heap type API was not designed for "lossless" conversion from static types; +that is, creating a type that works exactly like a given static type. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst new file mode 100644 index 00000000..674ac5f5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst @@ -0,0 +1,1131 @@ +.. highlight:: none + +.. _history-and-license: + +******************* +History and License +******************* + + +History of the software +======================= + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see https://www.cwi.nl/) in the Netherlands as a +successor of a language called ABC. Guido remains Python's principal author, +although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for National +Research Initiatives (CNRI, see https://www.cnri.reston.va.us/) in Reston, +Virginia where he released several versions of the software. + +In May 2000, Guido and the Python core development team moved to BeOpen.com to +form the BeOpen PythonLabs team. In October of the same year, the PythonLabs +team moved to Digital Creations (now Zope Corporation; see +https://www.zope.org/). In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization created +specifically to own Python-related Intellectual Property. Zope Corporation is a +sponsoring member of the PSF. + +All Python releases are Open Source (see https://opensource.org/ for the Open +Source Definition). Historically, most, but not all, Python releases have also +been GPL-compatible; the table below summarizes the various releases. + ++----------------+--------------+------------+------------+-----------------+ +| Release | Derived from | Year | Owner | GPL compatible? | ++================+==============+============+============+=================+ +| 0.9.0 thru 1.2 | n/a | 1991-1995 | CWI | yes | ++----------------+--------------+------------+------------+-----------------+ +| 1.3 thru 1.5.2 | 1.2 | 1995-1999 | CNRI | yes | ++----------------+--------------+------------+------------+-----------------+ +| 1.6 | 1.5.2 | 2000 | CNRI | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.0 | 1.6 | 2000 | BeOpen.com | no | ++----------------+--------------+------------+------------+-----------------+ +| 1.6.1 | 1.6 | 2001 | CNRI | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.1 | 2.0+1.6.1 | 2001 | PSF | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.0.1 | 2.0+1.6.1 | 2001 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.1 | 2.1+2.0.1 | 2001 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.2 | 2.1.1 | 2002 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.3 | 2.1.2 | 2002 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.2 and above | 2.1.1 | 2001-now | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ + +.. note:: + + GPL-compatible doesn't mean that we're distributing Python under the GPL. All + Python licenses, unlike the GPL, let you distribute a modified version without + making your changes open source. The GPL-compatible licenses make it possible to + combine Python with other software that is released under the GPL; the others + don't. + +Thanks to the many outside volunteers who have worked under Guido's direction to +make these releases possible. + + +Terms and conditions for accessing or otherwise using Python +============================================================ + +Python software and documentation are licensed under the +:ref:`PSF License Agreement `. + +Starting with Python 3.8.6, examples, recipes, and other code in +the documentation are dual licensed under the PSF License Agreement +and the :ref:`Zero-Clause BSD license `. + +Some software incorporated into Python is under different licenses. +The licenses are listed with code falling under that license. +See :ref:`OtherLicenses` for an incomplete list of these licenses. + + +.. _PSF-license: + +PSF LICENSE AGREEMENT FOR PYTHON |release| +------------------------------------------ + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and + the Individual or Organization ("Licensee") accessing and otherwise using Python + |release| software in source or binary form and its associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, PSF hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python |release| alone or in any derivative + version, provided, however, that PSF's License Agreement and PSF's notice of + copyright, i.e., "Copyright © 2001-2024 Python Software Foundation; All Rights + Reserved" are retained in Python |release| alone or in any derivative version + prepared by Licensee. + + 3. In the event Licensee prepares a derivative work that is based on or + incorporates Python |release| or any part thereof, and wants to make the + derivative work available to others as provided herein, then Licensee hereby + agrees to include in any such work a brief summary of the changes made to Python + |release|. + + 4. PSF is making Python |release| available to Licensee on an "AS IS" basis. + PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF PYTHON |release| WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON |release| + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON |release|, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 7. Nothing in this License Agreement shall be deemed to create any relationship + of agency, partnership, or joint venture between PSF and Licensee. This License + Agreement does not grant permission to use PSF trademarks or trade name in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. + + 8. By copying, installing or otherwise using Python |release|, Licensee agrees + to be bound by the terms and conditions of this License Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at + 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization + ("Licensee") accessing and otherwise using this software in source or binary + form and its associated documentation ("the Software"). + + 2. Subject to the terms and conditions of this BeOpen Python License Agreement, + BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license + to reproduce, analyze, test, perform and/or display publicly, prepare derivative + works, distribute, and otherwise use the Software alone or in any derivative + version, provided, however, that the BeOpen Python License is retained in the + Software, alone or in any derivative version prepared by Licensee. + + 3. BeOpen is making the Software available to Licensee on an "AS IS" basis. + BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, + MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF + ADVISED OF THE POSSIBILITY THEREOF. + + 5. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 6. This License Agreement shall be governed by and interpreted in all respects + by the law of the State of California, excluding conflict of law provisions. + Nothing in this License Agreement shall be deemed to create any relationship of + agency, partnership, or joint venture between BeOpen and Licensee. This License + Agreement does not grant permission to use BeOpen trademarks or trade names in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. As an exception, the "BeOpen Python" logos available at + http://www.pythonlabs.com/logos.html may be used according to the permissions + granted on that web page. + + 7. By copying, installing or otherwise using the software, Licensee agrees to be + bound by the terms and conditions of this License Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between the Corporation for National Research + Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 + ("CNRI"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 1.6.1 software in source or binary form and its + associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, CNRI hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python 1.6.1 alone or in any derivative version, + provided, however, that CNRI's License Agreement and CNRI's notice of copyright, + i.e., "Copyright © 1995-2001 Corporation for National Research Initiatives; All + Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version + prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, + Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 + is made available subject to the terms and conditions in CNRI's License + Agreement. This Agreement together with Python 1.6.1 may be located on the + internet using the following unique, persistent identifier (known as a handle): + 1895.22/1013. This Agreement may also be obtained from a proxy server on the + internet using the following URL: http://hdl.handle.net/1895.22/1013." + + 3. In the event Licensee prepares a derivative work that is based on or + incorporates Python 1.6.1 or any part thereof, and wants to make the derivative + work available to others as provided herein, then Licensee hereby agrees to + include in any such work a brief summary of the changes made to Python 1.6.1. + + 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI + MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, + BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY + OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF + PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 7. This License Agreement shall be governed by the federal intellectual property + law of the United States, including without limitation the federal copyright + law, and, to the extent such U.S. federal law does not apply, by the law of the + Commonwealth of Virginia, excluding Virginia's conflict of law provisions. + Notwithstanding the foregoing, with regard to derivative works based on Python + 1.6.1 that incorporate non-separable material that was previously distributed + under the GNU General Public License (GPL), the law of the Commonwealth of + Virginia shall govern this License Agreement only as to issues arising under or + with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in + this License Agreement shall be deemed to create any relationship of agency, + partnership, or joint venture between CNRI and Licensee. This License Agreement + does not grant permission to use CNRI trademarks or trade name in a trademark + sense to endorse or promote products or services of Licensee, or any third + party. + + 8. By clicking on the "ACCEPT" button where indicated, or by copying, installing + or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and + conditions of this License Agreement. + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +.. parsed-literal:: + + Copyright © 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The + Netherlands. All rights reserved. + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, provided that + the above copyright notice appear in all copies and that both that copyright + notice and this permission notice appear in supporting documentation, and that + the name of Stichting Mathematisch Centrum or CWI not be used in advertising or + publicity pertaining to distribution of the software without specific, written + prior permission. + + STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT + OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + SOFTWARE. + + +.. _BSD0: + +ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON |release| DOCUMENTATION +---------------------------------------------------------------------- + +.. parsed-literal:: + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +.. _OtherLicenses: + +Licenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the Python distribution. + + +Mersenne Twister +---------------- + +The :mod:`!_random` C extension underlying the :mod:`random` module +includes code based on a download from +http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html. The following are +the verbatim comments from the original code:: + + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + + +Sockets +------- + +The :mod:`socket` module uses the functions, :c:func:`!getaddrinfo`, and +:c:func:`!getnameinfo`, which are coded in separate source files from the WIDE +Project, https://www.wide.ad.jp/. :: + + Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +Asynchronous socket services +---------------------------- + +The :mod:`!test.support.asynchat` and :mod:`!test.support.asyncore` +modules contain the following notice:: + + Copyright 1996 by Sam Rushing + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of Sam + Rushing not be used in advertising or publicity pertaining to + distribution of the software without specific, written prior + permission. + + SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN + NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR + CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Cookie management +----------------- + +The :mod:`http.cookies` module contains the following notice:: + + Copyright 2000 by Timothy O'Malley + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software + and its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Timothy O'Malley not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR + ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +Execution tracing +----------------- + +The :mod:`trace` module contains the following notice:: + + portions copyright 2001, Autonomous Zones Industries, Inc., all rights... + err... reserved and offered to the public under the terms of the + Python 2.2 license. + Author: Zooko O'Whielacronx + http://zooko.com/ + mailto:zooko@zooko.com + + Copyright 2000, Mojam Media, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1999, Bioreason, Inc., all rights reserved. + Author: Andrew Dalke + + Copyright 1995-1997, Automatrix, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved. + + + Permission to use, copy, modify, and distribute this Python software and + its associated documentation for any purpose without fee is hereby + granted, provided that the above copyright notice appears in all copies, + and that both that copyright notice and this permission notice appear in + supporting documentation, and that the name of neither Automatrix, + Bioreason or Mojam Media be used in advertising or publicity pertaining to + distribution of the software without specific, written prior permission. + + +UUencode and UUdecode functions +------------------------------- + +The ``uu`` codec contains the following notice:: + + Copyright 1994 by Lance Ellinghouse + Cathedral City, California Republic, United States of America. + All Rights Reserved + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, + provided that the above copyright notice appear in all copies and that + both that copyright notice and this permission notice appear in + supporting documentation, and that the name of Lance Ellinghouse + not be used in advertising or publicity pertaining to distribution + of the software without specific, written prior permission. + LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO + THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE + FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Modified by Jack Jansen, CWI, July 1995: + - Use binascii module to do the actual line-by-line conversion + between ascii and binary. This results in a 1000-fold speedup. The C + version is still 5 times faster, though. + - Arguments more compliant with Python standard + + +XML Remote Procedure Calls +-------------------------- + +The :mod:`xmlrpc.client` module contains the following notice:: + + The XML-RPC client interface is + + Copyright (c) 1999-2002 by Secret Labs AB + Copyright (c) 1999-2002 by Fredrik Lundh + + By obtaining, using, and/or copying this software and/or its + associated documentation, you agree that you have read, understood, + and will comply with the following terms and conditions: + + Permission to use, copy, modify, and distribute this software and + its associated documentation for any purpose and without fee is + hereby granted, provided that the above copyright notice appears in + all copies, and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Secret Labs AB or the author not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD + TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- + ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR + BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY + DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + OF THIS SOFTWARE. + + +test_epoll +---------- + +The :mod:`!test.test_epoll` module contains the following notice:: + + Copyright (c) 2001-2006 Twisted Matrix Laboratories. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Select kqueue +------------- + +The :mod:`select` module contains the following notice for the kqueue +interface:: + + Copyright (c) 2000 Doug White, 2006 James Knight, 2007 Christian Heimes + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +SipHash24 +--------- + +The file :file:`Python/pyhash.c` contains Marek Majkowski' implementation of +Dan Bernstein's SipHash24 algorithm. It contains the following note:: + + + Copyright (c) 2013 Marek Majkowski + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + + Original location: + https://github.com/majek/csiphash/ + + Solution inspired by code from: + Samuel Neves (supercop/crypto_auth/siphash24/little) + djb (supercop/crypto_auth/siphash24/little2) + Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c) + + +strtod and dtoa +--------------- + +The file :file:`Python/dtoa.c`, which supplies C functions dtoa and +strtod for conversion of C doubles to and from strings, is derived +from the file of the same name by David M. Gay, currently available +from https://web.archive.org/web/20220517033456/http://www.netlib.org/fp/dtoa.c. +The original file, as retrieved on March 16, 2009, contains the following +copyright and licensing notice:: + + /**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + + +OpenSSL +------- + +The modules :mod:`hashlib`, :mod:`posix` and :mod:`ssl` use +the OpenSSL library for added performance if made available by the +operating system. Additionally, the Windows and macOS installers for +Python may include a copy of the OpenSSL libraries, so we include a copy +of the OpenSSL license here. For the OpenSSL 3.0 release, +and later releases derived from that, the Apache License v2 applies:: + + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +expat +----- + +The :mod:`pyexpat ` extension is built using an included copy of the expat +sources unless the build is configured ``--with-system-expat``:: + + Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + and Clark Cooper + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +libffi +------ + +The :mod:`!_ctypes` C extension underlying the :mod:`ctypes` module +is built using an included copy of the libffi +sources unless the build is configured ``--with-system-libffi``:: + + Copyright (c) 1996-2008 Red Hat, Inc and others. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + +zlib +---- + +The :mod:`zlib` extension is built using an included copy of the zlib +sources if the zlib version found on the system is too old to be +used for the build:: + + Copyright (C) 1995-2011 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + +cfuhash +------- + +The implementation of the hash table used by the :mod:`tracemalloc` is based +on the cfuhash project:: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + + +libmpdec +-------- + +The :mod:`!_decimal` C extension underlying the :mod:`decimal` module +is built using an included copy of the libmpdec +library unless the build is configured ``--with-system-libmpdec``:: + + Copyright (c) 2008-2020 Stefan Krah. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +W3C C14N test suite +------------------- + +The C14N 2.0 test suite in the :mod:`test` package +(``Lib/test/xmltestdata/c14n-20/``) was retrieved from the W3C website at +https://www.w3.org/TR/xml-c14n2-testcases/ and is distributed under the +3-clause BSD license:: + + Copyright (c) 2013 W3C(R) (MIT, ERCIM, Keio, Beihang), + All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of works must retain the original copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the original copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the W3C nor the names of its contributors may be + used to endorse or promote products derived from this work without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +.. _mimalloc-license: + +mimalloc +-------- + +MIT License:: + + Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + +asyncio +---------- + +Parts of the :mod:`asyncio` module are incorporated from +`uvloop 0.16 `_, +which is distributed under the MIT license:: + + Copyright (c) 2015-2021 MagicStack Inc. http://magic.io + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Global Unbounded Sequences (GUS) +-------------------------------- + +The file :file:`Python/qsbr.c` is adapted from FreeBSD's "Global Unbounded +Sequences" safe memory reclamation scheme in +`subr_smr.c `_. +The file is distributed under the 2-Clause BSD License:: + + Copyright (c) 2019,2020 Jeffrey Roberson + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice unmodified, this list of conditions, and the following + disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py new file mode 100644 index 00000000..9b3256fa --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py @@ -0,0 +1,212 @@ +import re + +from docutils import nodes +from docutils.parsers.rst import directives +from docutils.utils import unescape +from sphinx import addnodes +from sphinx.domains.changeset import VersionChange, versionlabels, versionlabel_classes +from sphinx.domains.python import PyFunction, PyMethod, PyModule +from sphinx.locale import _ as sphinx_gettext +from sphinx.util.docutils import SphinxDirective + + +ISSUE_URI = 'https://bugs.python.org/issue?@action=redirect&bpo=%s' +GH_ISSUE_URI = 'https://github.com/python/cpython/issues/%s' +# Used in conf.py and updated here by python/release-tools/run_release.py +SOURCE_URI = 'https://github.com/python/cpython/tree/3.13/%s' + +# monkey-patch reST parser to disable alphabetic and roman enumerated lists +from docutils.parsers.rst.states import Body +Body.enum.converters['loweralpha'] = \ + Body.enum.converters['upperalpha'] = \ + Body.enum.converters['lowerroman'] = \ + Body.enum.converters['upperroman'] = lambda x: None + +# monkey-patch the productionlist directive to allow hyphens in group names +# https://github.com/sphinx-doc/sphinx/issues/11854 +from sphinx.domains import std + +std.token_re = re.compile(r'`((~?[\w-]*:)?\w+)`') + +# backport :no-index: +PyModule.option_spec['no-index'] = directives.flag + + +# Support for marking up and linking to bugs.python.org issues + +def issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + issue = unescape(text) + # sanity check: there are no bpo issues within these two values + if 47261 < int(issue) < 400000: + msg = inliner.reporter.error(f'The BPO ID {text!r} seems too high -- ' + 'use :gh:`...` for GitHub IDs', line=lineno) + prb = inliner.problematic(rawtext, rawtext, msg) + return [prb], [msg] + text = 'bpo-' + issue + refnode = nodes.reference(text, text, refuri=ISSUE_URI % issue) + return [refnode], [] + + +# Support for marking up and linking to GitHub issues + +def gh_issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + issue = unescape(text) + # sanity check: all GitHub issues have ID >= 32426 + # even though some of them are also valid BPO IDs + if int(issue) < 32426: + msg = inliner.reporter.error(f'The GitHub ID {text!r} seems too low -- ' + 'use :issue:`...` for BPO IDs', line=lineno) + prb = inliner.problematic(rawtext, rawtext, msg) + return [prb], [msg] + text = 'gh-' + issue + refnode = nodes.reference(text, text, refuri=GH_ISSUE_URI % issue) + return [refnode], [] + + +# Support for marking up implementation details + +class ImplementationDetail(SphinxDirective): + + has_content = True + final_argument_whitespace = True + + # This text is copied to templates/dummy.html + label_text = sphinx_gettext('CPython implementation detail:') + + def run(self): + self.assert_has_content() + pnode = nodes.compound(classes=['impl-detail']) + content = self.content + add_text = nodes.strong(self.label_text, self.label_text) + self.state.nested_parse(content, self.content_offset, pnode) + content = nodes.inline(pnode[0].rawsource, translatable=True) + content.source = pnode[0].source + content.line = pnode[0].line + content += pnode[0].children + pnode[0].replace_self(nodes.paragraph( + '', '', add_text, nodes.Text(' '), content, translatable=False)) + return [pnode] + + +# Support for documenting decorators + +class PyDecoratorMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyDecoratorMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_addname('@', '@')) + return ret + + def needs_arglist(self): + return False + + +class PyDecoratorFunction(PyDecoratorMixin, PyFunction): + def run(self): + # a decorator function is a function after all + self.name = 'py:function' + return PyFunction.run(self) + + +# TODO: Use sphinx.domains.python.PyDecoratorMethod when possible +class PyDecoratorMethod(PyDecoratorMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyCoroutineMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyCoroutineMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('coroutine ', 'coroutine ')) + return ret + + +class PyAwaitableMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyAwaitableMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('awaitable ', 'awaitable ')) + return ret + + +class PyCoroutineFunction(PyCoroutineMixin, PyFunction): + def run(self): + self.name = 'py:function' + return PyFunction.run(self) + + +class PyCoroutineMethod(PyCoroutineMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyAwaitableFunction(PyAwaitableMixin, PyFunction): + def run(self): + self.name = 'py:function' + return PyFunction.run(self) + + +class PyAwaitableMethod(PyAwaitableMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyAbstractMethod(PyMethod): + + def handle_signature(self, sig, signode): + ret = super(PyAbstractMethod, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('abstractmethod ', + 'abstractmethod ')) + return ret + + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +# Support for documenting version of removal in deprecations + +class DeprecatedRemoved(VersionChange): + required_arguments = 2 + + _deprecated_label = sphinx_gettext('Deprecated since version %s, will be removed in version %s') + _removed_label = sphinx_gettext('Deprecated since version %s, removed in version %s') + + def run(self): + # Replace the first two arguments (deprecated version and removed version) + # with a single tuple of both versions. + version_deprecated = self.arguments[0] + version_removed = self.arguments.pop(1) + self.arguments[0] = version_deprecated, version_removed + + # Set the label based on if we have reached the removal version + current_version = tuple(map(int, self.config.version.split('.'))) + removed_version = tuple(map(int, version_removed.split('.'))) + if current_version < removed_version: + versionlabels[self.name] = self._deprecated_label + versionlabel_classes[self.name] = 'deprecated' + else: + versionlabels[self.name] = self._removed_label + versionlabel_classes[self.name] = 'removed' + try: + return super().run() + finally: + # reset versionlabels and versionlabel_classes + versionlabels[self.name] = '' + versionlabel_classes[self.name] = '' + + +def setup(app): + app.add_role('issue', issue_role) + app.add_role('gh', gh_issue_role) + app.add_directive('impl-detail', ImplementationDetail) + app.add_directive('deprecated-removed', DeprecatedRemoved) + app.add_directive_to_domain('py', 'decorator', PyDecoratorFunction) + app.add_directive_to_domain('py', 'decoratormethod', PyDecoratorMethod) + app.add_directive_to_domain('py', 'coroutinefunction', PyCoroutineFunction) + app.add_directive_to_domain('py', 'coroutinemethod', PyCoroutineMethod) + app.add_directive_to_domain('py', 'awaitablefunction', PyAwaitableFunction) + app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) + app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) + return {'version': '1.0', 'parallel_read_safe': True} diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml b/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml new file mode 100644 index 00000000..743d6553 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "pyperformance_bm_sphinx" +requires-python = ">=3.11" +dependencies = [ + "pyperf", + "sphinx", +] +urls.repository = "https://github.com/python/pyperformance" +dynamic = ["version"] + +[tool.pyperformance] +name = "sphinx" +tags = "apps" diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt b/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt new file mode 100644 index 00000000..a866afda --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt @@ -0,0 +1,2 @@ +sphinx==7.3.7 +python-docs-theme==2024.6 diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py new file mode 100644 index 00000000..1ffc1932 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py @@ -0,0 +1,127 @@ +""" +Build a subset of Python's documentation using Sphinx +""" + +import io +import os +from pathlib import Path +import shutil + +import pyperf +from sphinx.cmd.build import main as sphinx_main + + +# Sphinx performs a lot of filesystem I/O when it operates. This can cause the +# results to be highly variable. Instead, we pre-load all of the source files +# and then monkeypatch "open" so that Sphinx is reading from in-memory +# `io.BytesIO` and `io.StringIO` objects. + + +DOC_ROOT = (Path(__file__).parent / "data" / "Doc").resolve() + + +_orig_open = open + + +preloaded_files = {} + + +def read_all_files(): + for filename in DOC_ROOT.glob("**/*"): + if filename.is_file(): + preloaded_files[str(filename)] = filename.read_bytes() + + +def open( + file, + mode="r", + buffering=-1, + encoding=None, + errors=None, + newline=None, + closefd=True, + opener=None, +): + if isinstance(file, Path): + file = str(file) + + if isinstance(file, str): + if "r" in mode and file in preloaded_files: + if "b" in mode: + return io.BytesIO(preloaded_files[file]) + else: + return io.StringIO(preloaded_files[file].decode(encoding or "utf-8")) + elif "w" in mode and DOC_ROOT in Path(file).parents: + if "b" in mode: + newfile = io.BytesIO() + else: + newfile = io.StringIO() + preloaded_files[file] = newfile + return newfile + + return _orig_open( + file, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + closefd=closefd, + opener=opener, + ) + + +__builtins__.open = open + + +def replace(src, dst): + pass + + +os.replace = replace + + +def build_doc(doc_root): + # Make sure there is no caching going on + t0 = pyperf.perf_counter() + sphinx_main( + [ + "--builder", + "dummy", + "--doctree-dir", + str(doc_root / "build" / "doctrees"), + "--jobs", + "1", + "--silent", + "--fresh-env", + "--write-all", + str(doc_root), + str(doc_root / "build" / "html"), + ] + ) + return pyperf.perf_counter() - t0 + + +def bench_sphinx(loops, doc_root): + if (DOC_ROOT / "build").is_dir(): + shutil.rmtree(DOC_ROOT / "build") + read_all_files() + + runs_total = 0 + for _ in range(loops): + runs_total += build_doc(doc_root) + if (DOC_ROOT / "build").is_dir(): + shutil.rmtree(DOC_ROOT / "build") + + return runs_total + + +if __name__ == "__main__": + runner = pyperf.Runner() + + runner.metadata["description"] = ( + "Render documentation with Sphinx, like the CPython docs" + ) + args = runner.parse_args() + + runner.bench_time_func("sphinx", bench_sphinx, DOC_ROOT) diff --git a/pyperformance/data-files/benchmarks/bm_sqlalchemy_declarative/requirements.txt b/pyperformance/data-files/benchmarks/bm_sqlalchemy_declarative/requirements.txt index 9578d705..7854731c 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlalchemy_declarative/requirements.txt +++ b/pyperformance/data-files/benchmarks/bm_sqlalchemy_declarative/requirements.txt @@ -1,2 +1,2 @@ -greenlet==3.0.0rc3 +greenlet==3.1.0 sqlalchemy==1.4.19 diff --git a/pyperformance/data-files/benchmarks/bm_sqlalchemy_imperative/requirements.txt b/pyperformance/data-files/benchmarks/bm_sqlalchemy_imperative/requirements.txt index 9578d705..7854731c 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlalchemy_imperative/requirements.txt +++ b/pyperformance/data-files/benchmarks/bm_sqlalchemy_imperative/requirements.txt @@ -1,2 +1,2 @@ -greenlet==3.0.0rc3 +greenlet==3.1.0 sqlalchemy==1.4.19 diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_optimize.toml b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_optimize.toml similarity index 61% rename from pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_optimize.toml rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_optimize.toml index 7f59f0b8..7c0d962b 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_optimize.toml +++ b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_optimize.toml @@ -1,3 +1,3 @@ [tool.pyperformance] -name = "sqlglot_optimize" +name = "sqlglot_v2_optimize" extra_opts = ["optimize"] diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_parse.toml b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_parse.toml similarity index 62% rename from pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_parse.toml rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_parse.toml index b886688a..09008b11 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_parse.toml +++ b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_parse.toml @@ -1,3 +1,3 @@ [tool.pyperformance] -name = "sqlglot_parse" +name = "sqlglot_v2_parse" extra_opts = ["parse"] diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_transpile.toml b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_transpile.toml similarity index 61% rename from pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_transpile.toml rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_transpile.toml index 25a26a3f..e435f1cc 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_transpile.toml +++ b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/bm_sqlglot_v2_transpile.toml @@ -1,3 +1,3 @@ [tool.pyperformance] -name = "sqlglot_transpile" +name = "sqlglot_v2_transpile" extra_opts = ["transpile"] diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/pyproject.toml b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/pyproject.toml similarity index 73% rename from pyperformance/data-files/benchmarks/bm_sqlglot/pyproject.toml rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/pyproject.toml index cb8656a2..ff04d6b9 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlglot/pyproject.toml +++ b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/pyproject.toml @@ -1,13 +1,13 @@ [project] -name = "pyperformance_bm_sqlglot" +name = "pyperformance_bm_sqlglot_v2" requires-python = ">=3.7" dependencies = [ "pyperf", - "sqlglot", + "sqlglot_v2", ] urls = {repository = "https://github.com/python/pyperformance"} dynamic = ["version"] [tool.pyperformance] -name = "sqlglot" +name = "sqlglot_v2" extra_opts = ["normalize"] diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/requirements.txt b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/requirements.txt similarity index 100% rename from pyperformance/data-files/benchmarks/bm_sqlglot/requirements.txt rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/requirements.txt diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/run_benchmark.py similarity index 95% rename from pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py rename to pyperformance/data-files/benchmarks/bm_sqlglot_v2/run_benchmark.py index f8fbb79a..0777ad3c 100644 --- a/pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_sqlglot_v2/run_benchmark.py @@ -156,8 +156,8 @@ def bench_optimize(loops): def bench_normalize(loops): elapsed = 0 - conjunction = parse_one("(A AND B) OR (C AND D) OR (E AND F) OR (G AND H)") for _ in range(loops): + conjunction = parse_one("(A AND B) OR (C AND D) OR (E AND F) OR (G AND H)") t0 = pyperf.perf_counter() normalize.normalize(conjunction) elapsed += pyperf.perf_counter() - t0 @@ -186,9 +186,9 @@ def add_parser_args(parser): if __name__ == "__main__": runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) - runner.metadata['description'] = "SQLGlot benchmark" + runner.metadata['description'] = "SQLGlot V2 benchmark" add_parser_args(runner.argparser) args = runner.parse_args() benchmark = args.benchmark - runner.bench_time_func(f"sqlglot_{benchmark}", BENCHMARKS[benchmark]) + runner.bench_time_func(f"sqlglot_v2_{benchmark}", BENCHMARKS[benchmark]) diff --git a/pyperformance/data-files/benchmarks/bm_tornado_http/requirements.txt b/pyperformance/data-files/benchmarks/bm_tornado_http/requirements.txt index ca2eb1c6..e9eefea6 100644 --- a/pyperformance/data-files/benchmarks/bm_tornado_http/requirements.txt +++ b/pyperformance/data-files/benchmarks/bm_tornado_http/requirements.txt @@ -1 +1 @@ -tornado==6.1 +tornado==6.2 diff --git a/pyperformance/requirements/requirements.txt b/pyperformance/requirements/requirements.txt index 7936dbb9..644a4965 100644 --- a/pyperformance/requirements/requirements.txt +++ b/pyperformance/requirements/requirements.txt @@ -10,5 +10,5 @@ psutil==5.9.5 # via # -r requirements.in # pyperf -pyperf==2.6.3 +pyperf==2.9.0 # via -r requirements.in diff --git a/pyperformance/run.py b/pyperformance/run.py index f572181c..a9f9a102 100644 --- a/pyperformance/run.py +++ b/pyperformance/run.py @@ -108,13 +108,13 @@ def run_benchmarks(should_run, python, options): else: benchmarks[bench] = (common, bench_runid) continue - venv = VenvForBenchmarks.ensure( - venv_root, - info, - upgrade='oncreate', - inherit_environ=options.inherit_environ, - ) try: + venv = VenvForBenchmarks.ensure( + venv_root, + info, + upgrade='oncreate', + inherit_environ=options.inherit_environ, + ) # XXX Do not override when there is a requirements collision. venv.ensure_reqs(bench) except _venv.RequirementsInstallationFailedError: @@ -129,7 +129,7 @@ def run_benchmarks(should_run, python, options): run_count = str(len(to_run)) errors = [] - pyperf_opts = get_pyperf_opts(options) + base_pyperf_opts = get_pyperf_opts(options) import pyperf for index, bench in enumerate(to_run): @@ -159,12 +159,14 @@ def add_bench(dest_suite, obj): return dest_suite if name in loops: - pyperf_opts.append(f"--loops={loops[name]}") + pyperf_opts = [*base_pyperf_opts, f"--loops={loops[name]}"] + else: + pyperf_opts = base_pyperf_opts bench_venv, bench_runid = benchmarks.get(bench) if bench_venv is None: print("ERROR: Benchmark %s failed: could not install requirements" % name) - errors.append(name) + errors.append((name, "Install requirements error")) continue try: result = bench.run( @@ -174,10 +176,17 @@ def add_bench(dest_suite, obj): venv=bench_venv, verbose=options.verbose, ) + except TimeoutError as exc: + print("ERROR: Benchmark %s timed out" % name) + errors.append((name, exc)) + except RuntimeError as exc: + print("ERROR: Benchmark %s failed: %s" % (name, exc)) + traceback.print_exc() + errors.append((name, exc)) except Exception as exc: print("ERROR: Benchmark %s failed: %s" % (name, exc)) traceback.print_exc() - errors.append(name) + errors.append((name, exc)) else: suite = add_bench(suite, result) @@ -233,5 +242,10 @@ def get_pyperf_opts(options): opts.append('--inherit-environ=%s' % ','.join(options.inherit_environ)) if options.min_time: opts.append('--min-time=%s' % options.min_time) + if options.timeout: + opts.append('--timeout=%s' % options.timeout) + if options.hook: + for hook in options.hook: + opts.append('--hook=%s' % hook) return opts diff --git a/pyperformance/tests/data/find-pyperformance.py b/pyperformance/tests/data/find-pyperformance.py index 2f6fa6c1..57c6e247 100644 --- a/pyperformance/tests/data/find-pyperformance.py +++ b/pyperformance/tests/data/find-pyperformance.py @@ -13,7 +13,7 @@ testsroot = os.path.dirname(datadir) pkgroot = os.path.dirname(testsroot) reporoot = os.path.realpath(os.path.dirname(pkgroot)) -marker = os.path.join(reporoot, 'setup.py') +marker = os.path.join(reporoot, 'pyproject.toml') if not os.path.exists(marker): sys.exit(f'ERROR: pyperformance is not an editable install ({reporoot})') actual = os.path.realpath(os.path.abspath(pyperformance.PKG_ROOT)) diff --git a/pyperformance/tests/test_commands.py b/pyperformance/tests/test_commands.py index 391398e2..42574f26 100644 --- a/pyperformance/tests/test_commands.py +++ b/pyperformance/tests/test_commands.py @@ -173,6 +173,25 @@ def test_run_test_benchmarks(self): capture=None, ) + def test_run_with_hook(self): + # We expect this to fail, since pystats requires a special build of Python + filename = self.resolve_tmp('bench-test-hook.json') + + stdout = self.run_pyperformance( + 'run', + '--manifest', os.path.join(tests.DATA_DIR, 'MANIFEST'), + '-b', 'all', + '-o', filename, + '--hook', 'pystats', + exitcode=1, + capture='combined' + ) + + self.assertIn( + "Can not collect pystats because python was not built with --enable-pystats", + stdout + ) + ################################### # compile @@ -228,6 +247,7 @@ def create_compile_config(self, *revisions, bench_dir = {outdir} lto = {not fast} pgo = {not fast} + jit = no install = True [run_benchmark] @@ -398,7 +418,7 @@ def test_compare_single_value(self): Performance version: 0.2 ### call_simple ### - 7896.0 kB -> 7900.0 kB: 1.00x larger + 7896.0 KiB -> 7900.0 KiB: 1.00x larger ''').lstrip()) def test_compare_csv(self): @@ -457,11 +477,11 @@ def test_compare_table_single_value(self): Performance version: 0.2 - +-------------+-----------+-----------+--------------+------------------------------------------+ - | Benchmark | mem1.json | mem2.json | Change | Significance | - +=============+===========+===========+==============+==========================================+ - | call_simple | 7896.0 kB | 7900.0 kB | 1.00x larger | (benchmark only contains a single value) | - +-------------+-----------+-----------+--------------+------------------------------------------+ + +-------------+------------+------------+--------------+------------------------------------------+ + | Benchmark | mem1.json | mem2.json | Change | Significance | + +=============+============+============+==============+==========================================+ + | call_simple | 7896.0 KiB | 7900.0 KiB | 1.00x larger | (benchmark only contains a single value) | + +-------------+------------+------------+--------------+------------------------------------------+ ''').lstrip()) diff --git a/pyperformance/venv.py b/pyperformance/venv.py index 17bd1bee..41066357 100644 --- a/pyperformance/venv.py +++ b/pyperformance/venv.py @@ -227,7 +227,7 @@ def install_pyperformance(self): env=self._env, ) if ec != 0: - raise RequirementsInstallationFailedError(root_dir) + raise _venv.RequirementsInstallationFailedError(root_dir) else: version = pyperformance.__version__ self.ensure_reqs([f'pyperformance=={version}']) diff --git a/pyproject.toml b/pyproject.toml index e5ea4dd0..da88a7b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ classifiers = [ 'Programming Language :: Python :: 3', 'Programming Language :: Python', ] -requires-python = ">=3.7" +requires-python = ">=3.9" dependencies = [ "pyperf", "tomli; python_version < '3.11'", @@ -85,7 +85,7 @@ find = {} # Scanning implicit namespaces is active by default version = {attr = "pyperformance.__version__"} [tool.mypy] -python_version = "3.7" +python_version = "3.9" pretty = true enable_error_code = "ignore-without-code" disallow_any_generics = true diff --git a/tox.ini b/tox.ini index f9030b3b..12ab9b65 100644 --- a/tox.ini +++ b/tox.ini @@ -13,7 +13,7 @@ basepython = python3 basepython = python3 deps= sphinx -whitelist_externals = make +allowlist_externals = make commands= make -C doc clean html