diff --git a/.coderabbit.yml b/.coderabbit.yml new file mode 100644 index 0000000000..6a96844e23 --- /dev/null +++ b/.coderabbit.yml @@ -0,0 +1,3 @@ +reviews: + path_filters: + - "!Lib/**" diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 0ac387634d..48059cf4e4 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -6,36 +6,56 @@ badsyntax basetype boolop bxor +cached_tsver cellarg cellvar cellvars cmpop +denom dictoffset elts excepthandler +fileutils finalbody +formatfloat freevar freevars fromlist heaptype +HIGHRES IMMUTABLETYPE kwonlyarg kwonlyargs +lasti linearise maxdepth mult nkwargs +noraise +numer orelse +pathconfig patma posonlyarg posonlyargs prec +preinitialized +PYTHREAD_NAME +SA_ONSTACK stackdepth +stringlib +structseq +subparams +tok_oldval +tvars unaryop unparse unparser VARKEYWORDS varkwarg wbits +weakreflist withitem -withs \ No newline at end of file +withs +xstat +XXPRIME \ No newline at end of file diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index a482c880cc..8e1c012838 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -1,17 +1,37 @@ +abiflags abstractmethods +aenter +aexit aiter anext +appendleft +argcount arrayiterator arraytype asend +asyncgen athrow +backslashreplace +baserepl basicsize +bdfl +bigcharset +bignum +bivariant +breakpointhook cformat +chunksize classcell +closefd closesocket codepoint codepoints +codesize +contextvar cpython +cratio +dealloc +debugbuild decompressor defaultaction descr @@ -19,15 +39,29 @@ dictcomp dictitems dictkeys dictview +digestmod +dllhandle docstring docstrings dunder +endianness +endpos eventmask +excepthook +exceptiongroup +exitfuncs +extendleft +fastlocals fdel +fedcba fget fileencoding fillchar +fillvalue finallyhandler +firstiter +firstlineno +fnctl frombytes fromhex fromunicode @@ -35,58 +69,135 @@ fset fspath fstring fstrings +ftruncate genexpr getattro +getcodesize +getdefaultencoding +getfilesystemencodeerrors +getfilesystemencoding getformat +getframe +getframemodulename getnewargs +getpip +getrandom +getrecursionlimit +getrefcount +getsizeof +getswitchinterval getweakrefcount getweakrefs +getwindowsversion +gmtoff +groupdict +groupindex +hamt hostnames +idfunc idiv +idxs impls +indexgroup infj instancecheck instanceof +irepeat isabstractmethod +isbytes +iscased +isfinal +istext itemiterator itemsize iternext +keepends +keyfunc keyiterator kwarg kwargs +kwdefaults +kwonlyargcount +lastgroup +lastindex linearization linearize listcomp +longrange +lvalue mappingproxy +maskpri +maxdigits +MAXGROUPS +MAXREPEAT maxsplit +maxunicode memoryview memoryviewiterator metaclass metaclasses metatype +mformat mro mros +multiarch +namereplace nanj +nbytes +ncallbacks ndigits ndim +nldecoder +nlocals +NOARGS nonbytes +Nonprintable origname +ospath +pendingcr +phello +platlibdir +popleft posixsubprocess +posonly +posonlyargcount +prepending +profilefunc +pycache +pycodecs +pycs pyexpat -pytraverse +PYTHONBREAKPOINT PYTHONDEBUG +PYTHONDONTWRITEBYTECODE +PYTHONHASHSEED PYTHONHOME PYTHONINSPECT +PYTHONINTMAXSTRDIGITS +PYTHONNOUSERSITE PYTHONOPTIMIZE PYTHONPATH PYTHONPATH +PYTHONSAFEPATH +PYTHONUNBUFFERED PYTHONVERBOSE +PYTHONWARNDEFAULTENCODING PYTHONWARNINGS +pytraverse +PYVENV qualname +quotetabs radd rdiv rdivmod +readall +readbuffer reconstructor +refcnt +releaselevel +reverseitemiterator +reverseiterator +reversekeyiterator reversevalueiterator rfloordiv rlshift @@ -95,22 +206,60 @@ rpow rrshift rsub rtruediv +rvalue scproxy +seennl setattro setcomp +setrecursionlimit +setswitchinterval showwarnmsg -warnmsg +signum +slotnames +STACKLESS stacklevel +stacksize +startpos +subclassable subclasscheck subclasshook +suboffset +suboffsets +SUBPATTERN +sumprod +surrogateescape +surrogatepass +sysconf +sysconfigdata +sysvars +teedata +thisclass +titlecased +tkapp +tobytes +tolist +toreadonly +TPFLAGS +tracefunc +unimportable unionable unraisablehook +unsliceable +urandom valueiterator vararg varargs varnames warningregistry +warnmsg +warnoptions warnopts +weaklist weakproxy +weakrefs winver -xopts \ No newline at end of file +withdata +xmlcharrefreplace +xoptions +xopts +yieldfrom diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt index f2177dd4c7..6f89fdfafe 100644 --- a/.cspell.dict/rust-more.txt +++ b/.cspell.dict/rust-more.txt @@ -1,47 +1,86 @@ ahash +arrayvec bidi biguint bindgen +bitand bitflags +bitor +bitxor bstr byteorder +byteset +caseless chrono consts +cranelift cstring +datelike +deserializer +deserializers +fdiv +flamescope flate2 fract +getres hasher +hexf +hexversion idents +illumos indexmap insta keccak lalrpop +lexopt libc +libcall +libloading libz longlong Manually maplit memmap +memmem metas modpow +msvc +muldiv nanos +nonoverlapping objclass peekable powc powf +powi prepended punct replacen +rmatch +rposition rsplitn rustc rustfmt +rustyline +seedable seekfrom +siphash +siphasher splitn subsec +thiserror +timelike timsort trai ulonglong unic unistd +unraw +unsync +wasip1 +wasip2 +wasmbind +wasmtime +widestring winapi -winsock \ No newline at end of file +winsock diff --git a/.cspell.json b/.cspell.json index 562b300ffa..6112c35d59 100644 --- a/.cspell.json +++ b/.cspell.json @@ -42,28 +42,43 @@ ], "ignorePaths": [ "**/__pycache__/**", + "target/**", "Lib/**" ], // words - list of words to be always considered correct "words": [ - // RustPython + "RUSTPYTHONPATH", + // RustPython terms + "aiterable", + "alnum", "baseclass", + "boxvec", "Bytecode", "cfgs", "codegen", + "coro", "dedentations", "dedents", "deduped", "downcasted", "dumpable", + "emscripten", + "excs", + "finalizer", "GetSet", + "groupref", "internable", + "jitted", + "jitting", + "lossily", "makeunicodedata", "miri", "notrace", + "openat", "pyarg", "pyarg", "pyargs", + "pyast", "PyAttr", "pyc", "PyClass", @@ -72,6 +87,8 @@ "PyFunction", "pygetset", "pyimpl", + "pylib", + "pymath", "pymember", "PyMethod", "PyModule", @@ -84,6 +101,7 @@ "PyResult", "pyslot", "PyStaticMethod", + "pystone", "pystr", "pystruct", "pystructseq", @@ -91,15 +109,33 @@ "reducelib", "richcompare", "RustPython", + "significand", "struc", + "summands", // plural of summand + "sysmodule", "tracebacks", "typealiases", - "Unconstructible", + "unconstructible", "unhashable", "uninit", "unraisable", + "unresizable", "wasi", "zelf", + // unix + "CLOEXEC", + "codeset", + "endgrent", + "gethrvtime", + "getrusage", + "nanosleep", + "sigaction", + "WRLCK", + // win32 + "birthtime", + "IFEXEC", + // "stat" + "FIRMLINK" ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..339cdb69bb --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,6 @@ +FROM mcr.microsoft.com/vscode/devcontainers/rust:1-bullseye + +# Install clang +RUN apt-get update \ + && apt-get install -y clang \ + && rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d60eee2130..8838cf6a96 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,4 +1,25 @@ { - "image": "mcr.microsoft.com/devcontainers/base:jammy", - "onCreateCommand": "curl https://sh.rustup.rs -sSf | sh -s -- -y" -} \ No newline at end of file + "name": "Rust", + "build": { + "dockerfile": "Dockerfile" + }, + "runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], + "customizations": { + "vscode": { + "settings": { + "lldb.executable": "/usr/bin/lldb", + // VS Code don't watch files under ./target + "files.watcherExclude": { + "**/target/**": true + }, + "extensions": [ + "rust-lang.rust-analyzer", + "tamasfe.even-better-toml", + "vadimcn.vscode-lldb", + "mutantdino.resourcemonitor" + ] + } + } + }, + "remoteUser": "vscode" +} diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000000..f2427688c8 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,212 @@ +# GitHub Copilot Instructions for RustPython + +This document provides guidelines for working with GitHub Copilot when contributing to the RustPython project. + +## Project Overview + +RustPython is a Python 3 interpreter written in Rust, implementing Python 3.13.0+ compatibility. The project aims to provide: + +- A complete Python-3 environment entirely in Rust (not CPython bindings) +- A clean implementation without compatibility hacks +- Cross-platform support, including WebAssembly compilation +- The ability to embed Python scripting in Rust applications + +## Repository Structure + +- `src/` - Top-level code for the RustPython binary +- `vm/` - The Python virtual machine implementation + - `builtins/` - Python built-in types and functions + - `stdlib/` - Essential standard library modules implemented in Rust, required to run the Python core +- `compiler/` - Python compiler components + - `parser/` - Parser for converting Python source to AST + - `core/` - Bytecode representation in Rust structures + - `codegen/` - AST to bytecode compiler +- `Lib/` - CPython's standard library in Python (copied from CPython) +- `derive/` - Rust macros for RustPython +- `common/` - Common utilities +- `extra_tests/` - Integration tests and snippets +- `stdlib/` - Non-essential Python standard library modules implemented in Rust (useful but not required for core functionality) +- `wasm/` - WebAssembly support +- `jit/` - Experimental JIT compiler implementation +- `pylib/` - Python standard library packaging (do not modify this directory directly - its contents are generated automatically) + +## Important Development Notes + +### Running Python Code + +When testing Python code, always use RustPython instead of the standard `python` command: + +```bash +# Use this instead of python script.py +cargo run -- script.py + +# For interactive REPL +cargo run + +# With specific features +cargo run --features ssl + +# Release mode (recommended for better performance) +cargo run --release -- script.py +``` + +### Comparing with CPython + +When you need to compare behavior with CPython or run test suites: + +```bash +# Use python command to explicitly run CPython +python my_test_script.py + +# Run RustPython +cargo run -- my_test_script.py +``` + +### Working with the Lib Directory + +The `Lib/` directory contains Python standard library files copied from the CPython repository. Important notes: + +- These files should be edited very conservatively +- Modifications should be minimal and only to work around RustPython limitations +- Tests in `Lib/test` often use one of the following markers: + - Add a `# TODO: RUSTPYTHON` comment when modifications are made + - `unittest.skip("TODO: RustPython ")` + - `unittest.expectedFailure` with `# TODO: RUSTPYTHON ` comment + +### Testing + +```bash +# Run Rust unit tests +cargo test --workspace --exclude rustpython_wasm + +# Run Python snippets tests +cd extra_tests +pytest -v + +# Run the Python test module +cargo run --release -- -m test ${TEST_MODULE} +cargo run --release -- -m test test_unicode # to test test_unicode.py + +# Run the Python test module with specific function +cargo run --release -- -m test test_unicode -k test_unicode_escape +``` + +### Determining What to Implement + +Run `./whats_left.py` to get a list of unimplemented methods, which is helpful when looking for contribution opportunities. + +## Coding Guidelines + +### Rust Code + +- Follow the default rustfmt code style (`cargo fmt` to format) +- **IMPORTANT**: Always run clippy to lint code (`cargo clippy`) before completing tasks. Fix any warnings or lints that are introduced by your changes +- Follow Rust best practices for error handling and memory management +- Use the macro system (`pyclass`, `pymodule`, `pyfunction`, etc.) when implementing Python functionality in Rust + +### Python Code + +- **IMPORTANT**: In most cases, Python code should not be edited. Bug fixes should be made through Rust code modifications only +- Follow PEP 8 style for custom Python code +- Use ruff for linting Python code +- Minimize modifications to CPython standard library files + +## Integration Between Rust and Python + +The project provides several mechanisms for integration: + +- `pymodule` macro for creating Python modules in Rust +- `pyclass` macro for implementing Python classes in Rust +- `pyfunction` macro for exposing Rust functions to Python +- `PyObjectRef` and other types for working with Python objects in Rust + +## Common Patterns + +### Implementing a Python Module in Rust + +```rust +#[pymodule] +mod mymodule { + use rustpython_vm::prelude::*; + + #[pyfunction] + fn my_function(value: i32) -> i32 { + value * 2 + } + + #[pyattr] + #[pyclass(name = "MyClass")] + #[derive(Debug, PyPayload)] + struct MyClass { + value: usize, + } + + #[pyclass] + impl MyClass { + #[pymethod] + fn get_value(&self) -> usize { + self.value + } + } +} +``` + +### Adding a Python Module to the Interpreter + +```rust +vm.add_native_module( + "my_module_name".to_owned(), + Box::new(my_module::make_module), +); +``` + +## Building for Different Targets + +### WebAssembly + +```bash +# Build for WASM +cargo build --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib --release +``` + +### JIT Support + +```bash +# Enable JIT support +cargo run --features jit +``` + +### SSL Support + +```bash +# Enable SSL support +cargo run --features ssl +``` + +## Test Code Modification Rules + +**CRITICAL: Test code modification restrictions** +- NEVER comment out or delete any test code lines except for removing `@unittest.expectedFailure` decorators and upper TODO comments +- NEVER modify test assertions, test logic, or test data +- When a test cannot pass due to missing language features, keep it as expectedFailure and document the reason +- The only acceptable modifications to test files are: + 1. Removing `@unittest.expectedFailure` decorators and the upper TODO comments when tests actually pass + 2. Adding `@unittest.expectedFailure` decorators when tests cannot be fixed + +**Examples of FORBIDDEN modifications:** +- Commenting out test lines +- Changing test assertions +- Modifying test data or expected results +- Removing test logic + +**Correct approach when tests fail due to unsupported syntax:** +- Keep the test as `@unittest.expectedFailure` +- Document that it requires PEP 695 support +- Focus on tests that can be fixed through Rust code changes only + +## Documentation + +- Check the [architecture document](architecture/architecture.md) for a high-level overview +- Read the [development guide](DEVELOPMENT.md) for detailed setup instructions +- Generate documentation with `cargo doc --no-deps --all` +- Online documentation is available at [docs.rs/rustpython](https://docs.rs/rustpython/) \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 586e00be26..fec9d4d838 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ concurrency: cancel-in-progress: true env: - CARGO_ARGS: --no-default-features --features stdlib,importlib,encodings,sqlite,ssl + CARGO_ARGS: --no-default-features --features stdlib,importlib,stdio,encodings,sqlite,ssl # Skip additional tests on Windows. They are checked on Linux and MacOS. # test_glob: many failing tests # test_io: many failing tests @@ -32,11 +32,6 @@ env: test_pathlib test_posixpath test_venv - # configparser: https://github.com/RustPython/RustPython/issues/4995#issuecomment-1582397417 - # socketserver: seems related to configparser crash. - MACOS_SKIPS: >- - test_configparser - test_socketserver # PLATFORM_INDEPENDENT_TESTS are tests that do not depend on the underlying OS. They are currently # only run on Linux to speed up the CI. PLATFORM_INDEPENDENT_TESTS: >- @@ -228,13 +223,13 @@ jobs: - name: Check compilation for freeBSD run: cargo check --target x86_64-unknown-freebsd - - name: Prepare repository for redox compilation - run: bash scripts/redox/uncomment-cargo.sh - - name: Check compilation for Redox - uses: coolreader18/redoxer-action@v1 - with: - command: check - args: --ignore-rust-version + # - name: Prepare repository for redox compilation + # run: bash scripts/redox/uncomment-cargo.sh + # - name: Check compilation for Redox + # uses: coolreader18/redoxer-action@v1 + # with: + # command: check + # args: --ignore-rust-version snippets_cpython: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} @@ -284,7 +279,7 @@ jobs: run: target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} - if: runner.os == 'macOS' name: run cpython platform-dependent tests (MacOS) - run: target/release/rustpython -m test -j 1 --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} ${{ env.MACOS_SKIPS }} + run: target/release/rustpython -m test -j 1 --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} - if: runner.os == 'Windows' name: run cpython platform-dependent tests (windows partial - fixme) run: @@ -313,13 +308,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: install extra dictionaries - run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell - - name: spell checker - uses: streetsidesoftware/cspell-action@v6 - with: - files: '**/*.rs' - incremental_files_only: true - uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy @@ -331,14 +319,26 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} - name: install ruff - run: python -m pip install ruff==0.0.291 # astral-sh/ruff#7778 - - name: run python lint - run: ruff extra_tests wasm examples --exclude='./.*',./Lib,./vm/Lib,./benches/ --select=E9,F63,F7,F82 --show-source + run: python -m pip install ruff==0.11.8 + - name: Ensure docs generate no warnings + run: cargo doc + - name: run ruff check + run: ruff check --diff + - name: run ruff format + run: ruff format --check - name: install prettier run: yarn global add prettier && echo "$(yarn global bin)" >>$GITHUB_PATH - name: check wasm code with prettier # prettier doesn't handle ignore files very well: https://github.com/prettier/prettier/issues/8506 run: cd wasm && git ls-files -z | xargs -0 prettier --check -u + # Keep cspell check as the last step. This is optional test. + - name: install extra dictionaries + run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell + - name: spell checker + uses: streetsidesoftware/cspell-action@v7 + with: + files: '**/*.rs' + incremental_files_only: true miri: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} diff --git a/.github/workflows/cron-ci.yaml b/.github/workflows/cron-ci.yaml index 4e88d749fb..6389fee1cb 100644 --- a/.github/workflows/cron-ci.yaml +++ b/.github/workflows/cron-ci.yaml @@ -84,7 +84,7 @@ jobs: - name: Collect what is left data run: | chmod +x ./whats_left.py - ./whats_left.py > whats_left.temp + ./whats_left.py --features "ssl,sqlite" > whats_left.temp env: RUSTPYTHONPATH: ${{ github.workspace }}/Lib - name: Upload data to the website diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3b0a797e0c..f6a1ad3209 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -80,20 +80,6 @@ jobs: run: cp target/${{ matrix.platform.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}.exe if: runner.os == 'Windows' - - name: Install cargo-packager - run: cargo binstall cargo-packager - - - name: Generate MSI - if: runner.os == 'Windows' - run: cargo packager -f wix --release -o installer - - - name: Upload MSI - if: runner.os == 'Windows' - uses: actions/upload-artifact@v4 - with: - name: rustpython-installer-msi-${{ runner.os }}-${{ matrix.platform.target }} - path: installer/*.msi - - name: Upload Binary Artifacts uses: actions/upload-artifact@v4 with: diff --git a/.gitignore b/.gitignore index 485272adfb..cb7165aaca 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,11 @@ /*/target **/*.rs.bk **/*.bytecode -__pycache__ +__pycache__/ **/*.pytest_cache .*sw* .repl_history.txt -.vscode +.vscode/ wasm-pack.log .idea/ .envrc diff --git a/Cargo.lock b/Cargo.lock index 577ef516bc..f059732550 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,15 +16,15 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", - "getrandom 0.2.15", + "getrandom 0.3.2", "once_cell", "version_check", - "zerocopy 0.7.35", + "zerocopy", ] [[package]] @@ -57,11 +57,67 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "anyhow" -version = "1.0.96" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "approx" @@ -93,17 +149,6 @@ dependencies = [ "bytemuck", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.4.0" @@ -112,41 +157,28 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "base64" -version = "0.13.1" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - -[[package]] -name = "bind_syn" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d6608ba072b4bc847774fac76963956592b5cdfa3751afcefa252fb61cb85b9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bindgen" -version = "0.64.0" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "cexpr", "clang-sys", - "lazy_static 1.5.0", - "lazycell", + "itertools 0.13.0", "log", - "peeking_take_while", + "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", - "syn 1.0.109", - "which 4.4.2", + "syn 2.0.101", ] [[package]] @@ -157,9 +189,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "blake2" @@ -181,9 +213,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.3" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", "regex-automata", @@ -201,34 +233,27 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" - -[[package]] -name = "byteorder" -version = "1.5.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" [[package]] name = "bzip2" -version = "0.4.4" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ "bzip2-sys", - "libc", + "libbz2-rs-sys", ] [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -258,35 +283,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.14" +version = "1.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" +checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" dependencies = [ "shlex", ] -[[package]] -name = "cex" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0114a3f232423fadbbdbb692688e3e68c3b58b4b063ac3a7d0190d561080da" -dependencies = [ - "cex_derive", - "enumx", -] - -[[package]] -name = "cex_derive" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b5048cd656d7d2e739960fa33d9f95693005792dc8aad0af8b8f0b7d76c938d" -dependencies = [ - "indexmap 1.9.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "cexpr" version = "0.6.0" @@ -310,16 +313,43 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", ] [[package]] @@ -335,28 +365,29 @@ dependencies = [ [[package]] name = "clap" -version = "2.34.0" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" dependencies = [ - "bitflags 1.3.2", - "textwrap 0.11.0", - "unicode-width 0.1.14", + "clap_builder", ] [[package]] -name = "clib" -version = "0.2.4" +name = "clap_builder" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda1a698cd341f055d3ae1fdbfb1cc441e7f9bacce795356ecc685e69134e957" +checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" dependencies = [ - "anyhow", - "bindgen", - "inwelling", - "pkg-config", - "toml", + "anstyle", + "clap_lex", ] +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + [[package]] name = "clipboard-win" version = "5.4.0" @@ -366,6 +397,12 @@ dependencies = [ "error-code", ] +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "compact_str" version = "0.8.1" @@ -382,9 +419,9 @@ dependencies = [ [[package]] name = "console" -version = "0.15.10" +version = "0.15.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" dependencies = [ "encode_unicode", "libc", @@ -402,6 +439,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation" version = "0.9.4" @@ -429,9 +472,9 @@ dependencies = [ [[package]] name = "cranelift" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e482b051275b415cf7627bb6b26e9902ce6aec058b443266c2a1e7a0de148960" +checksum = "6d07c374d4da962eca0833c1d14621d5b4e32e68c8ca185b046a3b6b924ad334" dependencies = [ "cranelift-codegen", "cranelift-frontend", @@ -440,39 +483,42 @@ dependencies = [ [[package]] name = "cranelift-assembler-x64" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4b56ebe316895d3fa37775d0a87b0c889cc933f5c8b253dbcc7c7bcb7fe7e4" +checksum = "263cc79b8a23c29720eb596d251698f604546b48c34d0d84f8fd2761e5bf8888" dependencies = [ "cranelift-assembler-x64-meta", ] [[package]] name = "cranelift-assembler-x64-meta" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cabbc01dfbd7dcd6c329ca44f0212910309c221797ac736a67a5bc8857fe1b" +checksum = "5b4a113455f8c0e13e3b3222a9c38d6940b958ff22573108be083495c72820e1" +dependencies = [ + "cranelift-srcgen", +] [[package]] name = "cranelift-bforest" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ffe46df300a45f1dc6f609dc808ce963f0e3a2e971682c479a2d13e3b9b8ef" +checksum = "58f96dca41c5acf5d4312c1d04b3391e21a312f8d64ce31a2723a3bb8edd5d4d" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-bitset" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b265bed7c51e1921fdae6419791d31af77d33662ee56d7b0fa0704dc8d231cab" +checksum = "7d821ed698dd83d9c012447eb63a5406c1e9c23732a2f674fb5b5015afd42202" [[package]] name = "cranelift-codegen" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e606230a7e3a6897d603761baee0d19f88d077f17b996bb5089488a29ae96e41" +checksum = "06c52fdec4322cb8d5545a648047819aaeaa04e630f88d3a609c0d3c1a00e9a0" dependencies = [ "bumpalo", "cranelift-assembler-x64", @@ -484,72 +530,73 @@ dependencies = [ "cranelift-entity", "cranelift-isle", "gimli", - "hashbrown 0.15.2", + "hashbrown", "log", "regalloc2", - "rustc-hash 2.1.1", + "rustc-hash", "serde", "smallvec", - "target-lexicon 0.13.2", + "target-lexicon", ] [[package]] name = "cranelift-codegen-meta" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a63bffafc23bc60969ad528e138788495999d935f0adcfd6543cb151ca8637d" +checksum = "af2c215e0c9afa8069aafb71d22aa0e0dde1048d9a5c3c72a83cacf9b61fcf4a" dependencies = [ - "cranelift-assembler-x64", + "cranelift-assembler-x64-meta", "cranelift-codegen-shared", + "cranelift-srcgen", ] [[package]] name = "cranelift-codegen-shared" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af50281b67324b58e843170a6a5943cf6d387c06f7eeacc9f5696e4ab7ae7d7e" +checksum = "97524b2446fc26a78142132d813679dda19f620048ebc9a9fbb0ac9f2d320dcb" [[package]] name = "cranelift-control" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c20c1b38d1abfbcebb0032e497e71156c0e3b8dcb3f0a92b9863b7bcaec290c" +checksum = "8e32e900aee81f9e3cc493405ef667a7812cb5c79b5fc6b669e0a2795bda4b22" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2c67d95507c51b4a1ff3f3555fe4bfec36b9e13c1b684ccc602736f5d5f4a2" +checksum = "d16a2e28e0fa6b9108d76879d60fe1cc95ba90e1bcf52bac96496371044484ee" dependencies = [ "cranelift-bitset", ] [[package]] name = "cranelift-frontend" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e002691cc69c38b54fc7ec93e5be5b744f627d027031d991cc845d1d512d0ce" +checksum = "328181a9083d99762d85954a16065d2560394a862b8dc10239f39668df528b95" dependencies = [ "cranelift-codegen", "log", "smallvec", - "target-lexicon 0.13.2", + "target-lexicon", ] [[package]] name = "cranelift-isle" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e93588ed1796cbcb0e2ad160403509e2c5d330d80dd6e0014ac6774c7ebac496" +checksum = "e916f36f183e377e9a3ed71769f2721df88b72648831e95bb9fa6b0cd9b1c709" [[package]] name = "cranelift-jit" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6682f0b193d6b7873cc8e7ed67e8776a8a26f50eeabf88534e9be618b9a03" +checksum = "d6bb584ac927f1076d552504b0075b833b9d61e2e9178ba55df6b2d966b4375d" dependencies = [ "anyhow", "cranelift-codegen", @@ -560,16 +607,16 @@ dependencies = [ "libc", "log", "region", - "target-lexicon 0.13.2", + "target-lexicon", "wasmtime-jit-icache-coherence", "windows-sys 0.59.0", ] [[package]] name = "cranelift-module" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff19784c6de05116e63e6a34791012bd927b2a4eac56233039c46f1b6a4edac8" +checksum = "40c18ccb8e4861cf49cec79998af73b772a2b47212d12d3d63bf57cc4293a1e3" dependencies = [ "anyhow", "cranelift-codegen", @@ -578,15 +625,21 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.118.0" +version = "0.119.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b09bdd6407bf5d89661b80cf926ce731c9e8cc184bf49102267a2369a8358e" +checksum = "fc852cf04128877047dc2027aa1b85c64f681dc3a6a37ff45dcbfa26e4d52d2f" dependencies = [ "cranelift-codegen", "libc", - "target-lexicon 0.13.2", + "target-lexicon", ] +[[package]] +name = "cranelift-srcgen" +version = "0.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1a86340a16e74b4285cc86ac69458fa1c8e7aaff313da4a89d10efd3535ee" + [[package]] name = "crc32fast" version = "1.4.2" @@ -598,24 +651,24 @@ dependencies = [ [[package]] name = "criterion" -version = "0.3.6" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ - "atty", + "anes", "cast", + "ciborium", "clap", "criterion-plot", - "csv", + "is-terminal", "itertools 0.10.5", - "lazy_static 1.5.0", "num-traits", + "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_cbor", "serde_derive", "serde_json", "tinytemplate", @@ -624,9 +677,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools 0.10.5", @@ -673,18 +726,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "csv" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - [[package]] name = "csv-core" version = "0.1.12" @@ -694,27 +735,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "derive_more" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" -dependencies = [ - "derive_more-impl", -] - -[[package]] -name = "derive_more-impl" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", - "unicode-xid", -] - [[package]] name = "digest" version = "0.10.7" @@ -761,15 +781,15 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35" +checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" @@ -784,35 +804,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] -name = "enumx" -version = "0.4.3" +name = "env_filter" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32875abeb14f7fe2c2b8ad15e58f41701f455d124d0a03bc88132d5face2663f" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ - "enumx_derive", + "log", + "regex", ] [[package]] -name = "enumx_derive" -version = "0.4.2" +name = "env_home" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa5d66efdd1eab6ea85ba31bdb58bed1e4ce218c1361061384ece88f40ebeb49" -dependencies = [ - "indexmap 1.9.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" [[package]] name = "env_logger" -version = "0.9.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ - "atty", + "anstream", + "anstyle", + "env_filter", + "jiff", "log", - "termcolor", ] [[package]] @@ -823,9 +840,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", "windows-sys 0.59.0", @@ -833,9 +850,9 @@ dependencies = [ [[package]] name = "error-code" -version = "3.3.1" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" [[package]] name = "exitcode" @@ -851,13 +868,13 @@ checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fd-lock" -version = "4.0.2" +version = "4.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -866,7 +883,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30" dependencies = [ - "lazy_static 0.2.11", + "lazy_static", "serde", "serde_derive", "serde_json", @@ -891,16 +908,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8168cbad48fdda10be94de9c6319f9e8ac5d3cf0a1abda1864269dfcca3d302a" dependencies = [ "flame", - "indexmap 2.7.1", + "indexmap", "serde", "serde_json", ] [[package]] name = "flate2" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "libz-rs-sys", @@ -946,9 +963,9 @@ dependencies = [ [[package]] name = "gethostname" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fd4b8790c0792e3b11895efdf5f289ebe8b59107a6624f1cce68f24ff8c7035" +checksum = "fc257fdb4038301ce4b9cd1b3b51704509692bb3ff716a410cbd07925d9dae55" dependencies = [ "rustix", "windows-targets 0.52.6", @@ -965,29 +982,27 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.13.3+wasi-0.2.2", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", "wasm-bindgen", - "windows-targets 0.52.6", ] [[package]] @@ -997,7 +1012,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" dependencies = [ "fallible-iterator", - "indexmap 2.7.1", + "indexmap", "stable_deref_trait", ] @@ -1009,15 +1024,9 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "half" -version = "1.8.3" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" - -[[package]] -name = "half" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -1025,57 +1034,30 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "foldhash", ] -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "heredom" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7a4d76fa670b51cfb56e908ad8bfd44a14fee853ea764790e46634d3fcdf4d" -dependencies = [ - "tuplex", -] - [[package]] name = "hermit-abi" -version = "0.1.19" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" [[package]] name = "hex" @@ -1100,16 +1082,17 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", - "windows-core", + "windows-core 0.61.0", ] [[package]] @@ -1123,65 +1106,60 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown", ] [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "insta" -version = "1.42.1" +version = "1.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c1b125e30d93896b365e156c33dadfffab45ee8400afcbba4752f59de08a86" +checksum = "154934ea70c58054b556dd430b99a98c2a7ff5309ac9891597e339b5c28f4371" dependencies = [ "console", - "linked-hash-map", "once_cell", - "pin-project", "similar", ] -[[package]] -name = "inwelling" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f6292c68ffca1fa94ca8f95ca5ad2885d79d96377f1d37ced6a47cd26cfaf8c" -dependencies = [ - "toml", - "walkdir", -] - [[package]] name = "is-macro" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi 0.5.1", + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.10.5" @@ -1193,9 +1171,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.11.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -1211,9 +1189,33 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] [[package]] name = "js-sys" @@ -1244,30 +1246,12 @@ dependencies = [ "cpufeatures", ] -[[package]] -name = "lambert_w" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45bf98425154bfe790a47b72ac452914f6df9ebfb202bc59e089e29db00258cf" - [[package]] name = "lazy_static" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lexical-parse-float" version = "1.0.5" @@ -1300,21 +1284,27 @@ dependencies = [ [[package]] name = "lexopt" -version = "0.3.0" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7" + +[[package]] +name = "libbz2-rs-sys" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401" +checksum = "0864a00c8d019e36216b69c2c4ce50b83b7bd966add3cf5ba554ec44f8bebcf5" [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libffi" -version = "3.2.0" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce826c243048e3d5cec441799724de52e2d42f820468431fc3fceee2341871e2" +checksum = "ebfd30a67b482a08116e753d0656cb626548cf4242543e5cc005be7639d99838" dependencies = [ "libc", "libffi-sys", @@ -1322,9 +1312,9 @@ dependencies = [ [[package]] name = "libffi-sys" -version = "2.3.0" +version = "3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36115160c57e8529781b4183c2bb51fdc1f6d6d1ed345591d84be7703befb3c" +checksum = "f003aa318c9f0ee69eb0ada7c78f5c9d2fedd2ceb274173b5c7ff475eee584a3" dependencies = [ "cc", ] @@ -1336,14 +1326,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] name = "libm" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" @@ -1351,7 +1341,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "libc", ] @@ -1368,24 +1358,18 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d" +checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" dependencies = [ "zlib-rs", ] -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "lock_api" @@ -1399,9 +1383,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lz4_flex" @@ -1412,6 +1396,17 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mac_address" version = "1.1.8" @@ -1433,23 +1428,22 @@ dependencies = [ [[package]] name = "malachite-base" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5063891d2cec8fd20cabccbd3fc277bf8d5666f481fb3f79d999559b39a62713" +checksum = "554bcf7f816ff3c1eae8f2b95c4375156884c79988596a6d01b7b070710fa9e5" dependencies = [ - "hashbrown 0.15.2", - "itertools 0.11.0", + "hashbrown", + "itertools 0.14.0", "libm", "ryu", ] [[package]] name = "malachite-bigint" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1b1fec8b370139968919a5b77071c94b282eaba3da1cf179ae5299060d4e75" +checksum = "df1acde414186498b2a6a1e271f8ce5d65eaa5c492e95271121f30718fe2f925" dependencies = [ - "derive_more", "malachite-base", "malachite-nz", "num-integer", @@ -1459,22 +1453,22 @@ dependencies = [ [[package]] name = "malachite-nz" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175263cd5b846c552b9afb9d4b03ca465b4ad10717d789cad7dac24441c4fcbe" +checksum = "f43d406336c42a59e07813b57efd651db00118af84c640a221d666964b2ec71f" dependencies = [ - "itertools 0.11.0", + "itertools 0.14.0", "libm", "malachite-base", ] [[package]] name = "malachite-q" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5261ba8feb1ad20cddab3d625af28206c663c08014b2e5c5f9bd54b0f230234f" +checksum = "25911a58ea0426e0b7bb1dffc8324e82711c82abff868b8523ae69d8a47e8062" dependencies = [ - "itertools 0.11.0", + "itertools 0.14.0", "malachite-base", "malachite-nz", ] @@ -1533,9 +1527,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.4" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -1549,12 +1543,6 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "mutf8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b444426a4c188e9ad33560853ebd52309ab72811f536a9e6f37907fd12cf45" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -1570,7 +1558,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "cfg_aliases", "libc", @@ -1639,31 +1627,30 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ - "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.71" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "foreign-types", "libc", @@ -1680,7 +1667,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -1691,18 +1678,18 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-src" -version = "300.4.2+3.4.1" +version = "300.5.0+3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168ce4e058f975fe43e89d9ccf78ca668601887ae736090aacc23ae353c298e2" +checksum = "e8ce546f549326b0e6052b649198487d91320875da901e7bd11a06d1ee3f9c2f" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.106" +version = "0.9.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" +checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" dependencies = [ "cc", "libc", @@ -1745,7 +1732,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.8", + "redox_syscall 0.5.12", "smallvec", "windows-targets 0.52.6", ] @@ -1756,12 +1743,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "phf" version = "0.11.3" @@ -1800,31 +1781,11 @@ dependencies = [ "siphasher", ] -[[package]] -name = "pin-project" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" @@ -1862,57 +1823,66 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.7.35", + "zerocopy", ] [[package]] -name = "proc-macro-crate" -version = "3.3.0" +name = "prettyplease" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ - "toml_edit 0.22.24", + "proc-macro2", + "syn 2.0.101", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] -name = "puruspe" -version = "0.4.0" +name = "pymath" +version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76c522e44709f541a403db419a7e34d6fbbc8e6b208589ae29a030cddeefd96" +checksum = "5b66ab66a8610ce209d8b36cd0fecc3a15c494f715e0cb26f0586057f293abc9" dependencies = [ - "lambert_w", - "num-complex", + "libc", ] [[package]] name = "pyo3" -version = "0.22.6" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" dependencies = [ "cfg-if", "indoc", @@ -1928,19 +1898,19 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" dependencies = [ "once_cell", - "target-lexicon 0.12.16", + "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" dependencies = [ "libc", "pyo3-build-config", @@ -1948,43 +1918,51 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] -name = "radium" -version = "0.7.0" +name = "r-efi" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + +[[package]] +name = "radium" +version = "1.1.0" +source = "git+https://github.com/youknowone/ferrilab?branch=fix-nightly#4a301c3a223e096626a2773d1a1eed1fc4e21140" +dependencies = [ + "cfg-if", +] [[package]] name = "radix_trie" @@ -2009,13 +1987,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.20", ] [[package]] @@ -2044,7 +2021,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] @@ -2053,7 +2030,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", ] [[package]] @@ -2084,11 +2061,11 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.5.8" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -2097,22 +2074,22 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "libredox", "thiserror 1.0.69", ] [[package]] name = "regalloc2" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145c1c267e14f20fb0f88aa76a1c5ffec42d592c1d28b3cd9148ae35916158d3" +checksum = "dc06e6b318142614e4a48bc725abbf08ff166694835c43c9dae5a9009704639a" dependencies = [ "allocator-api2", "bumpalo", - "hashbrown 0.15.2", + "hashbrown", "log", - "rustc-hash 2.1.1", + "rustc-hash", "smallvec", ] @@ -2175,7 +2152,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2184,7 +2161,7 @@ version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" dependencies = [ "aho-corasick", - "bitflags 2.8.0", + "bitflags 2.9.0", "compact_str", "is-macro", "itertools 0.14.0", @@ -2192,7 +2169,7 @@ dependencies = [ "ruff_python_trivia", "ruff_source_file", "ruff_text_size", - "rustc-hash 2.1.1", + "rustc-hash", ] [[package]] @@ -2200,14 +2177,14 @@ name = "ruff_python_parser" version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "compact_str", "memchr", "ruff_python_ast", "ruff_python_trivia", "ruff_text_size", - "rustc-hash 2.1.1", + "rustc-hash", "static_assertions", "unicode-ident", "unicode-normalization", @@ -2239,12 +2216,6 @@ name = "ruff_text_size" version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -2253,11 +2224,11 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "0.38.44" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "errno", "libc", "linux-raw-sys", @@ -2284,6 +2255,7 @@ dependencies = [ "rustpython-stdlib", "rustpython-vm", "rustyline", + "winresource", ] [[package]] @@ -2291,8 +2263,8 @@ name = "rustpython-codegen" version = "0.4.0" dependencies = [ "ahash", - "bitflags 2.8.0", - "indexmap 2.7.1", + "bitflags 2.9.0", + "indexmap", "insta", "itertools 0.14.0", "log", @@ -2301,13 +2273,14 @@ dependencies = [ "num-complex", "num-traits", "ruff_python_ast", + "ruff_python_parser", "ruff_source_file", "ruff_text_size", "rustpython-compiler-core", "rustpython-compiler-source", "rustpython-literal", "rustpython-wtf8", - "thiserror 2.0.11", + "thiserror 2.0.12", "unicode_names2", ] @@ -2316,10 +2289,10 @@ name = "rustpython-common" version = "0.4.0" dependencies = [ "ascii", - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "cfg-if", - "getrandom 0.3.1", + "getrandom 0.3.2", "itertools 0.14.0", "libc", "lock_api", @@ -2335,7 +2308,6 @@ dependencies = [ "rustpython-wtf8", "siphasher", "unicode_names2", - "volatile", "widestring", "windows-sys 0.59.0", ] @@ -2344,7 +2316,7 @@ dependencies = [ name = "rustpython-compiler" version = "0.4.0" dependencies = [ - "rand 0.9.0", + "rand 0.9.1", "ruff_python_ast", "ruff_python_parser", "ruff_source_file", @@ -2352,14 +2324,14 @@ dependencies = [ "rustpython-codegen", "rustpython-compiler-core", "rustpython-compiler-source", - "thiserror 2.0.11", + "thiserror 2.0.12", ] [[package]] name = "rustpython-compiler-core" version = "0.4.0" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "itertools 0.14.0", "lz4_flex", "malachite-bigint", @@ -2384,7 +2356,7 @@ dependencies = [ "proc-macro2", "rustpython-compiler", "rustpython-derive-impl", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2397,9 +2369,9 @@ dependencies = [ "quote", "rustpython-compiler-core", "rustpython-doc", - "syn 2.0.98", + "syn 2.0.101", "syn-ext", - "textwrap 0.16.1", + "textwrap", ] [[package]] @@ -2422,7 +2394,7 @@ dependencies = [ "num-traits", "rustpython-compiler-core", "rustpython-derive", - "thiserror 2.0.11", + "thiserror 2.0.12", ] [[package]] @@ -2433,7 +2405,7 @@ dependencies = [ "is-macro", "lexical-parse-float", "num-traits", - "rand 0.9.0", + "rand 0.9.1", "rustpython-wtf8", "unic-ucd-category", ] @@ -2451,7 +2423,7 @@ dependencies = [ name = "rustpython-sre_engine" version = "0.4.0" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "criterion", "num_enum", "optional", @@ -2479,12 +2451,13 @@ dependencies = [ "foreign-types-shared", "gethostname", "hex", - "indexmap 2.7.1", + "indexmap", "itertools 0.14.0", "junction", "libc", "libsqlite3-sys", "libz-rs-sys", + "lzma-sys", "mac_address", "malachite-bigint", "md-5", @@ -2502,7 +2475,7 @@ dependencies = [ "page_size", "parking_lot", "paste", - "puruspe", + "pymath", "rand_core 0.9.3", "rustix", "rustpython-common", @@ -2514,9 +2487,9 @@ dependencies = [ "sha3", "socket2", "system-configuration", - "tcl", + "tcl-sys", "termios", - "tk", + "tk-sys", "ucd", "unic-char-property", "unic-normal", @@ -2524,12 +2497,14 @@ dependencies = [ "unic-ucd-bidi", "unic-ucd-category", "unic-ucd-ident", + "unicode-bidi-mirroring", "unicode-casing", "unicode_names2", "uuid", "widestring", "windows-sys 0.59.0", "xml-rs", + "xz2", ] [[package]] @@ -2538,21 +2513,22 @@ version = "0.4.0" dependencies = [ "ahash", "ascii", - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "caseless", "cfg-if", "chrono", + "constant_time_eq", "crossbeam-utils", "errno", "exitcode", "flame", "flamer", - "getrandom 0.3.1", + "getrandom 0.3.2", "glob", - "half 2.4.1", + "half", "hex", - "indexmap 2.7.1", + "indexmap", "is-macro", "itertools 0.14.0", "junction", @@ -2592,9 +2568,9 @@ dependencies = [ "schannel", "serde", "static_assertions", - "strum 0.27.1", - "strum_macros 0.27.1", - "thiserror 2.0.11", + "strum", + "strum_macros", + "thiserror 2.0.12", "thread_local", "timsort", "uname", @@ -2604,7 +2580,7 @@ dependencies = [ "unicode-casing", "unicode_names2", "wasm-bindgen", - "which 6.0.3", + "which", "widestring", "windows", "windows-sys 0.59.0", @@ -2626,7 +2602,6 @@ name = "rustpython_wasm" version = "0.4.0" dependencies = [ "console_error_panic_hook", - "getrandom 0.2.15", "js-sys", "ruff_python_parser", "rustpython-common", @@ -2642,9 +2617,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "rustyline" @@ -2652,7 +2627,7 @@ version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "clipboard-win", "fd-lock", @@ -2670,9 +2645,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -2700,9 +2675,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -2719,32 +2694,22 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half 1.8.3", - "serde", -] - [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -2774,9 +2739,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.8" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", "cpufeatures", @@ -2793,6 +2758,14 @@ dependencies = [ "keccak", ] +[[package]] +name = "shared-build" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" +dependencies = [ + "bindgen", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2813,15 +2786,15 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "socket2" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2839,41 +2812,23 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "strum" -version = "0.19.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b89a286a7e3b5720b9a477b23253bc50debac207c8d21505f8e70b36792f11b5" - [[package]] name = "strum" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" -[[package]] -name = "strum_macros" -version = "0.19.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e61bb0be289045cb80bfce000512e32d09f8337e54c186725da381377ad1f8d5" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "strum_macros" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2895,9 +2850,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -2912,36 +2867,30 @@ checksum = "b126de4ef6c2a628a68609dd00733766c3b015894698a438ebdf374933fc31d1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", ] -[[package]] -name = "target-lexicon" -version = "0.12.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" - [[package]] name = "target-lexicon" version = "0.13.2" @@ -2949,42 +2898,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] -name = "tcl" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d0928e8b4dca8ebd485f687f725bb34e454c7a28c1d353bf7d1b8060581bf" -dependencies = [ - "cex", - "clib", - "enumx", - "inwelling", - "mutf8", - "serde", - "serde_derive", - "tcl_derive", - "tuplex", -] - -[[package]] -name = "tcl_derive" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625d95e672231bbf31dead6861b0ad72bcb71a2891b26b0c4924cd1cc9687b93" -dependencies = [ - "bind_syn", - "proc-macro2", - "quote", - "syn 2.0.98", - "uuid", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +name = "tcl-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" dependencies = [ - "winapi-util", + "pkg-config", + "shared-build", ] [[package]] @@ -2998,18 +2917,9 @@ dependencies = [ [[package]] name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width 0.1.14", -] - -[[package]] -name = "textwrap" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" [[package]] name = "thiserror" @@ -3022,11 +2932,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl 2.0.12", ] [[package]] @@ -3037,18 +2947,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3090,9 +3000,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" dependencies = [ "tinyvec_macros", ] @@ -3104,77 +3014,54 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "tk" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fbe29c813c9eee5e0d4d996a4a615f6538220f0ad181269a413f21c13eb077" +name = "tk-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" dependencies = [ - "bitflags 1.3.2", - "cex", - "clib", - "enumx", - "heredom", - "inwelling", - "num_enum", - "once_cell", - "serde", - "strum 0.19.5", - "strum_macros 0.19.4", - "tcl", - "tcl_derive", - "tuplex", + "pkg-config", + "shared-build", ] [[package]] name = "toml" -version = "0.7.8" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.19.15", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.15" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ - "indexmap 2.7.1", + "indexmap", "serde", "serde_spanned", "toml_datetime", - "winnow 0.5.40", + "toml_write", + "winnow", ] [[package]] -name = "toml_edit" -version = "0.22.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" -dependencies = [ - "indexmap 2.7.1", - "toml_datetime", - "winnow 0.7.4", -] - -[[package]] -name = "tuplex" -version = "0.1.2" +name = "toml_write" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "676ac81d5454c4dcf37955d34fa8626ede3490f744b86ca14a7b90168d2a08aa" +checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" [[package]] name = "twox-hash" @@ -3312,6 +3199,12 @@ dependencies = [ "unic-common", ] +[[package]] +name = "unicode-bidi-mirroring" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cb788ffebc92c5948d0e997106233eeb1d8b9512f93f41651f52b6c5f5af86" + [[package]] name = "unicode-casing" version = "0.1.0" @@ -3320,9 +3213,9 @@ checksum = "623f59e6af2a98bdafeb93fa277ac8e1e40440973001ca15cf4ae1541cd16d56" [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-normalization" @@ -3351,12 +3244,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "unicode_names2" version = "1.3.0" @@ -3381,9 +3268,9 @@ dependencies = [ [[package]] name = "unindent" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" [[package]] name = "utf8parse" @@ -3393,12 +3280,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.13.2" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "atomic", - "getrandom 0.3.1", ] [[package]] @@ -3413,12 +3299,6 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "volatile" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e76fae08f03f96e166d2dfda232190638c10e0383841252416f9cfe2ae60e6" - [[package]] name = "walkdir" version = "2.5.0" @@ -3437,9 +3317,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -3466,7 +3346,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "wasm-bindgen-shared", ] @@ -3501,7 +3381,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3517,9 +3397,9 @@ dependencies = [ [[package]] name = "wasmtime-jit-icache-coherence" -version = "31.0.0" +version = "32.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a54f6c6c7e9d7eeee32dfcc10db7f29d505ee7dd28d00593ea241d5f70698e64" +checksum = "eb399eaabd7594f695e1159d236bf40ef55babcb3af97f97c027864ed2104db6" dependencies = [ "anyhow", "cfg-if", @@ -3539,33 +3419,21 @@ dependencies = [ [[package]] name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - -[[package]] -name = "which" -version = "6.0.3" +version = "7.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" dependencies = [ "either", - "home", + "env_home", "rustix", "winsafe", ] [[package]] name = "widestring" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" +checksum = "dd7cf3379ca1aac9eea11fba24fd7e315d621f8dfe35c8d7d2be8b793726e07d" [[package]] name = "winapi" @@ -3604,7 +3472,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ - "windows-core", + "windows-core 0.52.0", "windows-targets 0.52.6", ] @@ -3617,6 +3485,65 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3767,18 +3694,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - -[[package]] -name = "winnow" -version = "0.7.4" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] @@ -3793,6 +3711,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winresource" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4a67c78ee5782c0c1cb41bebc7e12c6e79644daa1650ebbc1de5d5b08593f7" +dependencies = [ + "toml", + "version_check", +] + [[package]] name = "winsafe" version = "0.0.19" @@ -3801,62 +3729,50 @@ checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] name = "xml-rs" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b940ebc25896e71dd073bad2dbaa2abfe97b0a391415e22ad1326d9c54e3c4" +checksum = "a62ce76d9b56901b19a74f19431b0d8b3bc7ca4ad685a746dfd78ca8f4fc6bda" [[package]] -name = "zerocopy" -version = "0.7.35" +name = "xz2" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" dependencies = [ - "byteorder", - "zerocopy-derive 0.7.35", + "lzma-sys", ] [[package]] name = "zerocopy" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde3bb8c68a8f3f1ed4ac9221aad6b10cece3e60a8e2ea54a6a2dec806d0084c" -dependencies = [ - "zerocopy-derive 0.8.20", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", + "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.20" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eea57037071898bf96a6da35fd626f4f27e9cee3ead2a6c703cf09d472b2e700" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "zlib-rs" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05" +checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" diff --git a/Cargo.toml b/Cargo.toml index 271c11e782..1fdc77d261 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,20 +10,23 @@ repository.workspace = true license.workspace = true [features] -default = ["threading", "stdlib", "importlib"] +default = ["threading", "stdlib", "stdio", "importlib"] importlib = ["rustpython-vm/importlib"] encodings = ["rustpython-vm/encodings"] +stdio = ["rustpython-vm/stdio"] stdlib = ["rustpython-stdlib", "rustpython-pylib", "encodings"] flame-it = ["rustpython-vm/flame-it", "flame", "flamescope"] freeze-stdlib = ["stdlib", "rustpython-vm/freeze-stdlib", "rustpython-pylib?/freeze-stdlib"] jit = ["rustpython-vm/jit"] threading = ["rustpython-vm/threading", "rustpython-stdlib/threading"] -bz2 = ["stdlib", "rustpython-stdlib/bz2"] sqlite = ["rustpython-stdlib/sqlite"] ssl = ["rustpython-stdlib/ssl"] ssl-vendor = ["ssl", "rustpython-stdlib/ssl-vendor"] tkinter = ["rustpython-stdlib/tkinter"] +[build-dependencies] +winresource = "0.1" + [dependencies] rustpython-compiler = { workspace = true } rustpython-pylib = { workspace = true, optional = true } @@ -36,8 +39,8 @@ log = { workspace = true } flame = { workspace = true, optional = true } lexopt = "0.3" -dirs = { package = "dirs-next", version = "2.0.0" } -env_logger = { version = "0.9.0", default-features = false, features = ["atty", "termcolor"] } +dirs = { package = "dirs-next", version = "2.0" } +env_logger = "0.11" flamescope = { version = "0.1.2", optional = true } [target.'cfg(windows)'.dependencies] @@ -48,7 +51,7 @@ rustyline = { workspace = true } [dev-dependencies] criterion = { workspace = true } -pyo3 = { version = "0.22", features = ["auto-initialize"] } +pyo3 = { version = "0.24", features = ["auto-initialize"] } [[bench]] name = "execution" @@ -79,6 +82,7 @@ opt-level = 3 lto = "thin" [patch.crates-io] +radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } # REDOX START, Uncomment when you want to compile/check with redoxer # REDOX END @@ -155,24 +159,25 @@ bitflags = "2.4.2" bstr = "1" cfg-if = "1.0" chrono = "0.4.39" -criterion = { version = "0.3.5", features = ["html_reports"] } +constant_time_eq = "0.4" +criterion = { version = "0.5", features = ["html_reports"] } crossbeam-utils = "0.8.21" flame = "0.2.2" getrandom = { version = "0.3", features = ["std"] } glob = "0.3" hex = "0.4.3" indexmap = { version = "2.2.6", features = ["std"] } -insta = "1.38.0" +insta = "1.42" itertools = "0.14.0" is-macro = "0.3.7" junction = "1.2.0" libc = "0.2.169" -libffi = "3.2" -log = "0.4.25" +libffi = "4.1" +log = "0.4.27" nix = { version = "0.29", features = ["fs", "user", "process", "term", "time", "signal", "ioctl", "socket", "sched", "zerocopy", "dir", "hostname", "net", "poll"] } -malachite-bigint = "0.5" -malachite-q = "0.5" -malachite-base = "0.5" +malachite-bigint = "0.6" +malachite-q = "0.6" +malachite-base = "0.6" memchr = "2.7.4" num-complex = "0.4.6" num-integer = "0.1.46" @@ -183,10 +188,12 @@ once_cell = "1.20.3" parking_lot = "0.12.3" paste = "1.0.15" proc-macro2 = "1.0.93" +pymath = "0.0.2" quote = "1.0.38" +radium = "1.1" rand = "0.9" rand_core = { version = "0.9", features = ["os_rng"] } -rustix = { version = "0.38", features = ["event"] } +rustix = { version = "1.0", features = ["event"] } rustyline = "15.0.0" serde = { version = "1.0.133", default-features = false } schannel = "0.1.27" @@ -204,6 +211,7 @@ unic-ucd-bidi = "0.9.0" unic-ucd-category = "0.9.0" unic-ucd-ident = "0.9.0" unicode_names2 = "1.3.0" +unicode-bidi-mirroring = "0.2" widestring = "1.1.0" windows-sys = "0.59.0" wasm-bindgen = "0.2.100" diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index 601107d2d8..de624f2e54 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -85,6 +85,10 @@ def _f(): pass dict_items = type({}.items()) ## misc ## mappingproxy = type(type.__dict__) +def _get_framelocalsproxy(): + return type(sys._getframe().f_locals) +framelocalsproxy = _get_framelocalsproxy() +del _get_framelocalsproxy generator = type((lambda: (yield))()) ## coroutine ## async def _coro(): pass @@ -508,6 +512,10 @@ def __getitem__(self, item): new_args = (t_args, t_result) return _CallableGenericAlias(Callable, tuple(new_args)) + # TODO: RUSTPYTHON patch for common call + def __or__(self, other): + super().__or__(other) + def _is_param_expr(obj): """Checks if obj matches either a list of types, ``...``, ``ParamSpec`` or ``_ConcatenateGenericAlias`` from typing.py @@ -836,6 +844,7 @@ def __eq__(self, other): __reversed__ = None Mapping.register(mappingproxy) +Mapping.register(framelocalsproxy) class MappingView(Sized): @@ -973,7 +982,7 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and has a .keys() method, does: for k in E.keys(): D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v ''' diff --git a/Lib/_pyrepl/__init__.py b/Lib/_pyrepl/__init__.py new file mode 100644 index 0000000000..1693cbd0b9 --- /dev/null +++ b/Lib/_pyrepl/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Lib/_pyrepl/__main__.py b/Lib/_pyrepl/__main__.py new file mode 100644 index 0000000000..3fa992eee8 --- /dev/null +++ b/Lib/_pyrepl/__main__.py @@ -0,0 +1,6 @@ +# Important: don't add things to this module, as they will end up in the REPL's +# default globals. Use _pyrepl.main instead. + +if __name__ == "__main__": + from .main import interactive_console as __pyrepl_interactive_console + __pyrepl_interactive_console() diff --git a/Lib/_pyrepl/_minimal_curses.py b/Lib/_pyrepl/_minimal_curses.py new file mode 100644 index 0000000000..d884f880f5 --- /dev/null +++ b/Lib/_pyrepl/_minimal_curses.py @@ -0,0 +1,68 @@ +"""Minimal '_curses' module, the low-level interface for curses module +which is not meant to be used directly. + +Based on ctypes. It's too incomplete to be really called '_curses', so +to use it, you have to import it and stick it in sys.modules['_curses'] +manually. + +Note that there is also a built-in module _minimal_curses which will +hide this one if compiled in. +""" + +import ctypes +import ctypes.util + + +class error(Exception): + pass + + +def _find_clib() -> str: + trylibs = ["ncursesw", "ncurses", "curses"] + + for lib in trylibs: + path = ctypes.util.find_library(lib) + if path: + return path + raise ModuleNotFoundError("curses library not found", name="_pyrepl._minimal_curses") + + +_clibpath = _find_clib() +clib = ctypes.cdll.LoadLibrary(_clibpath) + +clib.setupterm.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_int)] +clib.setupterm.restype = ctypes.c_int + +clib.tigetstr.argtypes = [ctypes.c_char_p] +clib.tigetstr.restype = ctypes.c_ssize_t + +clib.tparm.argtypes = [ctypes.c_char_p] + 9 * [ctypes.c_int] # type: ignore[operator] +clib.tparm.restype = ctypes.c_char_p + +OK = 0 +ERR = -1 + +# ____________________________________________________________ + + +def setupterm(termstr, fd): + err = ctypes.c_int(0) + result = clib.setupterm(termstr, fd, ctypes.byref(err)) + if result == ERR: + raise error("setupterm() failed (err=%d)" % err.value) + + +def tigetstr(cap): + if not isinstance(cap, bytes): + cap = cap.encode("ascii") + result = clib.tigetstr(cap) + if result == ERR: + return None + return ctypes.cast(result, ctypes.c_char_p).value + + +def tparm(str, i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0, i8=0, i9=0): + result = clib.tparm(str, i1, i2, i3, i4, i5, i6, i7, i8, i9) + if result is None: + raise error("tparm() returned NULL") + return result diff --git a/Lib/_pyrepl/_threading_handler.py b/Lib/_pyrepl/_threading_handler.py new file mode 100644 index 0000000000..82f5e8650a --- /dev/null +++ b/Lib/_pyrepl/_threading_handler.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +import traceback + + +TYPE_CHECKING = False +if TYPE_CHECKING: + from threading import Thread + from types import TracebackType + from typing import Protocol + + class ExceptHookArgs(Protocol): + @property + def exc_type(self) -> type[BaseException]: ... + @property + def exc_value(self) -> BaseException | None: ... + @property + def exc_traceback(self) -> TracebackType | None: ... + @property + def thread(self) -> Thread | None: ... + + class ShowExceptions(Protocol): + def __call__(self) -> int: ... + def add(self, s: str) -> None: ... + + from .reader import Reader + + +def install_threading_hook(reader: Reader) -> None: + import threading + + @dataclass + class ExceptHookHandler: + lock: threading.Lock = field(default_factory=threading.Lock) + messages: list[str] = field(default_factory=list) + + def show(self) -> int: + count = 0 + with self.lock: + if not self.messages: + return 0 + reader.restore() + for tb in self.messages: + count += 1 + if tb: + print(tb) + self.messages.clear() + reader.scheduled_commands.append("ctrl-c") + reader.prepare() + return count + + def add(self, s: str) -> None: + with self.lock: + self.messages.append(s) + + def exception(self, args: ExceptHookArgs) -> None: + lines = traceback.format_exception( + args.exc_type, + args.exc_value, + args.exc_traceback, + colorize=reader.can_colorize, + ) # type: ignore[call-overload] + pre = f"\nException in {args.thread.name}:\n" if args.thread else "\n" + tb = pre + "".join(lines) + self.add(tb) + + def __call__(self) -> int: + return self.show() + + + handler = ExceptHookHandler() + reader.threading_hook = handler + threading.excepthook = handler.exception diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py new file mode 100644 index 0000000000..503ca1da32 --- /dev/null +++ b/Lib/_pyrepl/commands.py @@ -0,0 +1,489 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations +import os + +# Categories of actions: +# killing +# yanking +# motion +# editing +# history +# finishing +# [completion] + + +# types +if False: + from .historical_reader import HistoricalReader + + +class Command: + finish: bool = False + kills_digit_arg: bool = True + + def __init__( + self, reader: HistoricalReader, event_name: str, event: list[str] + ) -> None: + # Reader should really be "any reader" but there's too much usage of + # HistoricalReader methods and fields in the code below for us to + # refactor at the moment. + + self.reader = reader + self.event = event + self.event_name = event_name + + def do(self) -> None: + pass + + +class KillCommand(Command): + def kill_range(self, start: int, end: int) -> None: + if start == end: + return + r = self.reader + b = r.buffer + text = b[start:end] + del b[start:end] + if is_kill(r.last_command): + if start < r.pos: + r.kill_ring[-1] = text + r.kill_ring[-1] + else: + r.kill_ring[-1] = r.kill_ring[-1] + text + else: + r.kill_ring.append(text) + r.pos = start + r.dirty = True + + +class YankCommand(Command): + pass + + +class MotionCommand(Command): + pass + + +class EditCommand(Command): + pass + + +class FinishCommand(Command): + finish = True + pass + + +def is_kill(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, KillCommand) + + +def is_yank(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, YankCommand) + + +# etc + + +class digit_arg(Command): + kills_digit_arg = False + + def do(self) -> None: + r = self.reader + c = self.event[-1] + if c == "-": + if r.arg is not None: + r.arg = -r.arg + else: + r.arg = -1 + else: + d = int(c) + if r.arg is None: + r.arg = d + else: + if r.arg < 0: + r.arg = 10 * r.arg - d + else: + r.arg = 10 * r.arg + d + r.dirty = True + + +class clear_screen(Command): + def do(self) -> None: + r = self.reader + r.console.clear() + r.dirty = True + + +class refresh(Command): + def do(self) -> None: + self.reader.dirty = True + + +class repaint(Command): + def do(self) -> None: + self.reader.dirty = True + self.reader.console.repaint() + + +class kill_line(KillCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + eol = r.eol() + for c in b[r.pos : eol]: + if not c.isspace(): + self.kill_range(r.pos, eol) + return + else: + self.kill_range(r.pos, eol + 1) + + +class unix_line_discard(KillCommand): + def do(self) -> None: + r = self.reader + self.kill_range(r.bol(), r.pos) + + +class unix_word_rubout(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.pos, r.eow()) + + +class backward_kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class yank(YankCommand): + def do(self) -> None: + r = self.reader + if not r.kill_ring: + r.error("nothing to yank") + return + r.insert(r.kill_ring[-1]) + + +class yank_pop(YankCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if not r.kill_ring: + r.error("nothing to yank") + return + if not is_yank(r.last_command): + r.error("previous command was not a yank") + return + repl = len(r.kill_ring[-1]) + r.kill_ring.insert(0, r.kill_ring.pop()) + t = r.kill_ring[-1] + b[r.pos - repl : r.pos] = t + r.pos = r.pos - repl + len(t) + r.dirty = True + + +class interrupt(FinishCommand): + def do(self) -> None: + import signal + + self.reader.console.finish() + self.reader.finish() + os.kill(os.getpid(), signal.SIGINT) + + +class ctrl_c(Command): + def do(self) -> None: + self.reader.console.finish() + self.reader.finish() + raise KeyboardInterrupt + + +class suspend(Command): + def do(self) -> None: + import signal + + r = self.reader + p = r.pos + r.console.finish() + os.kill(os.getpid(), signal.SIGSTOP) + ## this should probably be done + ## in a handler for SIGCONT? + r.console.prepare() + r.pos = p + # r.posxy = 0, 0 # XXX this is invalid + r.dirty = True + r.console.screen = [] + + +class up(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y - 1 + + if r.bol() == 0: + if r.historyi > 0: + r.select_item(r.historyi - 1) + return + r.pos = 0 + r.error("start of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class down(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y + 1 + + if r.eol() == len(b): + if r.historyi < len(r.history): + r.select_item(r.historyi + 1) + r.pos = r.eol(0) + return + r.pos = len(b) + r.error("end of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class left(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + p = r.pos - 1 + if p >= 0: + r.pos = p + else: + self.reader.error("start of buffer") + + +class right(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + p = r.pos + 1 + if p <= len(b): + r.pos = p + else: + self.reader.error("end of buffer") + + +class beginning_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.bol() + + +class end_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.eol() + + +class home(MotionCommand): + def do(self) -> None: + self.reader.pos = 0 + + +class end(MotionCommand): + def do(self) -> None: + self.reader.pos = len(self.reader.buffer) + + +class forward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.eow() + + +class backward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.bow() + + +class self_insert(EditCommand): + def do(self) -> None: + r = self.reader + text = self.event * r.get_arg() + r.insert(text) + + +class insert_nl(EditCommand): + def do(self) -> None: + r = self.reader + r.insert("\n" * r.get_arg()) + + +class transpose_characters(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + s = r.pos - 1 + if s < 0: + r.error("cannot transpose at start of buffer") + else: + if s == len(b): + s -= 1 + t = min(s + r.get_arg(), len(b) - 1) + c = b[s] + del b[s] + b.insert(t, c) + r.pos = t + r.dirty = True + + +class backspace(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for i in range(r.get_arg()): + if r.pos > 0: + r.pos -= 1 + del b[r.pos] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +class delete(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if ( + r.pos == 0 + and len(b) == 0 # this is something of a hack + and self.event[-1] == "\004" + ): + r.update_screen() + r.console.finish() + raise EOFError + for i in range(r.get_arg()): + if r.pos != len(b): + del b[r.pos] + r.dirty = True + else: + self.reader.error("end of buffer") + + +class accept(FinishCommand): + def do(self) -> None: + pass + + +class help(Command): + def do(self) -> None: + import _sitebuiltins + + with self.reader.suspend(): + self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment, call-arg] + + +class invalid_key(Command): + def do(self) -> None: + pending = self.reader.console.getpending() + s = "".join(self.event) + pending.data + self.reader.error("`%r' not bound" % s) + + +class invalid_command(Command): + def do(self) -> None: + s = self.event_name + self.reader.error("command `%s' not known" % s) + + +class show_history(Command): + def do(self) -> None: + from .pager import get_pager + from site import gethistoryfile # type: ignore[attr-defined] + + history = os.linesep.join(self.reader.history[:]) + self.reader.console.restore() + pager = get_pager() + pager(history, gethistoryfile()) + self.reader.console.prepare() + + # We need to copy over the state so that it's consistent between + # console and reader, and console does not overwrite/append stuff + self.reader.console.screen = self.reader.screen.copy() + self.reader.console.posxy = self.reader.cxy + + +class paste_mode(Command): + + def do(self) -> None: + self.reader.paste_mode = not self.reader.paste_mode + self.reader.dirty = True + + +class enable_bracketed_paste(Command): + def do(self) -> None: + self.reader.paste_mode = True + self.reader.in_bracketed_paste = True + +class disable_bracketed_paste(Command): + def do(self) -> None: + self.reader.paste_mode = False + self.reader.in_bracketed_paste = False + self.reader.dirty = True diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py new file mode 100644 index 0000000000..9a005281da --- /dev/null +++ b/Lib/_pyrepl/completing_reader.py @@ -0,0 +1,295 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from dataclasses import dataclass, field + +import re +from . import commands, console, reader +from .reader import Reader + + +# types +Command = commands.Command +if False: + from .types import KeySpec, CommandName + + +def prefix(wordlist: list[str], j: int = 0) -> str: + d = {} + i = j + try: + while 1: + for word in wordlist: + d[word[i]] = 1 + if len(d) > 1: + return wordlist[0][j:i] + i += 1 + d = {} + except IndexError: + return wordlist[0][j:i] + return "" + + +STRIPCOLOR_REGEX = re.compile(r"\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[m|K]") + +def stripcolor(s: str) -> str: + return STRIPCOLOR_REGEX.sub('', s) + + +def real_len(s: str) -> int: + return len(stripcolor(s)) + + +def left_align(s: str, maxlen: int) -> str: + stripped = stripcolor(s) + if len(stripped) > maxlen: + # too bad, we remove the color + return stripped[:maxlen] + padding = maxlen - len(stripped) + return s + ' '*padding + + +def build_menu( + cons: console.Console, + wordlist: list[str], + start: int, + use_brackets: bool, + sort_in_column: bool, +) -> tuple[list[str], int]: + if use_brackets: + item = "[ %s ]" + padding = 4 + else: + item = "%s " + padding = 2 + maxlen = min(max(map(real_len, wordlist)), cons.width - padding) + cols = int(cons.width / (maxlen + padding)) + rows = int((len(wordlist) - 1)/cols + 1) + + if sort_in_column: + # sort_in_column=False (default) sort_in_column=True + # A B C A D G + # D E F B E + # G C F + # + # "fill" the table with empty words, so we always have the same amout + # of rows for each column + missing = cols*rows - len(wordlist) + wordlist = wordlist + ['']*missing + indexes = [(i % cols) * rows + i // cols for i in range(len(wordlist))] + wordlist = [wordlist[i] for i in indexes] + menu = [] + i = start + for r in range(rows): + row = [] + for col in range(cols): + row.append(item % left_align(wordlist[i], maxlen)) + i += 1 + if i >= len(wordlist): + break + menu.append(''.join(row)) + if i >= len(wordlist): + i = 0 + break + if r + 5 > cons.height: + menu.append(" %d more... " % (len(wordlist) - i)) + break + return menu, i + +# this gets somewhat user interface-y, and as a result the logic gets +# very convoluted. +# +# To summarise the summary of the summary:- people are a problem. +# -- The Hitch-Hikers Guide to the Galaxy, Episode 12 + +#### Desired behaviour of the completions commands. +# the considerations are: +# (1) how many completions are possible +# (2) whether the last command was a completion +# (3) if we can assume that the completer is going to return the same set of +# completions: this is controlled by the ``assume_immutable_completions`` +# variable on the reader, which is True by default to match the historical +# behaviour of pyrepl, but e.g. False in the ReadlineAlikeReader to match +# more closely readline's semantics (this is needed e.g. by +# fancycompleter) +# +# if there's no possible completion, beep at the user and point this out. +# this is easy. +# +# if there's only one possible completion, stick it in. if the last thing +# user did was a completion, point out that he isn't getting anywhere, but +# only if the ``assume_immutable_completions`` is True. +# +# now it gets complicated. +# +# for the first press of a completion key: +# if there's a common prefix, stick it in. + +# irrespective of whether anything got stuck in, if the word is now +# complete, show the "complete but not unique" message + +# if there's no common prefix and if the word is not now complete, +# beep. + +# common prefix -> yes no +# word complete \/ +# yes "cbnu" "cbnu" +# no - beep + +# for the second bang on the completion key +# there will necessarily be no common prefix +# show a menu of the choices. + +# for subsequent bangs, rotate the menu around (if there are sufficient +# choices). + + +class complete(commands.Command): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + last_is_completer = r.last_command_is(self.__class__) + immutable_completions = r.assume_immutable_completions + completions_unchangable = last_is_completer and immutable_completions + stem = r.get_stem() + if not completions_unchangable: + r.cmpltn_menu_choices = r.get_completions(stem) + + completions = r.cmpltn_menu_choices + if not completions: + r.error("no matches") + elif len(completions) == 1: + if completions_unchangable and len(completions[0]) == len(stem): + r.msg = "[ sole completion ]" + r.dirty = True + r.insert(completions[0][len(stem):]) + else: + p = prefix(completions, len(stem)) + if p: + r.insert(p) + if last_is_completer: + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, r.cmpltn_menu_end, + r.use_brackets, r.sort_in_column) + r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True + + +class self_insert(commands.self_insert): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + + commands.self_insert.do(self) + if r.cmpltn_menu_visible: + stem = r.get_stem() + if len(stem) < 1: + r.cmpltn_reset() + else: + completions = [w for w in r.cmpltn_menu_choices + if w.startswith(stem)] + if completions: + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, 0, + r.use_brackets, r.sort_in_column) + else: + r.cmpltn_reset() + + +@dataclass +class CompletingReader(Reader): + """Adds completion support""" + + ### Class variables + # see the comment for the complete command + assume_immutable_completions = True + use_brackets = True # display completions inside [] + sort_in_column = False + + ### Instance variables + cmpltn_menu: list[str] = field(init=False) + cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) + cmpltn_menu_end: int = field(init=False) + cmpltn_menu_choices: list[str] = field(init=False) + + def __post_init__(self) -> None: + super().__post_init__() + self.cmpltn_reset() + for c in (complete, self_insert): + self.commands[c.__name__] = c + self.commands[c.__name__.replace('_', '-')] = c + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r'\t', 'complete'),) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if not isinstance(cmd, (complete, self_insert)): + self.cmpltn_reset() + + def calc_screen(self) -> list[str]: + screen = super().calc_screen() + if self.cmpltn_menu_visible: + # We display the completions menu below the current prompt + ly = self.lxy[1] + 1 + screen[ly:ly] = self.cmpltn_menu + # If we're not in the middle of multiline edit, don't append to screeninfo + # since that screws up the position calculation in pos2xy function. + # This is a hack to prevent the cursor jumping + # into the completions menu when pressing left or down arrow. + if self.pos != len(self.buffer): + self.screeninfo[ly:ly] = [(0, [])]*len(self.cmpltn_menu) + return screen + + def finish(self) -> None: + super().finish() + self.cmpltn_reset() + + def cmpltn_reset(self) -> None: + self.cmpltn_menu = [] + self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False + self.cmpltn_menu_end = 0 + self.cmpltn_menu_choices = [] + + def get_stem(self) -> str: + st = self.syntax_table + SW = reader.SYNTAX_WORD + b = self.buffer + p = self.pos - 1 + while p >= 0 and st.get(b[p], SW) == SW: + p -= 1 + return ''.join(b[p+1:self.pos]) + + def get_completions(self, stem: str) -> list[str]: + return [] diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py new file mode 100644 index 0000000000..0d78890b4f --- /dev/null +++ b/Lib/_pyrepl/console.py @@ -0,0 +1,213 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import _colorize # type: ignore[import-not-found] + +from abc import ABC, abstractmethod +import ast +import code +from dataclasses import dataclass, field +import os.path +import sys + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + from typing import Callable + + +@dataclass +class Event: + evt: str + data: str + raw: bytes = b"" + + +@dataclass +class Console(ABC): + posxy: tuple[int, int] + screen: list[str] = field(default_factory=list) + height: int = 25 + width: int = 80 + + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + self.encoding = encoding or sys.getdefaultencoding() + + if isinstance(f_in, int): + self.input_fd = f_in + else: + self.input_fd = f_in.fileno() + + if isinstance(f_out, int): + self.output_fd = f_out + else: + self.output_fd = f_out.fileno() + + @abstractmethod + def refresh(self, screen: list[str], xy: tuple[int, int]) -> None: ... + + @abstractmethod + def prepare(self) -> None: ... + + @abstractmethod + def restore(self) -> None: ... + + @abstractmethod + def move_cursor(self, x: int, y: int) -> None: ... + + @abstractmethod + def set_cursor_vis(self, visible: bool) -> None: ... + + @abstractmethod + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + ... + + @abstractmethod + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + ... + + @abstractmethod + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + ... + + @abstractmethod + def beep(self) -> None: ... + + @abstractmethod + def clear(self) -> None: + """Wipe the screen""" + ... + + @abstractmethod + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + ... + + @abstractmethod + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere).""" + ... + + @abstractmethod + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + ... + + @abstractmethod + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + ... + + @abstractmethod + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" + ... + + @abstractmethod + def repaint(self) -> None: ... + + +class InteractiveColoredConsole(code.InteractiveConsole): + def __init__( + self, + locals: dict[str, object] | None = None, + filename: str = "", + *, + local_exit: bool = False, + ) -> None: + super().__init__(locals=locals, filename=filename, local_exit=local_exit) # type: ignore[call-arg] + self.can_colorize = _colorize.can_colorize() + + def showsyntaxerror(self, filename=None, **kwargs): + super().showsyntaxerror(filename=filename, **kwargs) + + def _excepthook(self, typ, value, tb): + import traceback + lines = traceback.format_exception( + typ, value, tb, + colorize=self.can_colorize, + limit=traceback.BUILTIN_EXCEPTION_LIMIT) + self.write(''.join(lines)) + + def runsource(self, source, filename="", symbol="single"): + try: + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: + *_, last_stmt = tree.body + for stmt in tree.body: + wrapper = ast.Interactive if stmt is last_stmt else ast.Module + the_symbol = symbol if stmt is last_stmt else "exec" + item = wrapper([stmt]) + try: + code = self.compile.compiler(item, filename, the_symbol) + except SyntaxError as e: + if e.args[0] == "'await' outside function": + python = os.path.basename(sys.executable) + e.add_note( + f"Try the asyncio REPL ({python} -m asyncio) to use" + f" top-level 'await' and run background asyncio tasks." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + + if code is None: + return True + + self.runcode(code) + return False diff --git a/Lib/_pyrepl/curses.py b/Lib/_pyrepl/curses.py new file mode 100644 index 0000000000..3a624d9f68 --- /dev/null +++ b/Lib/_pyrepl/curses.py @@ -0,0 +1,33 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +try: + import _curses +except ImportError: + try: + import curses as _curses # type: ignore[no-redef] + except ImportError: + from . import _minimal_curses as _curses # type: ignore[no-redef] + +setupterm = _curses.setupterm +tigetstr = _curses.tigetstr +tparm = _curses.tparm +error = _curses.error diff --git a/Lib/_pyrepl/fancy_termios.py b/Lib/_pyrepl/fancy_termios.py new file mode 100644 index 0000000000..0468b9a267 --- /dev/null +++ b/Lib/_pyrepl/fancy_termios.py @@ -0,0 +1,76 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import termios + + +class TermState: + def __init__(self, tuples): + ( + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + self.cc, + ) = tuples + + def as_list(self): + return [ + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + # Always return a copy of the control characters list to ensure + # there are not any additional references to self.cc + self.cc[:], + ] + + def copy(self): + return self.__class__(self.as_list()) + + +def tcgetattr(fd): + return TermState(termios.tcgetattr(fd)) + + +def tcsetattr(fd, when, attrs): + termios.tcsetattr(fd, when, attrs.as_list()) + + +class Term(TermState): + TS__init__ = TermState.__init__ + + def __init__(self, fd=0): + self.TS__init__(termios.tcgetattr(fd)) + self.fd = fd + self.stack = [] + + def save(self): + self.stack.append(self.as_list()) + + def set(self, when=termios.TCSANOW): + termios.tcsetattr(self.fd, when, self.as_list()) + + def restore(self): + self.TS__init__(self.stack.pop()) + self.set() diff --git a/Lib/_pyrepl/historical_reader.py b/Lib/_pyrepl/historical_reader.py new file mode 100644 index 0000000000..c4b95fa2e8 --- /dev/null +++ b/Lib/_pyrepl/historical_reader.py @@ -0,0 +1,419 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass, field + +from . import commands, input +from .reader import Reader + + +if False: + from .types import SimpleContextManager, KeySpec, CommandName + + +isearch_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [("\\%03o" % c, "isearch-end") for c in range(256) if chr(c) != "\\"] + + [(c, "isearch-add-character") for c in map(chr, range(32, 127)) if c != "\\"] + + [ + ("\\%03o" % c, "isearch-add-character") + for c in range(256) + if chr(c).isalpha() and chr(c) != "\\" + ] + + [ + ("\\\\", "self-insert"), + (r"\C-r", "isearch-backwards"), + (r"\C-s", "isearch-forwards"), + (r"\C-c", "isearch-cancel"), + (r"\C-g", "isearch-cancel"), + (r"\", "isearch-backspace"), + ] +) + +ISEARCH_DIRECTION_NONE = "" +ISEARCH_DIRECTION_BACKWARDS = "r" +ISEARCH_DIRECTION_FORWARDS = "f" + + +class next_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == len(r.history): + r.error("end of history list") + return + r.select_item(r.historyi + 1) + + +class previous_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == 0: + r.error("start of history list") + return + r.select_item(r.historyi - 1) + + +class history_search_backward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=False) + + +class history_search_forward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=True) + + +class restore_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi != len(r.history): + if r.get_unicode() != r.history[r.historyi]: + r.buffer = list(r.history[r.historyi]) + r.pos = len(r.buffer) + r.dirty = True + + +class first_history(commands.Command): + def do(self) -> None: + self.reader.select_item(0) + + +class last_history(commands.Command): + def do(self) -> None: + self.reader.select_item(len(self.reader.history)) + + +class operate_and_get_next(commands.FinishCommand): + def do(self) -> None: + self.reader.next_history = self.reader.historyi + 1 + + +class yank_arg(commands.Command): + def do(self) -> None: + r = self.reader + if r.last_command is self.__class__: + r.yank_arg_i += 1 + else: + r.yank_arg_i = 0 + if r.historyi < r.yank_arg_i: + r.error("beginning of history list") + return + a = r.get_arg(-1) + # XXX how to split? + words = r.get_item(r.historyi - r.yank_arg_i - 1).split() + if a < -len(words) or a >= len(words): + r.error("no such arg") + return + w = words[a] + b = r.buffer + if r.yank_arg_i > 0: + o = len(r.yank_arg_yanked) + else: + o = 0 + b[r.pos - o : r.pos] = list(w) + r.yank_arg_yanked = w + r.pos += len(w) - o + r.dirty = True + + +class forward_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_start = r.historyi, r.pos + r.isearch_term = "" + r.dirty = True + r.push_input_trans(r.isearch_trans) + + +class reverse_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.dirty = True + r.isearch_term = "" + r.push_input_trans(r.isearch_trans) + r.isearch_start = r.historyi, r.pos + + +class isearch_cancel(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.pop_input_trans() + r.select_item(r.isearch_start[0]) + r.pos = r.isearch_start[1] + r.dirty = True + + +class isearch_add_character(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + r.isearch_term += self.event[-1] + r.dirty = True + p = r.pos + len(r.isearch_term) - 1 + if b[p : p + 1] != [r.isearch_term[-1]]: + r.isearch_next() + + +class isearch_backspace(commands.Command): + def do(self) -> None: + r = self.reader + if len(r.isearch_term) > 0: + r.isearch_term = r.isearch_term[:-1] + r.dirty = True + else: + r.error("nothing to rubout") + + +class isearch_forwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_next() + + +class isearch_backwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.isearch_next() + + +class isearch_end(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.console.forgetinput() + r.pop_input_trans() + r.dirty = True + + +@dataclass +class HistoricalReader(Reader): + """Adds history support (with incremental history searching) to the + Reader class. + """ + + history: list[str] = field(default_factory=list) + historyi: int = 0 + next_history: int | None = None + transient_history: dict[int, str] = field(default_factory=dict) + isearch_term: str = "" + isearch_direction: str = ISEARCH_DIRECTION_NONE + isearch_start: tuple[int, int] = field(init=False) + isearch_trans: input.KeymapTranslator = field(init=False) + yank_arg_i: int = 0 + yank_arg_yanked: str = "" + + def __post_init__(self) -> None: + super().__post_init__() + for c in [ + next_history, + previous_history, + restore_history, + first_history, + last_history, + yank_arg, + forward_history_isearch, + reverse_history_isearch, + isearch_end, + isearch_add_character, + isearch_cancel, + isearch_add_character, + isearch_backspace, + isearch_forwards, + isearch_backwards, + operate_and_get_next, + history_search_backward, + history_search_forward, + ]: + self.commands[c.__name__] = c + self.commands[c.__name__.replace("_", "-")] = c + self.isearch_start = self.historyi, self.pos + self.isearch_trans = input.KeymapTranslator( + isearch_keymap, invalid_cls=isearch_end, character_cls=isearch_add_character + ) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\C-n", "next-history"), + (r"\C-p", "previous-history"), + (r"\C-o", "operate-and-get-next"), + (r"\C-r", "reverse-history-isearch"), + (r"\C-s", "forward-history-isearch"), + (r"\M-r", "restore-history"), + (r"\M-.", "yank-arg"), + (r"\", "history-search-forward"), + (r"\x1b[6~", "history-search-forward"), + (r"\", "history-search-backward"), + (r"\x1b[5~", "history-search-backward"), + ) + + def select_item(self, i: int) -> None: + self.transient_history[self.historyi] = self.get_unicode() + buf = self.transient_history.get(i) + if buf is None: + buf = self.history[i].rstrip() + self.buffer = list(buf) + self.historyi = i + self.pos = len(self.buffer) + self.dirty = True + self.last_refresh_cache.invalidated = True + + def get_item(self, i: int) -> str: + if i != len(self.history): + return self.transient_history.get(i, self.history[i]) + else: + return self.transient_history.get(i, self.get_unicode()) + + @contextmanager + def suspend(self) -> SimpleContextManager: + with super().suspend(), self.suspend_history(): + yield + + @contextmanager + def suspend_history(self) -> SimpleContextManager: + try: + old_history = self.history[:] + del self.history[:] + yield + finally: + self.history[:] = old_history + + def prepare(self) -> None: + super().prepare() + try: + self.transient_history = {} + if self.next_history is not None and self.next_history < len(self.history): + self.historyi = self.next_history + self.buffer[:] = list(self.history[self.next_history]) + self.pos = len(self.buffer) + self.transient_history[len(self.history)] = "" + else: + self.historyi = len(self.history) + self.next_history = None + except: + self.restore() + raise + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + if cursor_on_line and self.isearch_direction != ISEARCH_DIRECTION_NONE: + d = "rf"[self.isearch_direction == ISEARCH_DIRECTION_FORWARDS] + return "(%s-search `%s') " % (d, self.isearch_term) + else: + return super().get_prompt(lineno, cursor_on_line) + + def search_next(self, *, forwards: bool) -> None: + """Search history for the current line contents up to the cursor. + + Selects the first item found. If nothing is under the cursor, any next + item in history is selected. + """ + pos = self.pos + s = self.get_unicode() + history_index = self.historyi + + # In multiline contexts, we're only interested in the current line. + nl_index = s.rfind('\n', 0, pos) + prefix = s[nl_index + 1:pos] + pos = len(prefix) + + match_prefix = len(prefix) + len_item = 0 + if history_index < len(self.history): + len_item = len(self.get_item(history_index)) + if len_item and pos == len_item: + match_prefix = False + elif not pos: + match_prefix = False + + while 1: + if forwards: + out_of_bounds = history_index >= len(self.history) - 1 + else: + out_of_bounds = history_index == 0 + if out_of_bounds: + if forwards and not match_prefix: + self.pos = 0 + self.buffer = [] + self.dirty = True + else: + self.error("not found") + return + + history_index += 1 if forwards else -1 + s = self.get_item(history_index) + + if not match_prefix: + self.select_item(history_index) + return + + len_acc = 0 + for i, line in enumerate(s.splitlines(keepends=True)): + if line.startswith(prefix): + self.select_item(history_index) + self.pos = pos + len_acc + return + len_acc += len(line) + + def isearch_next(self) -> None: + st = self.isearch_term + p = self.pos + i = self.historyi + s = self.get_unicode() + forwards = self.isearch_direction == ISEARCH_DIRECTION_FORWARDS + while 1: + if forwards: + p = s.find(st, p + 1) + else: + p = s.rfind(st, 0, p + len(st) - 1) + if p != -1: + self.select_item(i) + self.pos = p + return + elif (forwards and i >= len(self.history) - 1) or (not forwards and i == 0): + self.error("not found") + return + else: + if forwards: + i += 1 + s = self.get_item(i) + p = -1 + else: + i -= 1 + s = self.get_item(i) + p = len(s) + + def finish(self) -> None: + super().finish() + ret = self.get_unicode() + for i, t in self.transient_history.items(): + if i < len(self.history) and i != self.historyi: + self.history[i] = t + if ret and should_auto_add_history: + self.history.append(ret) + + +should_auto_add_history = True diff --git a/Lib/_pyrepl/input.py b/Lib/_pyrepl/input.py new file mode 100644 index 0000000000..21c24eb5cd --- /dev/null +++ b/Lib/_pyrepl/input.py @@ -0,0 +1,114 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# (naming modules after builtin functions is not such a hot idea...) + +# an KeyTrans instance translates Event objects into Command objects + +# hmm, at what level do we want [C-i] and [tab] to be equivalent? +# [meta-a] and [esc a]? obviously, these are going to be equivalent +# for the UnixConsole, but should they be for PygameConsole? + +# it would in any situation seem to be a bad idea to bind, say, [tab] +# and [C-i] to *different* things... but should binding one bind the +# other? + +# executive, temporary decision: [tab] and [C-i] are distinct, but +# [meta-key] is identified with [esc key]. We demand that any console +# class does quite a lot towards emulating a unix terminal. + +from __future__ import annotations + +from abc import ABC, abstractmethod +import unicodedata +from collections import deque + + +# types +if False: + from .types import EventTuple + + +class InputTranslator(ABC): + @abstractmethod + def push(self, evt: EventTuple) -> None: + pass + + @abstractmethod + def get(self) -> EventTuple | None: + return None + + @abstractmethod + def empty(self) -> bool: + return True + + +class KeymapTranslator(InputTranslator): + def __init__(self, keymap, verbose=False, invalid_cls=None, character_cls=None): + self.verbose = verbose + from .keymap import compile_keymap, parse_keys + + self.keymap = keymap + self.invalid_cls = invalid_cls + self.character_cls = character_cls + d = {} + for keyspec, command in keymap: + keyseq = tuple(parse_keys(keyspec)) + d[keyseq] = command + if self.verbose: + print(d) + self.k = self.ck = compile_keymap(d, ()) + self.results = deque() + self.stack = [] + + def push(self, evt): + if self.verbose: + print("pushed", evt.data, end="") + key = evt.data + d = self.k.get(key) + if isinstance(d, dict): + if self.verbose: + print("transition") + self.stack.append(key) + self.k = d + else: + if d is None: + if self.verbose: + print("invalid") + if self.stack or len(key) > 1 or unicodedata.category(key) == "C": + self.results.append((self.invalid_cls, self.stack + [key])) + else: + # small optimization: + self.k[key] = self.character_cls + self.results.append((self.character_cls, [key])) + else: + if self.verbose: + print("matched", d) + self.results.append((d, self.stack + [key])) + self.stack = [] + self.k = self.ck + + def get(self): + if self.results: + return self.results.popleft() + else: + return None + + def empty(self) -> bool: + return not self.results diff --git a/Lib/_pyrepl/keymap.py b/Lib/_pyrepl/keymap.py new file mode 100644 index 0000000000..2fb03d1952 --- /dev/null +++ b/Lib/_pyrepl/keymap.py @@ -0,0 +1,213 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Keymap contains functions for parsing keyspecs and turning keyspecs into +appropriate sequences. + +A keyspec is a string representing a sequence of key presses that can +be bound to a command. All characters other than the backslash represent +themselves. In the traditional manner, a backslash introduces an escape +sequence. + +pyrepl uses its own keyspec format that is meant to be a strict superset of +readline's KEYSEQ format. This means that if a spec is found that readline +accepts that this doesn't, it should be logged as a bug. Note that this means +we're using the `\\C-o' style of readline's keyspec, not the `Control-o' sort. + +The extension to readline is that the sequence \\ denotes the +sequence of characters produced by hitting KEY. + +Examples: +`a' - what you get when you hit the `a' key +`\\EOA' - Escape - O - A (up, on my terminal) +`\\' - the up arrow key +`\\' - ditto (keynames are case-insensitive) +`\\C-o', `\\c-o' - control-o +`\\M-.' - meta-period +`\\E.' - ditto (that's how meta works for pyrepl) +`\\', `\\', `\\t', `\\011', '\\x09', '\\X09', '\\C-i', '\\C-I' + - all of these are the tab character. +""" + +_escapes = { + "\\": "\\", + "'": "'", + '"': '"', + "a": "\a", + "b": "\b", + "e": "\033", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +_keynames = { + "backspace": "backspace", + "delete": "delete", + "down": "down", + "end": "end", + "enter": "\r", + "escape": "\033", + "f1": "f1", + "f2": "f2", + "f3": "f3", + "f4": "f4", + "f5": "f5", + "f6": "f6", + "f7": "f7", + "f8": "f8", + "f9": "f9", + "f10": "f10", + "f11": "f11", + "f12": "f12", + "f13": "f13", + "f14": "f14", + "f15": "f15", + "f16": "f16", + "f17": "f17", + "f18": "f18", + "f19": "f19", + "f20": "f20", + "home": "home", + "insert": "insert", + "left": "left", + "page down": "page down", + "page up": "page up", + "return": "\r", + "right": "right", + "space": " ", + "tab": "\t", + "up": "up", +} + + +class KeySpecError(Exception): + pass + + +def parse_keys(keys: str) -> list[str]: + """Parse keys in keyspec format to a sequence of keys.""" + s = 0 + r: list[str] = [] + while s < len(keys): + k, s = _parse_single_key_sequence(keys, s) + r.extend(k) + return r + + +def _parse_single_key_sequence(key: str, s: int) -> tuple[list[str], int]: + ctrl = 0 + meta = 0 + ret = "" + while not ret and s < len(key): + if key[s] == "\\": + c = key[s + 1].lower() + if c in _escapes: + ret = _escapes[c] + s += 2 + elif c == "c": + if key[s + 2] != "-": + raise KeySpecError( + "\\C must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if ctrl: + raise KeySpecError( + "doubled \\C- (char %d of %s)" % (s + 1, repr(key)) + ) + ctrl = 1 + s += 3 + elif c == "m": + if key[s + 2] != "-": + raise KeySpecError( + "\\M must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if meta: + raise KeySpecError( + "doubled \\M- (char %d of %s)" % (s + 1, repr(key)) + ) + meta = 1 + s += 3 + elif c.isdigit(): + n = key[s + 1 : s + 4] + ret = chr(int(n, 8)) + s += 4 + elif c == "x": + n = key[s + 2 : s + 4] + ret = chr(int(n, 16)) + s += 4 + elif c == "<": + t = key.find(">", s) + if t == -1: + raise KeySpecError( + "unterminated \\< starting at char %d of %s" + % (s + 1, repr(key)) + ) + ret = key[s + 2 : t].lower() + if ret not in _keynames: + raise KeySpecError( + "unrecognised keyname `%s' at char %d of %s" + % (ret, s + 2, repr(key)) + ) + ret = _keynames[ret] + s = t + 1 + else: + raise KeySpecError( + "unknown backslash escape %s at char %d of %s" + % (repr(c), s + 2, repr(key)) + ) + else: + ret = key[s] + s += 1 + if ctrl: + if len(ret) == 1: + ret = chr(ord(ret) & 0x1F) # curses.ascii.ctrl() + elif ret in {"left", "right"}: + ret = f"ctrl {ret}" + else: + raise KeySpecError("\\C- followed by invalid key") + + result = [ret], s + if meta: + result[0].insert(0, "\033") + return result + + +def compile_keymap(keymap, empty=b""): + r = {} + for key, value in keymap.items(): + if isinstance(key, bytes): + first = key[:1] + else: + first = key[0] + r.setdefault(first, {})[key[1:]] = value + for key, value in r.items(): + if empty in value: + if len(value) != 1: + raise KeySpecError("key definitions for %s clash" % (value.values(),)) + else: + r[key] = value[empty] + else: + r[key] = compile_keymap(value, empty) + return r diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py new file mode 100644 index 0000000000..a6f824dcc4 --- /dev/null +++ b/Lib/_pyrepl/main.py @@ -0,0 +1,59 @@ +import errno +import os +import sys + + +CAN_USE_PYREPL: bool +FAIL_REASON: str +try: + if sys.platform == "win32" and sys.getwindowsversion().build < 10586: + raise RuntimeError("Windows 10 TH2 or later required") + if not os.isatty(sys.stdin.fileno()): + raise OSError(errno.ENOTTY, "tty required", "stdin") + from .simple_interact import check + if err := check(): + raise RuntimeError(err) +except Exception as e: + CAN_USE_PYREPL = False + FAIL_REASON = f"warning: can't use pyrepl: {e}" +else: + CAN_USE_PYREPL = True + FAIL_REASON = "" + + +def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): + if not CAN_USE_PYREPL: + if not os.getenv('PYTHON_BASIC_REPL') and FAIL_REASON: + from .trace import trace + trace(FAIL_REASON) + print(FAIL_REASON, file=sys.stderr) + return sys._baserepl() + + if mainmodule: + namespace = mainmodule.__dict__ + else: + import __main__ + namespace = __main__.__dict__ + namespace.pop("__pyrepl_interactive_console", None) + + # sys._baserepl() above does this internally, we do it here + startup_path = os.getenv("PYTHONSTARTUP") + if pythonstartup and startup_path: + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, namespace) + + # set sys.{ps1,ps2} just before invoking the interactive interpreter. This + # mimics what CPython does in pythonrun.c + if not hasattr(sys, "ps1"): + sys.ps1 = ">>> " + if not hasattr(sys, "ps2"): + sys.ps2 = "... " + + from .console import InteractiveColoredConsole + from .simple_interact import run_multiline_interactive_console + console = InteractiveColoredConsole(namespace, filename="") + run_multiline_interactive_console(console) diff --git a/Lib/_pyrepl/mypy.ini b/Lib/_pyrepl/mypy.ini new file mode 100644 index 0000000000..395f5945ab --- /dev/null +++ b/Lib/_pyrepl/mypy.ini @@ -0,0 +1,24 @@ +# Config file for running mypy on _pyrepl. +# Run mypy by invoking `mypy --config-file Lib/_pyrepl/mypy.ini` +# on the command-line from the repo root + +[mypy] +files = Lib/_pyrepl +explicit_package_bases = True +python_version = 3.12 +platform = linux +pretty = True + +# Enable most stricter settings +enable_error_code = ignore-without-code,redundant-expr +strict = True + +# Various stricter settings that we can't yet enable +# Try to enable these in the following order: +disallow_untyped_calls = False +disallow_untyped_defs = False +check_untyped_defs = False + +# Various internal modules that typeshed deliberately doesn't have stubs for: +[mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] +ignore_missing_imports = True diff --git a/Lib/_pyrepl/pager.py b/Lib/_pyrepl/pager.py new file mode 100644 index 0000000000..1fddc63e3e --- /dev/null +++ b/Lib/_pyrepl/pager.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import os +import re +import sys + + +# types +if False: + from typing import Protocol + class Pager(Protocol): + def __call__(self, text: str, title: str = "") -> None: + ... + + +def get_pager() -> Pager: + """Decide what method to use for paging through text.""" + if not hasattr(sys.stdin, "isatty"): + return plain_pager + if not hasattr(sys.stdout, "isatty"): + return plain_pager + if not sys.stdin.isatty() or not sys.stdout.isatty(): + return plain_pager + if sys.platform == "emscripten": + return plain_pager + use_pager = os.environ.get('MANPAGER') or os.environ.get('PAGER') + if use_pager: + if sys.platform == 'win32': # pipes completely broken in Windows + return lambda text, title='': tempfile_pager(plain(text), use_pager) + elif os.environ.get('TERM') in ('dumb', 'emacs'): + return lambda text, title='': pipe_pager(plain(text), use_pager, title) + else: + return lambda text, title='': pipe_pager(text, use_pager, title) + if os.environ.get('TERM') in ('dumb', 'emacs'): + return plain_pager + if sys.platform == 'win32': + return lambda text, title='': tempfile_pager(plain(text), 'more <') + if hasattr(os, 'system') and os.system('(pager) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'pager', title) + if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'less', title) + + import tempfile + (fd, filename) = tempfile.mkstemp() + os.close(fd) + try: + if hasattr(os, 'system') and os.system('more "%s"' % filename) == 0: + return lambda text, title='': pipe_pager(text, 'more', title) + else: + return tty_pager + finally: + os.unlink(filename) + + +def escape_stdout(text: str) -> str: + # Escape non-encodable characters to avoid encoding errors later + encoding = getattr(sys.stdout, 'encoding', None) or 'utf-8' + return text.encode(encoding, 'backslashreplace').decode(encoding) + + +def escape_less(s: str) -> str: + return re.sub(r'([?:.%\\])', r'\\\1', s) + + +def plain(text: str) -> str: + """Remove boldface formatting from text.""" + return re.sub('.\b', '', text) + + +def tty_pager(text: str, title: str = '') -> None: + """Page through text on a text terminal.""" + lines = plain(escape_stdout(text)).split('\n') + has_tty = False + try: + import tty + import termios + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + tty.setcbreak(fd) + has_tty = True + + def getchar() -> str: + return sys.stdin.read(1) + + except (ImportError, AttributeError, io.UnsupportedOperation): + def getchar() -> str: + return sys.stdin.readline()[:-1][:1] + + try: + try: + h = int(os.environ.get('LINES', 0)) + except ValueError: + h = 0 + if h <= 1: + h = 25 + r = inc = h - 1 + sys.stdout.write('\n'.join(lines[:inc]) + '\n') + while lines[r:]: + sys.stdout.write('-- more --') + sys.stdout.flush() + c = getchar() + + if c in ('q', 'Q'): + sys.stdout.write('\r \r') + break + elif c in ('\r', '\n'): + sys.stdout.write('\r \r' + lines[r] + '\n') + r = r + 1 + continue + if c in ('b', 'B', '\x1b'): + r = r - inc - inc + if r < 0: r = 0 + sys.stdout.write('\n' + '\n'.join(lines[r:r+inc]) + '\n') + r = r + inc + + finally: + if has_tty: + termios.tcsetattr(fd, termios.TCSAFLUSH, old) + + +def plain_pager(text: str, title: str = '') -> None: + """Simply print unformatted text. This is the ultimate fallback.""" + sys.stdout.write(plain(escape_stdout(text))) + + +def pipe_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by feeding it to another program.""" + import subprocess + env = os.environ.copy() + if title: + title += ' ' + esc_title = escape_less(title) + prompt_string = ( + f' {esc_title}' + + '?ltline %lt?L/%L.' + ':byte %bB?s/%s.' + '.' + '?e (END):?pB %pB\\%..' + ' (press h for help or q to quit)') + env['LESS'] = '-RmPm{0}$PM{0}$'.format(prompt_string) + proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, + errors='backslashreplace', env=env) + assert proc.stdin is not None + try: + with proc.stdin as pipe: + try: + pipe.write(text) + except KeyboardInterrupt: + # We've hereby abandoned whatever text hasn't been written, + # but the pager is still in control of the terminal. + pass + except OSError: + pass # Ignore broken pipes caused by quitting the pager program. + while True: + try: + proc.wait() + break + except KeyboardInterrupt: + # Ignore ctl-c like the pager itself does. Otherwise the pager is + # left running and the terminal is in raw mode and unusable. + pass + + +def tempfile_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by invoking a program on a temporary file.""" + import tempfile + with tempfile.TemporaryDirectory() as tempdir: + filename = os.path.join(tempdir, 'pydoc.out') + with open(filename, 'w', errors='backslashreplace', + encoding=os.device_encoding(0) if + sys.platform == 'win32' else None + ) as file: + file.write(text) + os.system(cmd + ' "' + filename + '"') diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py new file mode 100644 index 0000000000..dc26bfd3a3 --- /dev/null +++ b/Lib/_pyrepl/reader.py @@ -0,0 +1,816 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import sys + +from contextlib import contextmanager +from dataclasses import dataclass, field, fields +import unicodedata +from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] + + +from . import commands, console, input +from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width +from .trace import trace + + +# types +Command = commands.Command +from .types import Callback, SimpleContextManager, KeySpec, CommandName + + +def disp_str(buffer: str) -> tuple[str, list[int]]: + """disp_str(buffer:string) -> (string, [int]) + + Return the string that should be the printed representation of + |buffer| and a list detailing where the characters of |buffer| + get used up. E.g.: + + >>> disp_str(chr(3)) + ('^C', [1, 0]) + + """ + b: list[int] = [] + s: list[str] = [] + for c in buffer: + if c == '\x1a': + s.append(c) + b.append(2) + elif ord(c) < 128: + s.append(c) + b.append(1) + elif unicodedata.category(c).startswith("C"): + c = r"\u%04x" % ord(c) + s.append(c) + b.extend([0] * (len(c) - 1)) + else: + s.append(c) + b.append(str_width(c)) + return "".join(s), b + + +# syntax classes: + +SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) + + +def make_default_syntax_table() -> dict[str, int]: + # XXX perhaps should use some unicodedata here? + st: dict[str, int] = {} + for c in map(chr, range(256)): + st[c] = SYNTAX_SYMBOL + for c in [a for a in map(chr, range(256)) if a.isalnum()]: + st[c] = SYNTAX_WORD + st["\n"] = st[" "] = SYNTAX_WHITESPACE + return st + + +def make_default_commands() -> dict[CommandName, type[Command]]: + result: dict[CommandName, type[Command]] = {} + for v in vars(commands).values(): + if isinstance(v, type) and issubclass(v, Command) and v.__name__[0].islower(): + result[v.__name__] = v + result[v.__name__.replace("_", "-")] = v + return result + + +default_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [ + (r"\C-a", "beginning-of-line"), + (r"\C-b", "left"), + (r"\C-c", "interrupt"), + (r"\C-d", "delete"), + (r"\C-e", "end-of-line"), + (r"\C-f", "right"), + (r"\C-g", "cancel"), + (r"\C-h", "backspace"), + (r"\C-j", "accept"), + (r"\", "accept"), + (r"\C-k", "kill-line"), + (r"\C-l", "clear-screen"), + (r"\C-m", "accept"), + (r"\C-t", "transpose-characters"), + (r"\C-u", "unix-line-discard"), + (r"\C-w", "unix-word-rubout"), + (r"\C-x\C-u", "upcase-region"), + (r"\C-y", "yank"), + *(() if sys.platform == "win32" else ((r"\C-z", "suspend"), )), + (r"\M-b", "backward-word"), + (r"\M-c", "capitalize-word"), + (r"\M-d", "kill-word"), + (r"\M-f", "forward-word"), + (r"\M-l", "downcase-word"), + (r"\M-t", "transpose-words"), + (r"\M-u", "upcase-word"), + (r"\M-y", "yank-pop"), + (r"\M--", "digit-arg"), + (r"\M-0", "digit-arg"), + (r"\M-1", "digit-arg"), + (r"\M-2", "digit-arg"), + (r"\M-3", "digit-arg"), + (r"\M-4", "digit-arg"), + (r"\M-5", "digit-arg"), + (r"\M-6", "digit-arg"), + (r"\M-7", "digit-arg"), + (r"\M-8", "digit-arg"), + (r"\M-9", "digit-arg"), + (r"\M-\n", "accept"), + ("\\\\", "self-insert"), + (r"\x1b[200~", "enable_bracketed_paste"), + (r"\x1b[201~", "disable_bracketed_paste"), + (r"\x03", "ctrl-c"), + ] + + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] + + [(c, "self-insert") for c in map(chr, range(128, 256)) if c.isalpha()] + + [ + (r"\", "up"), + (r"\", "down"), + (r"\", "left"), + (r"\C-\", "backward-word"), + (r"\", "right"), + (r"\C-\", "forward-word"), + (r"\", "delete"), + (r"\x1b[3~", "delete"), + (r"\", "backspace"), + (r"\M-\", "backward-kill-word"), + (r"\", "end-of-line"), # was 'end' + (r"\", "beginning-of-line"), # was 'home' + (r"\", "help"), + (r"\", "show-history"), + (r"\", "paste-mode"), + (r"\EOF", "end"), # the entries in the terminfo database for xterms + (r"\EOH", "home"), # seem to be wrong. this is a less than ideal + # workaround + ] +) + + +@dataclass(slots=True) +class Reader: + """The Reader class implements the bare bones of a command reader, + handling such details as editing and cursor motion. What it does + not support are such things as completion or history support - + these are implemented elsewhere. + + Instance variables of note include: + + * buffer: + A *list* (*not* a string at the moment :-) containing all the + characters that have been entered. + * console: + Hopefully encapsulates the OS dependent stuff. + * pos: + A 0-based index into `buffer' for where the insertion point + is. + * screeninfo: + Ahem. This list contains some info needed to move the + insertion point around reasonably efficiently. + * cxy, lxy: + the position of the insertion point in screen ... + * syntax_table: + Dictionary mapping characters to `syntax class'; read the + emacs docs to see what this means :-) + * commands: + Dictionary mapping command names to command classes. + * arg: + The emacs-style prefix argument. It will be None if no such + argument has been provided. + * dirty: + True if we need to refresh the display. + * kill_ring: + The emacs-style kill-ring; manipulated with yank & yank-pop + * ps1, ps2, ps3, ps4: + prompts. ps1 is the prompt for a one-line input; for a + multiline input it looks like: + ps2> first line of input goes here + ps3> second and further + ps3> lines get ps3 + ... + ps4> and the last one gets ps4 + As with the usual top-level, you can set these to instances if + you like; str() will be called on them (once) at the beginning + of each command. Don't put really long or newline containing + strings here, please! + This is just the default policy; you can change it freely by + overriding get_prompt() (and indeed some standard subclasses + do). + * finished: + handle1 will set this to a true value if a command signals + that we're done. + """ + + console: console.Console + + ## state + buffer: list[str] = field(default_factory=list) + pos: int = 0 + ps1: str = "->> " + ps2: str = "/>> " + ps3: str = "|.. " + ps4: str = R"\__ " + kill_ring: list[list[str]] = field(default_factory=list) + msg: str = "" + arg: int | None = None + dirty: bool = False + finished: bool = False + paste_mode: bool = False + in_bracketed_paste: bool = False + commands: dict[str, type[Command]] = field(default_factory=make_default_commands) + last_command: type[Command] | None = None + syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) + keymap: tuple[tuple[str, str], ...] = () + input_trans: input.KeymapTranslator = field(init=False) + input_trans_stack: list[input.KeymapTranslator] = field(default_factory=list) + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + cxy: tuple[int, int] = field(init=False) + lxy: tuple[int, int] = field(init=False) + scheduled_commands: list[str] = field(default_factory=list) + can_colorize: bool = False + threading_hook: Callback | None = None + + ## cached metadata to speed up screen refreshes + @dataclass + class RefreshCache: + in_bracketed_paste: bool = False + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + line_end_offsets: list[int] = field(default_factory=list) + pos: int = field(init=False) + cxy: tuple[int, int] = field(init=False) + dimensions: tuple[int, int] = field(init=False) + invalidated: bool = False + + def update_cache(self, + reader: Reader, + screen: list[str], + screeninfo: list[tuple[int, list[int]]], + ) -> None: + self.in_bracketed_paste = reader.in_bracketed_paste + self.screen = screen.copy() + self.screeninfo = screeninfo.copy() + self.pos = reader.pos + self.cxy = reader.cxy + self.dimensions = reader.console.width, reader.console.height + self.invalidated = False + + def valid(self, reader: Reader) -> bool: + if self.invalidated: + return False + dimensions = reader.console.width, reader.console.height + dimensions_changed = dimensions != self.dimensions + paste_changed = reader.in_bracketed_paste != self.in_bracketed_paste + return not (dimensions_changed or paste_changed) + + def get_cached_location(self, reader: Reader) -> tuple[int, int]: + if self.invalidated: + raise ValueError("Cache is invalidated") + offset = 0 + earliest_common_pos = min(reader.pos, self.pos) + num_common_lines = len(self.line_end_offsets) + while num_common_lines > 0: + offset = self.line_end_offsets[num_common_lines - 1] + if earliest_common_pos > offset: + break + num_common_lines -= 1 + else: + offset = 0 + return offset, num_common_lines + + last_refresh_cache: RefreshCache = field(default_factory=RefreshCache) + + def __post_init__(self) -> None: + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.keymap = self.collect_keymap() + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + self.screeninfo = [(0, [])] + self.cxy = self.pos2xy() + self.lxy = (self.pos, 0) + self.can_colorize = can_colorize() + + self.last_refresh_cache.screeninfo = self.screeninfo + self.last_refresh_cache.pos = self.pos + self.last_refresh_cache.cxy = self.cxy + self.last_refresh_cache.dimensions = (0, 0) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return default_keymap + + def calc_screen(self) -> list[str]: + """Translate changes in self.buffer into changes in self.console.screen.""" + # Since the last call to calc_screen: + # screen and screeninfo may differ due to a completion menu being shown + # pos and cxy may differ due to edits, cursor movements, or completion menus + + # Lines that are above both the old and new cursor position can't have changed, + # unless the terminal has been resized (which might cause reflowing) or we've + # entered or left paste mode (which changes prompts, causing reflowing). + num_common_lines = 0 + offset = 0 + if self.last_refresh_cache.valid(self): + offset, num_common_lines = self.last_refresh_cache.get_cached_location(self) + + screen = self.last_refresh_cache.screen + del screen[num_common_lines:] + + screeninfo = self.last_refresh_cache.screeninfo + del screeninfo[num_common_lines:] + + last_refresh_line_end_offsets = self.last_refresh_cache.line_end_offsets + del last_refresh_line_end_offsets[num_common_lines:] + + pos = self.pos + pos -= offset + + prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + lines = "".join(self.buffer[offset:]).split("\n") + + cursor_found = False + lines_beyond_cursor = 0 + for ln, line in enumerate(lines, num_common_lines): + ll = len(line) + if 0 <= pos <= ll: + self.lxy = pos, ln + cursor_found = True + elif cursor_found: + lines_beyond_cursor += 1 + if lines_beyond_cursor > self.console.height: + # No need to keep formatting lines. + # The console can't show them. + break + if prompt_from_cache: + # Only the first line's prompt can come from the cache + prompt_from_cache = False + prompt = "" + else: + prompt = self.get_prompt(ln, ll >= pos >= 0) + while "\n" in prompt: + pre_prompt, _, prompt = prompt.partition("\n") + last_refresh_line_end_offsets.append(offset) + screen.append(pre_prompt) + screeninfo.append((0, [])) + pos -= ll + 1 + prompt, lp = self.process_prompt(prompt) + l, l2 = disp_str(line) + wrapcount = (wlen(l) + lp) // self.console.width + if wrapcount == 0: + offset += ll + 1 # Takes all of the line plus the newline + last_refresh_line_end_offsets.append(offset) + screen.append(prompt + l) + screeninfo.append((lp, l2)) + else: + i = 0 + while l: + prelen = lp if i == 0 else 0 + index_to_wrap_before = 0 + column = 0 + for character_width in l2: + if column + character_width >= self.console.width - prelen: + break + index_to_wrap_before += 1 + column += character_width + pre = prompt if i == 0 else "" + if len(l) > index_to_wrap_before: + offset += index_to_wrap_before + post = "\\" + after = [1] + else: + offset += index_to_wrap_before + 1 # Takes the newline + post = "" + after = [] + last_refresh_line_end_offsets.append(offset) + screen.append(pre + l[:index_to_wrap_before] + post) + screeninfo.append((prelen, l2[:index_to_wrap_before] + after)) + l = l[index_to_wrap_before:] + l2 = l2[index_to_wrap_before:] + i += 1 + self.screeninfo = screeninfo + self.cxy = self.pos2xy() + if self.msg: + for mline in self.msg.split("\n"): + screen.append(mline) + screeninfo.append((0, [])) + + self.last_refresh_cache.update_cache(self, screen, screeninfo) + return screen + + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: + """Process the prompt. + + This means calculate the length of the prompt. The character \x01 + and \x02 are used to bracket ANSI control sequences and need to be + excluded from the length calculation. So also a copy of the prompt + is returned with these control characters removed.""" + + # The logic below also ignores the length of common escape + # sequences if they were not explicitly within \x01...\x02. + # They are CSI (or ANSI) sequences ( ESC [ ... LETTER ) + + # wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars, + # which breaks the logic below so we redefine it here. + def wlen(s: str) -> int: + return sum(str_width(i) for i in s) + + out_prompt = "" + l = wlen(prompt) + pos = 0 + while True: + s = prompt.find("\x01", pos) + if s == -1: + break + e = prompt.find("\x02", s) + if e == -1: + break + # Found start and end brackets, subtract from string length + l = l - (e - s + 1) + keep = prompt[pos:s] + l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) + out_prompt += keep + prompt[s + 1 : e] + pos = e + 1 + keep = prompt[pos:] + l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) + out_prompt += keep + return out_prompt, l + + def bow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break preceding p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p -= 1 + return p + 1 + + def eow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break following p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p += 1 + while p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p += 1 + return p + + def bol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break preceding p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + p -= 1 + while p >= 0 and b[p] != "\n": + p -= 1 + return p + 1 + + def eol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break following p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + while p < len(b) and b[p] != "\n": + p += 1 + return p + + def max_column(self, y: int) -> int: + """Return the last x-offset for line y""" + return self.screeninfo[y][0] + sum(self.screeninfo[y][1]) + + def max_row(self) -> int: + return len(self.screeninfo) - 1 + + def get_arg(self, default: int = 1) -> int: + """Return any prefix argument that the user has supplied, + returning `default' if there is None. Defaults to 1. + """ + if self.arg is None: + return default + return self.arg + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + """Return what should be in the left-hand margin for line + `lineno'.""" + if self.arg is not None and cursor_on_line: + prompt = f"(arg: {self.arg}) " + elif self.paste_mode and not self.in_bracketed_paste: + prompt = "(paste) " + elif "\n" in self.buffer: + if lineno == 0: + prompt = self.ps2 + elif self.ps4 and lineno == self.buffer.count("\n"): + prompt = self.ps4 + else: + prompt = self.ps3 + else: + prompt = self.ps1 + + if self.can_colorize: + prompt = f"{ANSIColors.BOLD_MAGENTA}{prompt}{ANSIColors.RESET}" + return prompt + + def push_input_trans(self, itrans: input.KeymapTranslator) -> None: + self.input_trans_stack.append(self.input_trans) + self.input_trans = itrans + + def pop_input_trans(self) -> None: + self.input_trans = self.input_trans_stack.pop() + + def setpos_from_xy(self, x: int, y: int) -> None: + """Set pos according to coordinates x, y""" + pos = 0 + i = 0 + while i < y: + prompt_len, character_widths = self.screeninfo[i] + offset = len(character_widths) - character_widths.count(0) + in_wrapped_line = prompt_len + sum(character_widths) >= self.console.width + if in_wrapped_line: + pos += offset - 1 # -1 cause backslash is not in buffer + else: + pos += offset + 1 # +1 cause newline is in buffer + i += 1 + + j = 0 + cur_x = self.screeninfo[i][0] + while cur_x < x: + if self.screeninfo[i][1][j] == 0: + continue + cur_x += self.screeninfo[i][1][j] + j += 1 + pos += 1 + + self.pos = pos + + def pos2xy(self) -> tuple[int, int]: + """Return the x, y coordinates of position 'pos'.""" + # this *is* incomprehensible, yes. + p, y = 0, 0 + l2: list[int] = [] + pos = self.pos + assert 0 <= pos <= len(self.buffer) + if pos == len(self.buffer) and len(self.screeninfo) > 0: + y = len(self.screeninfo) - 1 + p, l2 = self.screeninfo[y] + return p + sum(l2) + l2.count(0), y + + for p, l2 in self.screeninfo: + l = len(l2) - l2.count(0) + in_wrapped_line = p + sum(l2) >= self.console.width + offset = l - 1 if in_wrapped_line else l # need to remove backslash + if offset >= pos: + break + + if p + sum(l2) >= self.console.width: + pos -= l - 1 # -1 cause backslash is not in buffer + else: + pos -= l + 1 # +1 cause newline is in buffer + y += 1 + return p + sum(l2[:pos]), y + + def insert(self, text: str | list[str]) -> None: + """Insert 'text' at the insertion point.""" + self.buffer[self.pos : self.pos] = list(text) + self.pos += len(text) + self.dirty = True + + def update_cursor(self) -> None: + """Move the cursor to reflect changes in self.pos""" + self.cxy = self.pos2xy() + self.console.move_cursor(*self.cxy) + + def after_command(self, cmd: Command) -> None: + """This function is called to allow post command cleanup.""" + if getattr(cmd, "kills_digit_arg", True): + if self.arg is not None: + self.dirty = True + self.arg = None + + def prepare(self) -> None: + """Get ready to run. Call restore when finished. You must not + write to the console in between the calls to prepare and + restore.""" + try: + self.console.prepare() + self.arg = None + self.finished = False + del self.buffer[:] + self.pos = 0 + self.dirty = True + self.last_command = None + self.calc_screen() + except BaseException: + self.restore() + raise + + while self.scheduled_commands: + cmd = self.scheduled_commands.pop() + self.do_cmd((cmd, [])) + + def last_command_is(self, cls: type) -> bool: + if not self.last_command: + return False + return issubclass(cls, self.last_command) + + def restore(self) -> None: + """Clean up after a run.""" + self.console.restore() + + @contextmanager + def suspend(self) -> SimpleContextManager: + """A context manager to delegate to another reader.""" + prev_state = {f.name: getattr(self, f.name) for f in fields(self)} + try: + self.restore() + yield + finally: + for arg in ("msg", "ps1", "ps2", "ps3", "ps4", "paste_mode"): + setattr(self, arg, prev_state[arg]) + self.prepare() + + def finish(self) -> None: + """Called when a command signals that we're finished.""" + pass + + def error(self, msg: str = "none") -> None: + self.msg = "! " + msg + " " + self.dirty = True + self.console.beep() + + def update_screen(self) -> None: + if self.dirty: + self.refresh() + + def refresh(self) -> None: + """Recalculate and refresh the screen.""" + if self.in_bracketed_paste and self.buffer and not self.buffer[-1] == "\n": + return + + # this call sets up self.cxy, so call it first. + self.screen = self.calc_screen() + self.console.refresh(self.screen, self.cxy) + self.dirty = False + + def do_cmd(self, cmd: tuple[str, list[str]]) -> None: + """`cmd` is a tuple of "event_name" and "event", which in the current + implementation is always just the "buffer" which happens to be a list + of single-character strings.""" + + trace("received command {cmd}", cmd=cmd) + if isinstance(cmd[0], str): + command_type = self.commands.get(cmd[0], commands.invalid_command) + elif isinstance(cmd[0], type): + command_type = cmd[0] + else: + return # nothing to do + + command = command_type(self, *cmd) # type: ignore[arg-type] + command.do() + + self.after_command(command) + + if self.dirty: + self.refresh() + else: + self.update_cursor() + + if not isinstance(cmd, commands.digit_arg): + self.last_command = command_type + + self.finished = bool(command.finish) + if self.finished: + self.console.finish() + self.finish() + + def run_hooks(self) -> None: + threading_hook = self.threading_hook + if threading_hook is None and 'threading' in sys.modules: + from ._threading_handler import install_threading_hook + install_threading_hook(self) + if threading_hook is not None: + try: + threading_hook() + except Exception: + pass + + input_hook = self.console.input_hook + if input_hook: + try: + input_hook() + except Exception: + pass + + def handle1(self, block: bool = True) -> bool: + """Handle a single event. Wait as long as it takes if block + is true (the default), otherwise return False if no event is + pending.""" + + if self.msg: + self.msg = "" + self.dirty = True + + while True: + # We use the same timeout as in readline.c: 100ms + self.run_hooks() + self.console.wait(100) + event = self.console.get_event(block=False) + if not event: + if block: + continue + return False + + translate = True + + if event.evt == "key": + self.input_trans.push(event) + elif event.evt == "scroll": + self.refresh() + elif event.evt == "resize": + self.refresh() + else: + translate = False + + if translate: + cmd = self.input_trans.get() + else: + cmd = [event.evt, event.data] + + if cmd is None: + if block: + continue + return False + + self.do_cmd(cmd) + return True + + def push_char(self, char: int | bytes) -> None: + self.console.push_char(char) + self.handle1(block=False) + + def readline(self, startup_hook: Callback | None = None) -> str: + """Read a line. The implementation of this method also shows + how to drive Reader if you want more control over the event + loop.""" + self.prepare() + try: + if startup_hook is not None: + startup_hook() + self.refresh() + while not self.finished: + self.handle1() + return self.get_unicode() + + finally: + self.restore() + + def bind(self, spec: KeySpec, command: CommandName) -> None: + self.keymap = self.keymap + ((spec, command),) + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + + def get_unicode(self) -> str: + """Return the current buffer as a unicode string.""" + return "".join(self.buffer) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py new file mode 100644 index 0000000000..888185eb03 --- /dev/null +++ b/Lib/_pyrepl/readline.py @@ -0,0 +1,598 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Alex Gaynor +# Antonio Cuni +# Armin Rigo +# Holger Krekel +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""A compatibility wrapper reimplementing the 'readline' standard module +on top of pyrepl. Not all functionalities are supported. Contains +extensions for multiline input. +""" + +from __future__ import annotations + +import warnings +from dataclasses import dataclass, field + +import os +from site import gethistoryfile # type: ignore[attr-defined] +import sys +from rlcompleter import Completer as RLCompleter + +from . import commands, historical_reader +from .completing_reader import CompletingReader +from .console import Console as ConsoleType + +Console: type[ConsoleType] +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import UnixConsole as Console, _error +except ImportError: + from .windows_console import WindowsConsole as Console, _error + +ENCODING = sys.getdefaultencoding() or "latin1" + + +# types +Command = commands.Command +from collections.abc import Callable, Collection +from .types import Callback, Completer, KeySpec, CommandName + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Mapping + + +MoreLinesCallable = Callable[[str], bool] + + +__all__ = [ + "add_history", + "clear_history", + "get_begidx", + "get_completer", + "get_completer_delims", + "get_current_history_length", + "get_endidx", + "get_history_item", + "get_history_length", + "get_line_buffer", + "insert_text", + "parse_and_bind", + "read_history_file", + # "read_init_file", + # "redisplay", + "remove_history_item", + "replace_history_item", + "set_auto_history", + "set_completer", + "set_completer_delims", + "set_history_length", + # "set_pre_input_hook", + "set_startup_hook", + "write_history_file", + # ---- multiline extensions ---- + "multiline_input", +] + +# ____________________________________________________________ + +@dataclass +class ReadlineConfig: + readline_completer: Completer | None = None + completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") + + +@dataclass(kw_only=True) +class ReadlineAlikeReader(historical_reader.HistoricalReader, CompletingReader): + # Class fields + assume_immutable_completions = False + use_brackets = False + sort_in_column = True + + # Instance fields + config: ReadlineConfig + more_lines: MoreLinesCallable | None = None + last_used_indentation: str | None = None + + def __post_init__(self) -> None: + super().__post_init__() + self.commands["maybe_accept"] = maybe_accept + self.commands["maybe-accept"] = maybe_accept + self.commands["backspace_dedent"] = backspace_dedent + self.commands["backspace-dedent"] = backspace_dedent + + def error(self, msg: str = "none") -> None: + pass # don't show error messages by default + + def get_stem(self) -> str: + b = self.buffer + p = self.pos - 1 + completer_delims = self.config.completer_delims + while p >= 0 and b[p] not in completer_delims: + p -= 1 + return "".join(b[p + 1 : self.pos]) + + def get_completions(self, stem: str) -> list[str]: + if len(stem) == 0 and self.more_lines is not None: + b = self.buffer + p = self.pos + while p > 0 and b[p - 1] != "\n": + p -= 1 + num_spaces = 4 - ((self.pos - p) % 4) + return [" " * num_spaces] + result = [] + function = self.config.readline_completer + if function is not None: + try: + stem = str(stem) # rlcompleter.py seems to not like unicode + except UnicodeEncodeError: + pass # but feed unicode anyway if we have no choice + state = 0 + while True: + try: + next = function(stem, state) + except Exception: + break + if not isinstance(next, str): + break + result.append(next) + state += 1 + # emulate the behavior of the standard readline that sorts + # the completions before displaying them. + result.sort() + return result + + def get_trimmed_history(self, maxlength: int) -> list[str]: + if maxlength >= 0: + cut = len(self.history) - maxlength + if cut < 0: + cut = 0 + else: + cut = 0 + return self.history[cut:] + + def update_last_used_indentation(self) -> None: + indentation = _get_first_indentation(self.buffer) + if indentation is not None: + self.last_used_indentation = indentation + + # --- simplified support for reading multiline Python statements --- + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\n", "maybe-accept"), + (r"\", "backspace-dedent"), + ) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if self.more_lines is None: + # Force single-line input if we are in raw_input() mode. + # Although there is no direct way to add a \n in this mode, + # multiline buffers can still show up using various + # commands, e.g. navigating the history. + try: + index = self.buffer.index("\n") + except ValueError: + pass + else: + self.buffer = self.buffer[:index] + if self.pos > len(self.buffer): + self.pos = len(self.buffer) + + +def set_auto_history(_should_auto_add_history: bool) -> None: + """Enable or disable automatic history""" + historical_reader.should_auto_add_history = bool(_should_auto_add_history) + + +def _get_this_line_indent(buffer: list[str], pos: int) -> int: + indent = 0 + while pos > 0 and buffer[pos - 1] in " \t": + indent += 1 + pos -= 1 + if pos > 0 and buffer[pos - 1] == "\n": + return indent + return 0 + + +def _get_previous_line_indent(buffer: list[str], pos: int) -> tuple[int, int | None]: + prevlinestart = pos + while prevlinestart > 0 and buffer[prevlinestart - 1] != "\n": + prevlinestart -= 1 + prevlinetext = prevlinestart + while prevlinetext < pos and buffer[prevlinetext] in " \t": + prevlinetext += 1 + if prevlinetext == pos: + indent = None + else: + indent = prevlinetext - prevlinestart + return prevlinestart, indent + + +def _get_first_indentation(buffer: list[str]) -> str | None: + indented_line_start = None + for i in range(len(buffer)): + if (i < len(buffer) - 1 + and buffer[i] == "\n" + and buffer[i + 1] in " \t" + ): + indented_line_start = i + 1 + elif indented_line_start is not None and buffer[i] not in " \t\n": + return ''.join(buffer[indented_line_start : i]) + return None + + +def _should_auto_indent(buffer: list[str], pos: int) -> bool: + # check if last character before "pos" is a colon, ignoring + # whitespaces and comments. + last_char = None + while pos > 0: + pos -= 1 + if last_char is None: + if buffer[pos] not in " \t\n#": # ignore whitespaces and comments + last_char = buffer[pos] + else: + # even if we found a non-whitespace character before + # original pos, we keep going back until newline is reached + # to make sure we ignore comments + if buffer[pos] == "\n": + break + if buffer[pos] == "#": + last_char = None + return last_char == ":" + + +class maybe_accept(commands.Command): + def do(self) -> None: + r: ReadlineAlikeReader + r = self.reader # type: ignore[assignment] + r.dirty = True # this is needed to hide the completion menu, if visible + + if self.reader.in_bracketed_paste: + r.insert("\n") + return + + # if there are already several lines and the cursor + # is not on the last one, always insert a new \n. + text = r.get_unicode() + + if "\n" in r.buffer[r.pos :] or ( + r.more_lines is not None and r.more_lines(text) + ): + def _newline_before_pos(): + before_idx = r.pos - 1 + while before_idx > 0 and text[before_idx].isspace(): + before_idx -= 1 + return text[before_idx : r.pos].count("\n") > 0 + + # if there's already a new line before the cursor then + # even if the cursor is followed by whitespace, we assume + # the user is trying to terminate the block + if _newline_before_pos() and text[r.pos:].isspace(): + self.finish = True + return + + # auto-indent the next line like the previous line + prevlinestart, indent = _get_previous_line_indent(r.buffer, r.pos) + r.insert("\n") + if not self.reader.paste_mode: + if indent: + for i in range(prevlinestart, prevlinestart + indent): + r.insert(r.buffer[i]) + r.update_last_used_indentation() + if _should_auto_indent(r.buffer, r.pos): + if r.last_used_indentation is not None: + indentation = r.last_used_indentation + else: + # default + indentation = " " * 4 + r.insert(indentation) + elif not self.reader.paste_mode: + self.finish = True + else: + r.insert("\n") + + +class backspace_dedent(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + if r.pos > 0: + repeat = 1 + if b[r.pos - 1] != "\n": + indent = _get_this_line_indent(b, r.pos) + if indent > 0: + ls = r.pos - indent + while ls > 0: + ls, pi = _get_previous_line_indent(b, ls - 1) + if pi is not None and pi < indent: + repeat = indent - pi + break + r.pos -= repeat + del b[r.pos : r.pos + repeat] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +# ____________________________________________________________ + + +@dataclass(slots=True) +class _ReadlineWrapper: + f_in: int = -1 + f_out: int = -1 + reader: ReadlineAlikeReader | None = field(default=None, repr=False) + saved_history_length: int = -1 + startup_hook: Callback | None = None + config: ReadlineConfig = field(default_factory=ReadlineConfig, repr=False) + + def __post_init__(self) -> None: + if self.f_in == -1: + self.f_in = os.dup(0) + if self.f_out == -1: + self.f_out = os.dup(1) + + def get_reader(self) -> ReadlineAlikeReader: + if self.reader is None: + console = Console(self.f_in, self.f_out, encoding=ENCODING) + self.reader = ReadlineAlikeReader(console=console, config=self.config) + return self.reader + + def input(self, prompt: object = "") -> str: + try: + reader = self.get_reader() + except _error: + assert raw_input is not None + return raw_input(prompt) + prompt_str = str(prompt) + reader.ps1 = prompt_str + sys.audit("builtins.input", prompt_str) + result = reader.readline(startup_hook=self.startup_hook) + sys.audit("builtins.input/result", result) + return result + + def multiline_input(self, more_lines: MoreLinesCallable, ps1: str, ps2: str) -> str: + """Read an input on possibly multiple lines, asking for more + lines as long as 'more_lines(unicodetext)' returns an object whose + boolean value is true. + """ + reader = self.get_reader() + saved = reader.more_lines + try: + reader.more_lines = more_lines + reader.ps1 = ps1 + reader.ps2 = ps1 + reader.ps3 = ps2 + reader.ps4 = "" + with warnings.catch_warnings(action="ignore"): + return reader.readline() + finally: + reader.more_lines = saved + reader.paste_mode = False + + def parse_and_bind(self, string: str) -> None: + pass # XXX we don't support parsing GNU-readline-style init files + + def set_completer(self, function: Completer | None = None) -> None: + self.config.readline_completer = function + + def get_completer(self) -> Completer | None: + return self.config.readline_completer + + def set_completer_delims(self, delimiters: Collection[str]) -> None: + self.config.completer_delims = frozenset(delimiters) + + def get_completer_delims(self) -> str: + return "".join(sorted(self.config.completer_delims)) + + def _histline(self, line: str) -> str: + line = line.rstrip("\n") + return line + + def get_history_length(self) -> int: + return self.saved_history_length + + def set_history_length(self, length: int) -> None: + self.saved_history_length = length + + def get_current_history_length(self) -> int: + return len(self.get_reader().history) + + def read_history_file(self, filename: str = gethistoryfile()) -> None: + # multiline extension (really a hack) for the end of lines that + # are actually continuations inside a single multiline_input() + # history item: we use \r\n instead of just \n. If the history + # file is passed to GNU readline, the extra \r are just ignored. + history = self.get_reader().history + + with open(os.path.expanduser(filename), 'rb') as f: + is_editline = f.readline().startswith(b"_HiStOrY_V2_") + if is_editline: + encoding = "unicode-escape" + else: + f.seek(0) + encoding = "utf-8" + + lines = [line.decode(encoding, errors='replace') for line in f.read().split(b'\n')] + buffer = [] + for line in lines: + if line.endswith("\r"): + buffer.append(line+'\n') + else: + line = self._histline(line) + if buffer: + line = self._histline("".join(buffer).replace("\r", "") + line) + del buffer[:] + if line: + history.append(line) + + def write_history_file(self, filename: str = gethistoryfile()) -> None: + maxlength = self.saved_history_length + history = self.get_reader().get_trimmed_history(maxlength) + f = open(os.path.expanduser(filename), "w", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + + def clear_history(self) -> None: + del self.get_reader().history[:] + + def get_history_item(self, index: int) -> str | None: + history = self.get_reader().history + if 1 <= index <= len(history): + return history[index - 1] + else: + return None # like readline.c + + def remove_history_item(self, index: int) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + del history[index] + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def replace_history_item(self, index: int, line: str) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + history[index] = self._histline(line) + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def add_history(self, line: str) -> None: + self.get_reader().history.append(self._histline(line)) + + def set_startup_hook(self, function: Callback | None = None) -> None: + self.startup_hook = function + + def get_line_buffer(self) -> str: + return self.get_reader().get_unicode() + + def _get_idxs(self) -> tuple[int, int]: + start = cursor = self.get_reader().pos + buf = self.get_line_buffer() + for i in range(cursor - 1, -1, -1): + if buf[i] in self.get_completer_delims(): + break + start = i + return start, cursor + + def get_begidx(self) -> int: + return self._get_idxs()[0] + + def get_endidx(self) -> int: + return self._get_idxs()[1] + + def insert_text(self, text: str) -> None: + self.get_reader().insert(text) + + +_wrapper = _ReadlineWrapper() + +# ____________________________________________________________ +# Public API + +parse_and_bind = _wrapper.parse_and_bind +set_completer = _wrapper.set_completer +get_completer = _wrapper.get_completer +set_completer_delims = _wrapper.set_completer_delims +get_completer_delims = _wrapper.get_completer_delims +get_history_length = _wrapper.get_history_length +set_history_length = _wrapper.set_history_length +get_current_history_length = _wrapper.get_current_history_length +read_history_file = _wrapper.read_history_file +write_history_file = _wrapper.write_history_file +clear_history = _wrapper.clear_history +get_history_item = _wrapper.get_history_item +remove_history_item = _wrapper.remove_history_item +replace_history_item = _wrapper.replace_history_item +add_history = _wrapper.add_history +set_startup_hook = _wrapper.set_startup_hook +get_line_buffer = _wrapper.get_line_buffer +get_begidx = _wrapper.get_begidx +get_endidx = _wrapper.get_endidx +insert_text = _wrapper.insert_text + +# Extension +multiline_input = _wrapper.multiline_input + +# Internal hook +_get_reader = _wrapper.get_reader + +# ____________________________________________________________ +# Stubs + + +def _make_stub(_name: str, _ret: object) -> None: + def stub(*args: object, **kwds: object) -> None: + import warnings + + warnings.warn("readline.%s() not implemented" % _name, stacklevel=2) + + stub.__name__ = _name + globals()[_name] = stub + + +for _name, _ret in [ + ("read_init_file", None), + ("redisplay", None), + ("set_pre_input_hook", None), +]: + assert _name not in globals(), _name + _make_stub(_name, _ret) + +# ____________________________________________________________ + + +def _setup(namespace: Mapping[str, Any]) -> None: + global raw_input + if raw_input is not None: + return # don't run _setup twice + + try: + f_in = sys.stdin.fileno() + f_out = sys.stdout.fileno() + except (AttributeError, ValueError): + return + if not os.isatty(f_in) or not os.isatty(f_out): + return + + _wrapper.f_in = f_in + _wrapper.f_out = f_out + + # set up namespace in rlcompleter, which requires it to be a bona fide dict + if not isinstance(namespace, dict): + namespace = dict(namespace) + _wrapper.config.readline_completer = RLCompleter(namespace).complete + + # this is not really what readline.c does. Better than nothing I guess + import builtins + raw_input = builtins.input + builtins.input = _wrapper.input + + +raw_input: Callable[[object], str] | None = None diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py new file mode 100644 index 0000000000..66e66eae7e --- /dev/null +++ b/Lib/_pyrepl/simple_interact.py @@ -0,0 +1,167 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""This is an alternative to python_reader which tries to emulate +the CPython prompt as closely as possible, with the exception of +allowing multiline input and multiline history entries. +""" + +from __future__ import annotations + +import _sitebuiltins +import linecache +import functools +import os +import sys +import code + +from .readline import _get_reader, multiline_input + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any + + +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import _error +except ModuleNotFoundError: + from .windows_console import _error + +def check() -> str: + """Returns the error message if there is a problem initializing the state.""" + try: + _get_reader() + except _error as e: + if term := os.environ.get("TERM", ""): + term = f"; TERM={term}" + return str(str(e) or repr(e) or "unknown error") + term + return "" + + +def _strip_final_indent(text: str) -> str: + # kill spaces and tabs at the end, but only if they follow '\n'. + # meant to remove the auto-indentation only (although it would of + # course also remove explicitly-added indentation). + short = text.rstrip(" \t") + n = len(short) + if n > 0 and text[n - 1] == "\n": + return short + return text + + +def _clear_screen(): + reader = _get_reader() + reader.scheduled_commands.append("clear_screen") + + +REPL_COMMANDS = { + "exit": _sitebuiltins.Quitter('exit', ''), + "quit": _sitebuiltins.Quitter('quit' ,''), + "copyright": _sitebuiltins._Printer('copyright', sys.copyright), + "help": _sitebuiltins._Helper(), + "clear": _clear_screen, + "\x1a": _sitebuiltins.Quitter('\x1a', ''), +} + + +def _more_lines(console: code.InteractiveConsole, unicodetext: str) -> bool: + # ooh, look at the hack: + src = _strip_final_indent(unicodetext) + try: + code = console.compile(src, "", "single") + except (OverflowError, SyntaxError, ValueError): + lines = src.splitlines(keepends=True) + if len(lines) == 1: + return False + + last_line = lines[-1] + was_indented = last_line.startswith((" ", "\t")) + not_empty = last_line.strip() != "" + incomplete = not last_line.endswith("\n") + return (was_indented or not_empty) and incomplete + else: + return code is None + + +def run_multiline_interactive_console( + console: code.InteractiveConsole, + *, + future_flags: int = 0, +) -> None: + from .readline import _setup + _setup(console.locals) + if future_flags: + console.compile.compiler.flags |= future_flags + + more_lines = functools.partial(_more_lines, console) + input_n = 0 + + def maybe_run_command(statement: str) -> bool: + statement = statement.strip() + if statement in console.locals or statement not in REPL_COMMANDS: + return False + + reader = _get_reader() + reader.history.pop() # skip internal commands in history + command = REPL_COMMANDS[statement] + if callable(command): + # Make sure that history does not change because of commands + with reader.suspend_history(): + command() + return True + return False + + while 1: + try: + try: + sys.stdout.flush() + except Exception: + pass + + ps1 = getattr(sys, "ps1", ">>> ") + ps2 = getattr(sys, "ps2", "... ") + try: + statement = multiline_input(more_lines, ps1, ps2) + except EOFError: + break + + if maybe_run_command(statement): + continue + + input_name = f"" + linecache._register_code(input_name, statement, "") # type: ignore[attr-defined] + more = console.push(_strip_final_indent(statement), filename=input_name, _symbol="single") # type: ignore[call-arg] + assert not more + input_n += 1 + except KeyboardInterrupt: + r = _get_reader() + if r.input_trans is r.isearch_trans: + r.do_cmd(("isearch-end", [""])) + r.pos = len(r.get_unicode()) + r.dirty = True + r.refresh() + r.in_bracketed_paste = False + console.write("\nKeyboardInterrupt\n") + console.resetbuffer() + except MemoryError: + console.write("\nMemoryError\n") + console.resetbuffer() diff --git a/Lib/_pyrepl/trace.py b/Lib/_pyrepl/trace.py new file mode 100644 index 0000000000..a8eb2433cd --- /dev/null +++ b/Lib/_pyrepl/trace.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import os + +# types +if False: + from typing import IO + + +trace_file: IO[str] | None = None +if trace_filename := os.environ.get("PYREPL_TRACE"): + trace_file = open(trace_filename, "a") + + +def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/Lib/_pyrepl/types.py b/Lib/_pyrepl/types.py new file mode 100644 index 0000000000..f9d48b828c --- /dev/null +++ b/Lib/_pyrepl/types.py @@ -0,0 +1,8 @@ +from collections.abc import Callable, Iterator + +Callback = Callable[[], object] +SimpleContextManager = Iterator[None] +KeySpec = str # like r"\C-c" +CommandName = str # like "interrupt" +EventTuple = tuple[CommandName, str] +Completer = Callable[[str, int], str | None] diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py new file mode 100644 index 0000000000..e69c96b115 --- /dev/null +++ b/Lib/_pyrepl/unix_console.py @@ -0,0 +1,810 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import errno +import os +import re +import select +import signal +import struct +import termios +import time +import platform +from fcntl import ioctl + +from . import curses +from .console import Console, Event +from .fancy_termios import tcgetattr, tcsetattr +from .trace import trace +from .unix_eventqueue import EventQueue +from .utils import wlen + + +TYPE_CHECKING = False + +# types +if TYPE_CHECKING: + from typing import IO, Literal, overload +else: + overload = lambda func: None + + +class InvalidTerminal(RuntimeError): + pass + + +_error = (termios.error, curses.error, InvalidTerminal) + +SIGWINCH_EVENT = "repaint" + +FIONREAD = getattr(termios, "FIONREAD", None) +TIOCGWINSZ = getattr(termios, "TIOCGWINSZ", None) + +# ------------ start of baudrate definitions ------------ + +# Add (possibly) missing baudrates (check termios man page) to termios + + +def add_baudrate_if_supported(dictionary: dict[int, int], rate: int) -> None: + baudrate_name = "B%d" % rate + if hasattr(termios, baudrate_name): + dictionary[getattr(termios, baudrate_name)] = rate + + +# Check the termios man page (Line speed) to know where these +# values come from. +potential_baudrates = [ + 0, + 110, + 115200, + 1200, + 134, + 150, + 1800, + 19200, + 200, + 230400, + 2400, + 300, + 38400, + 460800, + 4800, + 50, + 57600, + 600, + 75, + 9600, +] + +ratedict: dict[int, int] = {} +for rate in potential_baudrates: + add_baudrate_if_supported(ratedict, rate) + +# Clean up variables to avoid unintended usage +del rate, add_baudrate_if_supported + +# ------------ end of baudrate definitions ------------ + +delayprog = re.compile(b"\\$<([0-9]+)((?:/|\\*){0,2})>") + +try: + poll: type[select.poll] = select.poll +except AttributeError: + # this is exactly the minumum necessary to support what we + # do with poll objects + class MinimalPoll: + def __init__(self): + pass + + def register(self, fd, flag): + self.fd = fd + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout/1000) + return r + + poll = MinimalPoll # type: ignore[assignment] + + +class UnixConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + """ + Initialize the UnixConsole. + + Parameters: + - f_in (int or file-like object): Input file descriptor or object. + - f_out (int or file-like object): Output file descriptor or object. + - term (str): Terminal name. + - encoding (str): Encoding to use for I/O operations. + """ + super().__init__(f_in, f_out, term, encoding) + + self.pollob = poll() + self.pollob.register(self.input_fd, select.POLLIN) + self.input_buffer = b"" + self.input_buffer_pos = 0 + curses.setupterm(term or None, self.output_fd) + self.term = term + + @overload + def _my_getstr(cap: str, optional: Literal[False] = False) -> bytes: ... + + @overload + def _my_getstr(cap: str, optional: bool) -> bytes | None: ... + + def _my_getstr(cap: str, optional: bool = False) -> bytes | None: + r = curses.tigetstr(cap) + if not optional and r is None: + raise InvalidTerminal( + f"terminal doesn't have the required {cap} capability" + ) + return r + + self._bel = _my_getstr("bel") + self._civis = _my_getstr("civis", optional=True) + self._clear = _my_getstr("clear") + self._cnorm = _my_getstr("cnorm", optional=True) + self._cub = _my_getstr("cub", optional=True) + self._cub1 = _my_getstr("cub1", optional=True) + self._cud = _my_getstr("cud", optional=True) + self._cud1 = _my_getstr("cud1", optional=True) + self._cuf = _my_getstr("cuf", optional=True) + self._cuf1 = _my_getstr("cuf1", optional=True) + self._cup = _my_getstr("cup") + self._cuu = _my_getstr("cuu", optional=True) + self._cuu1 = _my_getstr("cuu1", optional=True) + self._dch1 = _my_getstr("dch1", optional=True) + self._dch = _my_getstr("dch", optional=True) + self._el = _my_getstr("el") + self._hpa = _my_getstr("hpa", optional=True) + self._ich = _my_getstr("ich", optional=True) + self._ich1 = _my_getstr("ich1", optional=True) + self._ind = _my_getstr("ind", optional=True) + self._pad = _my_getstr("pad", optional=True) + self._ri = _my_getstr("ri", optional=True) + self._rmkx = _my_getstr("rmkx", optional=True) + self._smkx = _my_getstr("smkx", optional=True) + + self.__setup_movement() + + self.event_queue = EventQueue(self.input_fd, self.encoding) + self.cursor_visible = 1 + + def more_in_buffer(self) -> bool: + return bool( + self.input_buffer + and self.input_buffer_pos < len(self.input_buffer) + ) + + def __read(self, n: int) -> bytes: + if not self.more_in_buffer(): + self.input_buffer = os.read(self.input_fd, 10000) + + ret = self.input_buffer[self.input_buffer_pos : self.input_buffer_pos + n] + self.input_buffer_pos += len(ret) + if self.input_buffer_pos >= len(self.input_buffer): + self.input_buffer = b"" + self.input_buffer_pos = 0 + return ret + + + def change_encoding(self, encoding: str) -> None: + """ + Change the encoding used for I/O operations. + + Parameters: + - encoding (str): New encoding to use. + """ + self.encoding = encoding + + def refresh(self, screen, c_xy): + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + if not self.__gone_tall: + while len(self.screen) < min(len(screen), self.height): + self.__hide_cursor() + self.__move(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + else: + while len(self.screen) < len(screen): + self.screen.append("") + + if len(screen) > self.height: + self.__gone_tall = 1 + self.__move = self.__move_tall + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + # use hardware scrolling if we have it. + if old_offset > offset and self._ri: + self.__hide_cursor() + self.__write_code(self._cup, 0, 0) + self.posxy = 0, old_offset + for i in range(old_offset - offset): + self.__write_code(self._ri) + oldscr.pop(-1) + oldscr.insert(0, "") + elif old_offset < offset and self._ind: + self.__hide_cursor() + self.__write_code(self._cup, self.height - 1, 0) + self.posxy = 0, old_offset + self.height - 1 + for i in range(offset - old_offset): + self.__write_code(self._ind) + oldscr.pop(0) + oldscr.append("") + + self.__offset = offset + + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self.__hide_cursor() + self.__move(0, y) + self.posxy = 0, y + self.__write_code(self._el) + y += 1 + + self.__show_cursor() + + self.screen = screen.copy() + self.move_cursor(cx, cy) + self.flushoutput() + + def move_cursor(self, x, y): + """ + Move the cursor to the specified position on the screen. + + Parameters: + - x (int): X coordinate. + - y (int): Y coordinate. + """ + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", None)) + else: + self.__move(x, y) + self.posxy = x, y + self.flushoutput() + + def prepare(self): + """ + Prepare the console for input/output operations. + """ + self.__svtermstate = tcgetattr(self.input_fd) + raw = self.__svtermstate.copy() + raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) + raw.oflag &= ~(termios.OPOST) + raw.cflag &= ~(termios.CSIZE | termios.PARENB) + raw.cflag |= termios.CS8 + raw.iflag |= termios.BRKINT + raw.lflag &= ~(termios.ICANON | termios.ECHO | termios.IEXTEN) + raw.lflag |= termios.ISIG + raw.cc[termios.VMIN] = 1 + raw.cc[termios.VTIME] = 0 + tcsetattr(self.input_fd, termios.TCSADRAIN, raw) + + # In macOS terminal we need to deactivate line wrap via ANSI escape code + if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + os.write(self.output_fd, b"\033[?7l") + + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.__buffer = [] + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__move = self.__move_short + self.__offset = 0 + + self.__maybe_write_code(self._smkx) + + try: + self.old_sigwinch = signal.signal(signal.SIGWINCH, self.__sigwinch) + except ValueError: + pass + + self.__enable_bracketed_paste() + + def restore(self): + """ + Restore the console to the default state + """ + self.__disable_bracketed_paste() + self.__maybe_write_code(self._rmkx) + self.flushoutput() + tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate) + + if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + os.write(self.output_fd, b"\033[?7h") + + if hasattr(self, "old_sigwinch"): + signal.signal(signal.SIGWINCH, self.old_sigwinch) + del self.old_sigwinch + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + trace("push char {char!r}", char=char) + self.event_queue.push(char) + + def get_event(self, block: bool = True) -> Event | None: + """ + Get an event from the console event queue. + + Parameters: + - block (bool): Whether to block until an event is available. + + Returns: + - Event: Event object from the event queue. + """ + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + while True: + try: + self.push_char(self.__read(1)) + except OSError as err: + if err.errno == errno.EINTR: + if not self.event_queue.empty(): + return self.event_queue.get() + else: + continue + else: + raise + else: + break + return self.event_queue.get() + + def wait(self, timeout: float | None = None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or self.more_in_buffer() + or bool(self.pollob.poll(timeout)) + ) + + def set_cursor_vis(self, visible): + """ + Set the visibility of the cursor. + + Parameters: + - visible (bool): Visibility flag. + """ + if visible: + self.__show_cursor() + else: + self.__hide_cursor() + + if TIOCGWINSZ: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + try: + size = ioctl(self.input_fd, TIOCGWINSZ, b"\000" * 8) + except OSError: + return 25, 80 + height, width = struct.unpack("hhhh", size)[0:2] + if not height: + return 25, 80 + return height, width + + else: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + return 25, 80 + + def forgetinput(self): + """ + Discard any pending input on the console. + """ + termios.tcflush(self.input_fd, termios.TCIFLUSH) + + def flushoutput(self): + """ + Flush the output buffer. + """ + for text, iscode in self.__buffer: + if iscode: + self.__tputs(text) + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + del self.__buffer[:] + + def finish(self): + """ + Finish console operations and flush the output buffer. + """ + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self.__move(0, min(y, self.height + self.__offset - 1)) + self.__write("\n\r") + self.flushoutput() + + def beep(self): + """ + Emit a beep sound. + """ + self.__maybe_write_code(self._bel) + self.flushoutput() + + if FIONREAD: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + else: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = 10000 + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + def clear(self): + """ + Clear the console screen. + """ + self.__write_code(self._clear) + self.__gone_tall = 1 + self.__move = self.__move_tall + self.posxy = 0, 0 + self.screen = [] + + @property + def input_hook(self): + try: + import posix + except ImportError: + return None + if posix._is_inputhook_installed(): + return posix._inputhook + + def __enable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004h") + + def __disable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004l") + + def __setup_movement(self): + """ + Set up the movement functions based on the terminal capabilities. + """ + if 0 and self._hpa: # hpa don't work in windows telnet :-( + self.__move_x = self.__move_x_hpa + elif self._cub and self._cuf: + self.__move_x = self.__move_x_cub_cuf + elif self._cub1 and self._cuf1: + self.__move_x = self.__move_x_cub1_cuf1 + else: + raise RuntimeError("insufficient terminal (horizontal)") + + if self._cuu and self._cud: + self.__move_y = self.__move_y_cuu_cud + elif self._cuu1 and self._cud1: + self.__move_y = self.__move_y_cuu1_cud1 + else: + raise RuntimeError("insufficient terminal (vertical)") + + if self._dch1: + self.dch1 = self._dch1 + elif self._dch: + self.dch1 = curses.tparm(self._dch, 1) + else: + self.dch1 = None + + if self._ich1: + self.ich1 = self._ich1 + elif self._ich: + self.ich1 = curses.tparm(self._ich, 1) + else: + self.ich1 = None + + self.__move = self.__move_short + + def __write_changed_line(self, y, oldline, newline, px_coord): + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequene + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + # if we need to insert a single character right after the first detected change + if oldline[x_pos:] == newline[x_pos + 1 :] and self.ich1: + if ( + y == self.posxy[1] + and x_coord > self.posxy[0] + and oldline[px_pos:x_pos] == newline[px_pos + 1 : x_pos + 1] + ): + x_pos = px_pos + x_coord = px_coord + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if it's a single character change in the middle of the line + elif ( + x_coord < minlen + and oldline[x_pos + 1 :] == newline[x_pos + 1 :] + and wlen(oldline[x_pos]) == wlen(newline[x_pos]) + ): + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if this is the last character to fit in the line and we edit in the middle of the line + elif ( + self.dch1 + and self.ich1 + and wlen(newline) == self.width + and x_coord < wlen(newline) - 2 + and newline[x_pos + 1 : -1] == oldline[x_pos:-2] + ): + self.__hide_cursor() + self.__move(self.width - 2, y) + self.posxy = self.width - 2, y + self.__write_code(self.dch1) + + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = character_width + 1, y + + else: + self.__hide_cursor() + self.__move(x_coord, y) + if wlen(oldline) > wlen(newline): + self.__write_code(self._el) + self.__write(newline[x_pos:]) + self.posxy = wlen(newline), y + + if "\x1b" in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def __write(self, text): + self.__buffer.append((text, 0)) + + def __write_code(self, fmt, *args): + self.__buffer.append((curses.tparm(fmt, *args), 1)) + + def __maybe_write_code(self, fmt, *args): + if fmt: + self.__write_code(fmt, *args) + + def __move_y_cuu1_cud1(self, y): + assert self._cud1 is not None + assert self._cuu1 is not None + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(dy * self._cud1) + elif dy < 0: + self.__write_code((-dy) * self._cuu1) + + def __move_y_cuu_cud(self, y): + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(self._cud, dy) + elif dy < 0: + self.__write_code(self._cuu, -dy) + + def __move_x_hpa(self, x: int) -> None: + if x != self.posxy[0]: + self.__write_code(self._hpa, x) + + def __move_x_cub1_cuf1(self, x: int) -> None: + assert self._cuf1 is not None + assert self._cub1 is not None + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf1 * dx) + elif dx < 0: + self.__write_code(self._cub1 * (-dx)) + + def __move_x_cub_cuf(self, x: int) -> None: + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf, dx) + elif dx < 0: + self.__write_code(self._cub, -dx) + + def __move_short(self, x, y): + self.__move_x(x) + self.__move_y(y) + + def __move_tall(self, x, y): + assert 0 <= y - self.__offset < self.height, y - self.__offset + self.__write_code(self._cup, y - self.__offset, x) + + def __sigwinch(self, signum, frame): + self.height, self.width = self.getheightwidth() + self.event_queue.insert(Event("resize", None)) + + def __hide_cursor(self): + if self.cursor_visible: + self.__maybe_write_code(self._civis) + self.cursor_visible = 0 + + def __show_cursor(self): + if not self.cursor_visible: + self.__maybe_write_code(self._cnorm) + self.cursor_visible = 1 + + def repaint(self): + if not self.__gone_tall: + self.posxy = 0, self.posxy[1] + self.__write("\r") + ns = len(self.screen) * ["\000" * self.width] + self.screen = ns + else: + self.posxy = 0, self.__offset + self.__move(0, self.__offset) + ns = self.height * ["\000" * self.width] + self.screen = ns + + def __tputs(self, fmt, prog=delayprog): + """A Python implementation of the curses tputs function; the + curses one can't really be wrapped in a sane manner. + + I have the strong suspicion that this is complexity that + will never do anyone any good.""" + # using .get() means that things will blow up + # only if the bps is actually needed (which I'm + # betting is pretty unlkely) + bps = ratedict.get(self.__svtermstate.ospeed) + while 1: + m = prog.search(fmt) + if not m: + os.write(self.output_fd, fmt) + break + x, y = m.span() + os.write(self.output_fd, fmt[:x]) + fmt = fmt[y:] + delay = int(m.group(1)) + if b"*" in m.group(2): + delay *= self.height + if self._pad and bps is not None: + nchars = (bps * delay) / 1000 + os.write(self.output_fd, self._pad * nchars) + else: + time.sleep(float(delay) / 1000.0) diff --git a/Lib/_pyrepl/unix_eventqueue.py b/Lib/_pyrepl/unix_eventqueue.py new file mode 100644 index 0000000000..70cfade26e --- /dev/null +++ b/Lib/_pyrepl/unix_eventqueue.py @@ -0,0 +1,152 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from collections import deque + +from . import keymap +from .console import Event +from . import curses +from .trace import trace +from termios import tcgetattr, VERASE +import os + + +# Mapping of human-readable key names to their terminal-specific codes +TERMINAL_KEYNAMES = { + "delete": "kdch1", + "down": "kcud1", + "end": "kend", + "enter": "kent", + "home": "khome", + "insert": "kich1", + "left": "kcub1", + "page down": "knp", + "page up": "kpp", + "right": "kcuf1", + "up": "kcuu1", +} + + +# Function keys F1-F20 mapping +TERMINAL_KEYNAMES.update(("f%d" % i, "kf%d" % i) for i in range(1, 21)) + +# Known CTRL-arrow keycodes +CTRL_ARROW_KEYCODES= { + # for xterm, gnome-terminal, xfce terminal, etc. + b'\033[1;5D': 'ctrl left', + b'\033[1;5C': 'ctrl right', + # for rxvt + b'\033Od': 'ctrl left', + b'\033Oc': 'ctrl right', +} + +def get_terminal_keycodes() -> dict[bytes, str]: + """ + Generates a dictionary mapping terminal keycodes to human-readable names. + """ + keycodes = {} + for key, terminal_code in TERMINAL_KEYNAMES.items(): + keycode = curses.tigetstr(terminal_code) + trace('key {key} tiname {terminal_code} keycode {keycode!r}', **locals()) + if keycode: + keycodes[keycode] = key + keycodes.update(CTRL_ARROW_KEYCODES) + return keycodes + +class EventQueue: + def __init__(self, fd: int, encoding: str) -> None: + self.keycodes = get_terminal_keycodes() + if os.isatty(fd): + backspace = tcgetattr(fd)[6][VERASE] + self.keycodes[backspace] = "backspace" + self.compiled_keymap = keymap.compile_keymap(self.keycodes) + self.keymap = self.compiled_keymap + trace("keymap {k!r}", k=self.keymap) + self.encoding = encoding + self.events: deque[Event] = deque() + self.buf = bytearray() + + def get(self) -> Event | None: + """ + Retrieves the next event from the queue. + """ + if self.events: + return self.events.popleft() + else: + return None + + def empty(self) -> bool: + """ + Checks if the queue is empty. + """ + return not self.events + + def flush_buf(self) -> bytearray: + """ + Flushes the buffer and returns its contents. + """ + old = self.buf + self.buf = bytearray() + return old + + def insert(self, event: Event) -> None: + """ + Inserts an event into the queue. + """ + trace('added event {event}', event=event) + self.events.append(event) + + def push(self, char: int | bytes) -> None: + """ + Processes a character by updating the buffer and handling special key mappings. + """ + ord_char = char if isinstance(char, int) else ord(char) + char = bytes(bytearray((ord_char,))) + self.buf.append(ord_char) + if char in self.keymap: + if self.keymap is self.compiled_keymap: + #sanity check, buffer is empty when a special key comes + assert len(self.buf) == 1 + k = self.keymap[char] + trace('found map {k!r}', k=k) + if isinstance(k, dict): + self.keymap = k + else: + self.insert(Event('key', k, self.flush_buf())) + self.keymap = self.compiled_keymap + + elif self.buf and self.buf[0] == 27: # escape + # escape sequence not recognized by our keymap: propagate it + # outside so that i can be recognized as an M-... key (see also + # the docstring in keymap.py + trace('unrecognized escape sequence, propagating...') + self.keymap = self.compiled_keymap + self.insert(Event('key', '\033', bytearray(b'\033'))) + for _c in self.flush_buf()[1:]: + self.push(_c) + + else: + try: + decoded = bytes(self.buf).decode(self.encoding) + except UnicodeError: + return + else: + self.insert(Event('key', decoded, self.flush_buf())) + self.keymap = self.compiled_keymap diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py new file mode 100644 index 0000000000..4651717bd7 --- /dev/null +++ b/Lib/_pyrepl/utils.py @@ -0,0 +1,25 @@ +import re +import unicodedata +import functools + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") + + +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + w = unicodedata.east_asian_width(c) + if w in ('N', 'Na', 'H', 'A'): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != '\x1a': + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count('\x1a') + return length - sum(len(i) for i in sequence) + ctrl_z_cnt diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py new file mode 100644 index 0000000000..fffadd5e2e --- /dev/null +++ b/Lib/_pyrepl/windows_console.py @@ -0,0 +1,618 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import io +import os +import sys +import time +import msvcrt + +from collections import deque +import ctypes +from ctypes.wintypes import ( + _COORD, + WORD, + SMALL_RECT, + BOOL, + HANDLE, + CHAR, + DWORD, + WCHAR, + SHORT, +) +from ctypes import Structure, POINTER, Union +from .console import Event, Console +from .trace import trace +from .utils import wlen + +try: + from ctypes import GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] +except: + # Keep MyPy happy off Windows + from ctypes import CDLL as WinDLL, cdll as windll + + def GetLastError() -> int: + return 42 + + class WinError(OSError): # type: ignore[no-redef] + def __init__(self, err: int | None, descr: str | None = None) -> None: + self.err = err + self.descr = descr + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + +# Virtual-Key Codes: https://learn.microsoft.com/en-us/windows/win32/inputdev/virtual-key-codes +VK_MAP: dict[int, str] = { + 0x23: "end", # VK_END + 0x24: "home", # VK_HOME + 0x25: "left", # VK_LEFT + 0x26: "up", # VK_UP + 0x27: "right", # VK_RIGHT + 0x28: "down", # VK_DOWN + 0x2E: "delete", # VK_DELETE + 0x70: "f1", # VK_F1 + 0x71: "f2", # VK_F2 + 0x72: "f3", # VK_F3 + 0x73: "f4", # VK_F4 + 0x74: "f5", # VK_F5 + 0x75: "f6", # VK_F6 + 0x76: "f7", # VK_F7 + 0x77: "f8", # VK_F8 + 0x78: "f9", # VK_F9 + 0x79: "f10", # VK_F10 + 0x7A: "f11", # VK_F11 + 0x7B: "f12", # VK_F12 + 0x7C: "f13", # VK_F13 + 0x7D: "f14", # VK_F14 + 0x7E: "f15", # VK_F15 + 0x7F: "f16", # VK_F16 + 0x80: "f17", # VK_F17 + 0x81: "f18", # VK_F18 + 0x82: "f19", # VK_F19 + 0x83: "f20", # VK_F20 +} + +# Console escape codes: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences +ERASE_IN_LINE = "\x1b[K" +MOVE_LEFT = "\x1b[{}D" +MOVE_RIGHT = "\x1b[{}C" +MOVE_UP = "\x1b[{}A" +MOVE_DOWN = "\x1b[{}B" +CLEAR = "\x1b[H\x1b[J" + + +class _error(Exception): + pass + + +class WindowsConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + super().__init__(f_in, f_out, term, encoding) + + SetConsoleMode( + OutHandle, + ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_PROCESSED_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + self.screen: list[str] = [] + self.width = 80 + self.height = 25 + self.__offset = 0 + self.event_queue: deque[Event] = deque() + try: + self.out = io._WindowsConsoleIO(self.output_fd, "w") # type: ignore[attr-defined] + except ValueError: + # Console I/O is redirected, fallback... + self.out = None + + def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + + while len(self.screen) < min(len(screen), self.height): + self._hide_cursor() + self._move_relative(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + scroll_lines = offset - old_offset + + # Scrolling the buffer as the current input is greater than the visible + # portion of the window. We need to scroll the visible portion and the + # entire history + self._scroll(scroll_lines, self._getscrollbacksize()) + self.posxy = self.posxy[0], self.posxy[1] + scroll_lines + self.__offset += scroll_lines + + for i in range(scroll_lines): + self.screen.append("") + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + self.__offset = offset + + self._hide_cursor() + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self._move_relative(0, y) + self.posxy = 0, y + self._erase_to_end() + y += 1 + + self._show_cursor() + + self.screen = screen + self.move_cursor(cx, cy) + + @property + def input_hook(self): + try: + import nt + except ImportError: + return None + if nt._is_inputhook_installed(): + return nt._inputhook + + def __write_changed_line( + self, y: int, oldline: str, newline: str, px_coord: int + ) -> None: + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequene + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + self._hide_cursor() + self._move_relative(x_coord, y) + if wlen(oldline) > wlen(newline): + self._erase_to_end() + + self.__write(newline[x_pos:]) + if wlen(newline) == self.width: + # If we wrapped we want to start at the next line + self._move_relative(0, y + 1) + self.posxy = 0, y + 1 + else: + self.posxy = wlen(newline), y + + if "\x1b" in newline or y != self.posxy[1] or '\x1a' in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def _scroll( + self, top: int, bottom: int, left: int | None = None, right: int | None = None + ) -> None: + scroll_rect = SMALL_RECT() + scroll_rect.Top = SHORT(top) + scroll_rect.Bottom = SHORT(bottom) + scroll_rect.Left = SHORT(0 if left is None else left) + scroll_rect.Right = SHORT( + self.getheightwidth()[1] - 1 if right is None else right + ) + destination_origin = _COORD() + fill_info = CHAR_INFO() + fill_info.UnicodeChar = " " + + if not ScrollConsoleScreenBuffer( + OutHandle, scroll_rect, None, destination_origin, fill_info + ): + raise WinError(GetLastError()) + + def _hide_cursor(self): + self.__write("\x1b[?25l") + + def _show_cursor(self): + self.__write("\x1b[?25h") + + def _enable_blinking(self): + self.__write("\x1b[?12h") + + def _disable_blinking(self): + self.__write("\x1b[?12l") + + def __write(self, text: str) -> None: + if "\x1a" in text: + text = ''.join(["^Z" if x == '\x1a' else x for x in text]) + + if self.out is not None: + self.out.write(text.encode(self.encoding, "replace")) + self.out.flush() + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + + @property + def screen_xy(self) -> tuple[int, int]: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return info.dwCursorPosition.X, info.dwCursorPosition.Y + + def _erase_to_end(self) -> None: + self.__write(ERASE_IN_LINE) + + def prepare(self) -> None: + trace("prepare") + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__offset = 0 + + def restore(self) -> None: + pass + + def _move_relative(self, x: int, y: int) -> None: + """Moves relative to the current posxy""" + dx = x - self.posxy[0] + dy = y - self.posxy[1] + if dx < 0: + self.__write(MOVE_LEFT.format(-dx)) + elif dx > 0: + self.__write(MOVE_RIGHT.format(dx)) + + if dy < 0: + self.__write(MOVE_UP.format(-dy)) + elif dy > 0: + self.__write(MOVE_DOWN.format(dy)) + + def move_cursor(self, x: int, y: int) -> None: + if x < 0 or y < 0: + raise ValueError(f"Bad cursor position {x}, {y}") + + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(0, Event("scroll", "")) + else: + self._move_relative(x, y) + self.posxy = x, y + + def set_cursor_vis(self, visible: bool) -> None: + if visible: + self._show_cursor() + else: + self._hide_cursor() + + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return ( + info.srWindow.Bottom - info.srWindow.Top + 1, + info.srWindow.Right - info.srWindow.Left + 1, + ) + + def _getscrollbacksize(self) -> int: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + + return info.srWindow.Bottom # type: ignore[no-any-return] + + def _read_input(self, block: bool = True) -> INPUT_RECORD | None: + if not block: + events = DWORD() + if not GetNumberOfConsoleInputEvents(InHandle, events): + raise WinError(GetLastError()) + if not events.value: + return None + + rec = INPUT_RECORD() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, 1, read): + raise WinError(GetLastError()) + + return rec + + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + if self.event_queue: + return self.event_queue.pop() + + while True: + rec = self._read_input(block) + if rec is None: + return None + + if rec.EventType == WINDOW_BUFFER_SIZE_EVENT: + return Event("resize", "") + + if rec.EventType != KEY_EVENT or not rec.Event.KeyEvent.bKeyDown: + # Only process keys and keydown events + if block: + continue + return None + + key = rec.Event.KeyEvent.uChar.UnicodeChar + + if rec.Event.KeyEvent.uChar.UnicodeChar == "\r": + # Make enter make unix-like + return Event(evt="key", data="\n", raw=b"\n") + elif rec.Event.KeyEvent.wVirtualKeyCode == 8: + # Turn backspace directly into the command + return Event( + evt="key", + data="backspace", + raw=rec.Event.KeyEvent.uChar.UnicodeChar, + ) + elif rec.Event.KeyEvent.uChar.UnicodeChar == "\x00": + # Handle special keys like arrow keys and translate them into the appropriate command + code = VK_MAP.get(rec.Event.KeyEvent.wVirtualKeyCode) + if code: + return Event( + evt="key", data=code, raw=rec.Event.KeyEvent.uChar.UnicodeChar + ) + if block: + continue + + return None + + return Event(evt="key", data=key, raw=rec.Event.KeyEvent.uChar.UnicodeChar) + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + raise NotImplementedError("push_char not supported on Windows") + + def beep(self) -> None: + self.__write("\x07") + + def clear(self) -> None: + """Wipe the screen""" + self.__write(CLEAR) + self.posxy = 0, 0 + self.screen = [""] + + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self._move_relative(0, min(y, self.height + self.__offset - 1)) + self.__write("\r\n") + + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere). + + All output on Windows is unbuffered so this is a nop""" + pass + + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + if not FlushConsoleInputBuffer(InHandle): + raise WinError(GetLastError()) + + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + return Event("key", "", b"") + + def wait(self, timeout: float | None) -> bool: + """Wait for an event.""" + # Poor man's Windows select loop + start_time = time.time() + while True: + if msvcrt.kbhit(): # type: ignore[attr-defined] + return True + if timeout and time.time() - start_time > timeout / 1000: + return False + time.sleep(0.01) + + def repaint(self) -> None: + raise NotImplementedError("No repaint support") + + +# Windows interop +class CONSOLE_SCREEN_BUFFER_INFO(Structure): + _fields_ = [ + ("dwSize", _COORD), + ("dwCursorPosition", _COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", _COORD), + ] + + +class CONSOLE_CURSOR_INFO(Structure): + _fields_ = [ + ("dwSize", DWORD), + ("bVisible", BOOL), + ] + + +class CHAR_INFO(Structure): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Attributes", WORD), + ] + + +class Char(Union): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Char", CHAR), + ] + + +class KeyEvent(ctypes.Structure): + _fields_ = [ + ("bKeyDown", BOOL), + ("wRepeatCount", WORD), + ("wVirtualKeyCode", WORD), + ("wVirtualScanCode", WORD), + ("uChar", Char), + ("dwControlKeyState", DWORD), + ] + + +class WindowsBufferSizeEvent(ctypes.Structure): + _fields_ = [("dwSize", _COORD)] + + +class ConsoleEvent(ctypes.Union): + _fields_ = [ + ("KeyEvent", KeyEvent), + ("WindowsBufferSizeEvent", WindowsBufferSizeEvent), + ] + + +class INPUT_RECORD(Structure): + _fields_ = [("EventType", WORD), ("Event", ConsoleEvent)] + + +KEY_EVENT = 0x01 +FOCUS_EVENT = 0x10 +MENU_EVENT = 0x08 +MOUSE_EVENT = 0x02 +WINDOW_BUFFER_SIZE_EVENT = 0x04 + +ENABLE_PROCESSED_OUTPUT = 0x01 +ENABLE_WRAP_AT_EOL_OUTPUT = 0x02 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x04 + +STD_INPUT_HANDLE = -10 +STD_OUTPUT_HANDLE = -11 + +if sys.platform == "win32": + _KERNEL32 = WinDLL("kernel32", use_last_error=True) + + GetStdHandle = windll.kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + + GetConsoleScreenBufferInfo = _KERNEL32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, + ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + GetConsoleScreenBufferInfo.restype = BOOL + + ScrollConsoleScreenBuffer = _KERNEL32.ScrollConsoleScreenBufferW + ScrollConsoleScreenBuffer.argtypes = [ + HANDLE, + POINTER(SMALL_RECT), + POINTER(SMALL_RECT), + _COORD, + POINTER(CHAR_INFO), + ] + ScrollConsoleScreenBuffer.restype = BOOL + + SetConsoleMode = _KERNEL32.SetConsoleMode + SetConsoleMode.argtypes = [HANDLE, DWORD] + SetConsoleMode.restype = BOOL + + ReadConsoleInput = _KERNEL32.ReadConsoleInputW + ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] + ReadConsoleInput.restype = BOOL + + GetNumberOfConsoleInputEvents = _KERNEL32.GetNumberOfConsoleInputEvents + GetNumberOfConsoleInputEvents.argtypes = [HANDLE, POINTER(DWORD)] + GetNumberOfConsoleInputEvents.restype = BOOL + + FlushConsoleInputBuffer = _KERNEL32.FlushConsoleInputBuffer + FlushConsoleInputBuffer.argtypes = [HANDLE] + FlushConsoleInputBuffer.restype = BOOL + + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) + InHandle = GetStdHandle(STD_INPUT_HANDLE) +else: + + def _win_only(*args, **kwargs): + raise NotImplementedError("Windows only") + + GetStdHandle = _win_only + GetConsoleScreenBufferInfo = _win_only + ScrollConsoleScreenBuffer = _win_only + SetConsoleMode = _win_only + ReadConsoleInput = _win_only + GetNumberOfConsoleInputEvents = _win_only + FlushConsoleInputBuffer = _win_only + OutHandle = 0 + InHandle = 0 diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index e520433998..0b9e5d3bbf 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -4,133 +4,6 @@ class. Depending on the version of Python you're using, there may be a faster one available. You should always import the `local` class from `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = sorted(mydata.__dict__.items()) - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... initialized = False - ... def __init__(self, **kw): - ... if self.initialized: - ... raise SystemError('__init__ called too many times') - ... self.initialized = True - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red'), ('initialized', True)], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> # TODO: RUSTPYTHON, __slots__ - >>> mydata.number #doctest: +SKIP - 11 - ->>> del mydata """ from weakref import ref @@ -194,7 +67,6 @@ def thread_deleted(_, idt=idt): @contextmanager def _patch(self): - old = object.__getattribute__(self, '__dict__') impl = object.__getattribute__(self, '_local__impl') try: dct = impl.get_dict() @@ -205,13 +77,12 @@ def _patch(self): with impl.locallock: object.__setattr__(self, '__dict__', dct) yield - object.__setattr__(self, '__dict__', old) class local: __slots__ = '_local__impl', '__dict__' - def __new__(cls, *args, **kw): + def __new__(cls, /, *args, **kw): if (args or kw) and (cls.__init__ is object.__init__): raise TypeError("Initialization arguments are not supported") self = object.__new__(cls) diff --git a/Lib/abc.py b/Lib/abc.py index 1ecff5e214..f8a4e11ce9 100644 --- a/Lib/abc.py +++ b/Lib/abc.py @@ -85,10 +85,6 @@ def my_abstract_property(self): from _abc import (get_cache_token, _abc_init, _abc_register, _abc_instancecheck, _abc_subclasscheck, _get_dump, _reset_registry, _reset_caches) -# TODO: RUSTPYTHON missing _abc module implementation. -except ModuleNotFoundError: - from _py_abc import ABCMeta, get_cache_token - ABCMeta.__module__ = 'abc' except ImportError: from _py_abc import ABCMeta, get_cache_token ABCMeta.__module__ = 'abc' diff --git a/Lib/aifc.py b/Lib/aifc.py deleted file mode 100644 index 5254987e22..0000000000 --- a/Lib/aifc.py +++ /dev/null @@ -1,984 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _sowt2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._sowt2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _lin2sowt(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', - b'ALAW', b'alaw', b'G722', - b'sowt', b'SOWT'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW, sowt/SOWT ' - 'or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._lin2sowt - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/Lib/argparse.py b/Lib/argparse.py index 543d9944f9..bd088ea0e6 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -89,8 +89,6 @@ import re as _re import sys as _sys -import warnings - from gettext import gettext as _, ngettext SUPPRESS = '==SUPPRESS==' @@ -192,6 +190,7 @@ def __init__(self, # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -225,7 +224,8 @@ def format_help(self): # add the heading if the section was non-empty if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) + heading_text = _('%(heading)s:') % dict(heading=self.heading) + heading = '%*s%s\n' % (current_indent, '', heading_text) else: heading = '' @@ -238,6 +238,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -262,13 +263,12 @@ def add_argument(self, action): # find all invocations get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] + invocation_lengths = [len(get_invocation(action)) + self._current_indent] for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) + invocation_lengths.append(len(get_invocation(subaction)) + self._current_indent) # update the maximum item length - invocation_length = max(map(len, invocations)) - action_length = invocation_length + self._current_indent + action_length = max(invocation_lengths) self._action_max_length = max(self._action_max_length, action_length) @@ -282,6 +282,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -329,17 +330,8 @@ def _format_usage(self, usage, actions, groups, prefix): if len(prefix) + len(usage) > text_width: # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + opt_parts = self._get_actions_usage_parts(optionals, groups) + pos_parts = self._get_actions_usage_parts(positionals, groups) # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -392,6 +384,9 @@ def get_lines(parts, indent, prefix=None): return '%s%s\n\n' % (prefix, usage) def _format_actions_usage(self, actions, groups): + return ' '.join(self._get_actions_usage_parts(actions, groups)) + + def _get_actions_usage_parts(self, actions, groups): # find group indices and identify actions in groups group_actions = set() inserts = {} @@ -399,56 +394,26 @@ def _format_actions_usage(self, actions, groups): if not group._group_actions: raise ValueError(f'empty group {group}') + if all(action.help is SUPPRESS for action in group._group_actions): + continue + try: start = actions.index(group._group_actions[0]) except ValueError: continue else: - group_action_count = len(group._group_actions) - end = start + group_action_count + end = start + len(group._group_actions) if actions[start:end] == group._group_actions: - - suppressed_actions_count = 0 - for action in group._group_actions: - group_actions.add(action) - if action.help is SUPPRESS: - suppressed_actions_count += 1 - - exposed_actions_count = group_action_count - suppressed_actions_count - - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - elif exposed_actions_count > 1: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' + group_actions.update(group._group_actions) + inserts[start, end] = group # collect all actions format strings parts = [] - for i, action in enumerate(actions): + for action in actions: # suppressed arguments are marked with None - # remove | separators for suppressed arguments if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) + part = None # produce all arg strings elif not action.option_strings: @@ -460,9 +425,6 @@ def _format_actions_usage(self, actions, groups): if part[0] == '[' and part[-1] == ']': part = part[1:-1] - # add the action string to the list - parts.append(part) - # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] @@ -483,26 +445,32 @@ def _format_actions_usage(self, actions, groups): if not action.required and action not in group_actions: part = '[%s]' % part - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) + # add the action string to the list + parts.append(part) - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = text.strip() - - # return the text - return text + # group mutually exclusive actions + inserted_separators_indices = set() + for start, end in sorted(inserts, reverse=True): + group = inserts[start, end] + group_parts = [item for item in parts[start:end] if item is not None] + group_size = len(group_parts) + if group.required: + open, close = "()" if group_size > 1 else ("", "") + else: + open, close = "[]" + group_parts[0] = open + group_parts[0] + group_parts[-1] = group_parts[-1] + close + for i, part in enumerate(group_parts[:-1], start=start): + # insert a separator if not already done in a nested group + if i not in inserted_separators_indices: + parts[i] = part + ' |' + inserted_separators_indices.add(i) + parts[start + group_size - 1] = group_parts[-1] + for i in range(start + group_size, end): + parts[i] = None + + # return the usage parts + return [item for item in parts if item is not None] def _format_text(self, text): if '%(prog)' in text: @@ -562,33 +530,27 @@ def _format_action(self, action): def _format_action_invocation(self, action): if not action.option_strings: default = self._get_default_metavar_for_positional(action) - metavar, = self._metavar_formatter(action, default)(1) - return metavar + return ' '.join(self._metavar_formatter(action, default)(1)) else: - parts = [] # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: - parts.extend(action.option_strings) + return ', '.join(action.option_strings) # if the Optional takes a value, format is: - # -s ARGS, --long ARGS + # -s, --long ARGS else: default = self._get_default_metavar_for_optional(action) args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) + return ', '.join(action.option_strings) + ' ' + args_string def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = '{%s}' % ','.join(map(str, action.choices)) else: result = default_metavar @@ -636,8 +598,7 @@ def _expand_help(self, action): if hasattr(params[name], '__name__'): params[name] = params[name].__name__ if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str + params['choices'] = ', '.join(map(str, params['choices'])) return self._get_help_string(action) % params def _iter_indented_subactions(self, action): @@ -704,14 +665,6 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter): """ def _get_help_string(self, action): - """ - Add the default value to the option help message. - - ArgumentDefaultsHelpFormatter and BooleanOptionalAction when it isn't - already present. This code will do that, detecting cornercases to - prevent duplicates or cases where it wouldn't make sense to the end - user. - """ help = action.help if help is None: help = '' @@ -720,7 +673,7 @@ def _get_help_string(self, action): if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' + help += _(' (default: %(default)s)') return help @@ -750,11 +703,19 @@ def _get_action_name(argument): elif argument.option_strings: return '/'.join(argument.option_strings) elif argument.metavar not in (None, SUPPRESS): - return argument.metavar + metavar = argument.metavar + if not isinstance(metavar, tuple): + return metavar + if argument.nargs == ZERO_OR_MORE and len(metavar) == 2: + return '%s[, %s]' % metavar + elif argument.nargs == ONE_OR_MORE: + return '%s[, %s]' % metavar + else: + return ', '.join(metavar) elif argument.dest not in (None, SUPPRESS): return argument.dest elif argument.choices: - return '{' + ','.join(argument.choices) + '}' + return '{%s}' % ','.join(map(str, argument.choices)) else: return None @@ -849,7 +810,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): self.option_strings = option_strings self.dest = dest self.nargs = nargs @@ -860,6 +822,7 @@ def __init__(self, self.required = required self.help = help self.metavar = metavar + self.deprecated = deprecated def _get_kwargs(self): names = [ @@ -873,6 +836,7 @@ def _get_kwargs(self): 'required', 'help', 'metavar', + 'deprecated', ] return [(name, getattr(self, name)) for name in names] @@ -895,7 +859,8 @@ def __init__(self, choices=_deprecated_default, required=False, help=None, - metavar=_deprecated_default): + metavar=_deprecated_default, + deprecated=False): _option_strings = [] for option_string in option_strings: @@ -910,6 +875,7 @@ def __init__(self, # parser.add_argument('-f', action=BooleanOptionalAction, type=int) for field_name in ('type', 'choices', 'metavar'): if locals()[field_name] is not _deprecated_default: + import warnings warnings._deprecated( field_name, "{name!r} is deprecated as of Python 3.12 and will be " @@ -932,7 +898,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): @@ -955,7 +922,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for store actions must be != 0; if you ' 'have nothing to store, actions such as store ' @@ -972,7 +940,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) @@ -987,7 +956,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_StoreConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -995,7 +965,8 @@ def __init__(self, const=const, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) @@ -1008,14 +979,16 @@ def __init__(self, dest, default=False, required=False, - help=None): + help=None, + deprecated=False): super(_StoreTrueAction, self).__init__( option_strings=option_strings, dest=dest, const=True, - default=default, + deprecated=deprecated, required=required, - help=help) + help=help, + default=default) class _StoreFalseAction(_StoreConstAction): @@ -1025,14 +998,16 @@ def __init__(self, dest, default=True, required=False, - help=None): + help=None, + deprecated=False): super(_StoreFalseAction, self).__init__( option_strings=option_strings, dest=dest, const=False, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) class _AppendAction(Action): @@ -1047,7 +1022,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for append actions must be != 0; if arg ' 'strings are not supplying the value to append, ' @@ -1064,7 +1040,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1082,7 +1059,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_AppendConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1091,7 +1069,8 @@ def __init__(self, default=default, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1107,14 +1086,16 @@ def __init__(self, dest, default=None, required=False, - help=None): + help=None, + deprecated=False): super(_CountAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): count = getattr(namespace, self.dest, None) @@ -1129,13 +1110,15 @@ def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, - help=None): + help=None, + deprecated=False): super(_HelpAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() @@ -1149,7 +1132,10 @@ def __init__(self, version=None, dest=SUPPRESS, default=SUPPRESS, - help="show program's version number and exit"): + help=None, + deprecated=False): + if help is None: + help = _("show program's version number and exit") super(_VersionAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1193,6 +1179,7 @@ def __init__(self, self._parser_class = parser_class self._name_parser_map = {} self._choices_actions = [] + self._deprecated = set() super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1203,7 +1190,7 @@ def __init__(self, help=help, metavar=metavar) - def add_parser(self, name, **kwargs): + def add_parser(self, name, *, deprecated=False, **kwargs): # set prog from the existing prefix if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) @@ -1231,6 +1218,10 @@ def add_parser(self, name, **kwargs): for alias in aliases: self._name_parser_map[alias] = parser + if deprecated: + self._deprecated.add(name) + self._deprecated.update(aliases) + return parser def _get_subactions(self): @@ -1246,13 +1237,17 @@ def __call__(self, parser, namespace, values, option_string=None): # select the parser try: - parser = self._name_parser_map[parser_name] + subparser = self._name_parser_map[parser_name] except KeyError: args = {'parser_name': parser_name, 'choices': ', '.join(self._name_parser_map)} msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args raise ArgumentError(self, msg) + if parser_name in self._deprecated: + parser._warning(_("command '%(parser_name)s' is deprecated") % + {'parser_name': parser_name}) + # parse all the remaining options into the namespace # store any unrecognized options on the object, so that the top # level parser can decide what to do with them @@ -1260,12 +1255,13 @@ def __call__(self, parser, namespace, values, option_string=None): # In case this subparser defines new defaults, we parse them # in a new namespace object and then update the original # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) + subnamespace, arg_strings = subparser.parse_known_args(arg_strings, None) for key, value in vars(subnamespace).items(): setattr(namespace, key, value) if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + if not hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + setattr(namespace, _UNRECOGNIZED_ARGS_ATTR, []) getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) class _ExtendAction(_AppendAction): @@ -1409,6 +1405,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1419,6 +1416,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1438,6 +1436,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1528,6 +1527,8 @@ def _add_container_actions(self, container): title_group_map = {} for group in self._action_groups: if group.title in title_group_map: + # This branch could happen if a derived class added + # groups with duplicated titles in __init__ msg = _('cannot merge actions - two groups are named %r') raise ValueError(msg % (group.title)) title_group_map[group.title] = group @@ -1552,7 +1553,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group @@ -1571,9 +1576,8 @@ def _get_positional_kwargs(self, dest, **kwargs): # mark positional arguments as required if at least one is # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + nargs = kwargs.get('nargs') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS, 0]: kwargs['required'] = True # return the keyword arguments with no option strings @@ -1698,6 +1702,7 @@ def _remove_action(self, action): self._group_actions.remove(action) def add_argument_group(self, *args, **kwargs): + import warnings warnings.warn( "Nesting argument groups is deprecated.", category=DeprecationWarning, @@ -1726,6 +1731,7 @@ def _remove_action(self, action): self._group_actions.remove(action) def add_mutually_exclusive_group(self, *args, **kwargs): + import warnings warnings.warn( "Nesting mutually exclusive groups is deprecated.", category=DeprecationWarning, @@ -1811,17 +1817,16 @@ def identity(string): # add parent arguments and defaults for parent in parents: + if not isinstance(parent, ArgumentParser): + raise TypeError('parents must be a list of ArgumentParser') self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) + defaults = parent._defaults + self._defaults.update(defaults) # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1836,16 +1841,17 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) + raise ArgumentError(None, _('cannot have multiple subparser arguments')) # add the parser class to the arguments if it's not present kwargs.setdefault('parser_class', type(self)) if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) + title = kwargs.pop('title', _('subcommands')) + description = kwargs.pop('description', None) self._subparsers = self.add_argument_group(title, description) else: self._subparsers = self._positionals @@ -1887,14 +1893,21 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1921,18 +1934,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -1964,11 +1977,11 @@ def _parse_known_args(self, arg_strings, namespace): # otherwise, add the arg to the arg strings # and note the index if it was an option else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: + option_tuples = self._parse_optional(arg_string) + if option_tuples is None: pattern = 'A' else: - option_string_indices[i] = option_tuple + option_string_indices[i] = option_tuples pattern = 'O' arg_string_pattern_parts.append(pattern) @@ -1978,15 +1991,15 @@ def _parse_known_args(self, arg_strings, namespace): # converts arg strings to the appropriate and then takes the action seen_actions = set() seen_non_default_actions = set() + warned = set() def take_action(action, argument_strings, option_string=None): seen_actions.add(action) argument_values = self._get_values(action, argument_strings) # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: + # seen arguments + if action.option_strings or argument_strings: seen_non_default_actions.add(action) for conflict_action in action_conflicts.get(action, []): if conflict_action in seen_non_default_actions: @@ -2003,8 +2016,16 @@ def take_action(action, argument_strings, option_string=None): def consume_optional(start_index): # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple + option_tuples = option_string_indices[start_index] + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, sep, explicit_arg in option_tuples]) + args = {'option': arg_strings[start_index], 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + raise ArgumentError(None, msg % args) + + action, option_string, sep, explicit_arg = option_tuples[0] # identify additional optionals in the same arg string # (e.g. -xyz is the same as -x -y -z if no args are required) @@ -2015,6 +2036,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2031,18 +2053,28 @@ def consume_optional(start_index): and option_string[1] not in chars and explicit_arg != '' ): + if sep or explicit_arg[0] in chars: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) action_tuples.append((action, [], option_string)) char = option_string[0] option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None optionals_map = self._option_string_actions if option_string in optionals_map: action = optionals_map[option_string] - explicit_arg = new_explicit_arg + explicit_arg = explicit_arg[1:] + if not explicit_arg: + sep = explicit_arg = None + elif explicit_arg[0] == '=': + sep = '=' + explicit_arg = explicit_arg[1:] + else: + sep = '' else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - + extras.append(char + explicit_arg) + extras_pattern.append('O') + stop = start_index + 1 + break # if the action expect exactly one argument, we've # successfully matched the option; exit the loop elif arg_count == 1: @@ -2073,6 +2105,10 @@ def consume_optional(start_index): # the Optional's string args stopped assert action_tuples for action, args, option_string in action_tuples: + if action.deprecated and option_string not in warned: + self._warning(_("option '%(option)s' is deprecated") % + {'option': option_string}) + warned.add(option_string) take_action(action, args, option_string) return stop @@ -2091,7 +2127,20 @@ def consume_positionals(start_index): # and add the Positional and its args to the list for action, arg_count in zip(positionals, arg_counts): args = arg_strings[start_index: start_index + arg_count] + # Strip out the first '--' if it is not in REMAINDER arg. + if action.nargs == PARSER: + if arg_strings_pattern[start_index] == '-': + assert args[0] == '--' + args.remove('--') + elif action.nargs != REMAINDER: + if (arg_strings_pattern.find('-', start_index, + start_index + arg_count) >= 0): + args.remove('--') start_index += arg_count + if args and action.deprecated and action.dest not in warned: + self._warning(_("argument '%(argument_name)s' is deprecated") % + {'argument_name': action.dest}) + warned.add(action.dest) take_action(action, args) # slice off the Positionals that we just parsed and return the @@ -2102,6 +2151,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2110,11 +2160,12 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: + next_option_string_index = start_index + while next_option_string_index <= max_option_string_index: + if next_option_string_index in option_string_indices: + break + next_option_string_index += 1 + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2130,16 +2181,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2161,7 +2231,7 @@ def consume_positionals(start_index): self._get_value(action, action.default)) if required_actions: - self.error(_('the following arguments are required: %s') % + raise ArgumentError(None, _('the following arguments are required: %s') % ', '.join(required_actions)) # make sure all required groups had one option present @@ -2177,7 +2247,7 @@ def consume_positionals(start_index): for action in group._group_actions if action.help is not SUPPRESS] msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) + raise ArgumentError(None, msg % ' '.join(names)) # return the updated namespace and the extra arguments return namespace, extras @@ -2204,7 +2274,7 @@ def _read_args_from_files(self, arg_strings): arg_strings = self._read_args_from_files(arg_strings) new_arg_strings.extend(arg_strings) except OSError as err: - self.error(str(err)) + raise ArgumentError(None, str(err)) # return the modified argument list return new_arg_strings @@ -2237,18 +2307,19 @@ def _match_argument(self, action, arg_strings_pattern): def _match_arguments_partial(self, actions, arg_strings_pattern): # progressively shorten the actions list by slicing off the # final actions until we find a match - result = [] for i in range(len(actions), 0, -1): actions_slice = actions[:i] pattern = ''.join([self._get_nargs_pattern(action) for action in actions_slice]) match = _re.match(pattern, arg_strings_pattern) if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result + result = [len(string) for string in match.groups()] + if (match.end() < len(arg_strings_pattern) + and arg_strings_pattern[match.end()] == 'O'): + while result and not result[-1]: + del result[-1] + return result + return [] def _parse_optional(self, arg_string): # if it's an empty string, it was meant to be a positional @@ -2262,36 +2333,24 @@ def _parse_optional(self, arg_string): # if the option string is present in the parser, return the action if arg_string in self._option_string_actions: action = self._option_string_actions[arg_string] - return action, arg_string, None + return [(action, arg_string, None, None)] # if it's just a single character, it was meant to be positional if len(arg_string) == 1: return None # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg + option_string, sep, explicit_arg = arg_string.partition('=') + if sep and option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return [(action, option_string, sep, explicit_arg)] # search through all possible prefixes of the option string # and all actions in the parser for possible interpretations option_tuples = self._get_option_tuples(arg_string) - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple + if option_tuples: + return option_tuples # if it was not found as an option, but it looks like a negative # number, it was meant to be positional @@ -2306,7 +2365,7 @@ def _parse_optional(self, arg_string): # it was meant to be an optional but there is no such option # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None + return [(None, arg_string, None, None)] def _get_option_tuples(self, option_string): result = [] @@ -2316,39 +2375,38 @@ def _get_option_tuples(self, option_string): chars = self.prefix_chars if option_string[0] in chars and option_string[1] in chars: if self.allow_abbrev: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None for option_string in self._option_string_actions: if option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # single character options can be concatenated with their arguments # but multiple character options always have to have their argument # separate elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None short_option_prefix = option_string[:2] short_explicit_arg = option_string[2:] for option_string in self._option_string_actions: if option_string == short_option_prefix: action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg + tup = action, option_string, '', short_explicit_arg result.append(tup) - elif option_string.startswith(option_prefix): + elif self.allow_abbrev and option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # shouldn't ever get here else: - self.error(_('unexpected option string: %s') % option_string) + raise ArgumentError(None, _('unexpected option string: %s') % option_string) # return the collected option tuples return result @@ -2357,43 +2415,40 @@ def _get_nargs_pattern(self, action): # in all examples below, we have to allow for '--' args # which are represented as '-' in the pattern nargs = action.nargs + # if this is an optional action, -- is not allowed + option = action.option_strings # the default (None) is assumed to be a single argument if nargs is None: - nargs_pattern = '(-*A-*)' + nargs_pattern = '([A])' if option else '(-*A-*)' # allow zero or one arguments elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' + nargs_pattern = '(A?)' if option else '(-*A?-*)' # allow zero or more arguments elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' + nargs_pattern = '(A*)' if option else '(-*[A-]*)' # allow one or more arguments elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' + nargs_pattern = '(A+)' if option else '(-*A[A-]*)' # allow any number of options or arguments elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' + nargs_pattern = '([AO]*)' if option else '(.*)' # allow one argument followed by any number of options or arguments elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' + nargs_pattern = '(A[AO]*)' if option else '(-*A[-AO]*)' # suppress action, like nargs=0 elif nargs == SUPPRESS: - nargs_pattern = '(-*-*)' + nargs_pattern = '()' if option else '(-*)' # all others should be integers else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') + nargs_pattern = '([AO]{%d})' % nargs if option else '((?:-*A){%d}-*)' % nargs # return the pattern return nargs_pattern @@ -2405,8 +2460,11 @@ def _get_nargs_pattern(self, action): def parse_intermixed_args(self, args=None, namespace=None): args, argv = self.parse_known_intermixed_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_intermixed_args(self, args=None, namespace=None): @@ -2417,10 +2475,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2429,78 +2483,20 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: if action.option_strings: value = action.const else: value = action.default - if isinstance(value, str): + if isinstance(value, str) and value is not SUPPRESS: value = self._get_value(action, value) self._check_value(action, value) @@ -2571,15 +2567,20 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) + choices = action.choices + if choices is not None: + if isinstance(choices, str): + choices = iter(choices) + if value not in choices: + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + raise ArgumentError(action, msg % args) # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2615,6 +2616,7 @@ def _get_formatter(self): # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2636,6 +2638,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) @@ -2653,3 +2656,7 @@ def error(self, message): self.print_usage(_sys.stderr) args = {'prog': self.prog, 'message': message} self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + + def _warning(self, message): + args = {'prog': self.prog, 'message': message} + self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) diff --git a/Lib/asynchat.py b/Lib/asynchat.py deleted file mode 100644 index fc1146adbb..0000000000 --- a/Lib/asynchat.py +++ /dev/null @@ -1,307 +0,0 @@ -# -*- Mode: Python; tab-width: 4 -*- -# Id: asynchat.py,v 2.26 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -r"""A class supporting chat-style (command/response) protocols. - -This class adds support for 'chat' style protocols - where one side -sends a 'command', and the other sends a response (examples would be -the common internet protocols - smtp, nntp, ftp, etc..). - -The handle_read() method looks at the input stream for the current -'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n' -for multi-line output), calling self.found_terminator() on its -receipt. - -for example: -Say you build an async nntp client using this class. At the start -of the connection, you'll have self.terminator set to '\r\n', in -order to process the single-line greeting. Just before issuing a -'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST -command will be accumulated (using your own 'collect_incoming_data' -method) up to the terminator, and then control will be returned to -you - by calling your self.found_terminator() method. -""" -import asyncore -from collections import deque - - -class async_chat(asyncore.dispatcher): - """This is an abstract class. You must derive from this class, and add - the two methods collect_incoming_data() and found_terminator()""" - - # these are overridable defaults - - ac_in_buffer_size = 65536 - ac_out_buffer_size = 65536 - - # we don't want to enable the use of encoding by default, because that is a - # sign of an application bug that we don't want to pass silently - - use_encoding = 0 - encoding = 'latin-1' - - def __init__(self, sock=None, map=None): - # for string terminator matching - self.ac_in_buffer = b'' - - # we use a list here rather than io.BytesIO for a few reasons... - # del lst[:] is faster than bio.truncate(0) - # lst = [] is faster than bio.truncate(0) - self.incoming = [] - - # we toss the use of the "simple producer" and replace it with - # a pure deque, which the original fifo was a wrapping of - self.producer_fifo = deque() - asyncore.dispatcher.__init__(self, sock, map) - - def collect_incoming_data(self, data): - raise NotImplementedError("must be implemented in subclass") - - def _collect_incoming_data(self, data): - self.incoming.append(data) - - def _get_data(self): - d = b''.join(self.incoming) - del self.incoming[:] - return d - - def found_terminator(self): - raise NotImplementedError("must be implemented in subclass") - - def set_terminator(self, term): - """Set the input delimiter. - - Can be a fixed string of any length, an integer, or None. - """ - if isinstance(term, str) and self.use_encoding: - term = bytes(term, self.encoding) - elif isinstance(term, int) and term < 0: - raise ValueError('the number of received bytes must be positive') - self.terminator = term - - def get_terminator(self): - return self.terminator - - # grab some more data from the socket, - # throw it to the collector method, - # check for the terminator, - # if found, transition to the next state. - - def handle_read(self): - - try: - data = self.recv(self.ac_in_buffer_size) - except BlockingIOError: - return - except OSError as why: - self.handle_error() - return - - if isinstance(data, str) and self.use_encoding: - data = bytes(str, self.encoding) - self.ac_in_buffer = self.ac_in_buffer + data - - # Continue to search for self.terminator in self.ac_in_buffer, - # while calling self.collect_incoming_data. The while loop - # is necessary because we might read several data+terminator - # combos with a single recv(4096). - - while self.ac_in_buffer: - lb = len(self.ac_in_buffer) - terminator = self.get_terminator() - if not terminator: - # no terminator, collect it all - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - elif isinstance(terminator, int): - # numeric terminator - n = terminator - if lb < n: - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - self.terminator = self.terminator - lb - else: - self.collect_incoming_data(self.ac_in_buffer[:n]) - self.ac_in_buffer = self.ac_in_buffer[n:] - self.terminator = 0 - self.found_terminator() - else: - # 3 cases: - # 1) end of buffer matches terminator exactly: - # collect data, transition - # 2) end of buffer matches some prefix: - # collect data to the prefix - # 3) end of buffer does not match any prefix: - # collect data - terminator_len = len(terminator) - index = self.ac_in_buffer.find(terminator) - if index != -1: - # we found the terminator - if index > 0: - # don't bother reporting the empty string - # (source of subtle bugs) - self.collect_incoming_data(self.ac_in_buffer[:index]) - self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:] - # This does the Right Thing if the terminator - # is changed here. - self.found_terminator() - else: - # check for a prefix of the terminator - index = find_prefix_at_end(self.ac_in_buffer, terminator) - if index: - if index != lb: - # we found a prefix, collect up to the prefix - self.collect_incoming_data(self.ac_in_buffer[:-index]) - self.ac_in_buffer = self.ac_in_buffer[-index:] - break - else: - # no prefix, collect it all - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - - def handle_write(self): - self.initiate_send() - - def handle_close(self): - self.close() - - def push(self, data): - if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be byte-ish (%r)', - type(data)) - sabs = self.ac_out_buffer_size - if len(data) > sabs: - for i in range(0, len(data), sabs): - self.producer_fifo.append(data[i:i+sabs]) - else: - self.producer_fifo.append(data) - self.initiate_send() - - def push_with_producer(self, producer): - self.producer_fifo.append(producer) - self.initiate_send() - - def readable(self): - "predicate for inclusion in the readable for select()" - # cannot use the old predicate, it violates the claim of the - # set_terminator method. - - # return (len(self.ac_in_buffer) <= self.ac_in_buffer_size) - return 1 - - def writable(self): - "predicate for inclusion in the writable for select()" - return self.producer_fifo or (not self.connected) - - def close_when_done(self): - "automatically close this channel once the outgoing queue is empty" - self.producer_fifo.append(None) - - def initiate_send(self): - while self.producer_fifo and self.connected: - first = self.producer_fifo[0] - # handle empty string/buffer or None entry - if not first: - del self.producer_fifo[0] - if first is None: - self.handle_close() - return - - # handle classic producer behavior - obs = self.ac_out_buffer_size - try: - data = first[:obs] - except TypeError: - data = first.more() - if data: - self.producer_fifo.appendleft(data) - else: - del self.producer_fifo[0] - continue - - if isinstance(data, str) and self.use_encoding: - data = bytes(data, self.encoding) - - # send the data - try: - num_sent = self.send(data) - except OSError: - self.handle_error() - return - - if num_sent: - if num_sent < len(data) or obs < len(first): - self.producer_fifo[0] = first[num_sent:] - else: - del self.producer_fifo[0] - # we tried to send some actual data - return - - def discard_buffers(self): - # Emergencies only! - self.ac_in_buffer = b'' - del self.incoming[:] - self.producer_fifo.clear() - - -class simple_producer: - - def __init__(self, data, buffer_size=512): - self.data = data - self.buffer_size = buffer_size - - def more(self): - if len(self.data) > self.buffer_size: - result = self.data[:self.buffer_size] - self.data = self.data[self.buffer_size:] - return result - else: - result = self.data - self.data = b'' - return result - - -# Given 'haystack', see if any prefix of 'needle' is at its end. This -# assumes an exact match has already been checked. Return the number of -# characters matched. -# for example: -# f_p_a_e("qwerty\r", "\r\n") => 1 -# f_p_a_e("qwertydkjf", "\r\n") => 0 -# f_p_a_e("qwerty\r\n", "\r\n") => - -# this could maybe be made faster with a computed regex? -# [answer: no; circa Python-2.0, Jan 2001] -# new python: 28961/s -# old python: 18307/s -# re: 12820/s -# regex: 14035/s - -def find_prefix_at_end(haystack, needle): - l = len(needle) - 1 - while l and not haystack.endswith(needle[:l]): - l -= 1 - return l diff --git a/Lib/asyncore.py b/Lib/asyncore.py deleted file mode 100644 index 0e92be3ad1..0000000000 --- a/Lib/asyncore.py +++ /dev/null @@ -1,642 +0,0 @@ -# -*- Mode: Python -*- -# Id: asyncore.py,v 2.51 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -"""Basic infrastructure for asynchronous socket service clients and servers. - -There are only two ways to have a program on a single processor do "more -than one thing at a time". Multi-threaded programming is the simplest and -most popular way to do it, but there is another very different technique, -that lets you have nearly all the advantages of multi-threading, without -actually using multiple threads. it's really only practical if your program -is largely I/O bound. If your program is CPU bound, then pre-emptive -scheduled threads are probably what you really need. Network servers are -rarely CPU-bound, however. - -If your operating system supports the select() system call in its I/O -library (and nearly all do), then you can use it to juggle multiple -communication channels at once; doing other work while your I/O is taking -place in the "background." Although this strategy can seem strange and -complex, especially at first, it is in many ways easier to understand and -control than multi-threaded programming. The module documented here solves -many of the difficult problems for you, making the task of building -sophisticated high-performance network servers and clients a snap. -""" - -import select -import socket -import sys -import time -import warnings - -import os -from errno import EALREADY, EINPROGRESS, EWOULDBLOCK, ECONNRESET, EINVAL, \ - ENOTCONN, ESHUTDOWN, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \ - errorcode - -_DISCONNECTED = frozenset({ECONNRESET, ENOTCONN, ESHUTDOWN, ECONNABORTED, EPIPE, - EBADF}) - -try: - socket_map -except NameError: - socket_map = {} - -def _strerror(err): - try: - return os.strerror(err) - except (ValueError, OverflowError, NameError): - if err in errorcode: - return errorcode[err] - return "Unknown error %s" %err - -class ExitNow(Exception): - pass - -_reraised_exceptions = (ExitNow, KeyboardInterrupt, SystemExit) - -def read(obj): - try: - obj.handle_read_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def write(obj): - try: - obj.handle_write_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def _exception(obj): - try: - obj.handle_expt_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def readwrite(obj, flags): - try: - if flags & select.POLLIN: - obj.handle_read_event() - if flags & select.POLLOUT: - obj.handle_write_event() - if flags & select.POLLPRI: - obj.handle_expt_event() - if flags & (select.POLLHUP | select.POLLERR | select.POLLNVAL): - obj.handle_close() - except OSError as e: - if e.args[0] not in _DISCONNECTED: - obj.handle_error() - else: - obj.handle_close() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def poll(timeout=0.0, map=None): - if map is None: - map = socket_map - if map: - r = []; w = []; e = [] - for fd, obj in list(map.items()): - is_r = obj.readable() - is_w = obj.writable() - if is_r: - r.append(fd) - # accepting sockets should not be writable - if is_w and not obj.accepting: - w.append(fd) - if is_r or is_w: - e.append(fd) - if [] == r == w == e: - time.sleep(timeout) - return - - r, w, e = select.select(r, w, e, timeout) - - for fd in r: - obj = map.get(fd) - if obj is None: - continue - read(obj) - - for fd in w: - obj = map.get(fd) - if obj is None: - continue - write(obj) - - for fd in e: - obj = map.get(fd) - if obj is None: - continue - _exception(obj) - -def poll2(timeout=0.0, map=None): - # Use the poll() support added to the select module in Python 2.0 - if map is None: - map = socket_map - if timeout is not None: - # timeout is in milliseconds - timeout = int(timeout*1000) - pollster = select.poll() - if map: - for fd, obj in list(map.items()): - flags = 0 - if obj.readable(): - flags |= select.POLLIN | select.POLLPRI - # accepting sockets should not be writable - if obj.writable() and not obj.accepting: - flags |= select.POLLOUT - if flags: - pollster.register(fd, flags) - - r = pollster.poll(timeout) - for fd, flags in r: - obj = map.get(fd) - if obj is None: - continue - readwrite(obj, flags) - -poll3 = poll2 # Alias for backward compatibility - -def loop(timeout=30.0, use_poll=False, map=None, count=None): - if map is None: - map = socket_map - - if use_poll and hasattr(select, 'poll'): - poll_fun = poll2 - else: - poll_fun = poll - - if count is None: - while map: - poll_fun(timeout, map) - - else: - while map and count > 0: - poll_fun(timeout, map) - count = count - 1 - -class dispatcher: - - debug = False - connected = False - accepting = False - connecting = False - closing = False - addr = None - ignore_log_types = frozenset({'warning'}) - - def __init__(self, sock=None, map=None): - if map is None: - self._map = socket_map - else: - self._map = map - - self._fileno = None - - if sock: - # Set to nonblocking just to make sure for cases where we - # get a socket from a blocking source. - sock.setblocking(0) - self.set_socket(sock, map) - self.connected = True - # The constructor no longer requires that the socket - # passed be connected. - try: - self.addr = sock.getpeername() - except OSError as err: - if err.args[0] in (ENOTCONN, EINVAL): - # To handle the case where we got an unconnected - # socket. - self.connected = False - else: - # The socket is broken in some unknown way, alert - # the user and remove it from the map (to prevent - # polling of broken sockets). - self.del_channel(map) - raise - else: - self.socket = None - - def __repr__(self): - status = [self.__class__.__module__+"."+self.__class__.__qualname__] - if self.accepting and self.addr: - status.append('listening') - elif self.connected: - status.append('connected') - if self.addr is not None: - try: - status.append('%s:%d' % self.addr) - except TypeError: - status.append(repr(self.addr)) - return '<%s at %#x>' % (' '.join(status), id(self)) - - def add_channel(self, map=None): - #self.log_info('adding channel %s' % self) - if map is None: - map = self._map - map[self._fileno] = self - - def del_channel(self, map=None): - fd = self._fileno - if map is None: - map = self._map - if fd in map: - #self.log_info('closing channel %d:%s' % (fd, self)) - del map[fd] - self._fileno = None - - def create_socket(self, family=socket.AF_INET, type=socket.SOCK_STREAM): - self.family_and_type = family, type - sock = socket.socket(family, type) - sock.setblocking(0) - self.set_socket(sock) - - def set_socket(self, sock, map=None): - self.socket = sock - self._fileno = sock.fileno() - self.add_channel(map) - - def set_reuse_addr(self): - # try to re-use a server port if possible - try: - self.socket.setsockopt( - socket.SOL_SOCKET, socket.SO_REUSEADDR, - self.socket.getsockopt(socket.SOL_SOCKET, - socket.SO_REUSEADDR) | 1 - ) - except OSError: - pass - - # ================================================== - # predicates for select() - # these are used as filters for the lists of sockets - # to pass to select(). - # ================================================== - - def readable(self): - return True - - def writable(self): - return True - - # ================================================== - # socket object methods. - # ================================================== - - def listen(self, num): - self.accepting = True - if os.name == 'nt' and num > 5: - num = 5 - return self.socket.listen(num) - - def bind(self, addr): - self.addr = addr - return self.socket.bind(addr) - - def connect(self, address): - self.connected = False - self.connecting = True - err = self.socket.connect_ex(address) - if err in (EINPROGRESS, EALREADY, EWOULDBLOCK) \ - or err == EINVAL and os.name == 'nt': - self.addr = address - return - if err in (0, EISCONN): - self.addr = address - self.handle_connect_event() - else: - raise OSError(err, errorcode[err]) - - def accept(self): - # XXX can return either an address pair or None - try: - conn, addr = self.socket.accept() - except TypeError: - return None - except OSError as why: - if why.args[0] in (EWOULDBLOCK, ECONNABORTED, EAGAIN): - return None - else: - raise - else: - return conn, addr - - def send(self, data): - try: - result = self.socket.send(data) - return result - except OSError as why: - if why.args[0] == EWOULDBLOCK: - return 0 - elif why.args[0] in _DISCONNECTED: - self.handle_close() - return 0 - else: - raise - - def recv(self, buffer_size): - try: - data = self.socket.recv(buffer_size) - if not data: - # a closed connection is indicated by signaling - # a read condition, and having recv() return 0. - self.handle_close() - return b'' - else: - return data - except OSError as why: - # winsock sometimes raises ENOTCONN - if why.args[0] in _DISCONNECTED: - self.handle_close() - return b'' - else: - raise - - def close(self): - self.connected = False - self.accepting = False - self.connecting = False - self.del_channel() - if self.socket is not None: - try: - self.socket.close() - except OSError as why: - if why.args[0] not in (ENOTCONN, EBADF): - raise - - # log and log_info may be overridden to provide more sophisticated - # logging and warning methods. In general, log is for 'hit' logging - # and 'log_info' is for informational, warning and error logging. - - def log(self, message): - sys.stderr.write('log: %s\n' % str(message)) - - def log_info(self, message, type='info'): - if type not in self.ignore_log_types: - print('%s: %s' % (type, message)) - - def handle_read_event(self): - if self.accepting: - # accepting sockets are never connected, they "spawn" new - # sockets that are connected - self.handle_accept() - elif not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_read() - else: - self.handle_read() - - def handle_connect_event(self): - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - raise OSError(err, _strerror(err)) - self.handle_connect() - self.connected = True - self.connecting = False - - def handle_write_event(self): - if self.accepting: - # Accepting sockets shouldn't get a write event. - # We will pretend it didn't happen. - return - - if not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_write() - - def handle_expt_event(self): - # handle_expt_event() is called if there might be an error on the - # socket, or if there is OOB data - # check for the error condition first - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - # we can get here when select.select() says that there is an - # exceptional condition on the socket - # since there is an error, we'll go ahead and close the socket - # like we would in a subclassed handle_read() that received no - # data - self.handle_close() - else: - self.handle_expt() - - def handle_error(self): - nil, t, v, tbinfo = compact_traceback() - - # sometimes a user repr method will crash. - try: - self_repr = repr(self) - except: - self_repr = '<__repr__(self) failed for object at %0x>' % id(self) - - self.log_info( - 'uncaptured python exception, closing channel %s (%s:%s %s)' % ( - self_repr, - t, - v, - tbinfo - ), - 'error' - ) - self.handle_close() - - def handle_expt(self): - self.log_info('unhandled incoming priority event', 'warning') - - def handle_read(self): - self.log_info('unhandled read event', 'warning') - - def handle_write(self): - self.log_info('unhandled write event', 'warning') - - def handle_connect(self): - self.log_info('unhandled connect event', 'warning') - - def handle_accept(self): - pair = self.accept() - if pair is not None: - self.handle_accepted(*pair) - - def handle_accepted(self, sock, addr): - sock.close() - self.log_info('unhandled accepted event', 'warning') - - def handle_close(self): - self.log_info('unhandled close event', 'warning') - self.close() - -# --------------------------------------------------------------------------- -# adds simple buffered output capability, useful for simple clients. -# [for more sophisticated usage use asynchat.async_chat] -# --------------------------------------------------------------------------- - -class dispatcher_with_send(dispatcher): - - def __init__(self, sock=None, map=None): - dispatcher.__init__(self, sock, map) - self.out_buffer = b'' - - def initiate_send(self): - num_sent = 0 - num_sent = dispatcher.send(self, self.out_buffer[:65536]) - self.out_buffer = self.out_buffer[num_sent:] - - def handle_write(self): - self.initiate_send() - - def writable(self): - return (not self.connected) or len(self.out_buffer) - - def send(self, data): - if self.debug: - self.log_info('sending %s' % repr(data)) - self.out_buffer = self.out_buffer + data - self.initiate_send() - -# --------------------------------------------------------------------------- -# used for debugging. -# --------------------------------------------------------------------------- - -def compact_traceback(): - t, v, tb = sys.exc_info() - tbinfo = [] - if not tb: # Must have a traceback - raise AssertionError("traceback does not exist") - while tb: - tbinfo.append(( - tb.tb_frame.f_code.co_filename, - tb.tb_frame.f_code.co_name, - str(tb.tb_lineno) - )) - tb = tb.tb_next - - # just to be safe - del tb - - file, function, line = tbinfo[-1] - info = ' '.join(['[%s|%s|%s]' % x for x in tbinfo]) - return (file, function, line), t, v, info - -def close_all(map=None, ignore_all=False): - if map is None: - map = socket_map - for x in list(map.values()): - try: - x.close() - except OSError as x: - if x.args[0] == EBADF: - pass - elif not ignore_all: - raise - except _reraised_exceptions: - raise - except: - if not ignore_all: - raise - map.clear() - -# Asynchronous File I/O: -# -# After a little research (reading man pages on various unixen, and -# digging through the linux kernel), I've determined that select() -# isn't meant for doing asynchronous file i/o. -# Heartening, though - reading linux/mm/filemap.c shows that linux -# supports asynchronous read-ahead. So _MOST_ of the time, the data -# will be sitting in memory for us already when we go to read it. -# -# What other OS's (besides NT) support async file i/o? [VMS?] -# -# Regardless, this is useful for pipes, and stdin/stdout... - -if os.name == 'posix': - class file_wrapper: - # Here we override just enough to make a file - # look like a socket for the purposes of asyncore. - # The passed fd is automatically os.dup()'d - - def __init__(self, fd): - self.fd = os.dup(fd) - - def __del__(self): - if self.fd >= 0: - warnings.warn("unclosed file %r" % self, ResourceWarning, - source=self) - self.close() - - def recv(self, *args): - return os.read(self.fd, *args) - - def send(self, *args): - return os.write(self.fd, *args) - - def getsockopt(self, level, optname, buflen=None): - if (level == socket.SOL_SOCKET and - optname == socket.SO_ERROR and - not buflen): - return 0 - raise NotImplementedError("Only asyncore specific behaviour " - "implemented.") - - read = recv - write = send - - def close(self): - if self.fd < 0: - return - fd = self.fd - self.fd = -1 - os.close(fd) - - def fileno(self): - return self.fd - - class file_dispatcher(dispatcher): - - def __init__(self, fd, map=None): - dispatcher.__init__(self, None, map) - self.connected = True - try: - fd = fd.fileno() - except AttributeError: - pass - self.set_file(fd) - # set it to non-blocking mode - os.set_blocking(fd, False) - - def set_file(self, fd): - self.socket = file_wrapper(fd) - self._fileno = self.socket.fileno() - self.add_channel() diff --git a/Lib/base64.py b/Lib/base64.py old mode 100755 new mode 100644 index e233647ee7..5a7e790a19 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -18,7 +18,7 @@ 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', + 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -164,7 +164,6 @@ def urlsafe_b64decode(s): _b32rev = {} def _b32encode(alphabet, s): - global _b32tab2 # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32tab2: @@ -200,7 +199,6 @@ def _b32encode(alphabet, s): return bytes(encoded) def _b32decode(alphabet, s, casefold=False, map01=None): - global _b32rev # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32rev: @@ -334,7 +332,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): wrapcol controls whether the output should have newline (b'\\n') characters added to it. If this is non-zero, each output line will be at most this - many characters long. + many characters long, excluding the trailing newline. pad controls whether the input is padded to a multiple of 4 before encoding. Note that the btoa implementation always pads. @@ -499,6 +497,33 @@ def b85decode(b): result = result[:-padding] return result +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + +def z85encode(s): + """Encode bytes-like object b in z85 format and return a bytes object.""" + return b85encode(s).translate(_z85_encode_translation) + +def z85decode(s): + """Decode the z85-encoded bytes-like object or ASCII string b + + The result is returned as a bytes object. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_z85_decode_translation) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None + # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. The files should be opened in binary mode. diff --git a/Lib/calendar.py b/Lib/calendar.py index baab52a157..8c1c646da4 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -10,7 +10,6 @@ from enum import IntEnum, global_enum import locale as _locale from itertools import repeat -import warnings __all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", "firstweekday", "isleap", "leapdays", "weekday", "monthrange", @@ -28,7 +27,9 @@ error = ValueError # Exceptions raised for bad input -class IllegalMonthError(ValueError): +# This is trick for backward compatibility. Since 3.13, we will raise IllegalMonthError instead of +# IndexError for bad month number(out of 1-12). But we can't remove IndexError for backward compatibility. +class IllegalMonthError(ValueError, IndexError): def __init__(self, month): self.month = month def __str__(self): @@ -44,6 +45,7 @@ def __str__(self): def __getattr__(name): if name in ('January', 'February'): + import warnings warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead", DeprecationWarning, stacklevel=2) if name == 'January': @@ -158,11 +160,14 @@ def weekday(year, month, day): return Day(datetime.date(year, month, day).weekday()) -def monthrange(year, month): - """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for - year, month.""" +def _validate_month(month): if not 1 <= month <= 12: raise IllegalMonthError(month) + +def monthrange(year, month): + """Return weekday of first day of month (0-6 ~ Mon-Sun) + and number of days (28-31) for year, month.""" + _validate_month(month) day1 = weekday(year, month, 1) ndays = mdays[month] + (month == FEBRUARY and isleap(year)) return day1, ndays @@ -370,6 +375,8 @@ def formatmonthname(self, theyear, themonth, width, withyear=True): """ Return a formatted month name. """ + _validate_month(themonth) + s = month_name[themonth] if withyear: s = "%s %r" % (s, theyear) @@ -500,6 +507,7 @@ def formatmonthname(self, theyear, themonth, withyear=True): """ Return a month name as a table row. """ + _validate_month(themonth) if withyear: s = '%s %s' % (month_name[themonth], theyear) else: @@ -585,8 +593,6 @@ def __enter__(self): _locale.setlocale(_locale.LC_TIME, self.locale) def __exit__(self, *args): - if self.oldlocale is None: - return _locale.setlocale(_locale.LC_TIME, self.oldlocale) @@ -690,7 +696,7 @@ def timegm(tuple): return seconds -def main(args): +def main(args=None): import argparse parser = argparse.ArgumentParser() textgroup = parser.add_argument_group('text only arguments') @@ -736,10 +742,15 @@ def main(args): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, - help="year number (1-9999)" + help="year number" ) parser.add_argument( "month", @@ -747,7 +758,7 @@ def main(args): help="month number (1-12, text only)" ) - options = parser.parse_args(args[1:]) + options = parser.parse_args(args) if options.locale and not options.encoding: parser.error("if --locale is specified --encoding is required") @@ -756,10 +767,14 @@ def main(args): locale = options.locale, options.encoding if options.type == "html": + if options.month: + parser.error("incorrect number of arguments") + sys.exit(1) if options.locale: cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() @@ -767,20 +782,20 @@ def main(args): write = sys.stdout.buffer.write if options.year is None: write(cal.formatyearpage(datetime.date.today().year, **optdict)) - elif options.month is None: - write(cal.formatyearpage(options.year, **optdict)) else: - parser.error("incorrect number of arguments") - sys.exit(1) + write(cal.formatyearpage(options.year, **optdict)) else: if options.locale: cal = LocaleTextCalendar(locale=locale) else: cal = TextCalendar() + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing optdict["m"] = options.months + if options.month is not None: + _validate_month(options.month) if options.year is None: result = cal.formatyear(datetime.date.today().year, **optdict) elif options.month is None: @@ -795,4 +810,4 @@ def main(args): if __name__ == "__main__": - main(sys.argv) + main() diff --git a/Lib/chunk.py b/Lib/chunk.py deleted file mode 100644 index 618781efd1..0000000000 --- a/Lib/chunk.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError from None - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/Lib/codeop.py b/Lib/codeop.py index 4dd096574b..eea6cbc701 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -65,9 +65,10 @@ def _maybe_compile(compiler, source, filename, symbol): try: compiler(source + "\n", filename, symbol) return None + except _IncompleteInputError as e: + return None except SyntaxError as e: - if "incomplete input" in str(e): - return None + pass # fallthrough return compiler(source, filename, symbol, incomplete_input=False) diff --git a/Lib/colorsys.py b/Lib/colorsys.py index bc897bd0f9..e97f91718a 100644 --- a/Lib/colorsys.py +++ b/Lib/colorsys.py @@ -24,7 +24,7 @@ __all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb", "rgb_to_hsv","hsv_to_rgb"] -# Some floating point constants +# Some floating-point constants ONE_THIRD = 1.0/3.0 ONE_SIXTH = 1.0/6.0 diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index fae872439e..9fa4778300 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -25,7 +25,6 @@ ] - # Some convenience routines. Don't import Parser and Message as side-effects # of importing email since those cascadingly import most of the rest of the # email package. diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 5eaab36ed0..6795a606de 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -62,7 +62,7 @@ # regex based decoder. _q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) + lambda m: bytes.fromhex(m.group(1).decode())) def decode_q(encoded): encoded = encoded.replace(b'_', b' ') @@ -98,30 +98,42 @@ def len_q(bstring): # def decode_b(encoded): - defects = [] + # First try encoding with validate=True, fixing the padding if needed. + # This will succeed only if encoded includes no invalid characters. pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded + missing_padding = b'==='[:4-pad_err] if pad_err else b'' try: - return base64.b64decode(padded_encoded, validate=True), defects + return ( + base64.b64decode(encoded + missing_padding, validate=True), + [errors.InvalidBase64PaddingDefect()] if pad_err else [], + ) except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] + # Since we had correct padding, this is likely an invalid char error. + # # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: + # goes, but we don't know how many there are. So try without adding + # padding to see if it works. + try: + return ( + base64.b64decode(encoded, validate=False), + [errors.InvalidBase64CharactersDefect()], + ) + except binascii.Error: + # Add as much padding as could possibly be necessary (extra padding + # is ignored). try: - return base64.b64decode(encoded+b'='*i, validate=False), defects + return ( + base64.b64decode(encoded + b'==', validate=False), + [errors.InvalidBase64CharactersDefect(), + errors.InvalidBase64PaddingDefect()], + ) except binascii.Error: - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") + # This only happens when the encoded string's length is 1 more + # than a multiple of 4, which is invalid. + # + # bpo-27397: Just return the encoded string since there's no + # way to decode. + return encoded, [errors.InvalidBase64LengthDefect()] def encode_b(bstring): return base64.b64encode(bstring).decode('ascii') @@ -167,15 +179,15 @@ def decode(ew): # Turn the CTE decoded bytes into unicode. try: string = bstring.decode(charset) - except UnicodeError: + except UnicodeDecodeError: defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) + f"contains bytes not decodable using {charset!r} charset")) string = bstring.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): string = bstring.decode('ascii', 'surrogateescape') if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) + defects.append(errors.CharsetError(f"Unknown charset {charset!r} " + f"in encoded word; decoded as unknown bytes")) return string, charset, lang, defects diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 57d01fbcb0..ec2215a5e5 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -68,9 +68,9 @@ """ import re +import sys import urllib # For urllib.parse.unquote from string import hexdigits -from collections import OrderedDict from operator import itemgetter from email import _encoded_words as _ew from email import errors @@ -92,93 +92,23 @@ ASPECIALS = TSPECIALS | set("*'%") ATTRIBUTE_ENDS = ASPECIALS | WSP EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') +NLSET = {'\n', '\r'} +SPECIALSNL = SPECIALS | NLSET def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' -# -# Accumulator for header folding -# - -class _Folded: - - def __init__(self, maxlen, policy): - self.maxlen = maxlen - self.policy = policy - self.lastlen = 0 - self.stickyspace = None - self.firstline = True - self.done = [] - self.current = [] +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= +rfc2047_matcher = re.compile(r''' + =\? # literal =? + [^?]* # charset + \? # literal ? + [qQbB] # literal 'q' or 'b', case insensitive + \? # literal ? + .*? # encoded word + \?= # literal ?= +''', re.VERBOSE | re.MULTILINE) - def newline(self): - self.done.extend(self.current) - self.done.append(self.policy.linesep) - self.current.clear() - self.lastlen = 0 - - def finalize(self): - if self.current: - self.newline() - - def __str__(self): - return ''.join(self.done) - - def append(self, stoken): - self.current.append(stoken) - - def append_if_fits(self, token, stoken=None): - if stoken is None: - stoken = str(token) - l = len(stoken) - if self.stickyspace is not None: - stickyspace_len = len(self.stickyspace) - if self.lastlen + stickyspace_len + l <= self.maxlen: - self.current.append(self.stickyspace) - self.lastlen += stickyspace_len - self.current.append(stoken) - self.lastlen += l - self.stickyspace = None - self.firstline = False - return True - if token.has_fws: - ws = token.pop_leading_fws() - if ws is not None: - self.stickyspace += str(ws) - stickyspace_len += len(ws) - token._fold(self) - return True - if stickyspace_len and l + 1 <= self.maxlen: - margin = self.maxlen - l - if 0 < margin < stickyspace_len: - trim = stickyspace_len - margin - self.current.append(self.stickyspace[:trim]) - self.stickyspace = self.stickyspace[trim:] - stickyspace_len = trim - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.lastlen = l + stickyspace_len - self.stickyspace = None - self.firstline = False - return True - if not self.firstline: - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.stickyspace = None - self.firstline = False - return True - if self.lastlen + l <= self.maxlen: - self.current.append(stoken) - self.lastlen += l - return True - if l < self.maxlen: - self.newline() - self.current.append(stoken) - self.lastlen = l - return True - return False # # TokenList and its subclasses @@ -187,6 +117,8 @@ def append_if_fits(self, token, stoken=None): class TokenList(list): token_type = None + syntactic_break = True + ew_combine_allowed = True def __init__(self, *args, **kw): super().__init__(*args, **kw) @@ -207,84 +139,13 @@ def value(self): def all_defects(self): return sum((x.all_defects for x in self), self.defects) - # - # Folding API - # - # parts(): - # - # return a list of objects that constitute the "higher level syntactic - # objects" specified by the RFC as the best places to fold a header line. - # The returned objects must include leading folding white space, even if - # this means mutating the underlying parse tree of the object. Each object - # is only responsible for returning *its* parts, and should not drill down - # to any lower level except as required to meet the leading folding white - # space constraint. - # - # _fold(folded): - # - # folded: the result accumulator. This is an instance of _Folded. - # (XXX: I haven't finished factoring this out yet, the folding code - # pretty much uses this as a state object.) When the folded.current - # contains as much text as will fit, the _fold method should call - # folded.newline. - # folded.lastlen: the current length of the test stored in folded.current. - # folded.maxlen: The maximum number of characters that may appear on a - # folded line. Differs from the policy setting in that "no limit" is - # represented by +inf, which means it can be used in the trivially - # logical fashion in comparisons. - # - # Currently no subclasses implement parts, and I think this will remain - # true. A subclass only needs to implement _fold when the generic version - # isn't sufficient. _fold will need to be implemented primarily when it is - # possible for encoded words to appear in the specialized token-list, since - # there is no generic algorithm that can know where exactly the encoded - # words are allowed. A _fold implementation is responsible for filling - # lines in the same general way that the top level _fold does. It may, and - # should, call the _fold method of sub-objects in a similar fashion to that - # of the top level _fold. - # - # XXX: I'm hoping it will be possible to factor the existing code further - # to reduce redundancy and make the logic clearer. - - @property - def parts(self): - klass = self.__class__ - this = [] - for token in self: - if token.startswith_fws(): - if this: - yield this[0] if len(this)==1 else klass(this) - this.clear() - end_ws = token.pop_trailing_ws() - this.append(token) - if end_ws: - yield klass(this) - this = [end_ws] - if this: - yield this[0] if len(this)==1 else klass(this) - def startswith_fws(self): return self[0].startswith_fws() - def pop_leading_fws(self): - if self[0].token_type == 'fws': - return self.pop(0) - return self[0].pop_leading_fws() - - def pop_trailing_ws(self): - if self[-1].token_type == 'cfws': - return self.pop(-1) - return self[-1].pop_trailing_ws() - @property - def has_fws(self): - for part in self: - if part.has_fws: - return True - return False - - def has_leading_comment(self): - return self[0].has_leading_comment() + def as_ew_allowed(self): + """True if all top level tokens of this part may be RFC2047 encoded.""" + return all(part.as_ew_allowed for part in self) @property def comments(self): @@ -294,71 +155,13 @@ def comments(self): return comments def fold(self, *, policy): - # max_line_length 0/None means no limit, ie: infinitely long. - maxlen = policy.max_line_length or float("+inf") - folded = _Folded(maxlen, policy) - self._fold(folded) - folded.finalize() - return str(folded) - - def as_encoded_word(self, charset): - # This works only for things returned by 'parts', which include - # the leading fws, if any, that should be used. - res = [] - ws = self.pop_leading_fws() - if ws: - res.append(ws) - trailer = self.pop(-1) if self[-1].token_type=='fws' else '' - res.append(_ew.encode(str(self), charset)) - res.append(trailer) - return ''.join(res) - - def cte_encode(self, charset, policy): - res = [] - for part in self: - res.append(part.cte_encode(charset, policy)) - return ''.join(res) - - def _fold(self, folded): - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - # XXX: this should be a policy setting when utf8 is False. - charset = 'utf-8' - tstr = part.cte_encode(charset, folded.policy) - tlen = len(tstr) - if folded.append_if_fits(part, tstr): - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - # Peel off the leading whitespace and make it sticky, to - # avoid infinite recursion. - folded.stickyspace = str(part.pop(0)) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # There are no fold points in this one; it is too long for a single - # line and can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() + return _refold_parse_tree(self, policy=policy) def pprint(self, indent=''): - print('\n'.join(self._pp(indent=''))) + print(self.ppstr(indent=indent)) def ppstr(self, indent=''): - return '\n'.join(self._pp(indent='')) + return '\n'.join(self._pp(indent=indent)) def _pp(self, indent=''): yield '{}{}/{}('.format( @@ -390,213 +193,35 @@ def comments(self): class UnstructuredTokenList(TokenList): - token_type = 'unstructured' - def _fold(self, folded): - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - is_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None: - # We've already done an EW, combine this one with it - # if there's room. - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - oldlastlen = sum(len(x) for x in folded.current[:last_ew]) - schunk = str(chunk) - lchunk = len(schunk) - if oldlastlen + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = oldlastlen + lchunk - continue - tstr = part.as_encoded_word(charset) - is_ew = True - if folded.append_if_fits(part, tstr): - if is_ew: - last_ew = len(folded.current) - 1 - continue - if is_ew or last_ew: - # It's too big to fit on the line, but since we've - # got encoded words we can use encoded word folding. - part._fold_as_ew(folded) - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - folded.stickyspace = str(ws) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # It can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - last_ew = None - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - if last_ew is None: - res.append(part.cte_encode(charset, policy)) - last_ew = len(res) - else: - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res.append(tl.as_encoded_word(charset)) - return ''.join(res) - class Phrase(TokenList): - token_type = 'phrase' - def _fold(self, folded): - # As with Unstructured, we can have pure ASCII with or without - # surrogateescape encoded bytes, or we could have unicode. But this - # case is more complicated, since we have to deal with the various - # sub-token types and how they can be composed in the face of - # unicode-that-needs-CTE-encoding, and the fact that if a token a - # comment that becomes a barrier across which we can't compose encoded - # words. - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - has_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None and not part.has_leading_comment(): - # We've already done an EW, let's see if we can combine - # this one with it. The last_ew logic ensures that all we - # have at this point is atoms, no comments or quoted - # strings. So we can treat the text between the last - # encoded word and the content of this token as - # unstructured text, and things will work correctly. But - # we have to strip off any trailing comment on this token - # first, and if it is a quoted string we have to pull out - # the content (we're encoding it, so it no longer needs to - # be quoted). - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - schunk = str(chunk) - lchunk = len(schunk) - if last_ew + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = sum(len(x) for x in folded.current) - continue - tstr = part.as_encoded_word(charset) - tlen = len(tstr) - has_ew = True - if folded.append_if_fits(part, tstr): - if has_ew and not part.comments: - last_ew = len(folded.current) - 1 - elif part.comments or part.token_type == 'quoted-string': - # If a comment is involved we can't combine EWs. And if a - # quoted string is involved, it's not worth the effort to - # try to combine them. - last_ew = None - continue - part._fold(folded) - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - is_ew = False - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - is_ew = True - if last_ew is None: - if not part.comments: - last_ew = len(res) - res.append(part.cte_encode(charset, policy)) - elif not part.has_leading_comment(): - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res[last_ew:] = [tl.as_encoded_word(charset)] - if part.comments or (not is_ew and part.token_type == 'quoted-string'): - last_ew = None - return ''.join(res) - class Word(TokenList): - token_type = 'word' class CFWSList(WhiteSpaceTokenList): - token_type = 'cfws' - def has_leading_comment(self): - return bool(self.comments) - class Atom(TokenList): - token_type = 'atom' class Token(TokenList): - token_type = 'token' + encode_as_ew = False class EncodedWord(TokenList): - token_type = 'encoded-word' cte = None charset = None lang = None - @property - def encoded(self): - if self.cte is not None: - return self.cte - _ew.encode(str(self), self.charset) - - class QuotedString(TokenList): @@ -812,7 +437,10 @@ def route(self): def addr_spec(self): for x in self: if x.token_type == 'addr-spec': - return x.addr_spec + if x.local_part: + return x.addr_spec + else: + return quote_string(x.local_part) + x.addr_spec else: return '<>' @@ -867,6 +495,7 @@ def display_name(self): class Domain(TokenList): token_type = 'domain' + as_ew_allowed = False @property def domain(self): @@ -874,18 +503,23 @@ def domain(self): class DotAtom(TokenList): - token_type = 'dot-atom' class DotAtomText(TokenList): - token_type = 'dot-atom-text' + as_ew_allowed = True + + +class NoFoldLiteral(TokenList): + token_type = 'no-fold-literal' + as_ew_allowed = False class AddrSpec(TokenList): token_type = 'addr-spec' + as_ew_allowed = False @property def local_part(self): @@ -918,24 +552,30 @@ def addr_spec(self): class ObsLocalPart(TokenList): token_type = 'obs-local-part' + as_ew_allowed = False class DisplayName(Phrase): token_type = 'display-name' + ew_combine_allowed = False @property def display_name(self): res = TokenList(self) + if len(res) == 0: + return res.value if res[0].token_type == 'cfws': res.pop(0) else: - if res[0][0].token_type == 'cfws': + if (isinstance(res[0], TokenList) and + res[0][0].token_type == 'cfws'): res[0] = TokenList(res[0][1:]) if res[-1].token_type == 'cfws': res.pop() else: - if res[-1][-1].token_type == 'cfws': + if (isinstance(res[-1], TokenList) and + res[-1][-1].token_type == 'cfws'): res[-1] = TokenList(res[-1][:-1]) return res.value @@ -948,11 +588,15 @@ def value(self): for x in self: if x.token_type == 'quoted-string': quote = True - if quote: + if len(self) != 0 and quote: pre = post = '' - if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + if (self[0].token_type == 'cfws' or + isinstance(self[0], TokenList) and + self[0][0].token_type == 'cfws'): pre = ' ' - if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + if (self[-1].token_type == 'cfws' or + isinstance(self[-1], TokenList) and + self[-1][-1].token_type == 'cfws'): post = ' ' return pre+quote_string(self.display_name)+post else: @@ -962,6 +606,7 @@ def value(self): class LocalPart(TokenList): token_type = 'local-part' + as_ew_allowed = False @property def value(self): @@ -997,6 +642,7 @@ def local_part(self): class DomainLiteral(TokenList): token_type = 'domain-literal' + as_ew_allowed = False @property def domain(self): @@ -1083,6 +729,7 @@ def stripped_value(self): class MimeParameters(TokenList): token_type = 'mime-parameters' + syntactic_break = False @property def params(self): @@ -1091,7 +738,7 @@ def params(self): # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. - params = OrderedDict() + params = {} # Using order preserving dict from Python 3.7+ for token in self: if not token.token_type.endswith('parameter'): continue @@ -1142,7 +789,7 @@ def params(self): else: try: value = value.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent @@ -1167,6 +814,10 @@ def __str__(self): class ParameterizedHeaderValue(TokenList): + # Set this false so that the value doesn't wind up on a new line even + # if it and the parameters would fit there but not on the first line. + syntactic_break = False + @property def params(self): for token in reversed(self): @@ -1174,58 +825,50 @@ def params(self): return token.params return {} - @property - def parts(self): - if self and self[-1].token_type == 'mime-parameters': - # We don't want to start a new line if all of the params don't fit - # after the value, so unwrap the parameter list. - return TokenList(self[:-1] + self[-1]) - return TokenList(self).parts - class ContentType(ParameterizedHeaderValue): - token_type = 'content-type' + as_ew_allowed = False maintype = 'text' subtype = 'plain' class ContentDisposition(ParameterizedHeaderValue): - token_type = 'content-disposition' + as_ew_allowed = False content_disposition = None class ContentTransferEncoding(TokenList): - token_type = 'content-transfer-encoding' + as_ew_allowed = False cte = '7bit' class HeaderLabel(TokenList): - token_type = 'header-label' + as_ew_allowed = False -class Header(TokenList): +class MsgID(TokenList): + token_type = 'msg-id' + as_ew_allowed = False - token_type = 'header' + def fold(self, policy): + # message-id tokens may not be folded. + return str(self) + policy.linesep + + +class MessageID(MsgID): + token_type = 'message-id' - def _fold(self, folded): - folded.append(str(self.pop(0))) - folded.lastlen = len(folded.current[0]) - # The first line of the header is different from all others: we don't - # want to start a new object on a new line if it has any fold points in - # it that would allow part of it to be on the first header line. - # Further, if the first fold point would fit on the new line, we want - # to do that, but if it doesn't we want to put it on the first line. - # Folded supports this via the stickyspace attribute. If this - # attribute is not None, it does the special handling. - folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' - rest = self.pop(0) - if self: - raise ValueError("Malformed Header token list") - rest._fold(folded) + +class InvalidMessageID(MessageID): + token_type = 'invalid-message-id' + + +class Header(TokenList): + token_type = 'header' # @@ -1234,6 +877,10 @@ def _fold(self, folded): class Terminal(str): + as_ew_allowed = True + ew_combine_allowed = True + syntactic_break = True + def __new__(cls, value, token_type): self = super().__new__(cls, value) self.token_type = token_type @@ -1243,6 +890,9 @@ def __new__(cls, value, token_type): def __repr__(self): return "{}({})".format(self.__class__.__name__, super().__repr__()) + def pprint(self): + print(self.__class__.__name__ + '/' + self.token_type) + @property def all_defects(self): return list(self.defects) @@ -1256,29 +906,14 @@ def _pp(self, indent=''): '' if not self.defects else ' {}'.format(self.defects), )] - def cte_encode(self, charset, policy): - value = str(self) - try: - value.encode('us-ascii') - return value - except UnicodeEncodeError: - return _ew.encode(value, charset) - def pop_trailing_ws(self): # This terminates the recursion. return None - def pop_leading_fws(self): - # This terminates the recursion. - return None - @property def comments(self): return [] - def has_leading_comment(self): - return False - def __getnewargs__(self): return(str(self), self.token_type) @@ -1292,8 +927,6 @@ def value(self): def startswith_fws(self): return True - has_fws = True - class ValueTerminal(Terminal): @@ -1304,11 +937,6 @@ def value(self): def startswith_fws(self): return False - has_fws = False - - def as_encoded_word(self, charset): - return _ew.encode(str(self), charset) - class EWWhiteSpaceTerminal(WhiteSpaceTerminal): @@ -1316,14 +944,12 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal): def value(self): return '' - @property - def encoded(self): - return self[:] - def __str__(self): return '' - has_fws = True + +class _InvalidEwError(errors.HeaderParseError): + """Invalid encoded word found while parsing headers.""" # XXX these need to become classes and used as instances so @@ -1331,6 +957,8 @@ def __str__(self): # up other parse trees. Maybe should have tests for that, too. DOT = ValueTerminal('.', 'dot') ListSeparator = ValueTerminal(',', 'list-separator') +ListSeparator.as_ew_allowed = False +ListSeparator.syntactic_break = False RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # @@ -1356,15 +984,14 @@ def __str__(self): _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split _non_atom_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATOM_ENDS)))).match _non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall _non_token_end_matcher = re.compile(r"[^{}]+".format( - ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(TOKEN_ENDS)))).match _non_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATTRIBUTE_ENDS)))).match _non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( - '\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(EXTENDED_ATTRIBUTE_ENDS)))).match def _validate_xtext(xtext): """If input token contains ASCII non-printables, register a defect.""" @@ -1431,7 +1058,10 @@ def get_encoded_word(value): raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) remstr = ''.join(remainder) - if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits: + if (len(remstr) > 1 and + remstr[0] in hexdigits and + remstr[1] in hexdigits and + tok.count('?') < 2): # The ? after the CTE was followed by an encoded word escape (=XX). rest, *remainder = remstr.split('?=', 1) tok = tok + '?=' + rest @@ -1442,8 +1072,8 @@ def get_encoded_word(value): value = ''.join(remainder) try: text, charset, lang, defects = _ew.decode('=?' + tok + '?=') - except ValueError: - raise errors.HeaderParseError( + except (ValueError, KeyError): + raise _InvalidEwError( "encoded word format invalid: '{}'".format(ew.cte)) ew.charset = charset ew.lang = lang @@ -1458,6 +1088,10 @@ def get_encoded_word(value): _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) + # Encoded words should be followed by a WS + if value and value[0] not in WSP: + ew.defects.append(errors.InvalidHeaderDefect( + "missing trailing whitespace after encoded-word")) return ew, value def get_unstructured(value): @@ -1489,9 +1123,12 @@ def get_unstructured(value): token, value = get_fws(value) unstructured.append(token) continue + valid_ew = True if value.startswith('=?'): try: token, value = get_encoded_word(value) + except _InvalidEwError: + valid_ew = False except errors.HeaderParseError: # XXX: Need to figure out how to register defects when # appropriate here. @@ -1510,6 +1147,14 @@ def get_unstructured(value): unstructured.append(token) continue tok, *remainder = _wsp_splitter(value, 1) + # Split in the middle of an atom if there is a rfc2047 encoded word + # which does not have WSP on both sides. The defect will be registered + # the next time through the loop. + # This needs to only be performed when the encoded word is valid; + # otherwise, performing it on an invalid encoded word can cause + # the parser to go in an infinite loop. + if valid_ew and rfc2047_matcher.search(tok): + tok, *remainder = value.partition('=?') vtext = ValueTerminal(tok, 'vtext') _validate_xtext(vtext) unstructured.append(vtext) @@ -1571,21 +1216,33 @@ def get_bare_quoted_string(value): value is the text between the quote marks, with whitespace preserved and quoted pairs decoded. """ - if value[0] != '"': + if not value or value[0] != '"': raise errors.HeaderParseError( "expected '\"' but found '{}'".format(value)) bare_quoted_string = BareQuotedString() value = value[1:] + if value and value[0] == '"': + token, value = get_qcontent(value) + bare_quoted_string.append(token) while value and value[0] != '"': if value[0] in WSP: token, value = get_fws(value) elif value[:2] == '=?': + valid_ew = False try: token, value = get_encoded_word(value) bare_quoted_string.defects.append(errors.InvalidHeaderDefect( "encoded word inside quoted string")) + valid_ew = True except errors.HeaderParseError: token, value = get_qcontent(value) + # Collapse the whitespace between two encoded words that occur in a + # bare-quoted-string. + if valid_ew and len(bare_quoted_string) > 1: + if (bare_quoted_string[-1].token_type == 'fws' and + bare_quoted_string[-2].token_type == 'encoded-word'): + bare_quoted_string[-1] = EWWhiteSpaceTerminal( + bare_quoted_string[-1], 'fws') else: token, value = get_qcontent(value) bare_quoted_string.append(token) @@ -1742,6 +1399,9 @@ def get_word(value): leader, value = get_cfws(value) else: leader = None + if not value: + raise errors.HeaderParseError( + "Expected 'atom' or 'quoted-string' but found nothing.") if value[0]=='"': token, value = get_quoted_string(value) elif value[0] in SPECIALS: @@ -1797,7 +1457,7 @@ def get_local_part(value): """ local_part = LocalPart() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1863,13 +1523,18 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) + if not obs_local_part: + raise errors.HeaderParseError( + "expected obs-local-part but found '{}'".format(value)) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[1].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) if (obs_local_part[-1].token_type == 'dot' or obs_local_part[-1].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[-2].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) @@ -1951,7 +1616,7 @@ def get_domain(value): """ domain = Domain() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1966,6 +1631,8 @@ def get_domain(value): token, value = get_dot_atom(value) except errors.HeaderParseError: token, value = get_atom(value) + if value and value[0] == '@': + raise errors.HeaderParseError('Invalid Domain') if leader is not None: token[:0] = [leader] domain.append(token) @@ -1989,7 +1656,7 @@ def get_addr_spec(value): addr_spec.append(token) if not value or value[0] != '@': addr_spec.defects.append(errors.InvalidHeaderDefect( - "add-spec local part with no domain")) + "addr-spec local part with no domain")) return addr_spec, value addr_spec.append(ValueTerminal('@', 'address-at-symbol')) token, value = get_domain(value[1:]) @@ -2025,6 +1692,8 @@ def get_obs_route(value): if value[0] in CFWS_LEADER: token, value = get_cfws(value) obs_route.append(token) + if not value: + break if value[0] == '@': obs_route.append(RouteComponentMarker) token, value = get_domain(value[1:]) @@ -2043,7 +1712,7 @@ def get_angle_addr(value): """ angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) angle_addr.append(token) if not value or value[0] != '<': @@ -2053,7 +1722,7 @@ def get_angle_addr(value): value = value[1:] # Although it is not legal per RFC5322, SMTP uses '<>' in certain # circumstances. - if value[0] == '>': + if value and value[0] == '>': angle_addr.append(ValueTerminal('>', 'angle-addr-end')) angle_addr.defects.append(errors.InvalidHeaderDefect( "null addr-spec in angle-addr")) @@ -2105,6 +1774,9 @@ def get_name_addr(value): name_addr = NameAddr() # Both the optional display name and the angle-addr can start with cfws. leader = None + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) if value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: @@ -2119,7 +1791,10 @@ def get_name_addr(value): raise errors.HeaderParseError( "expected name-addr but found '{}'".format(token)) if leader is not None: - token[0][:0] = [leader] + if isinstance(token[0], TokenList): + token[0][:0] = [leader] + else: + token[:0] = [leader] leader = None name_addr.append(token) token, value = get_angle_addr(value) @@ -2281,7 +1956,7 @@ def get_group(value): if not value: group.defects.append(errors.InvalidHeaderDefect( "end of header in group")) - if value[0] != ';': + elif value[0] != ';': raise errors.HeaderParseError( "expected ';' at end of group but found {}".format(value)) group.append(ValueTerminal(';', 'group-terminator')) @@ -2335,7 +2010,7 @@ def get_address_list(value): try: token, value = get_address(value) address_list.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2370,10 +2045,122 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) + address_list.append(ListSeparator) value = value[1:] return address_list, value + +def get_no_fold_literal(value): + """ no-fold-literal = "[" *dtext "]" + """ + no_fold_literal = NoFoldLiteral() + if not value: + raise errors.HeaderParseError( + "expected no-fold-literal but found '{}'".format(value)) + if value[0] != '[': + raise errors.HeaderParseError( + "expected '[' at the start of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start')) + value = value[1:] + token, value = get_dtext(value) + no_fold_literal.append(token) + if not value or value[0] != ']': + raise errors.HeaderParseError( + "expected ']' at the end of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end')) + return no_fold_literal, value[1:] + +def get_msg_id(value): + """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS] + id-left = dot-atom-text / obs-id-left + id-right = dot-atom-text / no-fold-literal / obs-id-right + no-fold-literal = "[" *dtext "]" + """ + msg_id = MsgID() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected msg-id but found '{}'".format(value)) + msg_id.append(ValueTerminal('<', 'msg-id-start')) + value = value[1:] + # Parse id-left. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + # obs-id-left is same as local-part of add-spec. + token, value = get_obs_local_part(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-left in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text or obs-id-left" + " but found '{}'".format(value)) + msg_id.append(token) + if not value or value[0] != '@': + msg_id.defects.append(errors.InvalidHeaderDefect( + "msg-id with no id-right")) + # Even though there is no id-right, if the local part + # ends with `>` let's just parse it too and return + # along with the defect. + if value and value[0] == '>': + msg_id.append(ValueTerminal('>', 'msg-id-end')) + value = value[1:] + return msg_id, value + msg_id.append(ValueTerminal('@', 'address-at-symbol')) + value = value[1:] + # Parse id-right. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + token, value = get_no_fold_literal(value) + except errors.HeaderParseError: + try: + token, value = get_domain(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-right in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text, no-fold-literal or obs-id-right" + " but found '{}'".format(value)) + msg_id.append(token) + if value and value[0] == '>': + value = value[1:] + else: + msg_id.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on msg-id")) + msg_id.append(ValueTerminal('>', 'msg-id-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + return msg_id, value + + +def parse_message_id(value): + """message-id = "Message-ID:" msg-id CRLF + """ + message_id = MessageID() + try: + token, value = get_msg_id(value) + message_id.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id = InvalidMessageID(token) + message_id.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + else: + # Value after parsing a valid msg_id should be None. + if value: + message_id.defects.append(errors.InvalidHeaderDefect( + "Unexpected {!r}".format(value))) + + return message_id + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2615,8 +2402,8 @@ def get_section(value): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': - section.defects.append(errors.InvalidHeaderError("section number" - "has an invalid leading 0")) + section.defects.append(errors.InvalidHeaderDefect( + "section number has an invalid leading 0")) section.number = int(digits) section.append(ValueTerminal(digits, 'digits')) return section, value @@ -2679,7 +2466,6 @@ def get_parameter(value): raise errors.HeaderParseError("Parameter not followed by '='") param.append(ValueTerminal('=', 'parameter-separator')) value = value[1:] - leader = None if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) param.append(token) @@ -2754,7 +2540,7 @@ def get_parameter(value): if value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {!r}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if value and value[0] != "'": token, value = get_attrtext(value) @@ -2763,7 +2549,7 @@ def get_parameter(value): if not value or value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if remainder is not None: # Treat the rest of value as bare quoted string content. @@ -2771,6 +2557,9 @@ def get_parameter(value): while value: if value[0] in WSP: token, value = get_fws(value) + elif value[0] == '"': + token = ValueTerminal('"', 'DQUOTE') + value = value[1:] else: token, value = get_qcontent(value) v.append(token) @@ -2791,7 +2580,7 @@ def parse_mime_parameters(value): the formal RFC grammar, but it is more convenient for us for the set of parameters to be treated as its own TokenList. - This is 'parse' routine because it consumes the reminaing value, but it + This is 'parse' routine because it consumes the remaining value, but it would never be called to parse a full header. Instead it is called to parse everything after the non-parameter value of a specific MIME header. @@ -2801,7 +2590,7 @@ def parse_mime_parameters(value): try: token, value = get_parameter(value) mime_parameters.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2859,7 +2648,6 @@ def parse_content_type_header(value): don't do that. """ ctype = ContentType() - recover = False if not value: ctype.defects.append(errors.HeaderMissingRequiredValue( "Missing content type specification")) @@ -2968,3 +2756,323 @@ def parse_content_transfer_encoding_header(value): token, value = get_phrase(value) cte_header.append(token) return cte_header + + +# +# Header folding +# +# Header folding is complex, with lots of rules and corner cases. The +# following code does its best to obey the rules and handle the corner +# cases, but you can be sure there are few bugs:) +# +# This folder generally canonicalizes as it goes, preferring the stringified +# version of each token. The tokens contain information that supports the +# folder, including which tokens can be encoded in which ways. +# +# Folded text is accumulated in a simple list of strings ('lines'), each +# one of which should be less than policy.max_line_length ('maxlen'). +# + +def _steal_trailing_WSP_if_exists(lines): + wsp = '' + if lines and lines[-1] and lines[-1][-1] in WSP: + wsp = lines[-1][-1] + lines[-1] = lines[-1][:-1] + return wsp + +def _refold_parse_tree(parse_tree, *, policy): + """Return string of contents of parse_tree folded according to RFC rules. + + """ + # max_line_length 0/None means no limit, ie: infinitely long. + maxlen = policy.max_line_length or sys.maxsize + encoding = 'utf-8' if policy.utf8 else 'us-ascii' + lines = [''] # Folded lines to be output + leading_whitespace = '' # When we have whitespace between two encoded + # words, we may need to encode the whitespace + # at the beginning of the second word. + last_ew = None # Points to the last encoded character if there's an ew on + # the line + last_charset = None + wrap_as_ew_blocked = 0 + want_encoding = False # This is set to True if we need to encode this part + end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked') + parts = list(parse_tree) + while parts: + part = parts.pop(0) + if part is end_ew_not_allowed: + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) + if not want_encoding: + if part.token_type == 'ptext': + # Encode if tstr contains special characters. + want_encoding = not SPECIALSNL.isdisjoint(tstr) + else: + # Encode if tstr contains newlines. + want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + # If policy.utf8 is false this should really be taken from a + # 'charset' property on the policy. + charset = 'utf-8' + want_encoding = True + + if part.token_type == 'mime-parameters': + # Mime parameter folding (using RFC2231) is extra special. + _fold_mime_parameters(part, lines, maxlen, encoding) + continue + + if want_encoding and not wrap_as_ew_blocked: + if not part.as_ew_allowed: + want_encoding = False + last_ew = None + if part.syntactic_break: + encoded_part = part.fold(policy=policy)[:-len(policy.linesep)] + if policy.linesep not in encoded_part: + # It fits on a single line + if len(encoded_part) > maxlen - len(lines[-1]): + # But not on this one, so start a new one. + newline = _steal_trailing_WSP_if_exists(lines) + # XXX what if encoded_part has no leading FWS? + lines.append(newline) + lines[-1] += encoded_part + continue + # Either this is not a major syntactic break, so we don't + # want it on a line by itself even if it fits, or it + # doesn't fit on a line by itself. Either way, fall through + # to unpacking the subparts and wrapping them. + if not hasattr(part, 'encode'): + # It's not a Terminal, do each piece individually. + parts = list(part) + parts + want_encoding = False + continue + elif part.as_ew_allowed: + # It's a terminal, wrap it as an encoded word, possibly + # combining it with previously encoded words if allowed. + if (last_ew is not None and + charset != last_charset and + (last_charset == 'unknown-8bit' or + last_charset == 'utf-8' and charset != 'us-ascii')): + last_ew = None + last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, + part.ew_combine_allowed, charset, leading_whitespace) + # This whitespace has been added to the lines in _fold_as_ew() + # so clear it now. + leading_whitespace = '' + last_charset = charset + want_encoding = False + continue + else: + # It's a terminal which should be kept non-encoded + # (e.g. a ListSeparator). + last_ew = None + want_encoding = False + # fall through + + if len(tstr) <= maxlen - len(lines[-1]): + lines[-1] += tstr + continue + + # This part is too long to fit. The RFC wants us to break at + # "major syntactic breaks", so unless we don't consider this + # to be one, check if it will fit on the next line by itself. + leading_whitespace = '' + if (part.syntactic_break and + len(tstr) + 1 <= maxlen): + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + # We're going to fold the data onto a new line here. Due to + # the way encoded strings handle continuation lines, we need to + # be prepared to encode any whitespace if the next line turns + # out to start with an encoded word. + lines.append(newline + tstr) + + whitespace_accumulator = [] + for char in lines[-1]: + if char not in WSP: + break + whitespace_accumulator.append(char) + leading_whitespace = ''.join(whitespace_accumulator) + last_ew = None + continue + if not hasattr(part, 'encode'): + # It's not a terminal, try folding the subparts. + newparts = list(part) + if not part.as_ew_allowed: + wrap_as_ew_blocked += 1 + newparts.append(end_ew_not_allowed) + parts = newparts + parts + continue + if part.as_ew_allowed and not wrap_as_ew_blocked: + # It doesn't need CTE encoding, but encode it anyway so we can + # wrap it. + parts.insert(0, part) + want_encoding = True + continue + # We can't figure out how to wrap, it, so give up. + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + lines.append(newline + tstr) + else: + # We can't fold it onto the next line either... + lines[-1] += tstr + + return policy.linesep.join(lines) + policy.linesep + +def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): + """Fold string to_encode into lines as encoded word, combining if allowed. + Return the new value for last_ew, or None if ew_combine_allowed is False. + + If there is already an encoded word in the last line of lines (indicated by + a non-None value for last_ew) and ew_combine_allowed is true, decode the + existing ew, combine it with to_encode, and re-encode. Otherwise, encode + to_encode. In either case, split to_encode as necessary so that the + encoded segments fit within maxlen. + + """ + if last_ew is not None and ew_combine_allowed: + to_encode = str( + get_unstructured(lines[-1][last_ew:] + to_encode)) + lines[-1] = lines[-1][:last_ew] + elif to_encode[0] in WSP: + # We're joining this to non-encoded text, so don't encode + # the leading blank. + leading_wsp = to_encode[0] + to_encode = to_encode[1:] + if (len(lines[-1]) == maxlen): + lines.append(_steal_trailing_WSP_if_exists(lines)) + lines[-1] += leading_wsp + + trailing_wsp = '' + if to_encode[-1] in WSP: + # Likewise for the trailing space. + trailing_wsp = to_encode[-1] + to_encode = to_encode[:-1] + new_last_ew = len(lines[-1]) if last_ew is None else last_ew + + encode_as = 'utf-8' if charset == 'us-ascii' else charset + + # The RFC2047 chrome takes up 7 characters plus the length + # of the charset name. + chrome_len = len(encode_as) + 7 + + if (chrome_len + 1) >= maxlen: + raise errors.HeaderParseError( + "max_line_length is too small to fit an encoded word") + + while to_encode: + remaining_space = maxlen - len(lines[-1]) + text_space = remaining_space - chrome_len - len(leading_whitespace) + if text_space <= 0: + lines.append(' ') + continue + + # If we are at the start of a continuation line, prepend whitespace + # (we only want to do this when the line starts with an encoded word + # but if we're folding in this helper function, then we know that we + # are going to be writing out an encoded word.) + if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: + encoded_word = _ew.encode(leading_whitespace, charset=encode_as) + lines[-1] += encoded_word + leading_whitespace = '' + + to_encode_word = to_encode[:text_space] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + while excess > 0: + # Since the chunk to encode is guaranteed to fit into less than 100 characters, + # shrinking it by one at a time shouldn't take long. + to_encode_word = to_encode_word[:-1] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + lines[-1] += encoded_word + to_encode = to_encode[len(to_encode_word):] + leading_whitespace = '' + + if to_encode: + lines.append(' ') + new_last_ew = len(lines[-1]) + lines[-1] += trailing_wsp + return new_last_ew if ew_combine_allowed else None + +def _fold_mime_parameters(part, lines, maxlen, encoding): + """Fold TokenList 'part' into the 'lines' list as mime parameters. + + Using the decoded list of parameters and values, format them according to + the RFC rules, including using RFC2231 encoding if the value cannot be + expressed in 'encoding' and/or the parameter+value is too long to fit + within 'maxlen'. + + """ + # Special case for RFC2231 encoding: start from decoded values and use + # RFC2231 encoding iff needed. + # + # Note that the 1 and 2s being added to the length calculations are + # accounting for the possibly-needed spaces and semicolons we'll be adding. + # + for name, value in part.params: + # XXX What if this ';' puts us over maxlen the first time through the + # loop? We should split the header value onto a newline in that case, + # but to do that we need to recognize the need earlier or reparse the + # header, so I'm going to ignore that bug for now. It'll only put us + # one character over. + if not lines[-1].rstrip().endswith(';'): + lines[-1] += ';' + charset = encoding + error_handler = 'strict' + try: + value.encode(encoding) + encoding_required = False + except UnicodeEncodeError: + encoding_required = True + if utils._has_surrogates(value): + charset = 'unknown-8bit' + error_handler = 'surrogateescape' + else: + charset = 'utf-8' + if encoding_required: + encoded_value = urllib.parse.quote( + value, safe='', errors=error_handler) + tstr = "{}*={}''{}".format(name, charset, encoded_value) + else: + tstr = '{}={}'.format(name, quote_string(value)) + if len(lines[-1]) + len(tstr) + 1 < maxlen: + lines[-1] = lines[-1] + ' ' + tstr + continue + elif len(tstr) + 2 <= maxlen: + lines.append(' ' + tstr) + continue + # We need multiple sections. We are allowed to mix encoded and + # non-encoded sections, but we aren't going to. We'll encode them all. + section = 0 + extra_chrome = charset + "''" + while value: + chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome) + if maxlen <= chrome_len + 3: + # We need room for the leading blank, the trailing semicolon, + # and at least one character of the value. If we don't + # have that, we'd be stuck, so in that case fall back to + # the RFC standard width. + maxlen = 78 + splitpoint = maxchars = maxlen - chrome_len - 2 + while True: + partial = value[:splitpoint] + encoded_value = urllib.parse.quote( + partial, safe='', errors=error_handler) + if len(encoded_value) <= maxchars: + break + splitpoint -= 1 + lines.append(" {}*{}*={}{}".format( + name, section, extra_chrome, encoded_value)) + extra_chrome = '' + section += 1 + value = value[splitpoint:] + if value: + lines[-1] += ';' diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index cdfa3729ad..0f1bf8e425 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -65,8 +65,10 @@ def _parsedate_tz(data): """ if not data: - return + return None data = data.split() + if not data: # This happens for whitespace-only input. + return None # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. if data[0].endswith(',') or data[0].lower() in _daynames: @@ -93,6 +95,8 @@ def _parsedate_tz(data): return None data = data[:5] [dd, mm, yy, tm, tz] = data + if not (dd and mm and yy): + return None mm = mm.lower() if mm not in _monthnames: dd, mm = mm, dd.lower() @@ -108,6 +112,8 @@ def _parsedate_tz(data): yy, tm = tm, yy if yy[-1] == ',': yy = yy[:-1] + if not yy: + return None if not yy[0].isdigit(): yy, tz = tz, yy if tm[-1] == ',': @@ -126,6 +132,8 @@ def _parsedate_tz(data): tss = 0 elif len(tm) == 3: [thh, tmm, tss] = tm + else: + return None else: return None try: @@ -186,6 +194,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] @@ -379,7 +390,12 @@ def getaddrspec(self): aslist.append('@') self.pos += 1 self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() + domain = self.getdomain() + if not domain: + # Invalid domain, return an empty address instead of returning a + # local part to denote failed parsing. + return EMPTYSTRING + return EMPTYSTRING.join(aslist) + domain def getdomain(self): """Get the complete domain name from an address.""" @@ -394,6 +410,10 @@ def getdomain(self): elif self.field[self.pos] == '.': self.pos += 1 sdlist.append('.') + elif self.field[self.pos] == '@': + # bpo-34155: Don't parse domains with two `@` like + # `a@malicious.org@important.com`. + return EMPTYSTRING elif self.field[self.pos] in self.atomends: break else: diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index df4649676a..c9f0d74309 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -152,11 +152,18 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): mangle_from_ -- a flag that, when True escapes From_ lines in the body of the message by putting a `>' in front of them. This is used when the message is being - serialized by a generator. Default: True. + serialized by a generator. Default: False. message_factory -- the class to use to create new message objects. If the value is None, the default is Message. + verify_generated_headers + -- if true, the generator verifies that each header + they are properly folded, so that a parser won't + treat it as multiple headers, start-of-body, or + part of another header. + This is a check against custom Header & fold() + implementations. """ raise_on_defect = False @@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): max_line_length = 78 mangle_from_ = False message_factory = None + verify_generated_headers = True def handle_defect(self, obj, defect): """Based on policy, either raise defect or call register_defect. @@ -294,12 +302,12 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -361,8 +369,12 @@ def _fold(self, name, value, sanitize): # Assume it is a Header-like object. h = value if h is not None: - parts.append(h.encode(linesep=self.linesep, - maxlinelen=self.max_line_length)) + # The Header class interprets a value of None for maxlinelen as the + # default value of 78, as recommended by RFC 2822. + maxlinelen = 0 + if self.max_line_length is not None: + maxlinelen = self.max_line_length + parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen)) parts.append(self.linesep) return ''.join(parts) diff --git a/Lib/email/architecture.rst b/Lib/email/architecture.rst index 78572ae63b..fcd10bde13 100644 --- a/Lib/email/architecture.rst +++ b/Lib/email/architecture.rst @@ -66,7 +66,7 @@ data payloads. Message Lifecycle ----------------- -The general lifecyle of a message is: +The general lifecycle of a message is: Creation A `Message` object can be created by a Parser, or it can be diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index 17f0818f6c..4cdf22666e 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -45,7 +45,6 @@ MISC_LEN = 7 - # Helpers def header_length(bytearray): """Return the length of s when it is encoded with base64.""" @@ -57,7 +56,6 @@ def header_length(bytearray): return n - def header_encode(header_bytes, charset='iso-8859-1'): """Encode a single header line with Base64 encoding in a given charset. @@ -72,7 +70,6 @@ def header_encode(header_bytes, charset='iso-8859-1'): return '=?%s?b?%s?=' % (charset, encoded) - def body_encode(s, maxlinelen=76, eol=NL): r"""Encode a string with base64. @@ -84,7 +81,7 @@ def body_encode(s, maxlinelen=76, eol=NL): in an email. """ if not s: - return s + return "" encvec = [] max_unencoded = maxlinelen * 3 // 4 @@ -98,7 +95,6 @@ def body_encode(s, maxlinelen=76, eol=NL): return EMPTYSTRING.join(encvec) - def decode(string): """Decode a raw base64 string, returning a bytes object. diff --git a/Lib/email/charset.py b/Lib/email/charset.py index ee564040c6..043801107b 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -18,7 +18,6 @@ from email.encoders import encode_7or8bit - # Flags for types of header encodings QP = 1 # Quoted-Printable BASE64 = 2 # Base64 @@ -32,7 +31,6 @@ EMPTYSTRING = '' - # Defaults CHARSETS = { # input header enc body enc output conv @@ -104,7 +102,6 @@ } - # Convenience functions for extending the above mappings def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): """Add character set properties to the global registry. @@ -112,8 +109,8 @@ def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): charset is the input character set, and must be the canonical name of a character set. - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + Optional header_enc and body_enc is either charset.QP for + quoted-printable, charset.BASE64 for base64 encoding, charset.SHORTEST for the shortest of qp or base64 encoding, or None for no encoding. SHORTEST is only valid for header_enc. It describes how message headers and message bodies in the input charset are to be encoded. Default is no @@ -153,7 +150,6 @@ def add_codec(charset, codecname): CODEC_MAP[charset] = codecname - # Convenience function for encoding strings, taking into account # that they might be unknown-8bit (ie: have surrogate-escaped bytes) def _encode(string, codec): @@ -163,7 +159,6 @@ def _encode(string, codec): return string.encode(codec) - class Charset: """Map character sets to their email properties. @@ -185,13 +180,13 @@ class Charset: header_encoding: If the character set must be encoded before it can be used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of + charset.QP (for quoted-printable), charset.BASE64 (for + base64 encoding), or charset.SHORTEST for the shortest of QP or BASE64 encoding. Otherwise, it will be None. body_encoding: Same as header_encoding, but describes the encoding for the mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for + header encoding. charset.SHORTEST is not allowed for body_encoding. output_charset: Some character sets must be converted before they can be @@ -241,11 +236,9 @@ def __init__(self, input_charset=DEFAULT_CHARSET): self.output_codec = CODEC_MAP.get(self.output_charset, self.output_charset) - def __str__(self): + def __repr__(self): return self.input_charset.lower() - __repr__ = __str__ - def __eq__(self, other): return str(self) == str(other).lower() @@ -348,7 +341,6 @@ def header_encode_lines(self, string, maxlengths): if not lines and not current_line: lines.append(None) else: - separator = (' ' if lines else '') joined_line = EMPTYSTRING.join(current_line) header_bytes = _encode(joined_line, codec) lines.append(encoder(header_bytes)) diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index b904ded94c..b4f5830bea 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -72,12 +72,14 @@ def get_non_text_content(msg): return msg.get_payload(decode=True) for maintype in 'audio image video application'.split(): raw_data_manager.add_get_handler(maintype, get_non_text_content) +del maintype def get_message_content(msg): return msg.get_payload(0) for subtype in 'rfc822 external-body'.split(): raw_data_manager.add_get_handler('message/'+subtype, get_message_content) +del subtype def get_and_fixup_unknown_message_content(msg): @@ -144,15 +146,15 @@ def _encode_text(string, charset, cte, policy): linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' - if cte==None: + if cte is None: # Use heuristics to decide on the "best" encoding. - try: - return '7bit', normal_body(lines).decode('ascii') - except UnicodeDecodeError: - pass - if (policy.cte_type == '8bit' and - max(len(x) for x in lines) <= policy.max_line_length): - return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') + if max((len(x) for x in lines), default=0) <= policy.max_line_length: + try: + return '7bit', normal_body(lines).decode('ascii') + except UnicodeDecodeError: + pass + if policy.cte_type == '8bit': + return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), policy.max_line_length) @@ -238,9 +240,7 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True) data = data.decode('ascii') elif cte == '7bit': - # Make sure it really is only ASCII. The early warning here seems - # worth the overhead...if you care write your own content manager :). - data.encode('ascii') + data = data.decode('ascii') elif cte in ('8bit', 'binary'): data = data.decode('ascii', 'surrogateescape') msg.set_payload(data) @@ -248,3 +248,4 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', _finalize_set(msg, disposition, filename, cid, params) for typ in (bytes, bytearray, memoryview): raw_data_manager.add_set_handler(typ, set_bytes_content) +del typ diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py index 0a66acb624..17bd1ab7b1 100644 --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -16,7 +16,6 @@ from quopri import encodestring as _encodestring - def _qencode(s): enc = _encodestring(s, quotetabs=True) # Must encode spaces, which quopri.encodestring() doesn't do @@ -34,7 +33,6 @@ def encode_base64(msg): msg['Content-Transfer-Encoding'] = 'base64' - def encode_quopri(msg): """Encode the message's payload in quoted-printable. @@ -46,7 +44,6 @@ def encode_quopri(msg): msg['Content-Transfer-Encoding'] = 'quoted-printable' - def encode_7or8bit(msg): """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" orig = msg.get_payload(decode=True) @@ -64,6 +61,5 @@ def encode_7or8bit(msg): msg['Content-Transfer-Encoding'] = '7bit' - def encode_noop(msg): """Do nothing.""" diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 791239fa6a..02aa5eced6 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -29,6 +29,10 @@ class CharsetError(MessageError): """An illegal charset was given.""" +class HeaderWriteError(MessageError): + """Error while writing headers.""" + + # These are parsing defects which the parser was able to work around. class MessageDefect(ValueError): """Base class for a message defect.""" @@ -73,6 +77,9 @@ class InvalidBase64PaddingDefect(MessageDefect): class InvalidBase64CharactersDefect(MessageDefect): """base64 encoded sequence had characters not in base64 alphabet""" +class InvalidBase64LengthDefect(MessageDefect): + """base64 encoded sequence had invalid length (1 mod 4)""" + # These errors are specific to header parsing. class HeaderDefect(MessageDefect): @@ -105,3 +112,6 @@ class NonASCIILocalPartDefect(HeaderDefect): """local_part contains non-ASCII characters""" # This defect only occurs during unicode parsing, not when # parsing messages decoded from binary. + +class InvalidDateDefect(HeaderDefect): + """Header has unparsable or invalid date""" diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 7c07ca8645..06d6b4a3af 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -37,11 +37,12 @@ headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') NeedMoreData = object() - class BufferedSubFile(object): """A file-ish object that can have new data loaded into it. @@ -132,7 +133,6 @@ def __next__(self): return line - class FeedParser: """A feed-style parser of email.""" @@ -189,7 +189,7 @@ def close(self): assert not self._msgstack # Look for final set of defects if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): + and not root.is_multipart() and not self._headersonly: defect = errors.MultipartInvariantViolationDefect() self.policy.handle_defect(root, defect) return root @@ -266,7 +266,7 @@ def _parsegen(self): yield NeedMoreData continue break - msg = self._pop_message() + self._pop_message() # We need to pop the EOF matcher in order to tell if we're at # the end of the current file, not the end of the last block # of message headers. @@ -320,7 +320,7 @@ def _parsegen(self): self._cur.set_payload(EMPTYSTRING.join(lines)) return # Make sure a valid content type was specified per RFC 2045:6.4. - if (self._cur.get('content-transfer-encoding', '8bit').lower() + if (str(self._cur.get('content-transfer-encoding', '8bit')).lower() not in ('7bit', '8bit', 'binary')): defect = errors.InvalidMultipartContentTransferEncodingDefect() self.policy.handle_defect(self._cur, defect) @@ -329,9 +329,10 @@ def _parsegen(self): # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -343,7 +344,7 @@ def _parsegen(self): continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -375,13 +376,13 @@ def _parsegen(self): if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ae670c2353..47b9df8f4e 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -14,15 +14,16 @@ from copy import deepcopy from io import StringIO, BytesIO from email.utils import _has_surrogates +from email.errors import HeaderWriteError UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) +NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') - class Generator: """Generates output from a Message object tree. @@ -170,7 +171,7 @@ def _write(self, msg): # parameter. # # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the + # is to cache any subpart writes into a buffer. Then we write the # headers and the buffer contents. That way, subpart handlers can # Do The Right Thing, and can still modify the Content-Type: header if # necessary. @@ -186,7 +187,11 @@ def _write(self, msg): # If we munged the cte, copy the message again and re-fix the CTE. if munge_cte: msg = deepcopy(msg) - msg.replace_header('content-transfer-encoding', munge_cte[0]) + # Preserve the header order if the CTE header already exists. + if msg.get('content-transfer-encoding') is None: + msg['Content-Transfer-Encoding'] = munge_cte[0] + else: + msg.replace_header('content-transfer-encoding', munge_cte[0]) msg.replace_header('content-type', munge_cte[1]) # Write the headers. First we see if the message object wants to # handle that itself. If not, we'll do it generically. @@ -219,7 +224,16 @@ def _dispatch(self, msg): def _write_headers(self, msg): for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) + folded = self.policy.fold(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep + if not folded.endswith(self.policy.linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self.write(folded) # A blank line always separates headers from body self.write(self._NL) @@ -240,7 +254,7 @@ def _handle_text(self, msg): # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) @@ -388,7 +402,7 @@ def _make_boundary(cls, text=None): def _compile_re(cls, s, flags): return re.compile(s, flags) - + class BytesGenerator(Generator): """Generates a bytes version of a Message object tree. @@ -439,7 +453,6 @@ def _compile_re(cls, s, flags): return re.compile(s.encode('ascii'), flags) - _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' class DecodedGenerator(Generator): @@ -499,7 +512,6 @@ def _dispatch(self, msg): }, file=self) - # Helper used by Generator._make_boundary _width = len(repr(sys.maxsize-1)) _fmt = '%%0%dd' % _width diff --git a/Lib/email/header.py b/Lib/email/header.py index c7b2dd9f31..984851a7d9 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -36,11 +36,11 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive + (?P[qQbB]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. @@ -52,12 +52,10 @@ _embedded_header = re.compile(r'\n[^ \t]+:') - # Helpers _max_append = email.quoprimime._max_append - def decode_header(header): """Decode a message header value without converting charset. @@ -152,7 +150,6 @@ def decode_header(header): return collapsed - def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): """Create a Header from a sequence of pairs as returned by decode_header() @@ -175,7 +172,6 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, return h - class Header: def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, @@ -409,7 +405,6 @@ def _normalize(self): self._chunks = chunks - class _ValueFormatter: def __init__(self, headerlen, maxlen, continuation_ws, splitchars): self._maxlen = maxlen @@ -431,7 +426,7 @@ def newline(self): if end_of_line != (' ', ''): self._current_line.push(*end_of_line) if len(self._current_line) > 0: - if self._current_line.is_onlyws(): + if self._current_line.is_onlyws() and self._lines: self._lines[-1] += str(self._current_line) else: self._lines.append(str(self._current_line)) diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 0fc2231e5c..543141dc42 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -2,10 +2,6 @@ This module provides an implementation of the HeaderRegistry API. The implementation is designed to flexibly follow RFC5322 rules. - -Eventually HeaderRegistry will be a public API, but it isn't yet, -and will probably change some before that happens. - """ from types import MappingProxyType @@ -31,6 +27,11 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): without any Content Transfer Encoding. """ + + inputs = ''.join(filter(None, (display_name, username, domain, addr_spec))) + if '\r' in inputs or '\n' in inputs: + raise ValueError("invalid arguments; address parts cannot contain CR or LF") + # This clause with its potential 'raise' may only happen when an # application program creates an Address object using an addr_spec # keyword. The email library code itself must always supply username @@ -69,11 +70,9 @@ def addr_spec(self): """The addr_spec (username@domain) portion of the address, quoted according to RFC 5322 rules, but with no Content Transfer Encoding. """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username + lp = self.username + if not parser.DOT_ATOM_ENDS.isdisjoint(lp): + lp = parser.quote_string(lp) if self.domain: return lp + '@' + self.domain if not lp: @@ -86,19 +85,17 @@ def __repr__(self): self.display_name, self.username, self.domain) def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name + disp = self.display_name + if not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) if disp: addr_spec = '' if self.addr_spec=='<>' else self.addr_spec return "{} <{}>".format(disp, addr_spec) return self.addr_spec def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Address): + return NotImplemented return (self.display_name == other.display_name and self.username == other.username and self.domain == other.domain) @@ -141,17 +138,15 @@ def __str__(self): if self.display_name is None and len(self.addresses)==1: return str(self.addresses[0]) disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) + if disp is not None and not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) adrstr = ", ".join(str(x) for x in self.addresses) adrstr = ' ' + adrstr if adrstr else adrstr return "{}:{};".format(disp, adrstr) def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Group): + return NotImplemented return (self.display_name == other.display_name and self.addresses == other.addresses) @@ -223,7 +218,7 @@ def __reduce__(self): self.__class__.__bases__, str(self), ), - self.__dict__) + self.__getstate__()) @classmethod def _reconstruct(cls, value): @@ -245,13 +240,16 @@ def fold(self, *, policy): the header name and the ': ' separator. """ - # At some point we need to only put fws here if it was in the source. + # At some point we need to put fws here if it was in the source. header = parser.Header([ parser.HeaderLabel([ parser.ValueTerminal(self.name, 'header-name'), parser.ValueTerminal(':', 'header-sep')]), - parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), - self._parse_tree]) + ]) + if self._parse_tree: + header.append( + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) + header.append(self._parse_tree) return header.fold(policy=policy) @@ -300,7 +298,14 @@ def parse(cls, value, kwds): kwds['parse_tree'] = parser.TokenList() return if isinstance(value, str): - value = utils.parsedate_to_datetime(value) + kwds['decoded'] = value + try: + value = utils.parsedate_to_datetime(value) + except ValueError: + kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) + kwds['datetime'] = None + kwds['parse_tree'] = parser.TokenList() + return kwds['datetime'] = value kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['parse_tree'] = cls.value_parser(kwds['decoded']) @@ -369,8 +374,8 @@ def groups(self): @property def addresses(self): if self._addresses is None: - self._addresses = tuple([address for group in self._groups - for address in group.addresses]) + self._addresses = tuple(address for group in self._groups + for address in group.addresses) return self._addresses @@ -517,6 +522,18 @@ def cte(self): return self._cte +class MessageIDHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_id) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -539,6 +556,7 @@ def cte(self): 'content-type': ContentTypeHeader, 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, + 'message-id': MessageIDHeader, } class HeaderRegistry: diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py index b5502ee975..3410935e38 100644 --- a/Lib/email/iterators.py +++ b/Lib/email/iterators.py @@ -15,7 +15,6 @@ from io import StringIO - # This function will become a method of the Message class def walk(self): """Walk over the message tree, yielding each subpart. @@ -29,7 +28,6 @@ def walk(self): yield from subpart.walk() - # These two functions are imported into the Iterators.py interface module. def body_line_iterator(msg, decode=False): """Iterate over the parts, returning string payloads line-by-line. @@ -55,7 +53,6 @@ def typed_subpart_iterator(msg, maintype='text', subtype=None): yield subpart - def _structure(msg, fp=None, level=0, include_default=False): """A handy debugging aid""" if fp is None: diff --git a/Lib/email/message.py b/Lib/email/message.py index f932186875..46bb8c2194 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -6,6 +6,7 @@ __all__ = ['Message', 'EmailMessage'] +import binascii import re import quopri from io import BytesIO, StringIO @@ -13,7 +14,7 @@ # Intrapackage imports from email import utils from email import errors -from email._policybase import Policy, compat32 +from email._policybase import compat32 from email import charset as _charset from email._encoded_words import decode_b Charset = _charset.Charset @@ -34,7 +35,7 @@ def _splitparam(param): if not sep: return a.strip(), None return a.strip(), b.strip() - + def _formatparam(param, value=None, quote=True): """Convenience function to format and return a key=value pair. @@ -129,7 +130,8 @@ def _decode_uu(encoded): decoded_lines.append(decoded_line) return b''.join(decoded_lines) - + + class Message: """Basic message object. @@ -169,7 +171,7 @@ def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): header. For backward compatibility reasons, if maxheaderlen is not specified it defaults to 0, so you must override it explicitly if you want a different maxheaderlen. 'policy' is passed to the - Generator instance used to serialize the mesasge; if it is not + Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. If the message object contains binary data that is not encoded @@ -287,25 +289,26 @@ def get_payload(self, i=None, decode=False): # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. - if isinstance(payload, str): - if utils._has_surrogates(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: + if not decode: + if isinstance(payload, str) and utils._has_surrogates(payload): + try: + bpayload = payload.encode('ascii', 'surrogateescape') try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + payload = bpayload.decode(self.get_content_charset('ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII code points in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: + except UnicodeEncodeError: + pass return payload + if isinstance(payload, str): + try: + bpayload = payload.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII code points in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -337,7 +340,7 @@ def set_payload(self, payload, charset=None): return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: @@ -446,7 +449,11 @@ def __delitem__(self, name): self._headers = newheaders def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] + name_lower = name.lower() + for k, v in self._headers: + if name_lower == k.lower(): + return True + return False def __iter__(self): for field, value in self._headers: @@ -973,7 +980,7 @@ def __init__(self, policy=None): if policy is None: from email.policy import default policy = default - Message.__init__(self, policy) + super().__init__(policy) def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): @@ -983,14 +990,14 @@ def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): header. maxheaderlen is retained for backward compatibility with the base Message class, but defaults to None, meaning that the policy value for max_line_length controls the header maximum length. 'policy' is - passed to the Generator instance used to serialize the mesasge; if it + passed to the Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. """ policy = self.policy if policy is None else policy if maxheaderlen is None: maxheaderlen = policy.max_line_length - return super().as_string(maxheaderlen=maxheaderlen, policy=policy) + return super().as_string(unixfrom, maxheaderlen, policy) def __str__(self): return self.as_string(policy=self.policy.clone(utf8=True)) @@ -1007,7 +1014,7 @@ def _find_body(self, part, preferencelist): if subtype in preferencelist: yield (preferencelist.index(subtype), part) return - if maintype != 'multipart': + if maintype != 'multipart' or not self.is_multipart(): return if subtype != 'related': for subpart in part.iter_parts(): @@ -1066,7 +1073,16 @@ def iter_attachments(self): maintype, subtype = self.get_content_type().split('/') if maintype != 'multipart' or subtype == 'alternative': return - parts = self.get_payload().copy() + payload = self.get_payload() + # Certain malformed messages can have content type set to `multipart/*` + # but still have single part body, in which case payload.copy() can + # fail with AttributeError. + try: + parts = payload.copy() + except AttributeError: + # payload is not a list, it is most probably a string. + return + if maintype == 'multipart' and subtype == 'related': # For related, we treat everything but the root as an attachment. # The root may be indicated by 'start'; if there's no start or we @@ -1103,7 +1119,7 @@ def iter_parts(self): Return an empty iterator for a non-multipart. """ - if self.get_content_maintype() == 'multipart': + if self.is_multipart(): yield from self.get_payload() def get_content(self, *args, content_manager=None, **kw): diff --git a/Lib/email/mime/application.py b/Lib/email/mime/application.py index 6877e554e1..f67cbad3f0 100644 --- a/Lib/email/mime/application.py +++ b/Lib/email/mime/application.py @@ -17,7 +17,7 @@ def __init__(self, _data, _subtype='octet-stream', _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an application/* type MIME document. - _data is a string containing the raw application data. + _data contains the bytes for the raw application data. _subtype is the MIME content type subtype, defaulting to 'octet-stream'. diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py index 4bcd7b224a..aa0c4905cb 100644 --- a/Lib/email/mime/audio.py +++ b/Lib/email/mime/audio.py @@ -6,39 +6,10 @@ __all__ = ['MIMEAudio'] -import sndhdr - -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - class MIMEAudio(MIMENonMultipart): """Class for generating audio/* MIME documents.""" @@ -46,8 +17,8 @@ def __init__(self, _audiodata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an audio/* type MIME document. - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the + _audiodata contains the bytes for the raw audio data. If this data + can be decoded as au, wav, aiff, or aifc, then the subtype will be automatically included in the Content-Type header. Otherwise, you can specify the specific audio subtype via the _subtype parameter. If _subtype is not given, and no subtype can be @@ -65,10 +36,62 @@ def __init__(self, _audiodata, _subtype=None, header. """ if _subtype is None: - _subtype = _whatsnd(_audiodata) + _subtype = _what(_audiodata) if _subtype is None: raise TypeError('Could not find audio MIME subtype') MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, **_params) self.set_payload(_audiodata) _encoder(self) + + +_rules = [] + + +# Originally from the sndhdr module. +# +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _what(data): + # Try to identify a sound file type. + # + # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why + # we re-do it here. It would be easier to reverse engineer the Unix 'file' + # command and use the standard 'magic' file, as shipped with a modern Unix. + for testfn in _rules: + if res := testfn(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _aiff(h): + if not h.startswith(b'FORM'): + return None + if h[8:12] in {b'AIFC', b'AIFF'}: + return 'x-aiff' + else: + return None + + +@rule +def _au(h): + if h.startswith(b'.snd'): + return 'basic' + else: + return None + + +@rule +def _wav(h): + # 'RIFF' 'WAVE' 'fmt ' + if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': + return None + else: + return "x-wav" diff --git a/Lib/email/mime/base.py b/Lib/email/mime/base.py index 1a3f9b51f6..f601f621ce 100644 --- a/Lib/email/mime/base.py +++ b/Lib/email/mime/base.py @@ -11,7 +11,6 @@ from email import message - class MIMEBase(message.Message): """Base class for MIME specializations.""" diff --git a/Lib/email/mime/image.py b/Lib/email/mime/image.py index 92724643cd..4b7f2f9cba 100644 --- a/Lib/email/mime/image.py +++ b/Lib/email/mime/image.py @@ -6,13 +6,10 @@ __all__ = ['MIMEImage'] -import imghdr - from email import encoders from email.mime.nonmultipart import MIMENonMultipart - class MIMEImage(MIMENonMultipart): """Class for generating image/* type MIME documents.""" @@ -20,11 +17,11 @@ def __init__(self, _imagedata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an image/* type MIME document. - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. + _imagedata contains the bytes for the raw image data. If the data + type can be detected (jpeg, png, gif, tiff, rgb, pbm, pgm, ppm, + rast, xbm, bmp, webp, and exr attempted), then the subtype will be + automatically included in the Content-Type header. Otherwise, you can + specify the specific image subtype via the _subtype parameter. _encoder is a function which will perform the actual encoding for transport of the image data. It takes one argument, which is this @@ -37,11 +34,119 @@ def __init__(self, _imagedata, _subtype=None, constructor, which turns them into parameters on the Content-Type header. """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) + _subtype = _what(_imagedata) if _subtype is None else _subtype if _subtype is None: raise TypeError('Could not guess image MIME subtype') MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, **_params) self.set_payload(_imagedata) _encoder(self) + + +_rules = [] + + +# Originally from the imghdr module. +def _what(data): + for rule in _rules: + if res := rule(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _jpeg(h): + """JPEG data with JFIF or Exif markers; and raw JPEG""" + if h[6:10] in (b'JFIF', b'Exif'): + return 'jpeg' + elif h[:4] == b'\xff\xd8\xff\xdb': + return 'jpeg' + + +@rule +def _png(h): + if h.startswith(b'\211PNG\r\n\032\n'): + return 'png' + + +@rule +def _gif(h): + """GIF ('87 and '89 variants)""" + if h[:6] in (b'GIF87a', b'GIF89a'): + return 'gif' + + +@rule +def _tiff(h): + """TIFF (can be in Motorola or Intel byte order)""" + if h[:2] in (b'MM', b'II'): + return 'tiff' + + +@rule +def _rgb(h): + """SGI image library""" + if h.startswith(b'\001\332'): + return 'rgb' + + +@rule +def _pbm(h): + """PBM (portable bitmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': + return 'pbm' + + +@rule +def _pgm(h): + """PGM (portable graymap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': + return 'pgm' + + +@rule +def _ppm(h): + """PPM (portable pixmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': + return 'ppm' + + +@rule +def _rast(h): + """Sun raster file""" + if h.startswith(b'\x59\xA6\x6A\x95'): + return 'rast' + + +@rule +def _xbm(h): + """X bitmap (X10 or X11)""" + if h.startswith(b'#define '): + return 'xbm' + + +@rule +def _bmp(h): + if h.startswith(b'BM'): + return 'bmp' + + +@rule +def _webp(h): + if h.startswith(b'RIFF') and h[8:12] == b'WEBP': + return 'webp' + + +@rule +def _exr(h): + if h.startswith(b'\x76\x2f\x31\x01'): + return 'exr' diff --git a/Lib/email/mime/message.py b/Lib/email/mime/message.py index 07e4f2d119..61836b5a78 100644 --- a/Lib/email/mime/message.py +++ b/Lib/email/mime/message.py @@ -10,7 +10,6 @@ from email.mime.nonmultipart import MIMENonMultipart - class MIMEMessage(MIMENonMultipart): """Class representing message/* MIME documents.""" diff --git a/Lib/email/mime/multipart.py b/Lib/email/mime/multipart.py index 2d3f288810..94d81c771a 100644 --- a/Lib/email/mime/multipart.py +++ b/Lib/email/mime/multipart.py @@ -9,7 +9,6 @@ from email.mime.base import MIMEBase - class MIMEMultipart(MIMEBase): """Base class for MIME multipart/* type messages.""" diff --git a/Lib/email/mime/nonmultipart.py b/Lib/email/mime/nonmultipart.py index e1f51968b5..a41386eb14 100644 --- a/Lib/email/mime/nonmultipart.py +++ b/Lib/email/mime/nonmultipart.py @@ -10,7 +10,6 @@ from email.mime.base import MIMEBase - class MIMENonMultipart(MIMEBase): """Base class for MIME non-multipart type messages.""" diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py index 35b4423830..7672b78913 100644 --- a/Lib/email/mime/text.py +++ b/Lib/email/mime/text.py @@ -6,11 +6,9 @@ __all__ = ['MIMEText'] -from email.charset import Charset from email.mime.nonmultipart import MIMENonMultipart - class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" @@ -37,6 +35,6 @@ def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None): _charset = 'utf-8' MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy, - **{'charset': str(_charset)}) + charset=str(_charset)) self.set_payload(_text, _charset) diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 555b172560..06d99b17f2 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -13,7 +13,6 @@ from email._policybase import compat32 - class Parser: def __init__(self, _class=None, *, policy=compat32): """Parser of RFC 2822 and MIME email messages. @@ -50,10 +49,7 @@ def parse(self, fp, headersonly=False): feedparser = FeedParser(self._class, policy=self.policy) if headersonly: feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break + while data := fp.read(8192): feedparser.feed(data) return feedparser.close() @@ -68,7 +64,6 @@ def parsestr(self, text, headersonly=False): return self.parse(StringIO(text), headersonly=headersonly) - class HeaderParser(Parser): def parse(self, fp, headersonly=True): return Parser.parse(self, fp, True) @@ -76,7 +71,7 @@ def parse(self, fp, headersonly=True): def parsestr(self, text, headersonly=True): return Parser.parsestr(self, text, True) - + class BytesParser: def __init__(self, *args, **kw): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 5131311ac5..6e109b6501 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -3,6 +3,7 @@ """ import re +import sys from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry @@ -20,7 +21,7 @@ 'HTTP', ] -linesep_splitter = re.compile(r'\n|\r') +linesep_splitter = re.compile(r'\n|\r\n?') @_extend_docstrings class EmailPolicy(Policy): @@ -118,13 +119,13 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. (This is the same as Compat32). """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -203,14 +204,22 @@ def fold_binary(self, name, value): def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) - maxlen = self.max_line_length if self.max_line_length else float('inf') - lines = value.splitlines() + maxlen = self.max_line_length if self.max_line_length else sys.maxsize + # We can't use splitlines here because it splits on more than \r and \n. + lines = linesep_splitter.split(value) refold = (self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index c543eb59ae..27fcbb5a26 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -148,6 +148,7 @@ def header_encode(header_bytes, charset='iso-8859-1'): _QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:] for c in b'\r\n': _QUOPRI_BODY_ENCODE_MAP[c] = chr(c) +del c def body_encode(body, maxlinelen=76, eol=NL): """Encode with quoted-printable, wrapping at maxlinelen characters. @@ -173,7 +174,7 @@ def body_encode(body, maxlinelen=76, eol=NL): if not body: return body - # quote speacial characters + # quote special characters body = body.translate(_QUOPRI_BODY_ENCODE_MAP) soft_break = '=' + eol diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a759d23308..e42674fa4f 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -25,8 +25,6 @@ import os import re import time -import random -import socket import datetime import urllib.parse @@ -36,9 +34,6 @@ from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz -# Intrapackage imports -from email.charset import Charset - COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = '' @@ -48,11 +43,12 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') + def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" + """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 # (Python's default encoding) can encode any string. This is the fastest - # way to check for surrogates, see issue 11454 for timings. + # way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings. try: s.encode() return False @@ -81,7 +77,7 @@ def formataddr(pair, charset='utf-8'): If the first element of pair is false, then the second element is returned unmodified. - Optional charset if given is the character set that is used to encode + The optional charset is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or a Charset-like object which has a header_encode method. Default is 'utf-8'. @@ -94,6 +90,8 @@ def formataddr(pair, charset='utf-8'): name.encode('ascii') except UnicodeEncodeError: if isinstance(charset, str): + # lazy import to improve module import time + from email.charset import Charset charset = Charset(charset) encoded_name = charset.header_encode(name) return "%s <%s>" % (encoded_name, address) @@ -106,24 +104,127 @@ def formataddr(pair, charset='utf-8'): return address +def _iter_escaped_chars(addr): + pos = 0 + escape = False + for pos, ch in enumerate(addr): + if escape: + yield (pos, '\\' + ch) + escape = False + elif ch == '\\': + escape = True + else: + yield (pos, ch) + if escape: + yield (pos, '\\') + + +def _strip_quoted_realnames(addr): + """Strip real names between quotes.""" + if '"' not in addr: + # Fast path + return addr + + start = 0 + open_pos = None + result = [] + for pos, ch in _iter_escaped_chars(addr): + if ch == '"': + if open_pos is None: + open_pos = pos + else: + if start != open_pos: + result.append(addr[start:open_pos]) + start = pos + 1 + open_pos = None + + if start < len(addr): + result.append(addr[start:]) + + return ''.join(result) -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist +supports_strict_parsing = True +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If strict is true, use a strict parser which rejects malformed inputs. + """ + + # If strict is true, if the resulting list of parsed addresses is greater + # than the number of fieldvalues in the input list, a parsing error has + # occurred and consequently a list containing a single empty 2-tuple [('', + # '')] is returned in its place. This is done to avoid invalid output. + # + # Malformed input: getaddresses(['alice@example.com ']) + # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + # Safe output: [('', '')] + + if not strict: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist + + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + addr = COMMASPACE.join(fieldvalues) + a = _AddressList(addr) + result = _post_parse_validation(a.addresslist) + + # Treat output as invalid if the number of addresses is not equal to the + # expected number of addresses. + n = 0 + for v in fieldvalues: + # When a comma is used in the Real Name part it is not a deliminator. + # So strip those out before counting the commas. + v = _strip_quoted_realnames(v) + # Expected number of addresses: 1 + number of commas + n += 1 + v.count(',') + if len(result) != n: + return [('', '')] + + return result + + +def _check_parenthesis(addr): + # Ignore parenthesis in quoted real names. + addr = _strip_quoted_realnames(addr) + + opens = 0 + for pos, ch in _iter_escaped_chars(addr): + if ch == '(': + opens += 1 + elif ch == ')': + opens -= 1 + if opens < 0: + return False + return (opens == 0) + + +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values def _format_timetuple_and_zone(timetuple, zone): @@ -140,7 +241,7 @@ def formatdate(timeval=None, localtime=False, usegmt=False): Fri, 09 Nov 2001 01:08:47 -0000 - Optional timeval if given is a floating point time value as accepted by + Optional timeval if given is a floating-point time value as accepted by gmtime() and localtime(), otherwise the current time is used. Optional localtime is a flag that when True, interprets timeval, and @@ -155,13 +256,13 @@ def formatdate(timeval=None, localtime=False, usegmt=False): # 2822 requires that day and month names be the English abbreviations. if timeval is None: timeval = time.time() - if localtime or usegmt: - dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) - else: - dt = datetime.datetime.utcfromtimestamp(timeval) + dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) + if localtime: dt = dt.astimezone() usegmt = False + elif not usegmt: + dt = dt.replace(tzinfo=None) return format_datetime(dt, usegmt) def format_datetime(dt, usegmt=False): @@ -193,6 +294,11 @@ def make_msgid(idstring=None, domain=None): portion of the message id after the '@'. It defaults to the locally defined hostname. """ + # Lazy imports to speedup module import time + # (no other functions in email.utils need these modules) + import random + import socket + timeval = int(time.time()*100) pid = os.getpid() randint = random.getrandbits(64) @@ -207,17 +313,43 @@ def make_msgid(idstring=None, domain=None): def parsedate_to_datetime(data): - *dtuple, tz = _parsedate_tz(data) + parsed_date_tz = _parsedate_tz(data) + if parsed_date_tz is None: + raise ValueError('Invalid date value or format "%s"' % str(data)) + *dtuple, tz = parsed_date_tz if tz is None: return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6], tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' +def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + + If strict is True, use a strict parser which rejects malformed inputs. + """ + if not strict: + addrs = _AddressList(addr).addresslist + if not addrs: + return ('', '') + return addrs[0] + + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] @@ -265,21 +397,13 @@ def decode_params(params): params is a sequence of 2-tuples containing (param name, string value). """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] + new_params = [params[0]] # Map parameter's name to a list of continuations. The values are a # 3-tuple of the continuation number, the string value, and a flag # specifying whether a particular segment is %-encoded. rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False + for name, value in params[1:]: + encoded = name.endswith('*') value = unquote(value) mo = rfc2231_continuation.match(name) if mo: @@ -342,41 +466,23 @@ def collapse_rfc2231_value(value, errors='replace', # better than not having it. # -def localtime(dt=None, isdst=-1): +def localtime(dt=None, isdst=None): """Return local time as an aware datetime object. If called without arguments, return current time. Otherwise *dt* argument should be a datetime instance, and it is converted to the local time zone according to the system time zone database. If *dt* is naive (that is, dt.tzinfo is None), it is assumed to be in local time. - In this case, a positive or zero value for *isdst* causes localtime to - presume initially that summer time (for example, Daylight Saving Time) - is or is not (respectively) in effect for the specified time. A - negative value for *isdst* causes the localtime() function to attempt - to divine whether summer time is in effect for the specified time. + The isdst parameter is ignored. """ + if isdst is not None: + import warnings + warnings._deprecated( + "The 'isdst' parameter to 'localtime'", + message='{name} is deprecated and slated for removal in Python {remove}', + remove=(3, 14), + ) if dt is None: - return datetime.datetime.now(datetime.timezone.utc).astimezone() - if dt.tzinfo is not None: - return dt.astimezone() - # We have a naive datetime. Convert to a (localtime) timetuple and pass to - # system mktime together with the isdst hint. System mktime will return - # seconds since epoch. - tm = dt.timetuple()[:-1] + (isdst,) - seconds = time.mktime(tm) - localtm = time.localtime(seconds) - try: - delta = datetime.timedelta(seconds=localtm.tm_gmtoff) - tz = datetime.timezone(delta, localtm.tm_zone) - except AttributeError: - # Compute UTC offset and compare with the value implied by tm_isdst. - # If the values match, use the zone name implied by tm_isdst. - delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) - dst = time.daylight and localtm.tm_isdst > 0 - gmtoff = -(time.altzone if dst else time.timezone) - if delta == datetime.timedelta(seconds=gmtoff): - tz = datetime.timezone(delta, time.tzname[dst]) - else: - tz = datetime.timezone(delta) - return dt.replace(tzinfo=tz) + dt = datetime.datetime.now() + return dt.astimezone() diff --git a/Lib/fileinput.py b/Lib/fileinput.py index e234dc9ea6..3dba3d2fbf 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -53,7 +53,7 @@ sequence must be accessed in strictly sequential order; sequence access and readline() cannot be mixed. -Optional in-place filtering: if the keyword argument inplace=1 is +Optional in-place filtering: if the keyword argument inplace=True is passed to input() or to the FileInput constructor, the file is moved to a backup file and standard output is directed to the input file. This makes it possible to write a filter that rewrites its input file @@ -399,7 +399,7 @@ def isstdin(self): def hook_compressed(filename, mode, *, encoding=None, errors=None): - if encoding is None: # EncodingWarning is emitted in FileInput() already. + if encoding is None and "b" not in mode: # EncodingWarning is emitted in FileInput() already. encoding = "locale" ext = os.path.splitext(filename)[1] if ext == '.gz': diff --git a/Lib/getpass.py b/Lib/getpass.py index 6970d8adfb..bd0097ced9 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -18,7 +18,6 @@ import io import os import sys -import warnings __all__ = ["getpass","getuser","GetPassWarning"] @@ -118,6 +117,7 @@ def win_getpass(prompt='Password: ', stream=None): def fallback_getpass(prompt='Password: ', stream=None): + import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: @@ -156,7 +156,11 @@ def getuser(): First try various environment variables, then the password database. This works on Windows as long as USERNAME is set. + Any failure to find a username raises OSError. + .. versionchanged:: 3.13 + Previously, various exceptions beyond just :exc:`OSError` + were raised. """ for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): @@ -164,9 +168,12 @@ def getuser(): if user: return user - # If this fails, the exception will "explain" why - import pwd - return pwd.getpwuid(os.getuid())[0] + try: + import pwd + return pwd.getpwuid(os.getuid())[0] + except (ImportError, KeyError) as e: + raise OSError('No username set in the environment') from e + # Bind the name getpass to the appropriate function try: diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 636545648e..9512865a8e 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -154,7 +154,7 @@ def done(self, *nodes): This method unblocks any successor of each node in *nodes* for being returned in the future by a call to "get_ready". - Raises :exec:`ValueError` if any node in *nodes* has already been marked as + Raises ValueError if any node in *nodes* has already been marked as processed by a previous call to this method, if a node was not added to the graph by using "add" or if called without calling "prepare" previously or if node has not yet been returned by "get_ready". diff --git a/Lib/imghdr.py b/Lib/imghdr.py deleted file mode 100644 index 6a372e66c7..0000000000 --- a/Lib/imghdr.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Recognize image file formats based on their first few bytes.""" - -from os import PathLike -import warnings - -__all__ = ["what"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -#-------------------------# -# Recognize image headers # -#-------------------------# - -def what(file, h=None): - f = None - try: - if h is None: - if isinstance(file, (str, PathLike)): - f = open(file, 'rb') - h = f.read(32) - else: - location = file.tell() - h = file.read(32) - file.seek(location) - for tf in tests: - res = tf(h, f) - if res: - return res - finally: - if f: f.close() - return None - - -#---------------------------------# -# Subroutines per image file type # -#---------------------------------# - -tests = [] - -def test_jpeg(h, f): - """JPEG data with JFIF or Exif markers; and raw JPEG""" - if h[6:10] in (b'JFIF', b'Exif'): - return 'jpeg' - elif h[:4] == b'\xff\xd8\xff\xdb': - return 'jpeg' - -tests.append(test_jpeg) - -def test_png(h, f): - if h.startswith(b'\211PNG\r\n\032\n'): - return 'png' - -tests.append(test_png) - -def test_gif(h, f): - """GIF ('87 and '89 variants)""" - if h[:6] in (b'GIF87a', b'GIF89a'): - return 'gif' - -tests.append(test_gif) - -def test_tiff(h, f): - """TIFF (can be in Motorola or Intel byte order)""" - if h[:2] in (b'MM', b'II'): - return 'tiff' - -tests.append(test_tiff) - -def test_rgb(h, f): - """SGI image library""" - if h.startswith(b'\001\332'): - return 'rgb' - -tests.append(test_rgb) - -def test_pbm(h, f): - """PBM (portable bitmap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': - return 'pbm' - -tests.append(test_pbm) - -def test_pgm(h, f): - """PGM (portable graymap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': - return 'pgm' - -tests.append(test_pgm) - -def test_ppm(h, f): - """PPM (portable pixmap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': - return 'ppm' - -tests.append(test_ppm) - -def test_rast(h, f): - """Sun raster file""" - if h.startswith(b'\x59\xA6\x6A\x95'): - return 'rast' - -tests.append(test_rast) - -def test_xbm(h, f): - """X bitmap (X10 or X11)""" - if h.startswith(b'#define '): - return 'xbm' - -tests.append(test_xbm) - -def test_bmp(h, f): - if h.startswith(b'BM'): - return 'bmp' - -tests.append(test_bmp) - -def test_webp(h, f): - if h.startswith(b'RIFF') and h[8:12] == b'WEBP': - return 'webp' - -tests.append(test_webp) - -def test_exr(h, f): - if h.startswith(b'\x76\x2f\x31\x01'): - return 'exr' - -tests.append(test_exr) - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print(filename + '/:', end=' ') - if recursive or toplevel: - print('recursing down:') - import glob - names = glob.glob(os.path.join(glob.escape(filename), '*')) - testall(names, recursive, 0) - else: - print('*** directory (use -r) ***') - else: - print(filename + ':', end=' ') - sys.stdout.flush() - try: - print(what(filename)) - except OSError: - print('*** not found ***') - -if __name__ == '__main__': - test() diff --git a/Lib/imp.py b/Lib/imp.py deleted file mode 100644 index fc42c15765..0000000000 --- a/Lib/imp.py +++ /dev/null @@ -1,346 +0,0 @@ -"""This module provides the components needed to build your own __import__ -function. Undocumented functions are obsolete. - -In most cases it is preferred you consider using the importlib module's -functionality over this module. - -""" -# (Probably) need to stay in _imp -from _imp import (lock_held, acquire_lock, release_lock, - get_frozen_object, is_frozen_package, - init_frozen, is_builtin, is_frozen, - _fix_co_filename, _frozen_module_names) -try: - from _imp import create_dynamic -except ImportError: - # Platform doesn't support dynamic loading. - create_dynamic = None - -from importlib._bootstrap import _ERR_MSG, _exec, _load, _builtin_from_name -from importlib._bootstrap_external import SourcelessFileLoader - -from importlib import machinery -from importlib import util -import importlib -import os -import sys -import tokenize -import types -import warnings - -warnings.warn("the imp module is deprecated in favour of importlib and slated " - "for removal in Python 3.12; " - "see the module's documentation for alternative uses", - DeprecationWarning, stacklevel=2) - -# DEPRECATED -SEARCH_ERROR = 0 -PY_SOURCE = 1 -PY_COMPILED = 2 -C_EXTENSION = 3 -PY_RESOURCE = 4 -PKG_DIRECTORY = 5 -C_BUILTIN = 6 -PY_FROZEN = 7 -PY_CODERESOURCE = 8 -IMP_HOOK = 9 - - -def new_module(name): - """**DEPRECATED** - - Create a new module. - - The module is not entered into sys.modules. - - """ - return types.ModuleType(name) - - -def get_magic(): - """**DEPRECATED** - - Return the magic number for .pyc files. - """ - return util.MAGIC_NUMBER - - -def get_tag(): - """Return the magic tag for .pyc files.""" - return sys.implementation.cache_tag - - -def cache_from_source(path, debug_override=None): - """**DEPRECATED** - - Given the path to a .py file, return the path to its .pyc file. - - The .py file does not need to exist; this simply returns the path to the - .pyc file calculated as if the .py file were imported. - - If debug_override is not None, then it must be a boolean and is used in - place of sys.flags.optimize. - - If sys.implementation.cache_tag is None then NotImplementedError is raised. - - """ - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - return util.cache_from_source(path, debug_override) - - -def source_from_cache(path): - """**DEPRECATED** - - Given the path to a .pyc. file, return the path to its .py file. - - The .pyc file does not need to exist; this simply returns the path to - the .py file calculated to correspond to the .pyc file. If path does - not conform to PEP 3147 format, ValueError will be raised. If - sys.implementation.cache_tag is None then NotImplementedError is raised. - - """ - return util.source_from_cache(path) - - -def get_suffixes(): - """**DEPRECATED**""" - extensions = [(s, 'rb', C_EXTENSION) for s in machinery.EXTENSION_SUFFIXES] - source = [(s, 'r', PY_SOURCE) for s in machinery.SOURCE_SUFFIXES] - bytecode = [(s, 'rb', PY_COMPILED) for s in machinery.BYTECODE_SUFFIXES] - - return extensions + source + bytecode - - -class NullImporter: - - """**DEPRECATED** - - Null import object. - - """ - - def __init__(self, path): - if path == '': - raise ImportError('empty pathname', path='') - elif os.path.isdir(path): - raise ImportError('existing directory', path=path) - - def find_module(self, fullname): - """Always returns None.""" - return None - - -class _HackedGetData: - - """Compatibility support for 'file' arguments of various load_*() - functions.""" - - def __init__(self, fullname, path, file=None): - super().__init__(fullname, path) - self.file = file - - def get_data(self, path): - """Gross hack to contort loader to deal w/ load_*()'s bad API.""" - if self.file and path == self.path: - # The contract of get_data() requires us to return bytes. Reopen the - # file in binary mode if needed. - if not self.file.closed: - file = self.file - if 'b' not in file.mode: - file.close() - if self.file.closed: - self.file = file = open(self.path, 'rb') - - with file: - return file.read() - else: - return super().get_data(path) - - -class _LoadSourceCompatibility(_HackedGetData, machinery.SourceFileLoader): - - """Compatibility support for implementing load_source().""" - - -def load_source(name, pathname, file=None): - loader = _LoadSourceCompatibility(name, pathname, file) - spec = util.spec_from_file_location(name, pathname, loader=loader) - if name in sys.modules: - module = _exec(spec, sys.modules[name]) - else: - module = _load(spec) - # To allow reloading to potentially work, use a non-hacked loader which - # won't rely on a now-closed file object. - module.__loader__ = machinery.SourceFileLoader(name, pathname) - module.__spec__.loader = module.__loader__ - return module - - -class _LoadCompiledCompatibility(_HackedGetData, SourcelessFileLoader): - - """Compatibility support for implementing load_compiled().""" - - -def load_compiled(name, pathname, file=None): - """**DEPRECATED**""" - loader = _LoadCompiledCompatibility(name, pathname, file) - spec = util.spec_from_file_location(name, pathname, loader=loader) - if name in sys.modules: - module = _exec(spec, sys.modules[name]) - else: - module = _load(spec) - # To allow reloading to potentially work, use a non-hacked loader which - # won't rely on a now-closed file object. - module.__loader__ = SourcelessFileLoader(name, pathname) - module.__spec__.loader = module.__loader__ - return module - - -def load_package(name, path): - """**DEPRECATED**""" - if os.path.isdir(path): - extensions = (machinery.SOURCE_SUFFIXES[:] + - machinery.BYTECODE_SUFFIXES[:]) - for extension in extensions: - init_path = os.path.join(path, '__init__' + extension) - if os.path.exists(init_path): - path = init_path - break - else: - raise ValueError('{!r} is not a package'.format(path)) - spec = util.spec_from_file_location(name, path, - submodule_search_locations=[]) - if name in sys.modules: - return _exec(spec, sys.modules[name]) - else: - return _load(spec) - - -def load_module(name, file, filename, details): - """**DEPRECATED** - - Load a module, given information returned by find_module(). - - The module name must include the full package name, if any. - - """ - suffix, mode, type_ = details - if mode and (not mode.startswith('r') or '+' in mode): - raise ValueError('invalid file open mode {!r}'.format(mode)) - elif file is None and type_ in {PY_SOURCE, PY_COMPILED}: - msg = 'file object required for import (type code {})'.format(type_) - raise ValueError(msg) - elif type_ == PY_SOURCE: - return load_source(name, filename, file) - elif type_ == PY_COMPILED: - return load_compiled(name, filename, file) - elif type_ == C_EXTENSION and load_dynamic is not None: - if file is None: - with open(filename, 'rb') as opened_file: - return load_dynamic(name, filename, opened_file) - else: - return load_dynamic(name, filename, file) - elif type_ == PKG_DIRECTORY: - return load_package(name, filename) - elif type_ == C_BUILTIN: - return init_builtin(name) - elif type_ == PY_FROZEN: - return init_frozen(name) - else: - msg = "Don't know how to import {} (type code {})".format(name, type_) - raise ImportError(msg, name=name) - - -def find_module(name, path=None): - """**DEPRECATED** - - Search for a module. - - If path is omitted or None, search for a built-in, frozen or special - module and continue search in sys.path. The module name cannot - contain '.'; to search for a submodule of a package, pass the - submodule name and the package's __path__. - - """ - if not isinstance(name, str): - raise TypeError("'name' must be a str, not {}".format(type(name))) - elif not isinstance(path, (type(None), list)): - # Backwards-compatibility - raise RuntimeError("'path' must be None or a list, " - "not {}".format(type(path))) - - if path is None: - if is_builtin(name): - return None, None, ('', '', C_BUILTIN) - elif is_frozen(name): - return None, None, ('', '', PY_FROZEN) - else: - path = sys.path - - for entry in path: - package_directory = os.path.join(entry, name) - for suffix in ['.py', machinery.BYTECODE_SUFFIXES[0]]: - package_file_name = '__init__' + suffix - file_path = os.path.join(package_directory, package_file_name) - if os.path.isfile(file_path): - return None, package_directory, ('', '', PKG_DIRECTORY) - for suffix, mode, type_ in get_suffixes(): - file_name = name + suffix - file_path = os.path.join(entry, file_name) - if os.path.isfile(file_path): - break - else: - continue - break # Break out of outer loop when breaking out of inner loop. - else: - raise ImportError(_ERR_MSG.format(name), name=name) - - encoding = None - if 'b' not in mode: - with open(file_path, 'rb') as file: - encoding = tokenize.detect_encoding(file.readline)[0] - file = open(file_path, mode, encoding=encoding) - return file, file_path, (suffix, mode, type_) - - -def reload(module): - """**DEPRECATED** - - Reload the module and return it. - - The module must have been successfully imported before. - - """ - return importlib.reload(module) - - -def init_builtin(name): - """**DEPRECATED** - - Load and return a built-in module by name, or None is such module doesn't - exist - """ - try: - return _builtin_from_name(name) - except ImportError: - return None - - -if create_dynamic: - def load_dynamic(name, path, file=None): - """**DEPRECATED** - - Load an extension module. - """ - import importlib.machinery - loader = importlib.machinery.ExtensionFileLoader(name, path) - - # Issue #24748: Skip the sys.modules check in _load_module_shim; - # always load new extension - spec = importlib.machinery.ModuleSpec( - name=name, loader=loader, origin=path) - return _load(spec) - -else: - load_dynamic = None diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 9ca90fd0f7..67e45450fc 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -310,7 +310,7 @@ def collapse_addresses(addresses): [IPv4Network('192.0.2.0/24')] Args: - addresses: An iterator of IPv4Network or IPv6Network objects. + addresses: An iterable of IPv4Network or IPv6Network objects. Returns: An iterator of the collapsed IPv(4|6)Network objects. @@ -734,7 +734,7 @@ def __eq__(self, other): return NotImplemented def __hash__(self): - return hash(int(self.network_address) ^ int(self.netmask)) + return hash((int(self.network_address), int(self.netmask))) def __contains__(self, other): # always false if one is v4 and the other is v6. @@ -1086,7 +1086,11 @@ def is_private(self): """ return any(self.network_address in priv_network and self.broadcast_address in priv_network - for priv_network in self._constants._private_networks) + for priv_network in self._constants._private_networks) and all( + self.network_address not in network and + self.broadcast_address not in network + for network in self._constants._private_networks_exceptions + ) @property def is_global(self): @@ -1333,18 +1337,41 @@ def is_reserved(self): @property @functools.lru_cache() def is_private(self): - """Test if this address is allocated for private networks. + """``True`` if the address is defined as not globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ + (for IPv6) with the following exceptions: - Returns: - A boolean, True if the address is reserved per - iana-ipv4-special-registry. + * ``is_private`` is ``False`` for ``100.64.0.0/10`` + * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the + semantics of the underlying IPv4 addresses and the following condition holds + (see :attr:`IPv6Address.ipv4_mapped`):: + address.is_private == address.ipv4_mapped.is_private + + ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` + IPv4 range where they are both ``False``. """ - return any(self in net for net in self._constants._private_networks) + return ( + any(self in net for net in self._constants._private_networks) + and all(self not in net for net in self._constants._private_networks_exceptions) + ) @property @functools.lru_cache() def is_global(self): + """``True`` if the address is defined as globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ + (for IPv6) with the following exception: + + For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the + semantics of the underlying IPv4 addresses and the following condition holds + (see :attr:`IPv6Address.ipv4_mapped`):: + + address.is_global == address.ipv4_mapped.is_global + + ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` + IPv4 range where they are both ``False``. + """ return self not in self._constants._public_network and not self.is_private @property @@ -1389,6 +1416,16 @@ def is_link_local(self): """ return self in self._constants._linklocal_network + @property + def ipv6_mapped(self): + """Return the IPv4-mapped IPv6 address. + + Returns: + The IPv4-mapped IPv6 address per RFC 4291. + + """ + return IPv6Address(f'::ffff:{self}') + class IPv4Interface(IPv4Address): @@ -1548,13 +1585,15 @@ class _IPv4Constants: _public_network = IPv4Network('100.64.0.0/10') + # Not globally reachable address blocks listed on + # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml _private_networks = [ IPv4Network('0.0.0.0/8'), IPv4Network('10.0.0.0/8'), IPv4Network('127.0.0.0/8'), IPv4Network('169.254.0.0/16'), IPv4Network('172.16.0.0/12'), - IPv4Network('192.0.0.0/29'), + IPv4Network('192.0.0.0/24'), IPv4Network('192.0.0.170/31'), IPv4Network('192.0.2.0/24'), IPv4Network('192.168.0.0/16'), @@ -1565,6 +1604,11 @@ class _IPv4Constants: IPv4Network('255.255.255.255/32'), ] + _private_networks_exceptions = [ + IPv4Network('192.0.0.9/32'), + IPv4Network('192.0.0.10/32'), + ] + _reserved_network = IPv4Network('240.0.0.0/4') _unspecified_address = IPv4Address('0.0.0.0') @@ -1630,8 +1674,18 @@ def _ip_int_from_string(cls, ip_str): """ if not ip_str: raise AddressValueError('Address cannot be empty') - - parts = ip_str.split(':') + if len(ip_str) > 45: + shorten = ip_str + if len(shorten) > 100: + shorten = f'{ip_str[:45]}({len(ip_str)-90} chars elided){ip_str[-45:]}' + raise AddressValueError(f"At most 45 characters expected in " + f"{shorten!r}") + + # We want to allow more parts than the max to be 'split' + # to preserve the correct error message when there are + # too many parts combined with '::' + _max_parts = cls._HEXTET_COUNT + 1 + parts = ip_str.split(':', maxsplit=_max_parts) # An IPv6 address needs at least 2 colons (3 parts). _min_parts = 3 @@ -1651,7 +1705,6 @@ def _ip_int_from_string(cls, ip_str): # An IPv6 address can't have more than 8 colons (9 parts). # The extra colon comes from using the "::" notation for a single # leading or trailing zero part. - _max_parts = cls._HEXTET_COUNT + 1 if len(parts) > _max_parts: msg = "At most %d colons permitted in %r" % (_max_parts-1, ip_str) raise AddressValueError(msg) @@ -1923,8 +1976,49 @@ def __init__(self, address): self._ip = self._ip_int_from_string(addr_str) + def _explode_shorthand_ip_string(self): + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is None: + return super()._explode_shorthand_ip_string() + prefix_len = 30 + raw_exploded_str = super()._explode_shorthand_ip_string() + return f"{raw_exploded_str[:prefix_len]}{ipv4_mapped!s}" + + def _reverse_pointer(self): + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is None: + return super()._reverse_pointer() + prefix_len = 30 + raw_exploded_str = super()._explode_shorthand_ip_string()[:prefix_len] + # ipv4 encoded using hexadecimal nibbles instead of decimals + ipv4_int = ipv4_mapped._ip + reverse_chars = f"{raw_exploded_str}{ipv4_int:008x}"[::-1].replace(':', '') + return '.'.join(reverse_chars) + '.ip6.arpa' + + def _ipv4_mapped_ipv6_to_str(self): + """Return convenient text representation of IPv4-mapped IPv6 address + + See RFC 4291 2.5.5.2, 2.2 p.3 for details. + + Returns: + A string, 'x:x:x:x:x:x:d.d.d.d', where the 'x's are the hexadecimal values of + the six high-order 16-bit pieces of the address, and the 'd's are + the decimal values of the four low-order 8-bit pieces of the + address (standard IPv4 representation) as defined in RFC 4291 2.2 p.3. + + """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is None: + raise AddressValueError("Can not apply to non-IPv4-mapped IPv6 address %s" % str(self)) + high_order_bits = self._ip >> 32 + return "%s:%s" % (self._string_from_ip_int(high_order_bits), str(ipv4_mapped)) + def __str__(self): - ip_str = super().__str__() + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is None: + ip_str = super().__str__() + else: + ip_str = self._ipv4_mapped_ipv6_to_str() return ip_str + '%' + self._scope_id if self._scope_id else ip_str def __hash__(self): @@ -1967,6 +2061,9 @@ def is_multicast(self): See RFC 2373 2.7 for details. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_multicast return self in self._constants._multicast_network @property @@ -1978,6 +2075,9 @@ def is_reserved(self): reserved IPv6 Network ranges. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_reserved return any(self in x for x in self._constants._reserved_networks) @property @@ -1988,6 +2088,9 @@ def is_link_local(self): A boolean, True if the address is reserved per RFC 4291. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_link_local return self in self._constants._linklocal_network @property @@ -2007,28 +2110,46 @@ def is_site_local(self): @property @functools.lru_cache() def is_private(self): - """Test if this address is allocated for private networks. + """``True`` if the address is defined as not globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ + (for IPv6) with the following exceptions: - Returns: - A boolean, True if the address is reserved per - iana-ipv6-special-registry, or is ipv4_mapped and is - reserved in the iana-ipv4-special-registry. + * ``is_private`` is ``False`` for ``100.64.0.0/10`` + * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the + semantics of the underlying IPv4 addresses and the following condition holds + (see :attr:`IPv6Address.ipv4_mapped`):: + + address.is_private == address.ipv4_mapped.is_private + ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` + IPv4 range where they are both ``False``. """ ipv4_mapped = self.ipv4_mapped if ipv4_mapped is not None: return ipv4_mapped.is_private - return any(self in net for net in self._constants._private_networks) + return ( + any(self in net for net in self._constants._private_networks) + and all(self not in net for net in self._constants._private_networks_exceptions) + ) @property def is_global(self): - """Test if this address is allocated for public networks. + """``True`` if the address is defined as globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ + (for IPv6) with the following exception: - Returns: - A boolean, true if the address is not reserved per - iana-ipv6-special-registry. + For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the + semantics of the underlying IPv4 addresses and the following condition holds + (see :attr:`IPv6Address.ipv4_mapped`):: + address.is_global == address.ipv4_mapped.is_global + + ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` + IPv4 range where they are both ``False``. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_global return not self.is_private @property @@ -2040,6 +2161,9 @@ def is_unspecified(self): RFC 2373 2.5.2. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_unspecified return self._ip == 0 @property @@ -2051,6 +2175,9 @@ def is_loopback(self): RFC 2373 2.5.3. """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_loopback return self._ip == 1 @property @@ -2167,7 +2294,7 @@ def is_unspecified(self): @property def is_loopback(self): - return self._ip == 1 and self.network.is_loopback + return super().is_loopback and self.network.is_loopback class IPv6Network(_BaseV6, _BaseNetwork): @@ -2268,19 +2395,33 @@ class _IPv6Constants: _multicast_network = IPv6Network('ff00::/8') + # Not globally reachable address blocks listed on + # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml _private_networks = [ IPv6Network('::1/128'), IPv6Network('::/128'), IPv6Network('::ffff:0:0/96'), + IPv6Network('64:ff9b:1::/48'), IPv6Network('100::/64'), IPv6Network('2001::/23'), - IPv6Network('2001:2::/48'), IPv6Network('2001:db8::/32'), - IPv6Network('2001:10::/28'), + # IANA says N/A, let's consider it not globally reachable to be safe + IPv6Network('2002::/16'), + # RFC 9637: https://www.rfc-editor.org/rfc/rfc9637.html#section-6-2.2 + IPv6Network('3fff::/20'), IPv6Network('fc00::/7'), IPv6Network('fe80::/10'), ] + _private_networks_exceptions = [ + IPv6Network('2001:1::1/128'), + IPv6Network('2001:1::2/128'), + IPv6Network('2001:3::/32'), + IPv6Network('2001:4:112::/48'), + IPv6Network('2001:20::/28'), + IPv6Network('2001:30::/28'), + ] + _reserved_networks = [ IPv6Network('::/8'), IPv6Network('100::/8'), IPv6Network('200::/7'), IPv6Network('400::/6'), diff --git a/Lib/linecache.py b/Lib/linecache.py index 97644a8e37..dc02de19eb 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -5,17 +5,13 @@ that name. """ -import functools -import sys -import os -import tokenize - __all__ = ["getline", "clearcache", "checkcache", "lazycache"] # The cache. Maps filenames to either a thunk which will provide source code, # or a tuple (size, mtime, lines, fullname) once loaded. cache = {} +_interactive_cache = {} def clearcache(): @@ -49,28 +45,54 @@ def getlines(filename, module_globals=None): return [] +def _getline_from_code(filename, lineno): + lines = _getlines_from_code(filename) + if 1 <= lineno <= len(lines): + return lines[lineno - 1] + return '' + +def _make_key(code): + return (code.co_filename, code.co_qualname, code.co_firstlineno) + +def _getlines_from_code(code): + code_id = _make_key(code) + if code_id in _interactive_cache: + entry = _interactive_cache[code_id] + if len(entry) != 1: + return _interactive_cache[code_id][2] + return [] + + def checkcache(filename=None): """Discard cache entries that are out of date. (This is not checked upon each call!)""" if filename is None: - filenames = list(cache.keys()) - elif filename in cache: - filenames = [filename] + # get keys atomically + filenames = cache.copy().keys() else: - return + filenames = [filename] for filename in filenames: - entry = cache[filename] + try: + entry = cache[filename] + except KeyError: + continue + if len(entry) == 1: # lazy cache entry, leave it lazy. continue size, mtime, lines, fullname = entry if mtime is None: continue # no-op for files loaded via a __loader__ + try: + # This import can fail if the interpreter is shutting down + import os + except ImportError: + return try: stat = os.stat(fullname) - except OSError: + except (OSError, ValueError): cache.pop(filename, None) continue if size != stat.st_size or mtime != stat.st_mtime: @@ -82,6 +104,17 @@ def updatecache(filename, module_globals=None): If something's wrong, print a message, discard the cache entry, and return an empty list.""" + # These imports are not at top level because linecache is in the critical + # path of the interpreter startup and importing os and sys take a lot of time + # and slows down the startup sequence. + try: + import os + import sys + import tokenize + except ImportError: + # These import can fail if the interpreter is shutting down + return [] + if filename in cache: if len(cache[filename]) != 1: cache.pop(filename, None) @@ -128,16 +161,20 @@ def updatecache(filename, module_globals=None): try: stat = os.stat(fullname) break - except OSError: + except (OSError, ValueError): pass else: return [] + except ValueError: # may be raised by os.stat() + return [] try: with tokenize.open(fullname) as fp: lines = fp.readlines() except (OSError, UnicodeDecodeError, SyntaxError): return [] - if lines and not lines[-1].endswith('\n'): + if not lines: + lines = ['\n'] + elif not lines[-1].endswith('\n'): lines[-1] += '\n' size, mtime = stat.st_size, stat.st_mtime cache[filename] = size, mtime, lines, fullname @@ -166,17 +203,29 @@ def lazycache(filename, module_globals): return False # Try for a __loader__, if available if module_globals and '__name__' in module_globals: - name = module_globals['__name__'] - if (loader := module_globals.get('__loader__')) is None: - if spec := module_globals.get('__spec__'): - try: - loader = spec.loader - except AttributeError: - pass + spec = module_globals.get('__spec__') + name = getattr(spec, 'name', None) or module_globals['__name__'] + loader = getattr(spec, 'loader', None) + if loader is None: + loader = module_globals.get('__loader__') get_source = getattr(loader, 'get_source', None) if name and get_source: - get_lines = functools.partial(get_source, name) + def get_lines(name=name, *args, **kwargs): + return get_source(name, *args, **kwargs) cache[filename] = (get_lines,) return True return False + +def _register_code(code, string, name): + entry = (len(string), + None, + [line + '\n' for line in string.splitlines()], + name) + stack = [code] + while stack: + code = stack.pop() + for const in code.co_consts: + if isinstance(const, type(code)): + stack.append(const) + _interactive_cache[_make_key(code)] = entry diff --git a/Lib/lzma.py b/Lib/lzma.py new file mode 100644 index 0000000000..6668921f00 --- /dev/null +++ b/Lib/lzma.py @@ -0,0 +1,364 @@ +"""Interface to the liblzma compression library. + +This module provides a class for reading and writing compressed files, +classes for incremental (de)compression, and convenience functions for +one-shot (de)compression. + +These classes and functions support both the XZ and legacy LZMA +container formats, as well as raw compressed data streams. +""" + +__all__ = [ + "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", + "CHECK_ID_MAX", "CHECK_UNKNOWN", + "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", + "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", + "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", + "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", + "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", + + "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", + "open", "compress", "decompress", "is_check_supported", +] + +import builtins +import io +import os +from _lzma import * +from _lzma import _encode_filter_properties, _decode_filter_properties +import _compression + + +# Value 0 no longer used +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class LZMAFile(_compression.BaseStream): + + """A file object providing transparent LZMA (de)compression. + + An LZMAFile can act as a wrapper for an existing file object, or + refer directly to a named file on disk. + + Note that LZMAFile provides a *binary* file interface - data read + is returned as bytes, and data to be written must be given as bytes. + """ + + def __init__(self, filename=None, mode="r", *, + format=None, check=-1, preset=None, filters=None): + """Open an LZMA-compressed file in binary mode. + + filename can be either an actual file name (given as a str, + bytes, or PathLike object), in which case the named file is + opened, or it can be an existing file object to read from or + write to. + + mode can be "r" for reading (default), "w" for (over)writing, + "x" for creating exclusively, or "a" for appending. These can + equivalently be given as "rb", "wb", "xb" and "ab" respectively. + + format specifies the container format to use for the file. + If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the + default is FORMAT_XZ. + + check specifies the integrity check to use. This argument can + only be used when opening a file for writing. For FORMAT_XZ, + the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not + support integrity checks - for these formats, check must be + omitted, or be CHECK_NONE. + + When opening a file for reading, the *preset* argument is not + meaningful, and should be omitted. The *filters* argument should + also be omitted, except when format is FORMAT_RAW (in which case + it is required). + + When opening a file for writing, the settings used by the + compressor can be specified either as a preset compression + level (with the *preset* argument), or in detail as a custom + filter chain (with the *filters* argument). For FORMAT_XZ and + FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset + level. For FORMAT_RAW, the caller must always specify a filter + chain; the raw compressor does not support preset compression + levels. + + preset (if provided) should be an integer in the range 0-9, + optionally OR-ed with the constant PRESET_EXTREME. + + filters (if provided) should be a sequence of dicts. Each dict + should have an entry for "id" indicating ID of the filter, plus + additional entries for options to the filter. + """ + self._fp = None + self._closefp = False + self._mode = None + + if mode in ("r", "rb"): + if check != -1: + raise ValueError("Cannot specify an integrity check " + "when opening a file for reading") + if preset is not None: + raise ValueError("Cannot specify a preset compression " + "level when opening a file for reading") + if format is None: + format = FORMAT_AUTO + mode_code = _MODE_READ + elif mode in ("w", "wb", "a", "ab", "x", "xb"): + if format is None: + format = FORMAT_XZ + mode_code = _MODE_WRITE + self._compressor = LZMACompressor(format=format, check=check, + preset=preset, filters=filters) + self._pos = 0 + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if isinstance(filename, (str, bytes, os.PathLike)): + if "b" not in mode: + mode += "b" + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str, bytes, file or PathLike object") + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, LZMADecompressor, + trailing_error=LZMAError, format=format, filters=filters) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self.closed: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + + @property + def closed(self): + """True if this file is closed.""" + return self._fp is None + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + return self._buffer.peek(size) + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b"" if the file is already at EOF. + """ + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b"" if the file is at EOF. + """ + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. + """ + self._check_can_write() + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", *, + format=None, check=-1, preset=None, filters=None, + encoding=None, errors=None, newline=None): + """Open an LZMA-compressed file in binary or text mode. + + filename can be either an actual file name (given as a str, bytes, + or PathLike object), in which case the named file is opened, or it + can be an existing file object to read from or write to. + + The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb", + "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text + mode. + + The format, check, preset and filters arguments specify the + compression settings, as for LZMACompressor, LZMADecompressor and + LZMAFile. + + For binary mode, this function is equivalent to the LZMAFile + constructor: LZMAFile(filename, mode, ...). In this case, the + encoding, errors and newline arguments must not be provided. + + For text mode, an LZMAFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error + handling behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + lz_mode = mode.replace("t", "") + binary_file = LZMAFile(filename, lz_mode, format=format, check=check, + preset=preset, filters=filters) + + if "t" in mode: + encoding = io.text_encoding(encoding) + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): + """Compress a block of data. + + Refer to LZMACompressor's docstring for a description of the + optional arguments *format*, *check*, *preset* and *filters*. + + For incremental compression, use an LZMACompressor instead. + """ + comp = LZMACompressor(format, check, preset, filters) + return comp.compress(data) + comp.flush() + + +def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None): + """Decompress a block of data. + + Refer to LZMADecompressor's docstring for a description of the + optional arguments *format*, *check* and *filters*. + + For incremental decompression, use an LZMADecompressor instead. + """ + results = [] + while True: + decomp = LZMADecompressor(format, memlimit, filters) + try: + res = decomp.decompress(data) + except LZMAError: + if results: + break # Leftover data is not a valid LZMA/XZ stream; ignore it. + else: + raise # Error on the first iteration; bail out. + results.append(res) + if not decomp.eof: + raise LZMAError("Compressed data ended before the " + "end-of-stream marker was reached") + data = decomp.unused_data + if not data: + break + return b"".join(results) diff --git a/Lib/nntplib.py b/Lib/nntplib.py deleted file mode 100644 index dddea05998..0000000000 --- a/Lib/nntplib.py +++ /dev/null @@ -1,1093 +0,0 @@ -"""An NNTP client class based on: -- RFC 977: Network News Transfer Protocol -- RFC 2980: Common NNTP Extensions -- RFC 3977: Network News Transfer Protocol (version 2) - -Example: - ->>> from nntplib import NNTP ->>> s = NNTP('news') ->>> resp, count, first, last, name = s.group('comp.lang.python') ->>> print('Group', name, 'has', count, 'articles, range', first, 'to', last) -Group comp.lang.python has 51 articles, range 5770 to 5821 ->>> resp, subs = s.xhdr('subject', '{0}-{1}'.format(first, last)) ->>> resp = s.quit() ->>> - -Here 'resp' is the server response line. -Error responses are turned into exceptions. - -To post an article from a file: ->>> f = open(filename, 'rb') # file containing article, including header ->>> resp = s.post(f) ->>> - -For descriptions of all methods, read the comments in the code below. -Note that all arguments and return values representing article numbers -are strings, not numbers, since they are rarely used for calculations. -""" - -# RFC 977 by Brian Kantor and Phil Lapsley. -# xover, xgtitle, xpath, date methods by Kevan Heydon - -# Incompatible changes from the 2.x nntplib: -# - all commands are encoded as UTF-8 data (using the "surrogateescape" -# error handler), except for raw message data (POST, IHAVE) -# - all responses are decoded as UTF-8 data (using the "surrogateescape" -# error handler), except for raw message data (ARTICLE, HEAD, BODY) -# - the `file` argument to various methods is keyword-only -# -# - NNTP.date() returns a datetime object -# - NNTP.newgroups() and NNTP.newnews() take a datetime (or date) object, -# rather than a pair of (date, time) strings. -# - NNTP.newgroups() and NNTP.list() return a list of GroupInfo named tuples -# - NNTP.descriptions() returns a dict mapping group names to descriptions -# - NNTP.xover() returns a list of dicts mapping field names (header or metadata) -# to field values; each dict representing a message overview. -# - NNTP.article(), NNTP.head() and NNTP.body() return a (response, ArticleInfo) -# tuple. -# - the "internal" methods have been marked private (they now start with -# an underscore) - -# Other changes from the 2.x/3.1 nntplib: -# - automatic querying of capabilities at connect -# - New method NNTP.getcapabilities() -# - New method NNTP.over() -# - New helper function decode_header() -# - NNTP.post() and NNTP.ihave() accept file objects, bytes-like objects and -# arbitrary iterables yielding lines. -# - An extensive test suite :-) - -# TODO: -# - return structured data (GroupInfo etc.) everywhere -# - support HDR - -# Imports -import re -import socket -import collections -import datetime -import sys -import warnings - -try: - import ssl -except ImportError: - _have_ssl = False -else: - _have_ssl = True - -from email.header import decode_header as _email_decode_header -from socket import _GLOBAL_DEFAULT_TIMEOUT - -__all__ = ["NNTP", - "NNTPError", "NNTPReplyError", "NNTPTemporaryError", - "NNTPPermanentError", "NNTPProtocolError", "NNTPDataError", - "decode_header", - ] - -warnings._deprecated(__name__, remove=(3, 13)) - -# maximal line length when calling readline(). This is to prevent -# reading arbitrary length lines. RFC 3977 limits NNTP line length to -# 512 characters, including CRLF. We have selected 2048 just to be on -# the safe side. -_MAXLINE = 2048 - - -# Exceptions raised when an error or invalid response is received -class NNTPError(Exception): - """Base class for all nntplib exceptions""" - def __init__(self, *args): - Exception.__init__(self, *args) - try: - self.response = args[0] - except IndexError: - self.response = 'No response given' - -class NNTPReplyError(NNTPError): - """Unexpected [123]xx reply""" - pass - -class NNTPTemporaryError(NNTPError): - """4xx errors""" - pass - -class NNTPPermanentError(NNTPError): - """5xx errors""" - pass - -class NNTPProtocolError(NNTPError): - """Response does not begin with [1-5]""" - pass - -class NNTPDataError(NNTPError): - """Error in response data""" - pass - - -# Standard port used by NNTP servers -NNTP_PORT = 119 -NNTP_SSL_PORT = 563 - -# Response numbers that are followed by additional text (e.g. article) -_LONGRESP = { - '100', # HELP - '101', # CAPABILITIES - '211', # LISTGROUP (also not multi-line with GROUP) - '215', # LIST - '220', # ARTICLE - '221', # HEAD, XHDR - '222', # BODY - '224', # OVER, XOVER - '225', # HDR - '230', # NEWNEWS - '231', # NEWGROUPS - '282', # XGTITLE -} - -# Default decoded value for LIST OVERVIEW.FMT if not supported -_DEFAULT_OVERVIEW_FMT = [ - "subject", "from", "date", "message-id", "references", ":bytes", ":lines"] - -# Alternative names allowed in LIST OVERVIEW.FMT response -_OVERVIEW_FMT_ALTERNATIVES = { - 'bytes': ':bytes', - 'lines': ':lines', -} - -# Line terminators (we always output CRLF, but accept any of CRLF, CR, LF) -_CRLF = b'\r\n' - -GroupInfo = collections.namedtuple('GroupInfo', - ['group', 'last', 'first', 'flag']) - -ArticleInfo = collections.namedtuple('ArticleInfo', - ['number', 'message_id', 'lines']) - - -# Helper function(s) -def decode_header(header_str): - """Takes a unicode string representing a munged header value - and decodes it as a (possibly non-ASCII) readable value.""" - parts = [] - for v, enc in _email_decode_header(header_str): - if isinstance(v, bytes): - parts.append(v.decode(enc or 'ascii')) - else: - parts.append(v) - return ''.join(parts) - -def _parse_overview_fmt(lines): - """Parse a list of string representing the response to LIST OVERVIEW.FMT - and return a list of header/metadata names. - Raises NNTPDataError if the response is not compliant - (cf. RFC 3977, section 8.4).""" - fmt = [] - for line in lines: - if line[0] == ':': - # Metadata name (e.g. ":bytes") - name, _, suffix = line[1:].partition(':') - name = ':' + name - else: - # Header name (e.g. "Subject:" or "Xref:full") - name, _, suffix = line.partition(':') - name = name.lower() - name = _OVERVIEW_FMT_ALTERNATIVES.get(name, name) - # Should we do something with the suffix? - fmt.append(name) - defaults = _DEFAULT_OVERVIEW_FMT - if len(fmt) < len(defaults): - raise NNTPDataError("LIST OVERVIEW.FMT response too short") - if fmt[:len(defaults)] != defaults: - raise NNTPDataError("LIST OVERVIEW.FMT redefines default fields") - return fmt - -def _parse_overview(lines, fmt, data_process_func=None): - """Parse the response to an OVER or XOVER command according to the - overview format `fmt`.""" - n_defaults = len(_DEFAULT_OVERVIEW_FMT) - overview = [] - for line in lines: - fields = {} - article_number, *tokens = line.split('\t') - article_number = int(article_number) - for i, token in enumerate(tokens): - if i >= len(fmt): - # XXX should we raise an error? Some servers might not - # support LIST OVERVIEW.FMT and still return additional - # headers. - continue - field_name = fmt[i] - is_metadata = field_name.startswith(':') - if i >= n_defaults and not is_metadata: - # Non-default header names are included in full in the response - # (unless the field is totally empty) - h = field_name + ": " - if token and token[:len(h)].lower() != h: - raise NNTPDataError("OVER/XOVER response doesn't include " - "names of additional headers") - token = token[len(h):] if token else None - fields[fmt[i]] = token - overview.append((article_number, fields)) - return overview - -def _parse_datetime(date_str, time_str=None): - """Parse a pair of (date, time) strings, and return a datetime object. - If only the date is given, it is assumed to be date and time - concatenated together (e.g. response to the DATE command). - """ - if time_str is None: - time_str = date_str[-6:] - date_str = date_str[:-6] - hours = int(time_str[:2]) - minutes = int(time_str[2:4]) - seconds = int(time_str[4:]) - year = int(date_str[:-4]) - month = int(date_str[-4:-2]) - day = int(date_str[-2:]) - # RFC 3977 doesn't say how to interpret 2-char years. Assume that - # there are no dates before 1970 on Usenet. - if year < 70: - year += 2000 - elif year < 100: - year += 1900 - return datetime.datetime(year, month, day, hours, minutes, seconds) - -def _unparse_datetime(dt, legacy=False): - """Format a date or datetime object as a pair of (date, time) strings - in the format required by the NEWNEWS and NEWGROUPS commands. If a - date object is passed, the time is assumed to be midnight (00h00). - - The returned representation depends on the legacy flag: - * if legacy is False (the default): - date has the YYYYMMDD format and time the HHMMSS format - * if legacy is True: - date has the YYMMDD format and time the HHMMSS format. - RFC 3977 compliant servers should understand both formats; therefore, - legacy is only needed when talking to old servers. - """ - if not isinstance(dt, datetime.datetime): - time_str = "000000" - else: - time_str = "{0.hour:02d}{0.minute:02d}{0.second:02d}".format(dt) - y = dt.year - if legacy: - y = y % 100 - date_str = "{0:02d}{1.month:02d}{1.day:02d}".format(y, dt) - else: - date_str = "{0:04d}{1.month:02d}{1.day:02d}".format(y, dt) - return date_str, time_str - - -if _have_ssl: - - def _encrypt_on(sock, context, hostname): - """Wrap a socket in SSL/TLS. Arguments: - - sock: Socket to wrap - - context: SSL context to use for the encrypted connection - Returns: - - sock: New, encrypted socket. - """ - # Generate a default SSL context if none was passed. - if context is None: - context = ssl._create_stdlib_context() - return context.wrap_socket(sock, server_hostname=hostname) - - -# The classes themselves -class NNTP: - # UTF-8 is the character set for all NNTP commands and responses: they - # are automatically encoded (when sending) and decoded (and receiving) - # by this class. - # However, some multi-line data blocks can contain arbitrary bytes (for - # example, latin-1 or utf-16 data in the body of a message). Commands - # taking (POST, IHAVE) or returning (HEAD, BODY, ARTICLE) raw message - # data will therefore only accept and produce bytes objects. - # Furthermore, since there could be non-compliant servers out there, - # we use 'surrogateescape' as the error handler for fault tolerance - # and easy round-tripping. This could be useful for some applications - # (e.g. NNTP gateways). - - encoding = 'utf-8' - errors = 'surrogateescape' - - def __init__(self, host, port=NNTP_PORT, user=None, password=None, - readermode=None, usenetrc=False, - timeout=_GLOBAL_DEFAULT_TIMEOUT): - """Initialize an instance. Arguments: - - host: hostname to connect to - - port: port to connect to (default the standard NNTP port) - - user: username to authenticate with - - password: password to use with username - - readermode: if true, send 'mode reader' command after - connecting. - - usenetrc: allow loading username and password from ~/.netrc file - if not specified explicitly - - timeout: timeout (in seconds) used for socket connections - - readermode is sometimes necessary if you are connecting to an - NNTP server on the local machine and intend to call - reader-specific commands, such as `group'. If you get - unexpected NNTPPermanentErrors, you might need to set - readermode. - """ - self.host = host - self.port = port - self.sock = self._create_socket(timeout) - self.file = None - try: - self.file = self.sock.makefile("rwb") - self._base_init(readermode) - if user or usenetrc: - self.login(user, password, usenetrc) - except: - if self.file: - self.file.close() - self.sock.close() - raise - - def _base_init(self, readermode): - """Partial initialization for the NNTP protocol. - This instance method is extracted for supporting the test code. - """ - self.debugging = 0 - self.welcome = self._getresp() - - # Inquire about capabilities (RFC 3977). - self._caps = None - self.getcapabilities() - - # 'MODE READER' is sometimes necessary to enable 'reader' mode. - # However, the order in which 'MODE READER' and 'AUTHINFO' need to - # arrive differs between some NNTP servers. If _setreadermode() fails - # with an authorization failed error, it will set this to True; - # the login() routine will interpret that as a request to try again - # after performing its normal function. - # Enable only if we're not already in READER mode anyway. - self.readermode_afterauth = False - if readermode and 'READER' not in self._caps: - self._setreadermode() - if not self.readermode_afterauth: - # Capabilities might have changed after MODE READER - self._caps = None - self.getcapabilities() - - # RFC 4642 2.2.2: Both the client and the server MUST know if there is - # a TLS session active. A client MUST NOT attempt to start a TLS - # session if a TLS session is already active. - self.tls_on = False - - # Log in and encryption setup order is left to subclasses. - self.authenticated = False - - def __enter__(self): - return self - - def __exit__(self, *args): - is_connected = lambda: hasattr(self, "file") - if is_connected(): - try: - self.quit() - except (OSError, EOFError): - pass - finally: - if is_connected(): - self._close() - - def _create_socket(self, timeout): - if timeout is not None and not timeout: - raise ValueError('Non-blocking socket (timeout=0) is not supported') - sys.audit("nntplib.connect", self, self.host, self.port) - return socket.create_connection((self.host, self.port), timeout) - - def getwelcome(self): - """Get the welcome message from the server - (this is read and squirreled away by __init__()). - If the response code is 200, posting is allowed; - if it 201, posting is not allowed.""" - - if self.debugging: print('*welcome*', repr(self.welcome)) - return self.welcome - - def getcapabilities(self): - """Get the server capabilities, as read by __init__(). - If the CAPABILITIES command is not supported, an empty dict is - returned.""" - if self._caps is None: - self.nntp_version = 1 - self.nntp_implementation = None - try: - resp, caps = self.capabilities() - except (NNTPPermanentError, NNTPTemporaryError): - # Server doesn't support capabilities - self._caps = {} - else: - self._caps = caps - if 'VERSION' in caps: - # The server can advertise several supported versions, - # choose the highest. - self.nntp_version = max(map(int, caps['VERSION'])) - if 'IMPLEMENTATION' in caps: - self.nntp_implementation = ' '.join(caps['IMPLEMENTATION']) - return self._caps - - def set_debuglevel(self, level): - """Set the debugging level. Argument 'level' means: - 0: no debugging output (default) - 1: print commands and responses but not body text etc. - 2: also print raw lines read and sent before stripping CR/LF""" - - self.debugging = level - debug = set_debuglevel - - def _putline(self, line): - """Internal: send one line to the server, appending CRLF. - The `line` must be a bytes-like object.""" - sys.audit("nntplib.putline", self, line) - line = line + _CRLF - if self.debugging > 1: print('*put*', repr(line)) - self.file.write(line) - self.file.flush() - - def _putcmd(self, line): - """Internal: send one command to the server (through _putline()). - The `line` must be a unicode string.""" - if self.debugging: print('*cmd*', repr(line)) - line = line.encode(self.encoding, self.errors) - self._putline(line) - - def _getline(self, strip_crlf=True): - """Internal: return one line from the server, stripping _CRLF. - Raise EOFError if the connection is closed. - Returns a bytes object.""" - line = self.file.readline(_MAXLINE +1) - if len(line) > _MAXLINE: - raise NNTPDataError('line too long') - if self.debugging > 1: - print('*get*', repr(line)) - if not line: raise EOFError - if strip_crlf: - if line[-2:] == _CRLF: - line = line[:-2] - elif line[-1:] in _CRLF: - line = line[:-1] - return line - - def _getresp(self): - """Internal: get a response from the server. - Raise various errors if the response indicates an error. - Returns a unicode string.""" - resp = self._getline() - if self.debugging: print('*resp*', repr(resp)) - resp = resp.decode(self.encoding, self.errors) - c = resp[:1] - if c == '4': - raise NNTPTemporaryError(resp) - if c == '5': - raise NNTPPermanentError(resp) - if c not in '123': - raise NNTPProtocolError(resp) - return resp - - def _getlongresp(self, file=None): - """Internal: get a response plus following text from the server. - Raise various errors if the response indicates an error. - - Returns a (response, lines) tuple where `response` is a unicode - string and `lines` is a list of bytes objects. - If `file` is a file-like object, it must be open in binary mode. - """ - - openedFile = None - try: - # If a string was passed then open a file with that name - if isinstance(file, (str, bytes)): - openedFile = file = open(file, "wb") - - resp = self._getresp() - if resp[:3] not in _LONGRESP: - raise NNTPReplyError(resp) - - lines = [] - if file is not None: - # XXX lines = None instead? - terminators = (b'.' + _CRLF, b'.\n') - while 1: - line = self._getline(False) - if line in terminators: - break - if line.startswith(b'..'): - line = line[1:] - file.write(line) - else: - terminator = b'.' - while 1: - line = self._getline() - if line == terminator: - break - if line.startswith(b'..'): - line = line[1:] - lines.append(line) - finally: - # If this method created the file, then it must close it - if openedFile: - openedFile.close() - - return resp, lines - - def _shortcmd(self, line): - """Internal: send a command and get the response. - Same return value as _getresp().""" - self._putcmd(line) - return self._getresp() - - def _longcmd(self, line, file=None): - """Internal: send a command and get the response plus following text. - Same return value as _getlongresp().""" - self._putcmd(line) - return self._getlongresp(file) - - def _longcmdstring(self, line, file=None): - """Internal: send a command and get the response plus following text. - Same as _longcmd() and _getlongresp(), except that the returned `lines` - are unicode strings rather than bytes objects. - """ - self._putcmd(line) - resp, list = self._getlongresp(file) - return resp, [line.decode(self.encoding, self.errors) - for line in list] - - def _getoverviewfmt(self): - """Internal: get the overview format. Queries the server if not - already done, else returns the cached value.""" - try: - return self._cachedoverviewfmt - except AttributeError: - pass - try: - resp, lines = self._longcmdstring("LIST OVERVIEW.FMT") - except NNTPPermanentError: - # Not supported by server? - fmt = _DEFAULT_OVERVIEW_FMT[:] - else: - fmt = _parse_overview_fmt(lines) - self._cachedoverviewfmt = fmt - return fmt - - def _grouplist(self, lines): - # Parse lines into "group last first flag" - return [GroupInfo(*line.split()) for line in lines] - - def capabilities(self): - """Process a CAPABILITIES command. Not supported by all servers. - Return: - - resp: server response if successful - - caps: a dictionary mapping capability names to lists of tokens - (for example {'VERSION': ['2'], 'OVER': [], LIST: ['ACTIVE', 'HEADERS'] }) - """ - caps = {} - resp, lines = self._longcmdstring("CAPABILITIES") - for line in lines: - name, *tokens = line.split() - caps[name] = tokens - return resp, caps - - def newgroups(self, date, *, file=None): - """Process a NEWGROUPS command. Arguments: - - date: a date or datetime object - Return: - - resp: server response if successful - - list: list of newsgroup names - """ - if not isinstance(date, (datetime.date, datetime.date)): - raise TypeError( - "the date parameter must be a date or datetime object, " - "not '{:40}'".format(date.__class__.__name__)) - date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) - cmd = 'NEWGROUPS {0} {1}'.format(date_str, time_str) - resp, lines = self._longcmdstring(cmd, file) - return resp, self._grouplist(lines) - - def newnews(self, group, date, *, file=None): - """Process a NEWNEWS command. Arguments: - - group: group name or '*' - - date: a date or datetime object - Return: - - resp: server response if successful - - list: list of message ids - """ - if not isinstance(date, (datetime.date, datetime.date)): - raise TypeError( - "the date parameter must be a date or datetime object, " - "not '{:40}'".format(date.__class__.__name__)) - date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) - cmd = 'NEWNEWS {0} {1} {2}'.format(group, date_str, time_str) - return self._longcmdstring(cmd, file) - - def list(self, group_pattern=None, *, file=None): - """Process a LIST or LIST ACTIVE command. Arguments: - - group_pattern: a pattern indicating which groups to query - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of (group, last, first, flag) (strings) - """ - if group_pattern is not None: - command = 'LIST ACTIVE ' + group_pattern - else: - command = 'LIST' - resp, lines = self._longcmdstring(command, file) - return resp, self._grouplist(lines) - - def _getdescriptions(self, group_pattern, return_all): - line_pat = re.compile('^(?P[^ \t]+)[ \t]+(.*)$') - # Try the more std (acc. to RFC2980) LIST NEWSGROUPS first - resp, lines = self._longcmdstring('LIST NEWSGROUPS ' + group_pattern) - if not resp.startswith('215'): - # Now the deprecated XGTITLE. This either raises an error - # or succeeds with the same output structure as LIST - # NEWSGROUPS. - resp, lines = self._longcmdstring('XGTITLE ' + group_pattern) - groups = {} - for raw_line in lines: - match = line_pat.search(raw_line.strip()) - if match: - name, desc = match.group(1, 2) - if not return_all: - return desc - groups[name] = desc - if return_all: - return resp, groups - else: - # Nothing found - return '' - - def description(self, group): - """Get a description for a single group. If more than one - group matches ('group' is a pattern), return the first. If no - group matches, return an empty string. - - This elides the response code from the server, since it can - only be '215' or '285' (for xgtitle) anyway. If the response - code is needed, use the 'descriptions' method. - - NOTE: This neither checks for a wildcard in 'group' nor does - it check whether the group actually exists.""" - return self._getdescriptions(group, False) - - def descriptions(self, group_pattern): - """Get descriptions for a range of groups.""" - return self._getdescriptions(group_pattern, True) - - def group(self, name): - """Process a GROUP command. Argument: - - group: the group name - Returns: - - resp: server response if successful - - count: number of articles - - first: first article number - - last: last article number - - name: the group name - """ - resp = self._shortcmd('GROUP ' + name) - if not resp.startswith('211'): - raise NNTPReplyError(resp) - words = resp.split() - count = first = last = 0 - n = len(words) - if n > 1: - count = words[1] - if n > 2: - first = words[2] - if n > 3: - last = words[3] - if n > 4: - name = words[4].lower() - return resp, int(count), int(first), int(last), name - - def help(self, *, file=None): - """Process a HELP command. Argument: - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of strings returned by the server in response to the - HELP command - """ - return self._longcmdstring('HELP', file) - - def _statparse(self, resp): - """Internal: parse the response line of a STAT, NEXT, LAST, - ARTICLE, HEAD or BODY command.""" - if not resp.startswith('22'): - raise NNTPReplyError(resp) - words = resp.split() - art_num = int(words[1]) - message_id = words[2] - return resp, art_num, message_id - - def _statcmd(self, line): - """Internal: process a STAT, NEXT or LAST command.""" - resp = self._shortcmd(line) - return self._statparse(resp) - - def stat(self, message_spec=None): - """Process a STAT command. Argument: - - message_spec: article number or message id (if not specified, - the current article is selected) - Returns: - - resp: server response if successful - - art_num: the article number - - message_id: the message id - """ - if message_spec: - return self._statcmd('STAT {0}'.format(message_spec)) - else: - return self._statcmd('STAT') - - def next(self): - """Process a NEXT command. No arguments. Return as for STAT.""" - return self._statcmd('NEXT') - - def last(self): - """Process a LAST command. No arguments. Return as for STAT.""" - return self._statcmd('LAST') - - def _artcmd(self, line, file=None): - """Internal: process a HEAD, BODY or ARTICLE command.""" - resp, lines = self._longcmd(line, file) - resp, art_num, message_id = self._statparse(resp) - return resp, ArticleInfo(art_num, message_id, lines) - - def head(self, message_spec=None, *, file=None): - """Process a HEAD command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the headers in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of header lines) - """ - if message_spec is not None: - cmd = 'HEAD {0}'.format(message_spec) - else: - cmd = 'HEAD' - return self._artcmd(cmd, file) - - def body(self, message_spec=None, *, file=None): - """Process a BODY command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the body in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of body lines) - """ - if message_spec is not None: - cmd = 'BODY {0}'.format(message_spec) - else: - cmd = 'BODY' - return self._artcmd(cmd, file) - - def article(self, message_spec=None, *, file=None): - """Process an ARTICLE command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the article in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of article lines) - """ - if message_spec is not None: - cmd = 'ARTICLE {0}'.format(message_spec) - else: - cmd = 'ARTICLE' - return self._artcmd(cmd, file) - - def slave(self): - """Process a SLAVE command. Returns: - - resp: server response if successful - """ - return self._shortcmd('SLAVE') - - def xhdr(self, hdr, str, *, file=None): - """Process an XHDR command (optional server extension). Arguments: - - hdr: the header type (e.g. 'subject') - - str: an article nr, a message id, or a range nr1-nr2 - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of (nr, value) strings - """ - pat = re.compile('^([0-9]+) ?(.*)\n?') - resp, lines = self._longcmdstring('XHDR {0} {1}'.format(hdr, str), file) - def remove_number(line): - m = pat.match(line) - return m.group(1, 2) if m else line - return resp, [remove_number(line) for line in lines] - - def xover(self, start, end, *, file=None): - """Process an XOVER command (optional server extension) Arguments: - - start: start of range - - end: end of range - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of dicts containing the response fields - """ - resp, lines = self._longcmdstring('XOVER {0}-{1}'.format(start, end), - file) - fmt = self._getoverviewfmt() - return resp, _parse_overview(lines, fmt) - - def over(self, message_spec, *, file=None): - """Process an OVER command. If the command isn't supported, fall - back to XOVER. Arguments: - - message_spec: - - either a message id, indicating the article to fetch - information about - - or a (start, end) tuple, indicating a range of article numbers; - if end is None, information up to the newest message will be - retrieved - - or None, indicating the current article number must be used - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of dicts containing the response fields - - NOTE: the "message id" form isn't supported by XOVER - """ - cmd = 'OVER' if 'OVER' in self._caps else 'XOVER' - if isinstance(message_spec, (tuple, list)): - start, end = message_spec - cmd += ' {0}-{1}'.format(start, end or '') - elif message_spec is not None: - cmd = cmd + ' ' + message_spec - resp, lines = self._longcmdstring(cmd, file) - fmt = self._getoverviewfmt() - return resp, _parse_overview(lines, fmt) - - def date(self): - """Process the DATE command. - Returns: - - resp: server response if successful - - date: datetime object - """ - resp = self._shortcmd("DATE") - if not resp.startswith('111'): - raise NNTPReplyError(resp) - elem = resp.split() - if len(elem) != 2: - raise NNTPDataError(resp) - date = elem[1] - if len(date) != 14: - raise NNTPDataError(resp) - return resp, _parse_datetime(date, None) - - def _post(self, command, f): - resp = self._shortcmd(command) - # Raises a specific exception if posting is not allowed - if not resp.startswith('3'): - raise NNTPReplyError(resp) - if isinstance(f, (bytes, bytearray)): - f = f.splitlines() - # We don't use _putline() because: - # - we don't want additional CRLF if the file or iterable is already - # in the right format - # - we don't want a spurious flush() after each line is written - for line in f: - if not line.endswith(_CRLF): - line = line.rstrip(b"\r\n") + _CRLF - if line.startswith(b'.'): - line = b'.' + line - self.file.write(line) - self.file.write(b".\r\n") - self.file.flush() - return self._getresp() - - def post(self, data): - """Process a POST command. Arguments: - - data: bytes object, iterable or file containing the article - Returns: - - resp: server response if successful""" - return self._post('POST', data) - - def ihave(self, message_id, data): - """Process an IHAVE command. Arguments: - - message_id: message-id of the article - - data: file containing the article - Returns: - - resp: server response if successful - Note that if the server refuses the article an exception is raised.""" - return self._post('IHAVE {0}'.format(message_id), data) - - def _close(self): - try: - if self.file: - self.file.close() - del self.file - finally: - self.sock.close() - - def quit(self): - """Process a QUIT command and close the socket. Returns: - - resp: server response if successful""" - try: - resp = self._shortcmd('QUIT') - finally: - self._close() - return resp - - def login(self, user=None, password=None, usenetrc=True): - if self.authenticated: - raise ValueError("Already logged in.") - if not user and not usenetrc: - raise ValueError( - "At least one of `user` and `usenetrc` must be specified") - # If no login/password was specified but netrc was requested, - # try to get them from ~/.netrc - # Presume that if .netrc has an entry, NNRP authentication is required. - try: - if usenetrc and not user: - import netrc - credentials = netrc.netrc() - auth = credentials.authenticators(self.host) - if auth: - user = auth[0] - password = auth[2] - except OSError: - pass - # Perform NNTP authentication if needed. - if not user: - return - resp = self._shortcmd('authinfo user ' + user) - if resp.startswith('381'): - if not password: - raise NNTPReplyError(resp) - else: - resp = self._shortcmd('authinfo pass ' + password) - if not resp.startswith('281'): - raise NNTPPermanentError(resp) - # Capabilities might have changed after login - self._caps = None - self.getcapabilities() - # Attempt to send mode reader if it was requested after login. - # Only do so if we're not in reader mode already. - if self.readermode_afterauth and 'READER' not in self._caps: - self._setreadermode() - # Capabilities might have changed after MODE READER - self._caps = None - self.getcapabilities() - - def _setreadermode(self): - try: - self.welcome = self._shortcmd('mode reader') - except NNTPPermanentError: - # Error 5xx, probably 'not implemented' - pass - except NNTPTemporaryError as e: - if e.response.startswith('480'): - # Need authorization before 'mode reader' - self.readermode_afterauth = True - else: - raise - - if _have_ssl: - def starttls(self, context=None): - """Process a STARTTLS command. Arguments: - - context: SSL context to use for the encrypted connection - """ - # Per RFC 4642, STARTTLS MUST NOT be sent after authentication or if - # a TLS session already exists. - if self.tls_on: - raise ValueError("TLS is already enabled.") - if self.authenticated: - raise ValueError("TLS cannot be started after authentication.") - resp = self._shortcmd('STARTTLS') - if resp.startswith('382'): - self.file.close() - self.sock = _encrypt_on(self.sock, context, self.host) - self.file = self.sock.makefile("rwb") - self.tls_on = True - # Capabilities may change after TLS starts up, so ask for them - # again. - self._caps = None - self.getcapabilities() - else: - raise NNTPError("TLS failed to start.") - - -if _have_ssl: - class NNTP_SSL(NNTP): - - def __init__(self, host, port=NNTP_SSL_PORT, - user=None, password=None, ssl_context=None, - readermode=None, usenetrc=False, - timeout=_GLOBAL_DEFAULT_TIMEOUT): - """This works identically to NNTP.__init__, except for the change - in default port and the `ssl_context` argument for SSL connections. - """ - self.ssl_context = ssl_context - super().__init__(host, port, user, password, readermode, - usenetrc, timeout) - - def _create_socket(self, timeout): - sock = super()._create_socket(timeout) - try: - sock = _encrypt_on(sock, self.ssl_context, self.host) - except: - sock.close() - raise - else: - return sock - - __all__.append("NNTP_SSL") - - -# Test retrieval when run as a script. -if __name__ == '__main__': - import argparse - - parser = argparse.ArgumentParser(description="""\ - nntplib built-in demo - display the latest articles in a newsgroup""") - parser.add_argument('-g', '--group', default='gmane.comp.python.general', - help='group to fetch messages from (default: %(default)s)') - parser.add_argument('-s', '--server', default='news.gmane.io', - help='NNTP server hostname (default: %(default)s)') - parser.add_argument('-p', '--port', default=-1, type=int, - help='NNTP port number (default: %s / %s)' % (NNTP_PORT, NNTP_SSL_PORT)) - parser.add_argument('-n', '--nb-articles', default=10, type=int, - help='number of articles to fetch (default: %(default)s)') - parser.add_argument('-S', '--ssl', action='store_true', default=False, - help='use NNTP over SSL') - args = parser.parse_args() - - port = args.port - if not args.ssl: - if port == -1: - port = NNTP_PORT - s = NNTP(host=args.server, port=port) - else: - if port == -1: - port = NNTP_SSL_PORT - s = NNTP_SSL(host=args.server, port=port) - - caps = s.getcapabilities() - if 'STARTTLS' in caps: - s.starttls() - resp, count, first, last, name = s.group(args.group) - print('Group', name, 'has', count, 'articles, range', first, 'to', last) - - def cut(s, lim): - if len(s) > lim: - s = s[:lim - 4] + "..." - return s - - first = str(int(last) - args.nb_articles + 1) - resp, overviews = s.xover(first, last) - for artnum, over in overviews: - author = decode_header(over['from']).split('<', 1)[0] - subject = decode_header(over['subject']) - lines = int(over[':lines']) - print("{:7} {:20} {:42} ({})".format( - artnum, cut(author, 20), cut(subject, 42), lines) - ) - - s.quit() diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index 8e010c79c1..a4c474006b 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -184,188 +184,6 @@ def _iter_file_finder_modules(importer, prefix=''): iter_importer_modules.register( importlib.machinery.FileFinder, _iter_file_finder_modules) - -def _import_imp(): - global imp - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - imp = importlib.import_module('imp') - -class ImpImporter: - """PEP 302 Finder that wraps Python's "classic" import algorithm - - ImpImporter(dirname) produces a PEP 302 finder that searches that - directory. ImpImporter(None) produces a PEP 302 finder that searches - the current sys.path, plus any modules that are frozen or built-in. - - Note that ImpImporter does not currently support being used by placement - on sys.meta_path. - """ - - def __init__(self, path=None): - global imp - warnings.warn("This emulation is deprecated and slated for removal " - "in Python 3.12; use 'importlib' instead", - DeprecationWarning) - _import_imp() - self.path = path - - def find_module(self, fullname, path=None): - # Note: we ignore 'path' argument since it is only used via meta_path - subname = fullname.split(".")[-1] - if subname != fullname and self.path is None: - return None - if self.path is None: - path = None - else: - path = [os.path.realpath(self.path)] - try: - file, filename, etc = imp.find_module(subname, path) - except ImportError: - return None - return ImpLoader(fullname, file, filename, etc) - - def iter_modules(self, prefix=''): - if self.path is None or not os.path.isdir(self.path): - return - - yielded = {} - import inspect - try: - filenames = os.listdir(self.path) - except OSError: - # ignore unreadable directories like import does - filenames = [] - filenames.sort() # handle packages before same-named modules - - for fn in filenames: - modname = inspect.getmodulename(fn) - if modname=='__init__' or modname in yielded: - continue - - path = os.path.join(self.path, fn) - ispkg = False - - if not modname and os.path.isdir(path) and '.' not in fn: - modname = fn - try: - dircontents = os.listdir(path) - except OSError: - # ignore unreadable directories like import does - dircontents = [] - for fn in dircontents: - subname = inspect.getmodulename(fn) - if subname=='__init__': - ispkg = True - break - else: - continue # not a package - - if modname and '.' not in modname: - yielded[modname] = 1 - yield prefix + modname, ispkg - - -class ImpLoader: - """PEP 302 Loader that wraps Python's "classic" import algorithm - """ - code = source = None - - def __init__(self, fullname, file, filename, etc): - warnings.warn("This emulation is deprecated and slated for removal in " - "Python 3.12; use 'importlib' instead", - DeprecationWarning) - _import_imp() - self.file = file - self.filename = filename - self.fullname = fullname - self.etc = etc - - def load_module(self, fullname): - self._reopen() - try: - mod = imp.load_module(fullname, self.file, self.filename, self.etc) - finally: - if self.file: - self.file.close() - # Note: we don't set __loader__ because we want the module to look - # normal; i.e. this is just a wrapper for standard import machinery - return mod - - def get_data(self, pathname): - with open(pathname, "rb") as file: - return file.read() - - def _reopen(self): - if self.file and self.file.closed: - mod_type = self.etc[2] - if mod_type==imp.PY_SOURCE: - self.file = open(self.filename, 'r') - elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION): - self.file = open(self.filename, 'rb') - - def _fix_name(self, fullname): - if fullname is None: - fullname = self.fullname - elif fullname != self.fullname: - raise ImportError("Loader for module %s cannot handle " - "module %s" % (self.fullname, fullname)) - return fullname - - def is_package(self, fullname): - fullname = self._fix_name(fullname) - return self.etc[2]==imp.PKG_DIRECTORY - - def get_code(self, fullname=None): - fullname = self._fix_name(fullname) - if self.code is None: - mod_type = self.etc[2] - if mod_type==imp.PY_SOURCE: - source = self.get_source(fullname) - self.code = compile(source, self.filename, 'exec') - elif mod_type==imp.PY_COMPILED: - self._reopen() - try: - self.code = read_code(self.file) - finally: - self.file.close() - elif mod_type==imp.PKG_DIRECTORY: - self.code = self._get_delegate().get_code() - return self.code - - def get_source(self, fullname=None): - fullname = self._fix_name(fullname) - if self.source is None: - mod_type = self.etc[2] - if mod_type==imp.PY_SOURCE: - self._reopen() - try: - self.source = self.file.read() - finally: - self.file.close() - elif mod_type==imp.PY_COMPILED: - if os.path.exists(self.filename[:-1]): - with open(self.filename[:-1], 'r') as f: - self.source = f.read() - elif mod_type==imp.PKG_DIRECTORY: - self.source = self._get_delegate().get_source() - return self.source - - def _get_delegate(self): - finder = ImpImporter(self.filename) - spec = _get_spec(finder, '__init__') - return spec.loader - - def get_filename(self, fullname=None): - fullname = self._fix_name(fullname) - mod_type = self.etc[2] - if mod_type==imp.PKG_DIRECTORY: - return self._get_delegate().get_filename() - elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION): - return self.filename - return None - - try: import zipimport from zipimport import zipimporter diff --git a/Lib/pprint.py b/Lib/pprint.py index 34ed12637e..9314701db3 100644 --- a/Lib/pprint.py +++ b/Lib/pprint.py @@ -128,6 +128,9 @@ def __init__(self, indent=1, width=80, depth=None, stream=None, *, sort_dicts If true, dict keys are sorted. + underscore_numbers + If true, digit groups are separated with underscores. + """ indent = int(indent) width = int(width) diff --git a/Lib/queue.py b/Lib/queue.py index 55f5008846..25beb46e30 100644 --- a/Lib/queue.py +++ b/Lib/queue.py @@ -10,7 +10,15 @@ except ImportError: SimpleQueue = None -__all__ = ['Empty', 'Full', 'Queue', 'PriorityQueue', 'LifoQueue', 'SimpleQueue'] +__all__ = [ + 'Empty', + 'Full', + 'ShutDown', + 'Queue', + 'PriorityQueue', + 'LifoQueue', + 'SimpleQueue', +] try: @@ -25,6 +33,10 @@ class Full(Exception): pass +class ShutDown(Exception): + '''Raised when put/get with shut-down queue.''' + + class Queue: '''Create a queue object with a given maximum size. @@ -54,6 +66,9 @@ def __init__(self, maxsize=0): self.all_tasks_done = threading.Condition(self.mutex) self.unfinished_tasks = 0 + # Queue shutdown state + self.is_shutdown = False + def task_done(self): '''Indicate that a formerly enqueued task is complete. @@ -65,6 +80,9 @@ def task_done(self): have been processed (meaning that a task_done() call was received for every item that had been put() into the queue). + shutdown(immediate=True) calls task_done() for each remaining item in + the queue. + Raises a ValueError if called more times than there were items placed in the queue. ''' @@ -129,8 +147,12 @@ def put(self, item, block=True, timeout=None): Otherwise ('block' is false), put an item on the queue if a free slot is immediately available, else raise the Full exception ('timeout' is ignored in that case). + + Raises ShutDown if the queue has been shut down. ''' with self.not_full: + if self.is_shutdown: + raise ShutDown if self.maxsize > 0: if not block: if self._qsize() >= self.maxsize: @@ -138,6 +160,8 @@ def put(self, item, block=True, timeout=None): elif timeout is None: while self._qsize() >= self.maxsize: self.not_full.wait() + if self.is_shutdown: + raise ShutDown elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: @@ -147,6 +171,8 @@ def put(self, item, block=True, timeout=None): if remaining <= 0.0: raise Full self.not_full.wait(remaining) + if self.is_shutdown: + raise ShutDown self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() @@ -161,14 +187,21 @@ def get(self, block=True, timeout=None): Otherwise ('block' is false), return an item if one is immediately available, else raise the Empty exception ('timeout' is ignored in that case). + + Raises ShutDown if the queue has been shut down and is empty, + or if the queue has been shut down immediately. ''' with self.not_empty: + if self.is_shutdown and not self._qsize(): + raise ShutDown if not block: if not self._qsize(): raise Empty elif timeout is None: while not self._qsize(): self.not_empty.wait() + if self.is_shutdown and not self._qsize(): + raise ShutDown elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: @@ -178,6 +211,8 @@ def get(self, block=True, timeout=None): if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) + if self.is_shutdown and not self._qsize(): + raise ShutDown item = self._get() self.not_full.notify() return item @@ -198,6 +233,29 @@ def get_nowait(self): ''' return self.get(block=False) + def shutdown(self, immediate=False): + '''Shut-down the queue, making queue gets and puts raise ShutDown. + + By default, gets will only raise once the queue is empty. Set + 'immediate' to True to make gets raise immediately instead. + + All blocked callers of put() and get() will be unblocked. If + 'immediate', a task is marked as done for each item remaining in + the queue, which may unblock callers of join(). + ''' + with self.mutex: + self.is_shutdown = True + if immediate: + while self._qsize(): + self._get() + if self.unfinished_tasks > 0: + self.unfinished_tasks -= 1 + # release all blocked threads in `join()` + self.all_tasks_done.notify_all() + # All getters need to re-check queue-empty to raise ShutDown + self.not_empty.notify_all() + self.not_full.notify_all() + # Override these methods to implement other queue organizations # (e.g. stack or priority queue). # These will only be called with appropriate locks held diff --git a/Lib/reprlib.py b/Lib/reprlib.py index 616b3439b5..19dbe3a07e 100644 --- a/Lib/reprlib.py +++ b/Lib/reprlib.py @@ -29,49 +29,100 @@ def wrapper(self): wrapper.__name__ = getattr(user_function, '__name__') wrapper.__qualname__ = getattr(user_function, '__qualname__') wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + wrapper.__type_params__ = getattr(user_function, '__type_params__', ()) + wrapper.__wrapped__ = user_function return wrapper return decorating_function class Repr: - - def __init__(self): - self.maxlevel = 6 - self.maxtuple = 6 - self.maxlist = 6 - self.maxarray = 5 - self.maxdict = 4 - self.maxset = 6 - self.maxfrozenset = 6 - self.maxdeque = 6 - self.maxstring = 30 - self.maxlong = 40 - self.maxother = 30 + _lookup = { + 'tuple': 'builtins', + 'list': 'builtins', + 'array': 'array', + 'set': 'builtins', + 'frozenset': 'builtins', + 'deque': 'collections', + 'dict': 'builtins', + 'str': 'builtins', + 'int': 'builtins' + } + + def __init__( + self, *, maxlevel=6, maxtuple=6, maxlist=6, maxarray=5, maxdict=4, + maxset=6, maxfrozenset=6, maxdeque=6, maxstring=30, maxlong=40, + maxother=30, fillvalue='...', indent=None, + ): + self.maxlevel = maxlevel + self.maxtuple = maxtuple + self.maxlist = maxlist + self.maxarray = maxarray + self.maxdict = maxdict + self.maxset = maxset + self.maxfrozenset = maxfrozenset + self.maxdeque = maxdeque + self.maxstring = maxstring + self.maxlong = maxlong + self.maxother = maxother + self.fillvalue = fillvalue + self.indent = indent def repr(self, x): return self.repr1(x, self.maxlevel) def repr1(self, x, level): - typename = type(x).__name__ + cls = type(x) + typename = cls.__name__ + if ' ' in typename: parts = typename.split() typename = '_'.join(parts) - if hasattr(self, 'repr_' + typename): - return getattr(self, 'repr_' + typename)(x, level) - else: - return self.repr_instance(x, level) + + method = getattr(self, 'repr_' + typename, None) + if method: + # not defined in this class + if typename not in self._lookup: + return method(x, level) + module = getattr(cls, '__module__', None) + # defined in this class and is the module intended + if module == self._lookup[typename]: + return method(x, level) + + return self.repr_instance(x, level) + + def _join(self, pieces, level): + if self.indent is None: + return ', '.join(pieces) + if not pieces: + return '' + indent = self.indent + if isinstance(indent, int): + if indent < 0: + raise ValueError( + f'Repr.indent cannot be negative int (was {indent!r})' + ) + indent *= ' ' + try: + sep = ',\n' + (self.maxlevel - level + 1) * indent + except TypeError as error: + raise TypeError( + f'Repr.indent must be a str, int or None, not {type(indent)}' + ) from error + return sep.join(('', *pieces, ''))[1:-len(indent) or None] def _repr_iterable(self, x, level, left, right, maxiter, trail=''): n = len(x) if level <= 0 and n: - s = '...' + s = self.fillvalue else: newlevel = level - 1 repr1 = self.repr1 pieces = [repr1(elem, newlevel) for elem in islice(x, maxiter)] - if n > maxiter: pieces.append('...') - s = ', '.join(pieces) - if n == 1 and trail: right = trail + right + if n > maxiter: + pieces.append(self.fillvalue) + s = self._join(pieces, level) + if n == 1 and trail and self.indent is None: + right = trail + right return '%s%s%s' % (left, s, right) def repr_tuple(self, x, level): @@ -104,8 +155,10 @@ def repr_deque(self, x, level): def repr_dict(self, x, level): n = len(x) - if n == 0: return '{}' - if level <= 0: return '{...}' + if n == 0: + return '{}' + if level <= 0: + return '{' + self.fillvalue + '}' newlevel = level - 1 repr1 = self.repr1 pieces = [] @@ -113,8 +166,9 @@ def repr_dict(self, x, level): keyrepr = repr1(key, newlevel) valrepr = repr1(x[key], newlevel) pieces.append('%s: %s' % (keyrepr, valrepr)) - if n > self.maxdict: pieces.append('...') - s = ', '.join(pieces) + if n > self.maxdict: + pieces.append(self.fillvalue) + s = self._join(pieces, level) return '{%s}' % (s,) def repr_str(self, x, level): @@ -123,7 +177,7 @@ def repr_str(self, x, level): i = max(0, (self.maxstring-3)//2) j = max(0, self.maxstring-3-i) s = builtins.repr(x[:i] + x[len(x)-j:]) - s = s[:i] + '...' + s[len(s)-j:] + s = s[:i] + self.fillvalue + s[len(s)-j:] return s def repr_int(self, x, level): @@ -131,7 +185,7 @@ def repr_int(self, x, level): if len(s) > self.maxlong: i = max(0, (self.maxlong-3)//2) j = max(0, self.maxlong-3-i) - s = s[:i] + '...' + s[len(s)-j:] + s = s[:i] + self.fillvalue + s[len(s)-j:] return s def repr_instance(self, x, level): @@ -144,7 +198,7 @@ def repr_instance(self, x, level): if len(s) > self.maxother: i = max(0, (self.maxother-3)//2) j = max(0, self.maxother-3-i) - s = s[:i] + '...' + s[len(s)-j:] + s = s[:i] + self.fillvalue + s[len(s)-j:] return s diff --git a/Lib/sched.py b/Lib/sched.py index 14613cf298..fb20639d45 100644 --- a/Lib/sched.py +++ b/Lib/sched.py @@ -11,7 +11,7 @@ implement simulated time by writing your own functions. This can also be used to integrate scheduling with STDWIN events; the delay function is allowed to modify the queue. Time can be expressed as -integers or floating point numbers, as long as it is consistent. +integers or floating-point numbers, as long as it is consistent. Events are specified by tuples (time, priority, action, argument, kwargs). As in UNIX, lower priority numbers mean higher priority; in this diff --git a/Lib/site.py b/Lib/site.py index 271524c0cf..acc8481b13 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -679,5 +679,17 @@ def exists(path): print(textwrap.dedent(help % (sys.argv[0], os.pathsep))) sys.exit(10) +def gethistoryfile(): + """Check if the PYTHON_HISTORY environment variable is set and define + it as the .python_history file. If PYTHON_HISTORY is not set, use the + default .python_history file. + """ + if not sys.flags.ignore_environment: + history = os.environ.get("PYTHON_HISTORY") + if history: + return history + return os.path.join(os.path.expanduser('~'), + '.python_history') + if __name__ == '__main__': _script() diff --git a/Lib/smtpd.py b/Lib/smtpd.py deleted file mode 100755 index 963e0a7689..0000000000 --- a/Lib/smtpd.py +++ /dev/null @@ -1,979 +0,0 @@ -#! /usr/bin/env python3 -"""An RFC 5321 smtp proxy with optional RFC 1870 and RFC 6531 extensions. - -Usage: %(program)s [options] [localhost:localport [remotehost:remoteport]] - -Options: - - --nosetuid - -n - This program generally tries to setuid `nobody', unless this flag is - set. The setuid call will fail if this program is not run as root (in - which case, use this flag). - - --version - -V - Print the version number and exit. - - --class classname - -c classname - Use `classname' as the concrete SMTP proxy class. Uses `PureProxy' by - default. - - --size limit - -s limit - Restrict the total size of the incoming message to "limit" number of - bytes via the RFC 1870 SIZE extension. Defaults to 33554432 bytes. - - --smtputf8 - -u - Enable the SMTPUTF8 extension and behave as an RFC 6531 smtp proxy. - - --debug - -d - Turn on debugging prints. - - --help - -h - Print this message and exit. - -Version: %(__version__)s - -If localhost is not given then `localhost' is used, and if localport is not -given then 8025 is used. If remotehost is not given then `localhost' is used, -and if remoteport is not given, then 25 is used. -""" - -# Overview: -# -# This file implements the minimal SMTP protocol as defined in RFC 5321. It -# has a hierarchy of classes which implement the backend functionality for the -# smtpd. A number of classes are provided: -# -# SMTPServer - the base class for the backend. Raises NotImplementedError -# if you try to use it. -# -# DebuggingServer - simply prints each message it receives on stdout. -# -# PureProxy - Proxies all messages to a real smtpd which does final -# delivery. One known problem with this class is that it doesn't handle -# SMTP errors from the backend server at all. This should be fixed -# (contributions are welcome!). -# -# MailmanProxy - An experimental hack to work with GNU Mailman -# . Using this server as your real incoming smtpd, your -# mailhost will automatically recognize and accept mail destined to Mailman -# lists when those lists are created. Every message not destined for a list -# gets forwarded to a real backend smtpd, as with PureProxy. Again, errors -# are not handled correctly yet. -# -# -# Author: Barry Warsaw -# -# TODO: -# -# - support mailbox delivery -# - alias files -# - Handle more ESMTP extensions -# - handle error codes from the backend smtpd - -import sys -import os -import errno -import getopt -import time -import socket -import collections -from warnings import warn -from email._header_value_parser import get_addr_spec, get_angle_addr - -__all__ = [ - "SMTPChannel", "SMTPServer", "DebuggingServer", "PureProxy", - "MailmanProxy", -] - -warn( - 'The smtpd module is deprecated and unmaintained and will be removed ' - 'in Python 3.12. Please see aiosmtpd ' - '(https://aiosmtpd.readthedocs.io/) for the recommended replacement.', - DeprecationWarning, - stacklevel=2) - - -# These are imported after the above warning so that users get the correct -# deprecation warning. -import asyncore -import asynchat - - -program = sys.argv[0] -__version__ = 'Python SMTP proxy version 0.3' - - -class Devnull: - def write(self, msg): pass - def flush(self): pass - - -DEBUGSTREAM = Devnull() -NEWLINE = '\n' -COMMASPACE = ', ' -DATA_SIZE_DEFAULT = 33554432 - - -def usage(code, msg=''): - print(__doc__ % globals(), file=sys.stderr) - if msg: - print(msg, file=sys.stderr) - sys.exit(code) - - -class SMTPChannel(asynchat.async_chat): - COMMAND = 0 - DATA = 1 - - command_size_limit = 512 - command_size_limits = collections.defaultdict(lambda x=command_size_limit: x) - - @property - def max_command_size_limit(self): - try: - return max(self.command_size_limits.values()) - except ValueError: - return self.command_size_limit - - def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT, - map=None, enable_SMTPUTF8=False, decode_data=False): - asynchat.async_chat.__init__(self, conn, map=map) - self.smtp_server = server - self.conn = conn - self.addr = addr - self.data_size_limit = data_size_limit - self.enable_SMTPUTF8 = enable_SMTPUTF8 - self._decode_data = decode_data - if enable_SMTPUTF8 and decode_data: - raise ValueError("decode_data and enable_SMTPUTF8 cannot" - " be set to True at the same time") - if decode_data: - self._emptystring = '' - self._linesep = '\r\n' - self._dotsep = '.' - self._newline = NEWLINE - else: - self._emptystring = b'' - self._linesep = b'\r\n' - self._dotsep = ord(b'.') - self._newline = b'\n' - self._set_rset_state() - self.seen_greeting = '' - self.extended_smtp = False - self.command_size_limits.clear() - self.fqdn = socket.getfqdn() - try: - self.peer = conn.getpeername() - except OSError as err: - # a race condition may occur if the other end is closing - # before we can get the peername - self.close() - if err.errno != errno.ENOTCONN: - raise - return - print('Peer:', repr(self.peer), file=DEBUGSTREAM) - self.push('220 %s %s' % (self.fqdn, __version__)) - - def _set_post_data_state(self): - """Reset state variables to their post-DATA state.""" - self.smtp_state = self.COMMAND - self.mailfrom = None - self.rcpttos = [] - self.require_SMTPUTF8 = False - self.num_bytes = 0 - self.set_terminator(b'\r\n') - - def _set_rset_state(self): - """Reset all state variables except the greeting.""" - self._set_post_data_state() - self.received_data = '' - self.received_lines = [] - - - # properties for backwards-compatibility - @property - def __server(self): - warn("Access to __server attribute on SMTPChannel is deprecated, " - "use 'smtp_server' instead", DeprecationWarning, 2) - return self.smtp_server - @__server.setter - def __server(self, value): - warn("Setting __server attribute on SMTPChannel is deprecated, " - "set 'smtp_server' instead", DeprecationWarning, 2) - self.smtp_server = value - - @property - def __line(self): - warn("Access to __line attribute on SMTPChannel is deprecated, " - "use 'received_lines' instead", DeprecationWarning, 2) - return self.received_lines - @__line.setter - def __line(self, value): - warn("Setting __line attribute on SMTPChannel is deprecated, " - "set 'received_lines' instead", DeprecationWarning, 2) - self.received_lines = value - - @property - def __state(self): - warn("Access to __state attribute on SMTPChannel is deprecated, " - "use 'smtp_state' instead", DeprecationWarning, 2) - return self.smtp_state - @__state.setter - def __state(self, value): - warn("Setting __state attribute on SMTPChannel is deprecated, " - "set 'smtp_state' instead", DeprecationWarning, 2) - self.smtp_state = value - - @property - def __greeting(self): - warn("Access to __greeting attribute on SMTPChannel is deprecated, " - "use 'seen_greeting' instead", DeprecationWarning, 2) - return self.seen_greeting - @__greeting.setter - def __greeting(self, value): - warn("Setting __greeting attribute on SMTPChannel is deprecated, " - "set 'seen_greeting' instead", DeprecationWarning, 2) - self.seen_greeting = value - - @property - def __mailfrom(self): - warn("Access to __mailfrom attribute on SMTPChannel is deprecated, " - "use 'mailfrom' instead", DeprecationWarning, 2) - return self.mailfrom - @__mailfrom.setter - def __mailfrom(self, value): - warn("Setting __mailfrom attribute on SMTPChannel is deprecated, " - "set 'mailfrom' instead", DeprecationWarning, 2) - self.mailfrom = value - - @property - def __rcpttos(self): - warn("Access to __rcpttos attribute on SMTPChannel is deprecated, " - "use 'rcpttos' instead", DeprecationWarning, 2) - return self.rcpttos - @__rcpttos.setter - def __rcpttos(self, value): - warn("Setting __rcpttos attribute on SMTPChannel is deprecated, " - "set 'rcpttos' instead", DeprecationWarning, 2) - self.rcpttos = value - - @property - def __data(self): - warn("Access to __data attribute on SMTPChannel is deprecated, " - "use 'received_data' instead", DeprecationWarning, 2) - return self.received_data - @__data.setter - def __data(self, value): - warn("Setting __data attribute on SMTPChannel is deprecated, " - "set 'received_data' instead", DeprecationWarning, 2) - self.received_data = value - - @property - def __fqdn(self): - warn("Access to __fqdn attribute on SMTPChannel is deprecated, " - "use 'fqdn' instead", DeprecationWarning, 2) - return self.fqdn - @__fqdn.setter - def __fqdn(self, value): - warn("Setting __fqdn attribute on SMTPChannel is deprecated, " - "set 'fqdn' instead", DeprecationWarning, 2) - self.fqdn = value - - @property - def __peer(self): - warn("Access to __peer attribute on SMTPChannel is deprecated, " - "use 'peer' instead", DeprecationWarning, 2) - return self.peer - @__peer.setter - def __peer(self, value): - warn("Setting __peer attribute on SMTPChannel is deprecated, " - "set 'peer' instead", DeprecationWarning, 2) - self.peer = value - - @property - def __conn(self): - warn("Access to __conn attribute on SMTPChannel is deprecated, " - "use 'conn' instead", DeprecationWarning, 2) - return self.conn - @__conn.setter - def __conn(self, value): - warn("Setting __conn attribute on SMTPChannel is deprecated, " - "set 'conn' instead", DeprecationWarning, 2) - self.conn = value - - @property - def __addr(self): - warn("Access to __addr attribute on SMTPChannel is deprecated, " - "use 'addr' instead", DeprecationWarning, 2) - return self.addr - @__addr.setter - def __addr(self, value): - warn("Setting __addr attribute on SMTPChannel is deprecated, " - "set 'addr' instead", DeprecationWarning, 2) - self.addr = value - - # Overrides base class for convenience. - def push(self, msg): - asynchat.async_chat.push(self, bytes( - msg + '\r\n', 'utf-8' if self.require_SMTPUTF8 else 'ascii')) - - # Implementation of base class abstract method - def collect_incoming_data(self, data): - limit = None - if self.smtp_state == self.COMMAND: - limit = self.max_command_size_limit - elif self.smtp_state == self.DATA: - limit = self.data_size_limit - if limit and self.num_bytes > limit: - return - elif limit: - self.num_bytes += len(data) - if self._decode_data: - self.received_lines.append(str(data, 'utf-8')) - else: - self.received_lines.append(data) - - # Implementation of base class abstract method - def found_terminator(self): - line = self._emptystring.join(self.received_lines) - print('Data:', repr(line), file=DEBUGSTREAM) - self.received_lines = [] - if self.smtp_state == self.COMMAND: - sz, self.num_bytes = self.num_bytes, 0 - if not line: - self.push('500 Error: bad syntax') - return - if not self._decode_data: - line = str(line, 'utf-8') - i = line.find(' ') - if i < 0: - command = line.upper() - arg = None - else: - command = line[:i].upper() - arg = line[i+1:].strip() - max_sz = (self.command_size_limits[command] - if self.extended_smtp else self.command_size_limit) - if sz > max_sz: - self.push('500 Error: line too long') - return - method = getattr(self, 'smtp_' + command, None) - if not method: - self.push('500 Error: command "%s" not recognized' % command) - return - method(arg) - return - else: - if self.smtp_state != self.DATA: - self.push('451 Internal confusion') - self.num_bytes = 0 - return - if self.data_size_limit and self.num_bytes > self.data_size_limit: - self.push('552 Error: Too much mail data') - self.num_bytes = 0 - return - # Remove extraneous carriage returns and de-transparency according - # to RFC 5321, Section 4.5.2. - data = [] - for text in line.split(self._linesep): - if text and text[0] == self._dotsep: - data.append(text[1:]) - else: - data.append(text) - self.received_data = self._newline.join(data) - args = (self.peer, self.mailfrom, self.rcpttos, self.received_data) - kwargs = {} - if not self._decode_data: - kwargs = { - 'mail_options': self.mail_options, - 'rcpt_options': self.rcpt_options, - } - status = self.smtp_server.process_message(*args, **kwargs) - self._set_post_data_state() - if not status: - self.push('250 OK') - else: - self.push(status) - - # SMTP and ESMTP commands - def smtp_HELO(self, arg): - if not arg: - self.push('501 Syntax: HELO hostname') - return - # See issue #21783 for a discussion of this behavior. - if self.seen_greeting: - self.push('503 Duplicate HELO/EHLO') - return - self._set_rset_state() - self.seen_greeting = arg - self.push('250 %s' % self.fqdn) - - def smtp_EHLO(self, arg): - if not arg: - self.push('501 Syntax: EHLO hostname') - return - # See issue #21783 for a discussion of this behavior. - if self.seen_greeting: - self.push('503 Duplicate HELO/EHLO') - return - self._set_rset_state() - self.seen_greeting = arg - self.extended_smtp = True - self.push('250-%s' % self.fqdn) - if self.data_size_limit: - self.push('250-SIZE %s' % self.data_size_limit) - self.command_size_limits['MAIL'] += 26 - if not self._decode_data: - self.push('250-8BITMIME') - if self.enable_SMTPUTF8: - self.push('250-SMTPUTF8') - self.command_size_limits['MAIL'] += 10 - self.push('250 HELP') - - def smtp_NOOP(self, arg): - if arg: - self.push('501 Syntax: NOOP') - else: - self.push('250 OK') - - def smtp_QUIT(self, arg): - # args is ignored - self.push('221 Bye') - self.close_when_done() - - def _strip_command_keyword(self, keyword, arg): - keylen = len(keyword) - if arg[:keylen].upper() == keyword: - return arg[keylen:].strip() - return '' - - def _getaddr(self, arg): - if not arg: - return '', '' - if arg.lstrip().startswith('<'): - address, rest = get_angle_addr(arg) - else: - address, rest = get_addr_spec(arg) - if not address: - return address, rest - return address.addr_spec, rest - - def _getparams(self, params): - # Return params as dictionary. Return None if not all parameters - # appear to be syntactically valid according to RFC 1869. - result = {} - for param in params: - param, eq, value = param.partition('=') - if not param.isalnum() or eq and not value: - return None - result[param] = value if eq else True - return result - - def smtp_HELP(self, arg): - if arg: - extended = ' [SP ]' - lc_arg = arg.upper() - if lc_arg == 'EHLO': - self.push('250 Syntax: EHLO hostname') - elif lc_arg == 'HELO': - self.push('250 Syntax: HELO hostname') - elif lc_arg == 'MAIL': - msg = '250 Syntax: MAIL FROM:
' - if self.extended_smtp: - msg += extended - self.push(msg) - elif lc_arg == 'RCPT': - msg = '250 Syntax: RCPT TO:
' - if self.extended_smtp: - msg += extended - self.push(msg) - elif lc_arg == 'DATA': - self.push('250 Syntax: DATA') - elif lc_arg == 'RSET': - self.push('250 Syntax: RSET') - elif lc_arg == 'NOOP': - self.push('250 Syntax: NOOP') - elif lc_arg == 'QUIT': - self.push('250 Syntax: QUIT') - elif lc_arg == 'VRFY': - self.push('250 Syntax: VRFY
') - else: - self.push('501 Supported commands: EHLO HELO MAIL RCPT ' - 'DATA RSET NOOP QUIT VRFY') - else: - self.push('250 Supported commands: EHLO HELO MAIL RCPT DATA ' - 'RSET NOOP QUIT VRFY') - - def smtp_VRFY(self, arg): - if arg: - address, params = self._getaddr(arg) - if address: - self.push('252 Cannot VRFY user, but will accept message ' - 'and attempt delivery') - else: - self.push('502 Could not VRFY %s' % arg) - else: - self.push('501 Syntax: VRFY
') - - def smtp_MAIL(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first') - return - print('===> MAIL', arg, file=DEBUGSTREAM) - syntaxerr = '501 Syntax: MAIL FROM:
' - if self.extended_smtp: - syntaxerr += ' [SP ]' - if arg is None: - self.push(syntaxerr) - return - arg = self._strip_command_keyword('FROM:', arg) - address, params = self._getaddr(arg) - if not address: - self.push(syntaxerr) - return - if not self.extended_smtp and params: - self.push(syntaxerr) - return - if self.mailfrom: - self.push('503 Error: nested MAIL command') - return - self.mail_options = params.upper().split() - params = self._getparams(self.mail_options) - if params is None: - self.push(syntaxerr) - return - if not self._decode_data: - body = params.pop('BODY', '7BIT') - if body not in ['7BIT', '8BITMIME']: - self.push('501 Error: BODY can only be one of 7BIT, 8BITMIME') - return - if self.enable_SMTPUTF8: - smtputf8 = params.pop('SMTPUTF8', False) - if smtputf8 is True: - self.require_SMTPUTF8 = True - elif smtputf8 is not False: - self.push('501 Error: SMTPUTF8 takes no arguments') - return - size = params.pop('SIZE', None) - if size: - if not size.isdigit(): - self.push(syntaxerr) - return - elif self.data_size_limit and int(size) > self.data_size_limit: - self.push('552 Error: message size exceeds fixed maximum message size') - return - if len(params.keys()) > 0: - self.push('555 MAIL FROM parameters not recognized or not implemented') - return - self.mailfrom = address - print('sender:', self.mailfrom, file=DEBUGSTREAM) - self.push('250 OK') - - def smtp_RCPT(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first'); - return - print('===> RCPT', arg, file=DEBUGSTREAM) - if not self.mailfrom: - self.push('503 Error: need MAIL command') - return - syntaxerr = '501 Syntax: RCPT TO:
' - if self.extended_smtp: - syntaxerr += ' [SP ]' - if arg is None: - self.push(syntaxerr) - return - arg = self._strip_command_keyword('TO:', arg) - address, params = self._getaddr(arg) - if not address: - self.push(syntaxerr) - return - if not self.extended_smtp and params: - self.push(syntaxerr) - return - self.rcpt_options = params.upper().split() - params = self._getparams(self.rcpt_options) - if params is None: - self.push(syntaxerr) - return - # XXX currently there are no options we recognize. - if len(params.keys()) > 0: - self.push('555 RCPT TO parameters not recognized or not implemented') - return - self.rcpttos.append(address) - print('recips:', self.rcpttos, file=DEBUGSTREAM) - self.push('250 OK') - - def smtp_RSET(self, arg): - if arg: - self.push('501 Syntax: RSET') - return - self._set_rset_state() - self.push('250 OK') - - def smtp_DATA(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first'); - return - if not self.rcpttos: - self.push('503 Error: need RCPT command') - return - if arg: - self.push('501 Syntax: DATA') - return - self.smtp_state = self.DATA - self.set_terminator(b'\r\n.\r\n') - self.push('354 End data with .') - - # Commands that have not been implemented - def smtp_EXPN(self, arg): - self.push('502 EXPN not implemented') - - -class SMTPServer(asyncore.dispatcher): - # SMTPChannel class to use for managing client connections - channel_class = SMTPChannel - - def __init__(self, localaddr, remoteaddr, - data_size_limit=DATA_SIZE_DEFAULT, map=None, - enable_SMTPUTF8=False, decode_data=False): - self._localaddr = localaddr - self._remoteaddr = remoteaddr - self.data_size_limit = data_size_limit - self.enable_SMTPUTF8 = enable_SMTPUTF8 - self._decode_data = decode_data - if enable_SMTPUTF8 and decode_data: - raise ValueError("decode_data and enable_SMTPUTF8 cannot" - " be set to True at the same time") - asyncore.dispatcher.__init__(self, map=map) - try: - gai_results = socket.getaddrinfo(*localaddr, - type=socket.SOCK_STREAM) - self.create_socket(gai_results[0][0], gai_results[0][1]) - # try to re-use a server port if possible - self.set_reuse_addr() - self.bind(localaddr) - self.listen(5) - except: - self.close() - raise - else: - print('%s started at %s\n\tLocal addr: %s\n\tRemote addr:%s' % ( - self.__class__.__name__, time.ctime(time.time()), - localaddr, remoteaddr), file=DEBUGSTREAM) - - def handle_accepted(self, conn, addr): - print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM) - channel = self.channel_class(self, - conn, - addr, - self.data_size_limit, - self._map, - self.enable_SMTPUTF8, - self._decode_data) - - # API for "doing something useful with the message" - def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): - """Override this abstract method to handle messages from the client. - - peer is a tuple containing (ipaddr, port) of the client that made the - socket connection to our smtp port. - - mailfrom is the raw address the client claims the message is coming - from. - - rcpttos is a list of raw addresses the client wishes to deliver the - message to. - - data is a string containing the entire full text of the message, - headers (if supplied) and all. It has been `de-transparencied' - according to RFC 821, Section 4.5.2. In other words, a line - containing a `.' followed by other text has had the leading dot - removed. - - kwargs is a dictionary containing additional information. It is - empty if decode_data=True was given as init parameter, otherwise - it will contain the following keys: - 'mail_options': list of parameters to the mail command. All - elements are uppercase strings. Example: - ['BODY=8BITMIME', 'SMTPUTF8']. - 'rcpt_options': same, for the rcpt command. - - This function should return None for a normal `250 Ok' response; - otherwise, it should return the desired response string in RFC 821 - format. - - """ - raise NotImplementedError - - -class DebuggingServer(SMTPServer): - - def _print_message_content(self, peer, data): - inheaders = 1 - lines = data.splitlines() - for line in lines: - # headers first - if inheaders and not line: - peerheader = 'X-Peer: ' + peer[0] - if not isinstance(data, str): - # decoded_data=false; make header match other binary output - peerheader = repr(peerheader.encode('utf-8')) - print(peerheader) - inheaders = 0 - if not isinstance(data, str): - # Avoid spurious 'str on bytes instance' warning. - line = repr(line) - print(line) - - def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): - print('---------- MESSAGE FOLLOWS ----------') - if kwargs: - if kwargs.get('mail_options'): - print('mail options: %s' % kwargs['mail_options']) - if kwargs.get('rcpt_options'): - print('rcpt options: %s\n' % kwargs['rcpt_options']) - self._print_message_content(peer, data) - print('------------ END MESSAGE ------------') - - -class PureProxy(SMTPServer): - def __init__(self, *args, **kwargs): - if 'enable_SMTPUTF8' in kwargs and kwargs['enable_SMTPUTF8']: - raise ValueError("PureProxy does not support SMTPUTF8.") - super(PureProxy, self).__init__(*args, **kwargs) - - def process_message(self, peer, mailfrom, rcpttos, data): - lines = data.split('\n') - # Look for the last header - i = 0 - for line in lines: - if not line: - break - i += 1 - lines.insert(i, 'X-Peer: %s' % peer[0]) - data = NEWLINE.join(lines) - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print('we got some refusals:', refused, file=DEBUGSTREAM) - - def _deliver(self, mailfrom, rcpttos, data): - import smtplib - refused = {} - try: - s = smtplib.SMTP() - s.connect(self._remoteaddr[0], self._remoteaddr[1]) - try: - refused = s.sendmail(mailfrom, rcpttos, data) - finally: - s.quit() - except smtplib.SMTPRecipientsRefused as e: - print('got SMTPRecipientsRefused', file=DEBUGSTREAM) - refused = e.recipients - except (OSError, smtplib.SMTPException) as e: - print('got', e.__class__, file=DEBUGSTREAM) - # All recipients were refused. If the exception had an associated - # error code, use it. Otherwise,fake it with a non-triggering - # exception code. - errcode = getattr(e, 'smtp_code', -1) - errmsg = getattr(e, 'smtp_error', 'ignore') - for r in rcpttos: - refused[r] = (errcode, errmsg) - return refused - - -class MailmanProxy(PureProxy): - def __init__(self, *args, **kwargs): - warn('MailmanProxy is deprecated and will be removed ' - 'in future', DeprecationWarning, 2) - if 'enable_SMTPUTF8' in kwargs and kwargs['enable_SMTPUTF8']: - raise ValueError("MailmanProxy does not support SMTPUTF8.") - super(PureProxy, self).__init__(*args, **kwargs) - - def process_message(self, peer, mailfrom, rcpttos, data): - from io import StringIO - from Mailman import Utils - from Mailman import Message - from Mailman import MailList - # If the message is to a Mailman mailing list, then we'll invoke the - # Mailman script directly, without going through the real smtpd. - # Otherwise we'll forward it to the local proxy for disposition. - listnames = [] - for rcpt in rcpttos: - local = rcpt.lower().split('@')[0] - # We allow the following variations on the theme - # listname - # listname-admin - # listname-owner - # listname-request - # listname-join - # listname-leave - parts = local.split('-') - if len(parts) > 2: - continue - listname = parts[0] - if len(parts) == 2: - command = parts[1] - else: - command = '' - if not Utils.list_exists(listname) or command not in ( - '', 'admin', 'owner', 'request', 'join', 'leave'): - continue - listnames.append((rcpt, listname, command)) - # Remove all list recipients from rcpttos and forward what we're not - # going to take care of ourselves. Linear removal should be fine - # since we don't expect a large number of recipients. - for rcpt, listname, command in listnames: - rcpttos.remove(rcpt) - # If there's any non-list destined recipients left, - print('forwarding recips:', ' '.join(rcpttos), file=DEBUGSTREAM) - if rcpttos: - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print('we got refusals:', refused, file=DEBUGSTREAM) - # Now deliver directly to the list commands - mlists = {} - s = StringIO(data) - msg = Message.Message(s) - # These headers are required for the proper execution of Mailman. All - # MTAs in existence seem to add these if the original message doesn't - # have them. - if not msg.get('from'): - msg['From'] = mailfrom - if not msg.get('date'): - msg['Date'] = time.ctime(time.time()) - for rcpt, listname, command in listnames: - print('sending message to', rcpt, file=DEBUGSTREAM) - mlist = mlists.get(listname) - if not mlist: - mlist = MailList.MailList(listname, lock=0) - mlists[listname] = mlist - # dispatch on the type of command - if command == '': - # post - msg.Enqueue(mlist, tolist=1) - elif command == 'admin': - msg.Enqueue(mlist, toadmin=1) - elif command == 'owner': - msg.Enqueue(mlist, toowner=1) - elif command == 'request': - msg.Enqueue(mlist, torequest=1) - elif command in ('join', 'leave'): - # TBD: this is a hack! - if command == 'join': - msg['Subject'] = 'subscribe' - else: - msg['Subject'] = 'unsubscribe' - msg.Enqueue(mlist, torequest=1) - - -class Options: - setuid = True - classname = 'PureProxy' - size_limit = None - enable_SMTPUTF8 = False - - -def parseargs(): - global DEBUGSTREAM - try: - opts, args = getopt.getopt( - sys.argv[1:], 'nVhc:s:du', - ['class=', 'nosetuid', 'version', 'help', 'size=', 'debug', - 'smtputf8']) - except getopt.error as e: - usage(1, e) - - options = Options() - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print(__version__) - sys.exit(0) - elif opt in ('-n', '--nosetuid'): - options.setuid = False - elif opt in ('-c', '--class'): - options.classname = arg - elif opt in ('-d', '--debug'): - DEBUGSTREAM = sys.stderr - elif opt in ('-u', '--smtputf8'): - options.enable_SMTPUTF8 = True - elif opt in ('-s', '--size'): - try: - int_size = int(arg) - options.size_limit = int_size - except: - print('Invalid size: ' + arg, file=sys.stderr) - sys.exit(1) - - # parse the rest of the arguments - if len(args) < 1: - localspec = 'localhost:8025' - remotespec = 'localhost:25' - elif len(args) < 2: - localspec = args[0] - remotespec = 'localhost:25' - elif len(args) < 3: - localspec = args[0] - remotespec = args[1] - else: - usage(1, 'Invalid arguments: %s' % COMMASPACE.join(args)) - - # split into host/port pairs - i = localspec.find(':') - if i < 0: - usage(1, 'Bad local spec: %s' % localspec) - options.localhost = localspec[:i] - try: - options.localport = int(localspec[i+1:]) - except ValueError: - usage(1, 'Bad local port: %s' % localspec) - i = remotespec.find(':') - if i < 0: - usage(1, 'Bad remote spec: %s' % remotespec) - options.remotehost = remotespec[:i] - try: - options.remoteport = int(remotespec[i+1:]) - except ValueError: - usage(1, 'Bad remote port: %s' % remotespec) - return options - - -if __name__ == '__main__': - options = parseargs() - # Become nobody - classname = options.classname - if "." in classname: - lastdot = classname.rfind(".") - mod = __import__(classname[:lastdot], globals(), locals(), [""]) - classname = classname[lastdot+1:] - else: - import __main__ as mod - class_ = getattr(mod, classname) - proxy = class_((options.localhost, options.localport), - (options.remotehost, options.remoteport), - options.size_limit, enable_SMTPUTF8=options.enable_SMTPUTF8) - if options.setuid: - try: - import pwd - except ImportError: - print('Cannot import module "pwd"; try running with -n option.', file=sys.stderr) - sys.exit(1) - nobody = pwd.getpwnam('nobody')[2] - try: - os.setuid(nobody) - except PermissionError: - print('Cannot setuid "nobody"; try running with -n option.', file=sys.stderr) - sys.exit(1) - try: - asyncore.loop() - except KeyboardInterrupt: - pass diff --git a/Lib/sndhdr.py b/Lib/sndhdr.py deleted file mode 100644 index 594353136f..0000000000 --- a/Lib/sndhdr.py +++ /dev/null @@ -1,257 +0,0 @@ -"""Routines to help recognizing sound files. - -Function whathdr() recognizes various types of sound file headers. -It understands almost all headers that SOX can decode. - -The return tuple contains the following items, in this order: -- file type (as SOX understands it) -- sampling rate (0 if unknown or hard to decode) -- number of channels (0 if unknown or hard to decode) -- number of frames in the file (-1 if unknown or hard to decode) -- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW - -If the file doesn't have a recognizable type, it returns None. -If the file can't be opened, OSError is raised. - -To compute the total time, divide the number of frames by the -sampling rate (a frame contains a sample for each channel). - -Function what() calls whathdr(). (It used to also use some -heuristics for raw data, but this doesn't work very well.) - -Finally, the function test() is a simple main program that calls -what() for all files mentioned on the argument list. For directory -arguments it calls what() for all files in that directory. Default -argument is "." (testing all files in the current directory). The -option -r tells it to recurse down directories found inside -explicitly given directories. -""" - -# The file structure is top-down except that the test program and its -# subroutine come last. - -__all__ = ['what', 'whathdr'] - -from collections import namedtuple - -SndHeaders = namedtuple('SndHeaders', - 'filetype framerate nchannels nframes sampwidth') - -SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type -and will be one of the strings 'aifc', 'aiff', 'au','hcom', -'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""") -SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual -value or 0 if unknown or difficult to decode.""") -SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be -determined or if the value is difficult to decode.""") -SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number -of frames or -1.""") -SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or -'A' for A-LAW or 'U' for u-LAW.""") - -def what(filename): - """Guess the type of a sound file.""" - res = whathdr(filename) - return res - - -def whathdr(filename): - """Recognize sound headers.""" - with open(filename, 'rb') as f: - h = f.read(512) - for tf in tests: - res = tf(h, f) - if res: - return SndHeaders(*res) - return None - - -#-----------------------------------# -# Subroutines per sound header type # -#-----------------------------------# - -tests = [] - -def test_aifc(h, f): - import aifc - if not h.startswith(b'FORM'): - return None - if h[8:12] == b'AIFC': - fmt = 'aifc' - elif h[8:12] == b'AIFF': - fmt = 'aiff' - else: - return None - f.seek(0) - try: - a = aifc.open(f, 'r') - except (EOFError, aifc.Error): - return None - return (fmt, a.getframerate(), a.getnchannels(), - a.getnframes(), 8 * a.getsampwidth()) - -tests.append(test_aifc) - - -def test_au(h, f): - if h.startswith(b'.snd'): - func = get_long_be - elif h[:4] in (b'\0ds.', b'dns.'): - func = get_long_le - else: - return None - filetype = 'au' - hdr_size = func(h[4:8]) - data_size = func(h[8:12]) - encoding = func(h[12:16]) - rate = func(h[16:20]) - nchannels = func(h[20:24]) - sample_size = 1 # default - if encoding == 1: - sample_bits = 'U' - elif encoding == 2: - sample_bits = 8 - elif encoding == 3: - sample_bits = 16 - sample_size = 2 - else: - sample_bits = '?' - frame_size = sample_size * nchannels - if frame_size: - nframe = data_size / frame_size - else: - nframe = -1 - return filetype, rate, nchannels, nframe, sample_bits - -tests.append(test_au) - - -def test_hcom(h, f): - if h[65:69] != b'FSSD' or h[128:132] != b'HCOM': - return None - divisor = get_long_be(h[144:148]) - if divisor: - rate = 22050 / divisor - else: - rate = 0 - return 'hcom', rate, 1, -1, 8 - -tests.append(test_hcom) - - -def test_voc(h, f): - if not h.startswith(b'Creative Voice File\032'): - return None - sbseek = get_short_le(h[20:22]) - rate = 0 - if 0 <= sbseek < 500 and h[sbseek] == 1: - ratecode = 256 - h[sbseek+4] - if ratecode: - rate = int(1000000.0 / ratecode) - return 'voc', rate, 1, -1, 8 - -tests.append(test_voc) - - -def test_wav(h, f): - import wave - # 'RIFF' 'WAVE' 'fmt ' - if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': - return None - f.seek(0) - try: - w = wave.open(f, 'r') - except (EOFError, wave.Error): - return None - return ('wav', w.getframerate(), w.getnchannels(), - w.getnframes(), 8*w.getsampwidth()) - -tests.append(test_wav) - - -def test_8svx(h, f): - if not h.startswith(b'FORM') or h[8:12] != b'8SVX': - return None - # Should decode it to get #channels -- assume always 1 - return '8svx', 0, 1, 0, 8 - -tests.append(test_8svx) - - -def test_sndt(h, f): - if h.startswith(b'SOUND'): - nsamples = get_long_le(h[8:12]) - rate = get_short_le(h[20:22]) - return 'sndt', rate, 1, nsamples, 8 - -tests.append(test_sndt) - - -def test_sndr(h, f): - if h.startswith(b'\0\0'): - rate = get_short_le(h[2:4]) - if 4000 <= rate <= 25000: - return 'sndr', rate, 1, -1, 8 - -tests.append(test_sndr) - - -#-------------------------------------------# -# Subroutines to extract numbers from bytes # -#-------------------------------------------# - -def get_long_be(b): - return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3] - -def get_long_le(b): - return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0] - -def get_short_be(b): - return (b[0] << 8) | b[1] - -def get_short_le(b): - return (b[1] << 8) | b[0] - - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print(filename + '/:', end=' ') - if recursive or toplevel: - print('recursing down:') - import glob - names = glob.glob(os.path.join(filename, '*')) - testall(names, recursive, 0) - else: - print('*** directory (use -r) ***') - else: - print(filename + ':', end=' ') - sys.stdout.flush() - try: - print(what(filename)) - except OSError: - print('*** not found ***') - -if __name__ == '__main__': - test() diff --git a/Lib/sunau.py b/Lib/sunau.py deleted file mode 100644 index 129502b0b4..0000000000 --- a/Lib/sunau.py +++ /dev/null @@ -1,531 +0,0 @@ -"""Stuff to parse Sun and NeXT audio files. - -An audio file consists of a header followed by the data. The structure -of the header is as follows. - - +---------------+ - | magic word | - +---------------+ - | header size | - +---------------+ - | data size | - +---------------+ - | encoding | - +---------------+ - | sample rate | - +---------------+ - | # of channels | - +---------------+ - | info | - | | - +---------------+ - -The magic word consists of the 4 characters '.snd'. Apart from the -info field, all header fields are 4 bytes in size. They are all -32-bit unsigned integers encoded in big-endian byte order. - -The header size really gives the start of the data. -The data size is the physical size of the data. From the other -parameters the number of frames can be calculated. -The encoding gives the way in which audio samples are encoded. -Possible values are listed below. -The info field currently consists of an ASCII string giving a -human-readable description of the audio file. The info field is -padded with NUL bytes to the header size. - -Usage. - -Reading audio files: - f = sunau.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -When the setpos() and rewind() methods are not used, the seek() -method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' or 'ULAW') - getcompname() -- returns human-readable version of - compression type ('not compressed' matches 'NONE') - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- returns None (for compatibility with the - aifc module) - getmark(id) -- raises an error since the mark does not - exist (for compatibility with the aifc module) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell() and the position given to setpos() -are compatible and have nothing to do with the actual position in the -file. -The close() method is called automatically when the class instance -is destroyed. - -Writing audio files: - f = sunau.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple)-- set all parameters at once - tell() -- return current position in output file - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -The close() method is called automatically when the class instance -is destroyed. -""" - -from collections import namedtuple -import warnings - -_sunau_params = namedtuple('_sunau_params', - 'nchannels sampwidth framerate nframes comptype compname') - -# from -AUDIO_FILE_MAGIC = 0x2e736e64 -AUDIO_FILE_ENCODING_MULAW_8 = 1 -AUDIO_FILE_ENCODING_LINEAR_8 = 2 -AUDIO_FILE_ENCODING_LINEAR_16 = 3 -AUDIO_FILE_ENCODING_LINEAR_24 = 4 -AUDIO_FILE_ENCODING_LINEAR_32 = 5 -AUDIO_FILE_ENCODING_FLOAT = 6 -AUDIO_FILE_ENCODING_DOUBLE = 7 -AUDIO_FILE_ENCODING_ADPCM_G721 = 23 -AUDIO_FILE_ENCODING_ADPCM_G722 = 24 -AUDIO_FILE_ENCODING_ADPCM_G723_3 = 25 -AUDIO_FILE_ENCODING_ADPCM_G723_5 = 26 -AUDIO_FILE_ENCODING_ALAW_8 = 27 - -# from -AUDIO_UNKNOWN_SIZE = 0xFFFFFFFF # ((unsigned)(~0)) - -_simple_encodings = [AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_LINEAR_8, - AUDIO_FILE_ENCODING_LINEAR_16, - AUDIO_FILE_ENCODING_LINEAR_24, - AUDIO_FILE_ENCODING_LINEAR_32, - AUDIO_FILE_ENCODING_ALAW_8] - -class Error(Exception): - pass - -def _read_u32(file): - x = 0 - for i in range(4): - byte = file.read(1) - if not byte: - raise EOFError - x = x*256 + ord(byte) - return x - -def _write_u32(file, x): - data = [] - for i in range(4): - d, m = divmod(x, 256) - data.insert(0, int(m)) - x = d - file.write(bytes(data)) - -class Au_read: - - def __init__(self, f): - if type(f) == type(''): - import builtins - f = builtins.open(f, 'rb') - self._opened = True - else: - self._opened = False - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def initfp(self, file): - self._file = file - self._soundpos = 0 - magic = int(_read_u32(file)) - if magic != AUDIO_FILE_MAGIC: - raise Error('bad magic number') - self._hdr_size = int(_read_u32(file)) - if self._hdr_size < 24: - raise Error('header size too small') - if self._hdr_size > 100: - raise Error('header size ridiculously large') - self._data_size = _read_u32(file) - if self._data_size != AUDIO_UNKNOWN_SIZE: - self._data_size = int(self._data_size) - self._encoding = int(_read_u32(file)) - if self._encoding not in _simple_encodings: - raise Error('encoding not (yet) supported') - if self._encoding in (AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_ALAW_8): - self._sampwidth = 2 - self._framesize = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_8: - self._framesize = self._sampwidth = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_16: - self._framesize = self._sampwidth = 2 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_24: - self._framesize = self._sampwidth = 3 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_32: - self._framesize = self._sampwidth = 4 - else: - raise Error('unknown encoding') - self._framerate = int(_read_u32(file)) - self._nchannels = int(_read_u32(file)) - if not self._nchannels: - raise Error('bad # of channels') - self._framesize = self._framesize * self._nchannels - if self._hdr_size > 24: - self._info = file.read(self._hdr_size - 24) - self._info, _, _ = self._info.partition(b'\0') - else: - self._info = b'' - try: - self._data_pos = file.tell() - except (AttributeError, OSError): - self._data_pos = None - - def getfp(self): - return self._file - - def getnchannels(self): - return self._nchannels - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getnframes(self): - if self._data_size == AUDIO_UNKNOWN_SIZE: - return AUDIO_UNKNOWN_SIZE - if self._encoding in _simple_encodings: - return self._data_size // self._framesize - return 0 # XXX--must do some arithmetic here - - def getcomptype(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'ULAW' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'ALAW' - else: - return 'NONE' - - def getcompname(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'CCITT G.711 u-law' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def getparams(self): - return _sunau_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - return None - - def getmark(self, id): - raise Error('no marks') - - def readframes(self, nframes): - if self._encoding in _simple_encodings: - if nframes == AUDIO_UNKNOWN_SIZE: - data = self._file.read() - else: - data = self._file.read(nframes * self._framesize) - self._soundpos += len(data) // self._framesize - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - import audioop - data = audioop.ulaw2lin(data, self._sampwidth) - return data - return None # XXX--not implemented yet - - def rewind(self): - if self._data_pos is None: - raise OSError('cannot seek') - self._file.seek(self._data_pos) - self._soundpos = 0 - - def tell(self): - return self._soundpos - - def setpos(self, pos): - if pos < 0 or pos > self.getnframes(): - raise Error('position not in range') - if self._data_pos is None: - raise OSError('cannot seek') - self._file.seek(self._data_pos + pos * self._framesize) - self._soundpos = pos - - def close(self): - file = self._file - if file: - self._file = None - if self._opened: - file.close() - -class Au_write: - - def __init__(self, f): - if type(f) == type(''): - import builtins - f = builtins.open(f, 'wb') - self._opened = True - else: - self._opened = False - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - self._file = None - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def initfp(self, file): - self._file = file - self._framerate = 0 - self._nchannels = 0 - self._sampwidth = 0 - self._framesize = 0 - self._nframes = AUDIO_UNKNOWN_SIZE - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._info = b'' - self._comptype = 'ULAW' # default is U-law - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels not in (1, 2, 4): - raise Error('only 1, 2, or 4 channels supported') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth not in (1, 2, 3, 4): - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._framerate: - raise Error('sample width not specified') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nframes < 0: - raise Error('# of frames cannot be negative') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, type, name): - if type in ('NONE', 'ULAW'): - self._comptype = type - else: - raise Error('unknown compression type') - - def getcomptype(self): - return self._comptype - - def getcompname(self): - if self._comptype == 'ULAW': - return 'CCITT G.711 u-law' - elif self._comptype == 'ALAW': - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - return _sunau_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written() - if self._comptype == 'ULAW': - import audioop - data = audioop.lin2ulaw(data, self._sampwidth) - nframes = len(data) // self._framesize - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file: - try: - self._ensure_header_written() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - self._file.flush() - finally: - file = self._file - self._file = None - if self._opened: - file.close() - - # - # private methods - # - - def _ensure_header_written(self): - if not self._nframeswritten: - if not self._nchannels: - raise Error('# of channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('frame rate not specified') - self._write_header() - - def _write_header(self): - if self._comptype == 'NONE': - if self._sampwidth == 1: - encoding = AUDIO_FILE_ENCODING_LINEAR_8 - self._framesize = 1 - elif self._sampwidth == 2: - encoding = AUDIO_FILE_ENCODING_LINEAR_16 - self._framesize = 2 - elif self._sampwidth == 3: - encoding = AUDIO_FILE_ENCODING_LINEAR_24 - self._framesize = 3 - elif self._sampwidth == 4: - encoding = AUDIO_FILE_ENCODING_LINEAR_32 - self._framesize = 4 - else: - raise Error('internal error') - elif self._comptype == 'ULAW': - encoding = AUDIO_FILE_ENCODING_MULAW_8 - self._framesize = 1 - else: - raise Error('internal error') - self._framesize = self._framesize * self._nchannels - _write_u32(self._file, AUDIO_FILE_MAGIC) - header_size = 25 + len(self._info) - header_size = (header_size + 7) & ~7 - _write_u32(self._file, header_size) - if self._nframes == AUDIO_UNKNOWN_SIZE: - length = AUDIO_UNKNOWN_SIZE - else: - length = self._nframes * self._framesize - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - _write_u32(self._file, length) - self._datalength = length - _write_u32(self._file, encoding) - _write_u32(self._file, self._framerate) - _write_u32(self._file, self._nchannels) - self._file.write(self._info) - self._file.write(b'\0'*(header_size - len(self._info) - 24)) - - def _patchheader(self): - if self._form_length_pos is None: - raise OSError('cannot seek') - self._file.seek(self._form_length_pos) - _write_u32(self._file, self._datawritten) - self._datalength = self._datawritten - self._file.seek(0, 2) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Au_read(f) - elif mode in ('w', 'wb'): - return Au_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - -def openfp(f, mode=None): - warnings.warn("sunau.openfp is deprecated since Python 3.7. " - "Use sunau.open instead.", DeprecationWarning, stacklevel=2) - return open(f, mode=mode) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index dea150e8db..04fda11597 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -46,6 +46,7 @@ import struct import copy import re +import warnings try: import pwd @@ -57,19 +58,19 @@ grp = None # os.symlink on Windows prior to 6.0 raises NotImplementedError -symlink_exception = (AttributeError, NotImplementedError) -try: - # OSError (winerror=1314) will be raised if the caller does not hold the - # SeCreateSymbolicLinkPrivilege privilege - symlink_exception += (OSError,) -except NameError: - pass +# OSError (winerror=1314) will be raised if the caller does not hold the +# SeCreateSymbolicLinkPrivilege privilege +symlink_exception = (AttributeError, NotImplementedError, OSError) # from tarfile import * __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", "CompressionError", "StreamError", "ExtractError", "HeaderError", "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", - "DEFAULT_FORMAT", "open"] + "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", + "tar_filter", "FilterError", "AbsoluteLinkError", + "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", + "LinkOutsideDestinationError"] + #--------------------------------------------------------- # tar constants @@ -158,6 +159,8 @@ def stn(s, length, encoding, errors): """Convert a string to a null-terminated bytes object. """ + if s is None: + raise ValueError("metadata cannot contain None") s = s.encode(encoding, errors) return s[:length] + (length - len(s)) * NUL @@ -328,15 +331,17 @@ def write(self, s): class _Stream: """Class that serves as an adapter between TarFile and a stream-like object. The stream-like object only - needs to have a read() or write() method and is accessed - blockwise. Use of gzip or bzip2 compression is possible. - A stream-like object could be for example: sys.stdin, - sys.stdout, a socket, a tape device etc. + needs to have a read() or write() method that works with bytes, + and the method is accessed blockwise. + Use of gzip or bzip2 compression is possible. + A stream-like object could be for example: sys.stdin.buffer, + sys.stdout.buffer, a socket, a tape device etc. _Stream is intended to be used only internally. """ - def __init__(self, name, mode, comptype, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize, + compresslevel): """Construct a _Stream object. """ self._extfileobj = True @@ -368,10 +373,10 @@ def __init__(self, name, mode, comptype, fileobj, bufsize): self.zlib = zlib self.crc = zlib.crc32(b"") if mode == "r": - self._init_read_gz() self.exception = zlib.error + self._init_read_gz() else: - self._init_write_gz() + self._init_write_gz(compresslevel) elif comptype == "bz2": try: @@ -383,13 +388,17 @@ def __init__(self, name, mode, comptype, fileobj, bufsize): self.cmp = bz2.BZ2Decompressor() self.exception = OSError else: - self.cmp = bz2.BZ2Compressor() + self.cmp = bz2.BZ2Compressor(compresslevel) elif comptype == "xz": try: import lzma except ImportError: raise CompressionError("lzma module is not available") from None + + # XXX: RUSTPYTHON; xz is not supported yet + raise CompressionError("lzma module is not available") from None + if mode == "r": self.dbuf = b"" self.cmp = lzma.LZMADecompressor() @@ -410,13 +419,14 @@ def __del__(self): if hasattr(self, "closed") and not self.closed: self.close() - def _init_write_gz(self): + def _init_write_gz(self, compresslevel): """Initialize for writing with gzip compression. """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) + self.cmp = self.zlib.compressobj(compresslevel, + self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) timestamp = struct.pack("" % (self.__class__.__name__,self.name,id(self)) + def replace(self, *, + name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, + uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, + deep=True, _KEEP=_KEEP): + """Return a deep copy of self with the given attributes replaced. + """ + if deep: + result = copy.deepcopy(self) + else: + result = copy.copy(self) + if name is not _KEEP: + result.name = name + if mtime is not _KEEP: + result.mtime = mtime + if mode is not _KEEP: + result.mode = mode + if linkname is not _KEEP: + result.linkname = linkname + if uid is not _KEEP: + result.uid = uid + if gid is not _KEEP: + result.gid = gid + if uname is not _KEEP: + result.uname = uname + if gname is not _KEEP: + result.gname = gname + return result + def get_info(self): """Return the TarInfo's attributes as a dictionary. """ + if self.mode is None: + mode = None + else: + mode = self.mode & 0o7777 info = { "name": self.name, - "mode": self.mode & 0o7777, + "mode": mode, "uid": self.uid, "gid": self.gid, "size": self.size, @@ -820,6 +987,9 @@ def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescap """Return a tar header as a string of 512 byte blocks. """ info = self.get_info() + for name, value in info.items(): + if value is None: + raise ValueError("%s may not be None" % name) if format == USTAR_FORMAT: return self.create_ustar_header(info, encoding, errors) @@ -950,6 +1120,12 @@ def _create_header(info, format, encoding, errors): devmajor = stn("", 8, encoding, errors) devminor = stn("", 8, encoding, errors) + # None values in metadata should cause ValueError. + # itn()/stn() do this for all fields except type. + filetype = info.get("type", REGTYPE) + if filetype is None: + raise ValueError("TarInfo.type must not be None") + parts = [ stn(info.get("name", ""), 100, encoding, errors), itn(info.get("mode", 0) & 0o7777, 8, format), @@ -958,7 +1134,7 @@ def _create_header(info, format, encoding, errors): itn(info.get("size", 0), 12, format), itn(info.get("mtime", 0), 12, format), b" ", # checksum field - info.get("type", REGTYPE), + filetype, stn(info.get("linkname", ""), 100, encoding, errors), info.get("magic", POSIX_MAGIC), stn(info.get("uname", ""), 32, encoding, errors), @@ -1264,11 +1440,7 @@ def _proc_pax(self, tarfile): # the newline. keyword and value are both UTF-8 encoded strings. regex = re.compile(br"(\d+) ([^=]+)=") pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - + while match := regex.match(buf, pos): length, keyword = match.groups() length = int(length) if length == 0: @@ -1468,6 +1640,8 @@ class TarFile(object): fileobject = ExFileObject # The file-object for extractfile(). + extraction_filter = None # The default filter for extraction. + def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, @@ -1659,7 +1833,9 @@ def not_compressed(comptype): if filemode not in ("r", "w"): raise ValueError("mode must be 'r' or 'w'") - stream = _Stream(name, filemode, comptype, fileobj, bufsize) + compresslevel = kwargs.pop("compresslevel", 9) + stream = _Stream(name, filemode, comptype, fileobj, bufsize, + compresslevel) try: t = cls(name, filemode, stream, **kwargs) except: @@ -1755,6 +1931,9 @@ def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): except ImportError: raise CompressionError("lzma module is not available") from None + # XXX: RUSTPYTHON; xz is not supported yet + raise CompressionError("lzma module is not available") from None + fileobj = LZMAFile(fileobj or name, mode, preset=preset) try: @@ -1940,7 +2119,10 @@ def list(self, verbose=True, *, members=None): members = self for tarinfo in members: if verbose: - _safe_print(stat.filemode(tarinfo.mode)) + if tarinfo.mode is None: + _safe_print("??????????") + else: + _safe_print(stat.filemode(tarinfo.mode)) _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, tarinfo.gname or tarinfo.gid)) if tarinfo.ischr() or tarinfo.isblk(): @@ -1948,8 +2130,11 @@ def list(self, verbose=True, *, members=None): ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) else: _safe_print("%10d" % tarinfo.size) - _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6]) + if tarinfo.mtime is None: + _safe_print("????-??-?? ??:??:??") + else: + _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6]) _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) @@ -2036,32 +2221,63 @@ def addfile(self, tarinfo, fileobj=None): self.members.append(tarinfo) - def extractall(self, path=".", members=None, *, numeric_owner=False): + def _get_filter_function(self, filter): + if filter is None: + filter = self.extraction_filter + if filter is None: + warnings.warn( + 'Python 3.14 will, by default, filter extracted tar ' + + 'archives and reject files or modify their metadata. ' + + 'Use the filter argument to control this behavior.', + DeprecationWarning) + return fully_trusted_filter + if isinstance(filter, str): + raise TypeError( + 'String names are not supported for ' + + 'TarFile.extraction_filter. Use a function such as ' + + 'tarfile.data_filter directly.') + return filter + if callable(filter): + return filter + try: + return _NAMED_FILTERS[filter] + except KeyError: + raise ValueError(f"filter {filter!r} not found") from None + + def extractall(self, path=".", members=None, *, numeric_owner=False, + filter=None): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the list returned by getmembers(). If `numeric_owner` is True, only the numbers for user/group names are used and not the names. + + The `filter` function will be called on each member just + before extraction. + It can return a changed TarInfo or None to skip the member. + String names of common filters are accepted. """ directories = [] + filter_function = self._get_filter_function(filter) if members is None: members = self - for tarinfo in members: + for member in members: + tarinfo = self._get_extract_tarinfo(member, filter_function, path) + if tarinfo is None: + continue if tarinfo.isdir(): - # Extract directories with a safe mode. + # For directories, delay setting attributes until later, + # since permissions can interfere with extraction and + # extracting contents can reset mtime. directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), - numeric_owner=numeric_owner) + self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. - directories.sort(key=lambda a: a.name) - directories.reverse() + directories.sort(key=lambda a: a.name, reverse=True) # Set correct owner, mtime and filemode on directories. for tarinfo in directories: @@ -2071,12 +2287,10 @@ def extractall(self, path=".", members=None, *, numeric_owner=False): self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) + self._handle_nonfatal_error(e) - def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): + def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, + filter=None): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can @@ -2084,35 +2298,70 @@ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` is True, only the numbers for user/group names are used and not the names. + + The `filter` function will be called before extraction. + It can return a changed TarInfo or None to skip the member. + String names of common filters are accepted. """ - self._check("r") + filter_function = self._get_filter_function(filter) + tarinfo = self._get_extract_tarinfo(member, filter_function, path) + if tarinfo is not None: + self._extract_one(tarinfo, path, set_attrs, numeric_owner) + def _get_extract_tarinfo(self, member, filter_function, path): + """Get filtered TarInfo (or None) from member, which might be a str""" if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member + unfiltered = tarinfo + try: + tarinfo = filter_function(tarinfo, path) + except (OSError, FilterError) as e: + self._handle_fatal_error(e) + except ExtractError as e: + self._handle_nonfatal_error(e) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) + return None # Prepare the link target for makelink(). if tarinfo.islnk(): + tarinfo = copy.copy(tarinfo) tarinfo._link_target = os.path.join(path, tarinfo.linkname) + return tarinfo + + def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): + """Extract from filtered tarinfo to disk""" + self._check("r") try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), set_attrs=set_attrs, numeric_owner=numeric_owner) except OSError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + self._handle_fatal_error(e) except ExtractError as e: - if self.errorlevel > 1: - raise + self._handle_nonfatal_error(e) + + def _handle_nonfatal_error(self, e): + """Handle non-fatal error (ExtractError) according to errorlevel""" + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def _handle_fatal_error(self, e): + """Handle "fatal" error according to self.errorlevel""" + if self.errorlevel > 0: + raise + elif isinstance(e, OSError): + if e.filename is None: + self._dbg(1, "tarfile: %s" % e.strerror) else: - self._dbg(1, "tarfile: %s" % e) + self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + else: + self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be @@ -2199,11 +2448,16 @@ def makedir(self, tarinfo, targetpath): """Make a directory called targetpath. """ try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) + if tarinfo.mode is None: + # Use the system's default mode + os.mkdir(targetpath) + else: + # Use a safe mode for the directory, the real mode is set + # later in _extract_member(). + os.mkdir(targetpath, 0o700) except FileExistsError: - pass + if not os.path.isdir(targetpath): + raise def makefile(self, tarinfo, targetpath): """Make a file called targetpath. @@ -2244,6 +2498,9 @@ def makedev(self, tarinfo, targetpath): raise ExtractError("special devices not supported by system") mode = tarinfo.mode + if mode is None: + # Use mknod's default + mode = 0o600 if tarinfo.isblk(): mode |= stat.S_IFBLK else: @@ -2265,7 +2522,6 @@ def makelink(self, tarinfo, targetpath): os.unlink(targetpath) os.symlink(tarinfo.linkname, targetpath) else: - # See extract(). if os.path.exists(tarinfo._link_target): os.link(tarinfo._link_target, targetpath) else: @@ -2290,15 +2546,19 @@ def chown(self, tarinfo, targetpath, numeric_owner): u = tarinfo.uid if not numeric_owner: try: - if grp: + if grp and tarinfo.gname: g = grp.getgrnam(tarinfo.gname)[2] except KeyError: pass try: - if pwd: + if pwd and tarinfo.uname: u = pwd.getpwnam(tarinfo.uname)[2] except KeyError: pass + if g is None: + g = -1 + if u is None: + u = -1 try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) @@ -2310,6 +2570,8 @@ def chown(self, tarinfo, targetpath, numeric_owner): def chmod(self, tarinfo, targetpath): """Set file permissions of targetpath according to tarinfo. """ + if tarinfo.mode is None: + return try: os.chmod(targetpath, tarinfo.mode) except OSError as e: @@ -2318,10 +2580,13 @@ def chmod(self, tarinfo, targetpath): def utime(self, tarinfo, targetpath): """Set modification time of targetpath according to tarinfo. """ + mtime = tarinfo.mtime + if mtime is None: + return if not hasattr(os, 'utime'): return try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) + os.utime(targetpath, (mtime, mtime)) except OSError as e: raise ExtractError("could not change modification time") from e @@ -2339,6 +2604,8 @@ def next(self): # Advance the file pointer. if self.offset != self.fileobj.tell(): + if self.offset == 0: + return None self.fileobj.seek(self.offset - 1) if not self.fileobj.read(1): raise ReadError("unexpected end of data") @@ -2397,13 +2664,26 @@ def _getmember(self, name, tarinfo=None, normalize=False): members = self.getmembers() # Limit the member search list up to tarinfo. + skipping = False if tarinfo is not None: - members = members[:members.index(tarinfo)] + try: + index = members.index(tarinfo) + except ValueError: + # The given starting point might be a (modified) copy. + # We'll later skip members until we find an equivalent. + skipping = True + else: + # Happy fast path + members = members[:index] if normalize: name = os.path.normpath(name) for member in reversed(members): + if skipping: + if tarinfo.offset == member.offset: + skipping = False + continue if normalize: member_name = os.path.normpath(member.name) else: @@ -2412,14 +2692,16 @@ def _getmember(self, name, tarinfo=None, normalize=False): if name == member_name: return member + if skipping: + # Starting point was not found + raise ValueError(tarinfo) + def _load(self): """Read through the entire archive file and look for readable members. """ - while True: - tarinfo = self.next() - if tarinfo is None: - break + while self.next() is not None: + pass self._loaded = True def _check(self, mode=None): @@ -2504,6 +2786,7 @@ def __exit__(self, type, value, traceback): #-------------------- # exported functions #-------------------- + def is_tarfile(name): """Return True if name points to a tar archive that we are able to handle, else return False. @@ -2512,7 +2795,9 @@ def is_tarfile(name): """ try: if hasattr(name, "read"): + pos = name.tell() t = open(fileobj=name) + name.seek(pos) else: t = open(name) t.close() @@ -2530,6 +2815,10 @@ def main(): parser = argparse.ArgumentParser(description=description) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output') + parser.add_argument('--filter', metavar='', + choices=_NAMED_FILTERS, + help='Filter for extraction') + group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-l', '--list', metavar='', help='Show listing of a tarfile') @@ -2541,8 +2830,12 @@ def main(): help='Create tarfile from sources') group.add_argument('-t', '--test', metavar='', help='Test if a tarfile is valid') + args = parser.parse_args() + if args.filter and args.extract is None: + parser.exit(1, '--filter is only valid for extraction\n') + if args.test is not None: src = args.test if is_tarfile(src): @@ -2573,7 +2866,7 @@ def main(): if is_tarfile(src): with TarFile.open(src, 'r:*') as tf: - tf.extractall(path=curdir) + tf.extractall(path=curdir, filter=args.filter) if args.verbose: if curdir == '.': msg = '{!r} file is extracted.'.format(src) diff --git a/Lib/test/cmath_testcases.txt b/Lib/test/mathdata/cmath_testcases.txt similarity index 99% rename from Lib/test/cmath_testcases.txt rename to Lib/test/mathdata/cmath_testcases.txt index dd7e458ddc..0165e17634 100644 --- a/Lib/test/cmath_testcases.txt +++ b/Lib/test/mathdata/cmath_testcases.txt @@ -1536,6 +1536,7 @@ sqrt0141 sqrt -1.797e+308 -9.9999999999999999e+306 -> 3.7284476432057307e+152 -1 sqrt0150 sqrt 1.7976931348623157e+308 0.0 -> 1.3407807929942596355e+154 0.0 sqrt0151 sqrt 2.2250738585072014e-308 0.0 -> 1.4916681462400413487e-154 0.0 sqrt0152 sqrt 5e-324 0.0 -> 2.2227587494850774834e-162 0.0 +sqrt0153 sqrt 5e-324 1.0 -> 0.7071067811865476 0.7071067811865476 -- special values sqrt1000 sqrt 0.0 0.0 -> 0.0 0.0 @@ -1744,6 +1745,7 @@ cosh0023 cosh 2.218885944363501 2.0015727395883687 -> -1.94294321081968 4.129026 -- large real part cosh0030 cosh 710.5 2.3519999999999999 -> -1.2967465239355998e+308 1.3076707908857333e+308 cosh0031 cosh -710.5 0.69999999999999996 -> 1.4085466381392499e+308 -1.1864024666450239e+308 +cosh0032 cosh 720.0 0.0 -> inf 0.0 overflow -- Additional real values (mpmath) cosh0050 cosh 1e-150 0.0 -> 1.0 0.0 @@ -1853,6 +1855,7 @@ sinh0023 sinh 0.043713693678420068 0.22512549887532657 -> 0.042624198673416713 0 -- large real part sinh0030 sinh 710.5 -2.3999999999999999 -> -1.3579970564885919e+308 -1.24394470907798e+308 sinh0031 sinh -710.5 0.80000000000000004 -> -1.2830671601735164e+308 1.3210954193997678e+308 +sinh0032 sinh 720.0 0.0 -> inf 0.0 overflow -- Additional real values (mpmath) sinh0050 sinh 1e-100 0.0 -> 1.00000000000000002e-100 0.0 diff --git a/Lib/test/ieee754.txt b/Lib/test/mathdata/ieee754.txt similarity index 100% rename from Lib/test/ieee754.txt rename to Lib/test/mathdata/ieee754.txt diff --git a/Lib/test/math_testcases.txt b/Lib/test/mathdata/math_testcases.txt similarity index 100% rename from Lib/test/math_testcases.txt rename to Lib/test/mathdata/math_testcases.txt diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 1efe5bddb1..aab31a366e 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -74,13 +74,7 @@ # # The timeout should be long enough for connect(), recv() and send() methods # of socket.socket. -LOOPBACK_TIMEOUT = 5.0 -if sys.platform == 'win32' and ' 32 bit (ARM)' in sys.version: - # bpo-37553: test_socket.SendfileUsingSendTest is taking longer than 2 - # seconds on Windows ARM32 buildbot - LOOPBACK_TIMEOUT = 10 -elif sys.platform == 'vxworks': - LOOPBACK_TIMEOUT = 10 +LOOPBACK_TIMEOUT = 10.0 # Timeout in seconds for network requests going to the internet. The timeout is # short enough to prevent a test to wait for too long if the internet request @@ -113,7 +107,6 @@ STDLIB_DIR = os.path.dirname(TEST_HOME_DIR) REPO_ROOT = os.path.dirname(STDLIB_DIR) - class Error(Exception): """Base class for regression test exceptions.""" @@ -259,22 +252,16 @@ class USEROBJECTFLAGS(ctypes.Structure): # process not running under the same user id as the current console # user. To avoid that, raise an exception if the window manager # connection is not available. - from ctypes import cdll, c_int, pointer, Structure - from ctypes.util import find_library - - app_services = cdll.LoadLibrary(find_library("ApplicationServices")) - - if app_services.CGMainDisplayID() == 0: - reason = "gui tests cannot run without OS X window manager" + import subprocess + try: + rc = subprocess.run(["launchctl", "managername"], + capture_output=True, check=True) + managername = rc.stdout.decode("utf-8").strip() + except subprocess.CalledProcessError: + reason = "unable to detect macOS launchd job manager" else: - class ProcessSerialNumber(Structure): - _fields_ = [("highLongOfPSN", c_int), - ("lowLongOfPSN", c_int)] - psn = ProcessSerialNumber() - psn_p = pointer(psn) - if ( (app_services.GetCurrentProcess(psn_p) < 0) or - (app_services.SetFrontProcess(psn_p) < 0) ): - reason = "cannot run without OS X gui process" + if managername != "Aqua": + reason = f"{managername=} -- can only run in a macOS GUI session" # check on every platform whether tkinter can actually do anything if not reason: @@ -391,11 +378,12 @@ def wrapper(*args, **kw): def skip_if_buildbot(reason=None): """Decorator raising SkipTest if running on a buildbot.""" + import getpass if not reason: reason = 'not suitable for buildbots' try: isbuildbot = getpass.getuser().lower() == 'buildbot' - except (KeyError, EnvironmentError) as err: + except (KeyError, OSError) as err: warnings.warn(f'getpass.getuser() failed {err}.', RuntimeWarning) isbuildbot = False return unittest.skipIf(isbuildbot, reason) @@ -409,35 +397,48 @@ def check_sanitizer(*, address=False, memory=False, ub=False, thread=False): cflags = sysconfig.get_config_var('CFLAGS') or '' config_args = sysconfig.get_config_var('CONFIG_ARGS') or '' memory_sanitizer = ( - '-fsanitize=memory' in cflags or - '--with-memory-sanitizer' in config_args + '-fsanitize=memory' in cflags or + '--with-memory-sanitizer' in config_args ) address_sanitizer = ( - '-fsanitize=address' in cflags or - '--with-address-sanitizer' in config_args + '-fsanitize=address' in cflags or + '--with-address-sanitizer' in config_args ) ub_sanitizer = ( - '-fsanitize=undefined' in cflags or - '--with-undefined-behavior-sanitizer' in config_args + '-fsanitize=undefined' in cflags or + '--with-undefined-behavior-sanitizer' in config_args ) thread_sanitizer = ( - '-fsanitize=thread' in cflags or - '--with-thread-sanitizer' in config_args + '-fsanitize=thread' in cflags or + '--with-thread-sanitizer' in config_args ) return ( - (memory and memory_sanitizer) or - (address and address_sanitizer) or - (ub and ub_sanitizer) or - (thread and thread_sanitizer) + (memory and memory_sanitizer) or + (address and address_sanitizer) or + (ub and ub_sanitizer) or + (thread and thread_sanitizer) ) + def skip_if_sanitizer(reason=None, *, address=False, memory=False, ub=False, thread=False): """Decorator raising SkipTest if running with a sanitizer active.""" if not reason: reason = 'not working with sanitizers active' - skip = check_sanitizer(address=address, memory=memory, ub=ub) + skip = check_sanitizer(address=address, memory=memory, ub=ub, thread=thread) return unittest.skipIf(skip, reason) +# gh-89363: True if fork() can hang if Python is built with Address Sanitizer +# (ASAN): libasan race condition, dead lock in pthread_create(). +HAVE_ASAN_FORK_BUG = check_sanitizer(address=True) + + +def set_sanitizer_env_var(env, option): + for name in ('ASAN_OPTIONS', 'MSAN_OPTIONS', 'UBSAN_OPTIONS', 'TSAN_OPTIONS'): + if name in env: + env[name] += f':{option}' + else: + env[name] = option + def system_must_validate_cert(f): """Skip the test on TLS certificate validation failures.""" @@ -497,6 +498,8 @@ def requires_lzma(reason='requires lzma'): import lzma except ImportError: lzma = None + # XXX: RUSTPYTHON; xz is not supported yet + lzma = None return unittest.skipUnless(lzma, reason) def has_no_debug_ranges(): @@ -510,21 +513,42 @@ def has_no_debug_ranges(): def requires_debug_ranges(reason='requires co_positions / debug_ranges'): return unittest.skipIf(has_no_debug_ranges(), reason) -def requires_legacy_unicode_capi(): +@contextlib.contextmanager +def suppress_immortalization(suppress=True): + """Suppress immortalization of deferred objects.""" try: - from _testcapi import unicode_legacy_string + import _testinternalcapi except ImportError: - unicode_legacy_string = None + yield + return - return unittest.skipUnless(unicode_legacy_string, - 'requires legacy Unicode C API') + if not suppress: + yield + return + + _testinternalcapi.suppress_immortalization(True) + try: + yield + finally: + _testinternalcapi.suppress_immortalization(False) + +def skip_if_suppress_immortalization(): + try: + import _testinternalcapi + except ImportError: + return + return unittest.skipUnless(_testinternalcapi.get_immortalize_deferred(), + "requires immortalization of deferred objects") + + +MS_WINDOWS = (sys.platform == 'win32') # Is not actually used in tests, but is kept for compatibility. is_jython = sys.platform.startswith('java') -is_android = hasattr(sys, 'getandroidapilevel') +is_android = sys.platform == "android" -if sys.platform not in ('win32', 'vxworks'): +if sys.platform not in {"win32", "vxworks", "ios", "tvos", "watchos"}: unix_shell = '/system/bin/sh' if is_android else '/bin/sh' else: unix_shell = None @@ -534,23 +558,44 @@ def requires_legacy_unicode_capi(): is_emscripten = sys.platform == "emscripten" is_wasi = sys.platform == "wasi" -has_fork_support = hasattr(os, "fork") and not is_emscripten and not is_wasi +is_apple_mobile = sys.platform in {"ios", "tvos", "watchos"} +is_apple = is_apple_mobile or sys.platform == "darwin" -# From python 3.12.6 -is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' -skip_on_s390x = unittest.skipIf(is_s390x, 'skipped on s390x') +has_fork_support = hasattr(os, "fork") and not ( + # WASM and Apple mobile platforms do not support subprocesses. + is_emscripten + or is_wasi + or is_apple_mobile + + # Although Android supports fork, it's unsafe to call it from Python because + # all Android apps are multi-threaded. + or is_android +) def requires_fork(): return unittest.skipUnless(has_fork_support, "requires working os.fork()") -has_subprocess_support = not is_emscripten and not is_wasi +has_subprocess_support = not ( + # WASM and Apple mobile platforms do not support subprocesses. + is_emscripten + or is_wasi + or is_apple_mobile + + # Although Android supports subproceses, they're almost never useful in + # practice (see PEP 738). And most of the tests that use them are calling + # sys.executable, which won't work when Python is embedded in an Android app. + or is_android +) def requires_subprocess(): """Used for subprocess, os.spawn calls, fd inheritance""" return unittest.skipUnless(has_subprocess_support, "requires subprocess support") # Emscripten's socket emulation and WASI sockets have limitations. -has_socket_support = not is_emscripten and not is_wasi +has_socket_support = not ( + is_emscripten + or is_wasi +) def requires_working_socket(*, module=False): """Skip tests or modules that require working sockets @@ -1888,8 +1933,7 @@ def _check_tracemalloc(): "if tracemalloc module is tracing " "memory allocations") - -# TODO: RUSTPYTHON (comment out before) +# TODO: RUSTPYTHON; GC is not supported yet # def check_free_after_iterating(test, iter, cls, args=()): # class A(cls): # def __del__(self): @@ -2551,7 +2595,8 @@ def adjust_int_max_str_digits(max_digits): # The default C recursion limit (from Include/cpython/pystate.h). C_RECURSION_LIMIT = 1500 -#Windows doesn't have os.uname() but it doesn't support s390x. +# Windows doesn't have os.uname() but it doesn't support s390x. +is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' skip_on_s390x = unittest.skipIf(hasattr(os, 'uname') and os.uname().machine == 's390x', 'skipped on s390x') HAVE_ASAN_FORK_BUG = check_sanitizer(address=True) diff --git a/Lib/test/support/i18n_helper.py b/Lib/test/support/i18n_helper.py new file mode 100644 index 0000000000..2e304f29e8 --- /dev/null +++ b/Lib/test/support/i18n_helper.py @@ -0,0 +1,63 @@ +import re +import subprocess +import sys +import unittest +from pathlib import Path +from test.support import REPO_ROOT, TEST_HOME_DIR, requires_subprocess +from test.test_tools import skip_if_missing + + +pygettext = Path(REPO_ROOT) / 'Tools' / 'i18n' / 'pygettext.py' + +msgid_pattern = re.compile(r'msgid(.*?)(?:msgid_plural|msgctxt|msgstr)', + re.DOTALL) +msgid_string_pattern = re.compile(r'"((?:\\"|[^"])*)"') + + +def _generate_po_file(path, *, stdout_only=True): + res = subprocess.run([sys.executable, pygettext, + '--no-location', '-o', '-', path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + text=True) + if stdout_only: + return res.stdout + return res + + +def _extract_msgids(po): + msgids = [] + for msgid in msgid_pattern.findall(po): + msgid_string = ''.join(msgid_string_pattern.findall(msgid)) + msgid_string = msgid_string.replace(r'\"', '"') + if msgid_string: + msgids.append(msgid_string) + return sorted(msgids) + + +def _get_snapshot_path(module_name): + return Path(TEST_HOME_DIR) / 'translationdata' / module_name / 'msgids.txt' + + +@requires_subprocess() +class TestTranslationsBase(unittest.TestCase): + + def assertMsgidsEqual(self, module): + '''Assert that msgids extracted from a given module match a + snapshot. + + ''' + skip_if_missing('i18n') + res = _generate_po_file(module.__file__, stdout_only=False) + self.assertEqual(res.returncode, 0) + self.assertEqual(res.stderr, '') + msgids = _extract_msgids(res.stdout) + snapshot_path = _get_snapshot_path(module.__name__) + snapshot = snapshot_path.read_text().splitlines() + self.assertListEqual(msgids, snapshot) + + +def update_translation_snapshots(module): + contents = _generate_po_file(module.__file__) + msgids = _extract_msgids(contents) + snapshot_path = _get_snapshot_path(module.__name__) + snapshot_path.write_text('\n'.join(msgids)) diff --git a/Lib/test/support/socket_helper.py b/Lib/test/support/socket_helper.py index d9c087c251..87941ee179 100644 --- a/Lib/test/support/socket_helper.py +++ b/Lib/test/support/socket_helper.py @@ -8,7 +8,6 @@ import unittest from .. import support -from . import warnings_helper HOST = "localhost" HOSTv4 = "127.0.0.1" @@ -196,7 +195,6 @@ def get_socket_conn_refused_errs(): def transient_internet(resource_name, *, timeout=_NOT_SET, errnos=()): """Return a context manager that raises ResourceDenied when various issues with the internet connection manifest themselves as exceptions.""" - nntplib = warnings_helper.import_deprecated("nntplib") import urllib.error if timeout is _NOT_SET: timeout = support.INTERNET_TIMEOUT @@ -249,10 +247,6 @@ def filter_error(err): if timeout is not None: socket.setdefaulttimeout(timeout) yield - except nntplib.NNTPTemporaryError as err: - if support.verbose: - sys.stderr.write(denied.args[0] + "\n") - raise denied from err except OSError as err: # urllib can wrap original socket errors multiple times (!), we must # unwrap to get at the original error. @@ -303,7 +297,7 @@ def _get_sysctl(name): stderr=subprocess.STDOUT, text=True) if proc.returncode: - support.print_warning(f'{" ".join(cmd)!r} command failed with ' + support.print_warning(f'{' '.join(cmd)!r} command failed with ' f'exit code {proc.returncode}') # cache the error to only log the warning once _sysctl_cache[name] = None @@ -314,7 +308,7 @@ def _get_sysctl(name): try: value = int(output.strip()) except Exception as exc: - support.print_warning(f'Failed to parse {" ".join(cmd)!r} ' + support.print_warning(f'Failed to parse {' '.join(cmd)!r} ' f'command output {output!r}: {exc!r}') # cache the error to only log the warning once _sysctl_cache[name] = None diff --git a/Lib/test/support/testcase.py b/Lib/test/support/testcase.py index fad1e4cb34..fd32457d14 100644 --- a/Lib/test/support/testcase.py +++ b/Lib/test/support/testcase.py @@ -1,6 +1,63 @@ from math import copysign, isnan +class ExtraAssertions: + + def assertIsSubclass(self, cls, superclass, msg=None): + if issubclass(cls, superclass): + return + standardMsg = f'{cls!r} is not a subclass of {superclass!r}' + self.fail(self._formatMessage(msg, standardMsg)) + + def assertNotIsSubclass(self, cls, superclass, msg=None): + if not issubclass(cls, superclass): + return + standardMsg = f'{cls!r} is a subclass of {superclass!r}' + self.fail(self._formatMessage(msg, standardMsg)) + + def assertHasAttr(self, obj, name, msg=None): + if not hasattr(obj, name): + if isinstance(obj, types.ModuleType): + standardMsg = f'module {obj.__name__!r} has no attribute {name!r}' + elif isinstance(obj, type): + standardMsg = f'type object {obj.__name__!r} has no attribute {name!r}' + else: + standardMsg = f'{type(obj).__name__!r} object has no attribute {name!r}' + self.fail(self._formatMessage(msg, standardMsg)) + + def assertNotHasAttr(self, obj, name, msg=None): + if hasattr(obj, name): + if isinstance(obj, types.ModuleType): + standardMsg = f'module {obj.__name__!r} has unexpected attribute {name!r}' + elif isinstance(obj, type): + standardMsg = f'type object {obj.__name__!r} has unexpected attribute {name!r}' + else: + standardMsg = f'{type(obj).__name__!r} object has unexpected attribute {name!r}' + self.fail(self._formatMessage(msg, standardMsg)) + + def assertStartsWith(self, s, prefix, msg=None): + if s.startswith(prefix): + return + standardMsg = f"{s!r} doesn't start with {prefix!r}" + self.fail(self._formatMessage(msg, standardMsg)) + + def assertNotStartsWith(self, s, prefix, msg=None): + if not s.startswith(prefix): + return + self.fail(self._formatMessage(msg, f"{s!r} starts with {prefix!r}")) + + def assertEndsWith(self, s, suffix, msg=None): + if s.endswith(suffix): + return + standardMsg = f"{s!r} doesn't end with {suffix!r}" + self.fail(self._formatMessage(msg, standardMsg)) + + def assertNotEndsWith(self, s, suffix, msg=None): + if not s.endswith(suffix): + return + self.fail(self._formatMessage(msg, f"{s!r} ends with {suffix!r}")) + + class ExceptionIsLikeMixin: def assertExceptionIsLike(self, exc, template): """ diff --git a/Lib/test/test___all__.py b/Lib/test/test___all__.py index a620dd5b4c..7b5356ea02 100644 --- a/Lib/test/test___all__.py +++ b/Lib/test/test___all__.py @@ -5,17 +5,21 @@ import sys import types -try: - import _multiprocessing -except ModuleNotFoundError: - _multiprocessing = None - if support.check_sanitizer(address=True, memory=True): - # bpo-46633: test___all__ is skipped because importing some modules - # directly can trigger known problems with ASAN (like tk or crypt). - raise unittest.SkipTest("workaround ASAN build issues on loading tests " - "like tk or crypt") + SKIP_MODULES = frozenset(( + # gh-90791: Tests involving libX11 can SEGFAULT on ASAN/MSAN builds. + # Skip modules, packages and tests using '_tkinter'. + '_tkinter', + 'tkinter', + 'test_tkinter', + 'test_ttk', + 'test_ttk_textonly', + 'idlelib', + 'test_idle', + )) +else: + SKIP_MODULES = () class NoAll(RuntimeError): @@ -27,17 +31,6 @@ class FailedImport(RuntimeError): class AllTest(unittest.TestCase): - def setUp(self): - # concurrent.futures uses a __getattr__ hook. Its __all__ triggers - # import of a submodule, which fails when _multiprocessing is not - # available. - if _multiprocessing is None: - sys.modules["_multiprocessing"] = types.ModuleType("_multiprocessing") - - def tearDown(self): - if _multiprocessing is None: - sys.modules.pop("_multiprocessing") - def check_all(self, modname): names = {} with warnings_helper.check_warnings( @@ -83,16 +76,24 @@ def walk_modules(self, basedir, modpath): for fn in sorted(os.listdir(basedir)): path = os.path.join(basedir, fn) if os.path.isdir(path): + if fn in SKIP_MODULES: + continue pkg_init = os.path.join(path, '__init__.py') if os.path.exists(pkg_init): yield pkg_init, modpath + fn for p, m in self.walk_modules(path, modpath + fn + "."): yield p, m continue - if not fn.endswith('.py') or fn == '__init__.py': + + if fn == '__init__.py': continue - yield path, modpath + fn[:-3] - + if not fn.endswith('.py'): + continue + modname = fn.removesuffix('.py') + if modname in SKIP_MODULES: + continue + yield path, modpath + modname + # TODO: RUSTPYTHON @unittest.expectedFailure def test_all(self): @@ -103,7 +104,8 @@ def test_all(self): ]) # In case _socket fails to build, make this test fail more gracefully - # than an AttributeError somewhere deep in CGIHTTPServer. + # than an AttributeError somewhere deep in concurrent.futures, email + # or unittest. import _socket ignored = [] @@ -120,14 +122,14 @@ def test_all(self): if denied: continue if support.verbose: - print(modname) + print(f"Check {modname}", flush=True) try: # This heuristic speeds up the process by removing, de facto, # most test modules (and avoiding the auto-executing ones). with open(path, "rb") as f: if b"__all__" not in f.read(): raise NoAll(modname) - self.check_all(modname) + self.check_all(modname) except NoAll: ignored.append(modname) except FailedImport: diff --git a/Lib/test/test_android.py b/Lib/test/test_android.py new file mode 100644 index 0000000000..076190f757 --- /dev/null +++ b/Lib/test/test_android.py @@ -0,0 +1,448 @@ +import io +import platform +import queue +import re +import subprocess +import sys +import unittest +from _android_support import TextLogStream +from array import array +from contextlib import ExitStack, contextmanager +from threading import Thread +from test.support import LOOPBACK_TIMEOUT +from time import time +from unittest.mock import patch + + +if sys.platform != "android": + raise unittest.SkipTest("Android-specific") + +api_level = platform.android_ver().api_level + +# (name, level, fileno) +STREAM_INFO = [("stdout", "I", 1), ("stderr", "W", 2)] + + +# Test redirection of stdout and stderr to the Android log. +@unittest.skipIf( + api_level < 23 and platform.machine() == "aarch64", + "SELinux blocks reading logs on older ARM64 emulators" +) +class TestAndroidOutput(unittest.TestCase): + maxDiff = None + + def setUp(self): + self.logcat_process = subprocess.Popen( + ["logcat", "-v", "tag"], stdout=subprocess.PIPE, + errors="backslashreplace" + ) + self.logcat_queue = queue.Queue() + + def logcat_thread(): + for line in self.logcat_process.stdout: + self.logcat_queue.put(line.rstrip("\n")) + self.logcat_process.stdout.close() + self.logcat_thread = Thread(target=logcat_thread) + self.logcat_thread.start() + + from ctypes import CDLL, c_char_p, c_int + android_log_write = getattr(CDLL("liblog.so"), "__android_log_write") + android_log_write.argtypes = (c_int, c_char_p, c_char_p) + ANDROID_LOG_INFO = 4 + + # Separate tests using a marker line with a different tag. + tag, message = "python.test", f"{self.id()} {time()}" + android_log_write( + ANDROID_LOG_INFO, tag.encode("UTF-8"), message.encode("UTF-8")) + self.assert_log("I", tag, message, skip=True, timeout=5) + + def assert_logs(self, level, tag, expected, **kwargs): + for line in expected: + self.assert_log(level, tag, line, **kwargs) + + def assert_log(self, level, tag, expected, *, skip=False, timeout=0.5): + deadline = time() + timeout + while True: + try: + line = self.logcat_queue.get(timeout=(deadline - time())) + except queue.Empty: + self.fail(f"line not found: {expected!r}") + if match := re.fullmatch(fr"(.)/{tag}: (.*)", line): + try: + self.assertEqual(level, match[1]) + self.assertEqual(expected, match[2]) + break + except AssertionError: + if not skip: + raise + + def tearDown(self): + self.logcat_process.terminate() + self.logcat_process.wait(LOOPBACK_TIMEOUT) + self.logcat_thread.join(LOOPBACK_TIMEOUT) + + @contextmanager + def unbuffered(self, stream): + stream.reconfigure(write_through=True) + try: + yield + finally: + stream.reconfigure(write_through=False) + + # In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't + # test them directly. Detect this mode and use some temporary streams with + # the same properties. + def stream_context(self, stream_name, level): + # https://developer.android.com/ndk/reference/group/logging + prio = {"I": 4, "W": 5}[level] + + stack = ExitStack() + stack.enter_context(self.subTest(stream_name)) + stream = getattr(sys, stream_name) + native_stream = getattr(sys, f"__{stream_name}__") + if isinstance(stream, io.StringIO): + stack.enter_context( + patch( + f"sys.{stream_name}", + TextLogStream( + prio, f"python.{stream_name}", native_stream.fileno(), + errors="backslashreplace" + ), + ) + ) + return stack + + def test_str(self): + for stream_name, level, fileno in STREAM_INFO: + with self.stream_context(stream_name, level): + stream = getattr(sys, stream_name) + tag = f"python.{stream_name}" + self.assertEqual(f"", repr(stream)) + + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) + self.assertEqual(stream.fileno(), fileno) + self.assertEqual("UTF-8", stream.encoding) + self.assertEqual("backslashreplace", stream.errors) + self.assertIs(stream.line_buffering, True) + self.assertIs(stream.write_through, False) + + def write(s, lines=None, *, write_len=None): + if write_len is None: + write_len = len(s) + self.assertEqual(write_len, stream.write(s)) + if lines is None: + lines = [s] + self.assert_logs(level, tag, lines) + + # Single-line messages, + with self.unbuffered(stream): + write("", []) + + write("a") + write("Hello") + write("Hello world") + write(" ") + write(" ") + + # Non-ASCII text + write("ol\u00e9") # Spanish + write("\u4e2d\u6587") # Chinese + + # Non-BMP emoji + write("\U0001f600") + + # Non-encodable surrogates + write("\ud800\udc00", [r"\ud800\udc00"]) + + # Code used by surrogateescape (which isn't enabled here) + write("\udc80", [r"\udc80"]) + + # Null characters are logged using "modified UTF-8". + write("\u0000", [r"\xc0\x80"]) + write("a\u0000", [r"a\xc0\x80"]) + write("\u0000b", [r"\xc0\x80b"]) + write("a\u0000b", [r"a\xc0\x80b"]) + + # Multi-line messages. Avoid identical consecutive lines, as + # they may activate "chatty" filtering and break the tests. + write("\nx", [""]) + write("\na\n", ["x", "a"]) + write("\n", [""]) + write("b\n", ["b"]) + write("c\n\n", ["c", ""]) + write("d\ne", ["d"]) + write("xx", []) + write("f\n\ng", ["exxf", ""]) + write("\n", ["g"]) + + # Since this is a line-based logging system, line buffering + # cannot be turned off, i.e. a newline always causes a flush. + stream.reconfigure(line_buffering=False) + self.assertIs(stream.line_buffering, True) + + # However, buffering can be turned off completely if you want a + # flush after every write. + with self.unbuffered(stream): + write("\nx", ["", "x"]) + write("\na\n", ["", "a"]) + write("\n", [""]) + write("b\n", ["b"]) + write("c\n\n", ["c", ""]) + write("d\ne", ["d", "e"]) + write("xx", ["xx"]) + write("f\n\ng", ["f", "", "g"]) + write("\n", [""]) + + # "\r\n" should be translated into "\n". + write("hello\r\n", ["hello"]) + write("hello\r\nworld\r\n", ["hello", "world"]) + write("\r\n", [""]) + + # Non-standard line separators should be preserved. + write("before form feed\x0cafter form feed\n", + ["before form feed\x0cafter form feed"]) + write("before line separator\u2028after line separator\n", + ["before line separator\u2028after line separator"]) + + # String subclasses are accepted, but they should be converted + # to a standard str without calling any of their methods. + class CustomStr(str): + def splitlines(self, *args, **kwargs): + raise AssertionError() + + def __len__(self): + raise AssertionError() + + def __str__(self): + raise AssertionError() + + write(CustomStr("custom\n"), ["custom"], write_len=7) + + # Non-string classes are not accepted. + for obj in [b"", b"hello", None, 42]: + with self.subTest(obj=obj): + with self.assertRaisesRegex( + TypeError, + fr"write\(\) argument must be str, not " + fr"{type(obj).__name__}" + ): + stream.write(obj) + + # Manual flushing is supported. + write("hello", []) + stream.flush() + self.assert_log(level, tag, "hello") + write("hello", []) + write("world", []) + stream.flush() + self.assert_log(level, tag, "helloworld") + + # Long lines are split into blocks of 1000 characters + # (MAX_CHARS_PER_WRITE in _android_support.py), but + # TextIOWrapper should then join them back together as much as + # possible without exceeding 4000 UTF-8 bytes + # (MAX_BYTES_PER_WRITE). + # + # ASCII (1 byte per character) + write(("foobar" * 700) + "\n", # 4200 bytes in + [("foobar" * 666) + "foob", # 4000 bytes out + "ar" + ("foobar" * 33)]) # 200 bytes out + + # "Full-width" digits 0-9 (3 bytes per character) + s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19" + write((s * 150) + "\n", # 4500 bytes in + [s * 100, # 3000 bytes out + s * 50]) # 1500 bytes out + + s = "0123456789" + write(s * 200, []) # 2000 bytes in + write(s * 150, []) # 1500 bytes in + write(s * 51, [s * 350]) # 510 bytes in, 3500 bytes out + write("\n", [s * 51]) # 0 bytes in, 510 bytes out + + def test_bytes(self): + for stream_name, level, fileno in STREAM_INFO: + with self.stream_context(stream_name, level): + stream = getattr(sys, stream_name).buffer + tag = f"python.{stream_name}" + self.assertEqual(f"", repr(stream)) + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) + self.assertEqual(stream.fileno(), fileno) + + def write(b, lines=None, *, write_len=None): + if write_len is None: + write_len = len(b) + self.assertEqual(write_len, stream.write(b)) + if lines is None: + lines = [b.decode()] + self.assert_logs(level, tag, lines) + + # Single-line messages, + write(b"", []) + + write(b"a") + write(b"Hello") + write(b"Hello world") + write(b" ") + write(b" ") + + # Non-ASCII text + write(b"ol\xc3\xa9") # Spanish + write(b"\xe4\xb8\xad\xe6\x96\x87") # Chinese + + # Non-BMP emoji + write(b"\xf0\x9f\x98\x80") + + # Null bytes are logged using "modified UTF-8". + write(b"\x00", [r"\xc0\x80"]) + write(b"a\x00", [r"a\xc0\x80"]) + write(b"\x00b", [r"\xc0\x80b"]) + write(b"a\x00b", [r"a\xc0\x80b"]) + + # Invalid UTF-8 + write(b"\xff", [r"\xff"]) + write(b"a\xff", [r"a\xff"]) + write(b"\xffb", [r"\xffb"]) + write(b"a\xffb", [r"a\xffb"]) + + # Log entries containing newlines are shown differently by + # `logcat -v tag`, `logcat -v long`, and Android Studio. We + # currently use `logcat -v tag`, which shows each line as if it + # was a separate log entry, but strips a single trailing + # newline. + # + # On newer versions of Android, all three of the above tools (or + # maybe Logcat itself) will also strip any number of leading + # newlines. + write(b"\nx", ["", "x"] if api_level < 30 else ["x"]) + write(b"\na\n", ["", "a"] if api_level < 30 else ["a"]) + write(b"\n", [""]) + write(b"b\n", ["b"]) + write(b"c\n\n", ["c", ""]) + write(b"d\ne", ["d", "e"]) + write(b"xx", ["xx"]) + write(b"f\n\ng", ["f", "", "g"]) + write(b"\n", [""]) + + # "\r\n" should be translated into "\n". + write(b"hello\r\n", ["hello"]) + write(b"hello\r\nworld\r\n", ["hello", "world"]) + write(b"\r\n", [""]) + + # Other bytes-like objects are accepted. + write(bytearray(b"bytearray")) + + mv = memoryview(b"memoryview") + write(mv, ["memoryview"]) # Continuous + write(mv[::2], ["mmrve"]) # Discontinuous + + write( + # Android only supports little-endian architectures, so the + # bytes representation is as follows: + array("H", [ + 0, # 00 00 + 1, # 01 00 + 65534, # FE FF + 65535, # FF FF + ]), + + # After encoding null bytes with modified UTF-8, the only + # valid UTF-8 sequence is \x01. All other bytes are handled + # by backslashreplace. + ["\\xc0\\x80\\xc0\\x80" + "\x01\\xc0\\x80" + "\\xfe\\xff" + "\\xff\\xff"], + write_len=8, + ) + + # Non-bytes-like classes are not accepted. + for obj in ["", "hello", None, 42]: + with self.subTest(obj=obj): + with self.assertRaisesRegex( + TypeError, + fr"write\(\) argument must be bytes-like, not " + fr"{type(obj).__name__}" + ): + stream.write(obj) + + +class TestAndroidRateLimit(unittest.TestCase): + def test_rate_limit(self): + # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 + PER_MESSAGE_OVERHEAD = 28 + + # https://developer.android.com/ndk/reference/group/logging + ANDROID_LOG_DEBUG = 3 + + # To avoid flooding the test script output, use a different tag rather + # than stdout or stderr. + tag = "python.rate_limit" + stream = TextLogStream(ANDROID_LOG_DEBUG, tag) + + # Make a test message which consumes 1 KB of the logcat buffer. + message = "Line {:03d} " + message += "." * ( + 1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0)) + ) + "\n" + + # To avoid depending on the performance of the test device, we mock the + # passage of time. + mock_now = time() + + def mock_time(): + # Avoid division by zero by simulating a small delay. + mock_sleep(0.0001) + return mock_now + + def mock_sleep(duration): + nonlocal mock_now + mock_now += duration + + # See _android_support.py. The default values of these parameters work + # well across a wide range of devices, but we'll use smaller values to + # ensure a quick and reliable test that doesn't flood the log too much. + MAX_KB_PER_SECOND = 100 + BUCKET_KB = 10 + with ( + patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024), + patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024), + patch("_android_support.sleep", mock_sleep), + patch("_android_support.time", mock_time), + ): + # Make sure the token bucket is full. + stream.write("Initial message to reset _prev_write_time") + mock_sleep(BUCKET_KB / MAX_KB_PER_SECOND) + line_num = 0 + + # Write BUCKET_KB messages, and return the rate at which they were + # accepted in KB per second. + def write_bucketful(): + nonlocal line_num + start = mock_time() + max_line_num = line_num + BUCKET_KB + while line_num < max_line_num: + stream.write(message.format(line_num)) + line_num += 1 + return BUCKET_KB / (mock_time() - start) + + # The first bucketful should be written with minimal delay. The + # factor of 2 here is not arbitrary: it verifies that the system can + # write fast enough to empty the bucket within two bucketfuls, which + # the next part of the test depends on. + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) + + # Write another bucketful to empty the token bucket completely. + write_bucketful() + + # The next bucketful should be written at the rate limit. + self.assertAlmostEqual( + write_bucketful(), MAX_KB_PER_SECOND, + delta=MAX_KB_PER_SECOND * 0.1 + ) + + # Once the token bucket refills, we should go back to full speed. + mock_sleep(BUCKET_KB / MAX_KB_PER_SECOND) + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 3a62a16cee..dc2df795a7 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -15,7 +15,10 @@ import argparse import warnings +from enum import StrEnum +from test.support import captured_stderr from test.support import os_helper +from test.support.i18n_helper import TestTranslationsBase, update_translation_snapshots from unittest import mock @@ -280,16 +283,18 @@ def test_failures(self, tester): parser = self._get_parser(tester) for args_str in tester.failures: args = args_str.split() - with tester.assertRaises(ArgumentParserError, msg=args): - parser.parse_args(args) + with tester.subTest(args=args): + with tester.assertRaises(ArgumentParserError, msg=args): + parser.parse_args(args) def test_successes(self, tester): parser = self._get_parser(tester) for args, expected_ns in tester.successes: if isinstance(args, str): args = args.split() - result_ns = self._parse_args(parser, args) - tester.assertEqual(expected_ns, result_ns) + with tester.subTest(args=args): + result_ns = self._parse_args(parser, args) + tester.assertEqual(expected_ns, result_ns) # add tests for each combination of an optionals adding method # and an arg parsing method @@ -378,15 +383,22 @@ class TestOptionalsSingleDashAmbiguous(ParserTestCase): """Test Optionals that partially match but are not subsets""" argument_signatures = [Sig('-foobar'), Sig('-foorab')] - failures = ['-f', '-f a', '-fa', '-foa', '-foo', '-fo', '-foo b'] + failures = ['-f', '-f a', '-fa', '-foa', '-foo', '-fo', '-foo b', + '-f=a', '-foo=b'] successes = [ ('', NS(foobar=None, foorab=None)), ('-foob a', NS(foobar='a', foorab=None)), + ('-foob=a', NS(foobar='a', foorab=None)), ('-foor a', NS(foobar=None, foorab='a')), + ('-foor=a', NS(foobar=None, foorab='a')), ('-fooba a', NS(foobar='a', foorab=None)), + ('-fooba=a', NS(foobar='a', foorab=None)), ('-foora a', NS(foobar=None, foorab='a')), + ('-foora=a', NS(foobar=None, foorab='a')), ('-foobar a', NS(foobar='a', foorab=None)), + ('-foobar=a', NS(foobar='a', foorab=None)), ('-foorab a', NS(foobar=None, foorab='a')), + ('-foorab=a', NS(foobar=None, foorab='a')), ] @@ -677,7 +689,7 @@ class TestOptionalsChoices(ParserTestCase): argument_signatures = [ Sig('-f', choices='abc'), Sig('-g', type=int, choices=range(5))] - failures = ['a', '-f d', '-fad', '-ga', '-g 6'] + failures = ['a', '-f d', '-f ab', '-fad', '-ga', '-g 6'] successes = [ ('', NS(f=None, g=None)), ('-f a', NS(f='a', g=None)), @@ -916,7 +928,9 @@ class TestOptionalsAllowLongAbbreviation(ParserTestCase): successes = [ ('', NS(foo=None, foobaz=None, fooble=False)), ('--foo 7', NS(foo='7', foobaz=None, fooble=False)), + ('--foo=7', NS(foo='7', foobaz=None, fooble=False)), ('--fooba a', NS(foo=None, foobaz='a', fooble=False)), + ('--fooba=a', NS(foo=None, foobaz='a', fooble=False)), ('--foobl --foo g', NS(foo='g', foobaz=None, fooble=True)), ] @@ -955,6 +969,23 @@ class TestOptionalsDisallowLongAbbreviationPrefixChars(ParserTestCase): ] +class TestOptionalsDisallowSingleDashLongAbbreviation(ParserTestCase): + """Do not allow abbreviations of long options at all""" + + parser_signature = Sig(allow_abbrev=False) + argument_signatures = [ + Sig('-foo'), + Sig('-foodle', action='store_true'), + Sig('-foonly'), + ] + failures = ['-foon 3', '-food', '-food -foo 2'] + successes = [ + ('', NS(foo=None, foodle=False, foonly=None)), + ('-foo 3', NS(foo='3', foodle=False, foonly=None)), + ('-foonly 7 -foodle -foo 2', NS(foo='2', foodle=True, foonly='7')), + ] + + class TestDisallowLongAbbreviationAllowsShortGrouping(ParserTestCase): """Do not allow abbreviations of long options at all""" @@ -993,6 +1024,34 @@ class TestDisallowLongAbbreviationAllowsShortGroupingPrefix(ParserTestCase): ] +class TestStrEnumChoices(TestCase): + class Color(StrEnum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + def test_parse_enum_value(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color) + args = parser.parse_args(['--color', 'red']) + self.assertEqual(args.color, self.Color.RED) + + def test_help_message_contains_enum_choices(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color, help='Choose a color') + self.assertIn('[--color {red,green,blue}]', parser.format_usage()) + self.assertIn(' --color {red,green,blue}', parser.format_help()) + + def test_invalid_enum_value_raises_error(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--color', choices=self.Color) + self.assertRaisesRegex( + argparse.ArgumentError, + r"invalid choice: 'yellow' \(choose from red, green, blue\)", + parser.parse_args, + ['--color', 'yellow'], + ) + # ================ # Positional tests # ================ @@ -1132,57 +1191,87 @@ class TestPositionalsNargs2None(ParserTestCase): class TestPositionalsNargsNoneZeroOrMore(ParserTestCase): """Test a Positional with no nargs followed by one with unlimited""" - argument_signatures = [Sig('foo'), Sig('bar', nargs='*')] - failures = ['', '--foo'] + argument_signatures = [Sig('-x'), Sig('foo'), Sig('bar', nargs='*')] + failures = ['', '--foo', 'a b -x X c'] successes = [ - ('a', NS(foo='a', bar=[])), - ('a b', NS(foo='a', bar=['b'])), - ('a b c', NS(foo='a', bar=['b', 'c'])), + ('a', NS(x=None, foo='a', bar=[])), + ('a b', NS(x=None, foo='a', bar=['b'])), + ('a b c', NS(x=None, foo='a', bar=['b', 'c'])), + ('-x X a', NS(x='X', foo='a', bar=[])), + ('a -x X', NS(x='X', foo='a', bar=[])), + ('-x X a b', NS(x='X', foo='a', bar=['b'])), + ('a -x X b', NS(x='X', foo='a', bar=['b'])), + ('a b -x X', NS(x='X', foo='a', bar=['b'])), + ('-x X a b c', NS(x='X', foo='a', bar=['b', 'c'])), + ('a -x X b c', NS(x='X', foo='a', bar=['b', 'c'])), + ('a b c -x X', NS(x='X', foo='a', bar=['b', 'c'])), ] class TestPositionalsNargsNoneOneOrMore(ParserTestCase): """Test a Positional with no nargs followed by one with one or more""" - argument_signatures = [Sig('foo'), Sig('bar', nargs='+')] - failures = ['', '--foo', 'a'] + argument_signatures = [Sig('-x'), Sig('foo'), Sig('bar', nargs='+')] + failures = ['', '--foo', 'a', 'a b -x X c'] successes = [ - ('a b', NS(foo='a', bar=['b'])), - ('a b c', NS(foo='a', bar=['b', 'c'])), + ('a b', NS(x=None, foo='a', bar=['b'])), + ('a b c', NS(x=None, foo='a', bar=['b', 'c'])), + ('-x X a b', NS(x='X', foo='a', bar=['b'])), + ('a -x X b', NS(x='X', foo='a', bar=['b'])), + ('a b -x X', NS(x='X', foo='a', bar=['b'])), + ('-x X a b c', NS(x='X', foo='a', bar=['b', 'c'])), + ('a -x X b c', NS(x='X', foo='a', bar=['b', 'c'])), + ('a b c -x X', NS(x='X', foo='a', bar=['b', 'c'])), ] class TestPositionalsNargsNoneOptional(ParserTestCase): """Test a Positional with no nargs followed by one with an Optional""" - argument_signatures = [Sig('foo'), Sig('bar', nargs='?')] + argument_signatures = [Sig('-x'), Sig('foo'), Sig('bar', nargs='?')] failures = ['', '--foo', 'a b c'] successes = [ - ('a', NS(foo='a', bar=None)), - ('a b', NS(foo='a', bar='b')), + ('a', NS(x=None, foo='a', bar=None)), + ('a b', NS(x=None, foo='a', bar='b')), + ('-x X a', NS(x='X', foo='a', bar=None)), + ('a -x X', NS(x='X', foo='a', bar=None)), + ('-x X a b', NS(x='X', foo='a', bar='b')), + ('a -x X b', NS(x='X', foo='a', bar='b')), + ('a b -x X', NS(x='X', foo='a', bar='b')), ] class TestPositionalsNargsZeroOrMoreNone(ParserTestCase): """Test a Positional with unlimited nargs followed by one with none""" - argument_signatures = [Sig('foo', nargs='*'), Sig('bar')] - failures = ['', '--foo'] + argument_signatures = [Sig('-x'), Sig('foo', nargs='*'), Sig('bar')] + failures = ['', '--foo', 'a -x X b', 'a -x X b c', 'a b -x X c'] successes = [ - ('a', NS(foo=[], bar='a')), - ('a b', NS(foo=['a'], bar='b')), - ('a b c', NS(foo=['a', 'b'], bar='c')), + ('a', NS(x=None, foo=[], bar='a')), + ('a b', NS(x=None, foo=['a'], bar='b')), + ('a b c', NS(x=None, foo=['a', 'b'], bar='c')), + ('-x X a', NS(x='X', foo=[], bar='a')), + ('a -x X', NS(x='X', foo=[], bar='a')), + ('-x X a b', NS(x='X', foo=['a'], bar='b')), + ('a b -x X', NS(x='X', foo=['a'], bar='b')), + ('-x X a b c', NS(x='X', foo=['a', 'b'], bar='c')), + ('a b c -x X', NS(x='X', foo=['a', 'b'], bar='c')), ] class TestPositionalsNargsOneOrMoreNone(ParserTestCase): """Test a Positional with one or more nargs followed by one with none""" - argument_signatures = [Sig('foo', nargs='+'), Sig('bar')] - failures = ['', '--foo', 'a'] + argument_signatures = [Sig('-x'), Sig('foo', nargs='+'), Sig('bar')] + failures = ['', '--foo', 'a', 'a -x X b c', 'a b -x X c'] successes = [ - ('a b', NS(foo=['a'], bar='b')), - ('a b c', NS(foo=['a', 'b'], bar='c')), + ('a b', NS(x=None, foo=['a'], bar='b')), + ('a b c', NS(x=None, foo=['a', 'b'], bar='c')), + ('-x X a b', NS(x='X', foo=['a'], bar='b')), + ('a -x X b', NS(x='X', foo=['a'], bar='b')), + ('a b -x X', NS(x='X', foo=['a'], bar='b')), + ('-x X a b c', NS(x='X', foo=['a', 'b'], bar='c')), + ('a b c -x X', NS(x='X', foo=['a', 'b'], bar='c')), ] @@ -1267,14 +1356,21 @@ class TestPositionalsNargsNoneZeroOrMore1(ParserTestCase): """Test three Positionals: no nargs, unlimited nargs and 1 nargs""" argument_signatures = [ + Sig('-x'), Sig('foo'), Sig('bar', nargs='*'), Sig('baz', nargs=1), ] - failures = ['', '--foo', 'a'] + failures = ['', '--foo', 'a', 'a b -x X c'] successes = [ - ('a b', NS(foo='a', bar=[], baz=['b'])), - ('a b c', NS(foo='a', bar=['b'], baz=['c'])), + ('a b', NS(x=None, foo='a', bar=[], baz=['b'])), + ('a b c', NS(x=None, foo='a', bar=['b'], baz=['c'])), + ('-x X a b', NS(x='X', foo='a', bar=[], baz=['b'])), + ('a -x X b', NS(x='X', foo='a', bar=[], baz=['b'])), + ('a b -x X', NS(x='X', foo='a', bar=[], baz=['b'])), + ('-x X a b c', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('a -x X b c', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('a b c -x X', NS(x='X', foo='a', bar=['b'], baz=['c'])), ] @@ -1282,14 +1378,22 @@ class TestPositionalsNargsNoneOneOrMore1(ParserTestCase): """Test three Positionals: no nargs, one or more nargs and 1 nargs""" argument_signatures = [ + Sig('-x'), Sig('foo'), Sig('bar', nargs='+'), Sig('baz', nargs=1), ] - failures = ['', '--foo', 'a', 'b'] + failures = ['', '--foo', 'a', 'b', 'a b -x X c d', 'a b c -x X d'] successes = [ - ('a b c', NS(foo='a', bar=['b'], baz=['c'])), - ('a b c d', NS(foo='a', bar=['b', 'c'], baz=['d'])), + ('a b c', NS(x=None, foo='a', bar=['b'], baz=['c'])), + ('a b c d', NS(x=None, foo='a', bar=['b', 'c'], baz=['d'])), + ('-x X a b c', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('a -x X b c', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('a b -x X c', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('a b c -x X', NS(x='X', foo='a', bar=['b'], baz=['c'])), + ('-x X a b c d', NS(x='X', foo='a', bar=['b', 'c'], baz=['d'])), + ('a -x X b c d', NS(x='X', foo='a', bar=['b', 'c'], baz=['d'])), + ('a b c d -x X', NS(x='X', foo='a', bar=['b', 'c'], baz=['d'])), ] @@ -1297,14 +1401,21 @@ class TestPositionalsNargsNoneOptional1(ParserTestCase): """Test three Positionals: no nargs, optional narg and 1 nargs""" argument_signatures = [ + Sig('-x'), Sig('foo'), Sig('bar', nargs='?', default=0.625), Sig('baz', nargs=1), ] - failures = ['', '--foo', 'a'] + failures = ['', '--foo', 'a', 'a b -x X c'] successes = [ - ('a b', NS(foo='a', bar=0.625, baz=['b'])), - ('a b c', NS(foo='a', bar='b', baz=['c'])), + ('a b', NS(x=None, foo='a', bar=0.625, baz=['b'])), + ('a b c', NS(x=None, foo='a', bar='b', baz=['c'])), + ('-x X a b', NS(x='X', foo='a', bar=0.625, baz=['b'])), + ('a -x X b', NS(x='X', foo='a', bar=0.625, baz=['b'])), + ('a b -x X', NS(x='X', foo='a', bar=0.625, baz=['b'])), + ('-x X a b c', NS(x='X', foo='a', bar='b', baz=['c'])), + ('a -x X b c', NS(x='X', foo='a', bar='b', baz=['c'])), + ('a b c -x X', NS(x='X', foo='a', bar='b', baz=['c'])), ] @@ -1382,6 +1493,19 @@ class TestPositionalsActionAppend(ParserTestCase): ('a b c', NS(spam=['a', ['b', 'c']])), ] + +class TestPositionalsActionExtend(ParserTestCase): + """Test the 'extend' action""" + + argument_signatures = [ + Sig('spam', action='extend'), + Sig('spam', action='extend', nargs=2), + ] + failures = ['', '--foo', 'a', 'a b', 'a b c d'] + successes = [ + ('a b c', NS(spam=['a', 'b', 'c'])), + ] + # ======================================== # Combined optionals and positionals tests # ======================================== @@ -1419,6 +1543,32 @@ class TestOptionalsAlmostNumericAndPositionals(ParserTestCase): ] +class TestOptionalsAndPositionalsAppend(ParserTestCase): + argument_signatures = [ + Sig('foo', nargs='*', action='append'), + Sig('--bar'), + ] + failures = ['-foo'] + successes = [ + ('a b', NS(foo=[['a', 'b']], bar=None)), + ('--bar a b', NS(foo=[['b']], bar='a')), + ('a b --bar c', NS(foo=[['a', 'b']], bar='c')), + ] + + +class TestOptionalsAndPositionalsExtend(ParserTestCase): + argument_signatures = [ + Sig('foo', nargs='*', action='extend'), + Sig('--bar'), + ] + failures = ['-foo'] + successes = [ + ('a b', NS(foo=['a', 'b'], bar=None)), + ('--bar a b', NS(foo=['b'], bar='a')), + ('a b --bar c', NS(foo=['a', 'b'], bar='c')), + ] + + class TestEmptyAndSpaceContainingArguments(ParserTestCase): argument_signatures = [ @@ -1481,6 +1631,9 @@ class TestNargsRemainder(ParserTestCase): successes = [ ('X', NS(x='X', y=[], z=None)), ('-z Z X', NS(x='X', y=[], z='Z')), + ('-z Z X A B', NS(x='X', y=['A', 'B'], z='Z')), + ('X -z Z A B', NS(x='X', y=['-z', 'Z', 'A', 'B'], z=None)), + ('X A -z Z B', NS(x='X', y=['A', '-z', 'Z', 'B'], z=None)), ('X A B -z Z', NS(x='X', y=['A', 'B', '-z', 'Z'], z=None)), ('X Y --foo', NS(x='X', y=['Y', '--foo'], z=None)), ] @@ -1517,18 +1670,24 @@ class TestDefaultSuppress(ParserTestCase): """Test actions with suppressed defaults""" argument_signatures = [ - Sig('foo', nargs='?', default=argparse.SUPPRESS), - Sig('bar', nargs='*', default=argparse.SUPPRESS), + Sig('foo', nargs='?', type=int, default=argparse.SUPPRESS), + Sig('bar', nargs='*', type=int, default=argparse.SUPPRESS), Sig('--baz', action='store_true', default=argparse.SUPPRESS), + Sig('--qux', nargs='?', type=int, default=argparse.SUPPRESS), + Sig('--quux', nargs='*', type=int, default=argparse.SUPPRESS), ] - failures = ['-x'] + failures = ['-x', 'a', '1 a'] successes = [ ('', NS()), - ('a', NS(foo='a')), - ('a b', NS(foo='a', bar=['b'])), + ('1', NS(foo=1)), + ('1 2', NS(foo=1, bar=[2])), ('--baz', NS(baz=True)), - ('a --baz', NS(foo='a', baz=True)), - ('--baz a b', NS(foo='a', bar=['b'], baz=True)), + ('1 --baz', NS(foo=1, baz=True)), + ('--baz 1 2', NS(foo=1, bar=[2], baz=True)), + ('--qux', NS(qux=None)), + ('--qux 1', NS(qux=1)), + ('--quux', NS(quux=[])), + ('--quux 1 2', NS(quux=[1, 2])), ] @@ -1899,6 +2058,10 @@ def test_open_args(self): type('foo') m.assert_called_with('foo', *args) + def test_invalid_file_type(self): + with self.assertRaises(ValueError): + argparse.FileType('b')('-test') + class TestFileTypeMissingInitialization(TestCase): """ @@ -2092,6 +2255,27 @@ class TestActionExtend(ParserTestCase): ('--foo f1 --foo f2 f3 f4', NS(foo=['f1', 'f2', 'f3', 'f4'])), ] + +class TestInvalidAction(TestCase): + """Test invalid user defined Action""" + + class ActionWithoutCall(argparse.Action): + pass + + def test_invalid_type(self): + parser = argparse.ArgumentParser() + + parser.add_argument('--foo', action=self.ActionWithoutCall) + self.assertRaises(NotImplementedError, parser.parse_args, ['--foo', 'bar']) + + def test_modified_invalid_action(self): + parser = ErrorRaisingArgumentParser() + action = parser.add_argument('--foo') + # Someone got crazy and did this + action.type = 1 + self.assertRaises(ArgumentParserError, parser.parse_args, ['--foo', 'bar']) + + # ================ # Subparsers tests # ================ @@ -2126,7 +2310,9 @@ def _get_parser(self, subparser_help=False, prefix_chars=None, else: subparsers_kwargs['help'] = 'command help' subparsers = parser.add_subparsers(**subparsers_kwargs) - self.assertArgumentParserError(parser.add_subparsers) + self.assertRaisesRegex(argparse.ArgumentError, + 'cannot have multiple subparser arguments', + parser.add_subparsers) # add first sub-parser parser1_kwargs = dict(description='1 description') @@ -2136,14 +2322,14 @@ def _get_parser(self, subparser_help=False, prefix_chars=None, parser1_kwargs['aliases'] = ['1alias1', '1alias2'] parser1 = subparsers.add_parser('1', **parser1_kwargs) parser1.add_argument('-w', type=int, help='w help') - parser1.add_argument('x', choices='abc', help='x help') + parser1.add_argument('x', choices=['a', 'b', 'c'], help='x help') # add second sub-parser parser2_kwargs = dict(description='2 description') if subparser_help: parser2_kwargs['help'] = '2 help' parser2 = subparsers.add_parser('2', **parser2_kwargs) - parser2.add_argument('-y', choices='123', help='y help') + parser2.add_argument('-y', choices=['1', '2', '3'], help='y help') parser2.add_argument('z', type=complex, nargs='*', help='z help') # add third sub-parser @@ -2210,6 +2396,68 @@ def test_parse_known_args(self): (NS(foo=False, bar=0.5, w=7, x='b'), ['-W', '-X', 'Y', 'Z']), ) + def test_parse_known_args_to_class_namespace(self): + class C: + pass + self.assertEqual( + self.parser.parse_known_args('0.5 1 b -w 7 -p'.split(), namespace=C), + (C, ['-p']), + ) + self.assertIs(C.foo, False) + self.assertEqual(C.bar, 0.5) + self.assertEqual(C.w, 7) + self.assertEqual(C.x, 'b') + + def test_abbreviation(self): + parser = ErrorRaisingArgumentParser() + parser.add_argument('--foodle') + parser.add_argument('--foonly') + subparsers = parser.add_subparsers() + parser1 = subparsers.add_parser('bar') + parser1.add_argument('--fo') + parser1.add_argument('--foonew') + + self.assertEqual(parser.parse_args(['--food', 'baz', 'bar']), + NS(foodle='baz', foonly=None, fo=None, foonew=None)) + self.assertEqual(parser.parse_args(['--foon', 'baz', 'bar']), + NS(foodle=None, foonly='baz', fo=None, foonew=None)) + self.assertArgumentParserError(parser.parse_args, ['--fo', 'baz', 'bar']) + self.assertEqual(parser.parse_args(['bar', '--fo', 'baz']), + NS(foodle=None, foonly=None, fo='baz', foonew=None)) + self.assertEqual(parser.parse_args(['bar', '--foo', 'baz']), + NS(foodle=None, foonly=None, fo=None, foonew='baz')) + self.assertEqual(parser.parse_args(['bar', '--foon', 'baz']), + NS(foodle=None, foonly=None, fo=None, foonew='baz')) + self.assertArgumentParserError(parser.parse_args, ['bar', '--food', 'baz']) + + def test_parse_known_args_with_single_dash_option(self): + parser = ErrorRaisingArgumentParser() + parser.add_argument('-k', '--known', action='count', default=0) + parser.add_argument('-n', '--new', action='count', default=0) + self.assertEqual(parser.parse_known_args(['-k', '-u']), + (NS(known=1, new=0), ['-u'])) + self.assertEqual(parser.parse_known_args(['-u', '-k']), + (NS(known=1, new=0), ['-u'])) + self.assertEqual(parser.parse_known_args(['-ku']), + (NS(known=1, new=0), ['-u'])) + self.assertArgumentParserError(parser.parse_known_args, ['-k=u']) + self.assertEqual(parser.parse_known_args(['-uk']), + (NS(known=0, new=0), ['-uk'])) + self.assertEqual(parser.parse_known_args(['-u=k']), + (NS(known=0, new=0), ['-u=k'])) + self.assertEqual(parser.parse_known_args(['-kunknown']), + (NS(known=1, new=0), ['-unknown'])) + self.assertArgumentParserError(parser.parse_known_args, ['-k=unknown']) + self.assertEqual(parser.parse_known_args(['-ku=nknown']), + (NS(known=1, new=0), ['-u=nknown'])) + self.assertEqual(parser.parse_known_args(['-knew']), + (NS(known=1, new=1), ['-ew'])) + self.assertArgumentParserError(parser.parse_known_args, ['-kn=ew']) + self.assertArgumentParserError(parser.parse_known_args, ['-k-new']) + self.assertArgumentParserError(parser.parse_known_args, ['-kn-ew']) + self.assertEqual(parser.parse_known_args(['-kne-w']), + (NS(known=1, new=1), ['-e-w'])) + def test_dest(self): parser = ErrorRaisingArgumentParser() parser.add_argument('--foo', action='store_true') @@ -2269,7 +2517,7 @@ def test_wrong_argument_subparsers_no_destination_error(self): parser.parse_args(('baz',)) self.assertRegex( excinfo.exception.stderr, - r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from 'foo', 'bar'\)\n$" + r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from foo, bar\)\n$" ) def test_optional_subparsers(self): @@ -2727,6 +2975,38 @@ def test_groups_parents(self): -x X '''.format(progname, ' ' if progname else '' ))) + def test_wrong_type_parents(self): + self.assertRaises(TypeError, ErrorRaisingArgumentParser, parents=[1]) + + def test_mutex_groups_parents(self): + parent = ErrorRaisingArgumentParser(add_help=False) + g = parent.add_argument_group(title='g', description='gd') + g.add_argument('-w') + g.add_argument('-x') + m = g.add_mutually_exclusive_group() + m.add_argument('-y') + m.add_argument('-z') + parser = ErrorRaisingArgumentParser(prog='PROG', parents=[parent]) + + self.assertRaises(ArgumentParserError, parser.parse_args, + ['-y', 'Y', '-z', 'Z']) + + parser_help = parser.format_help() + self.assertEqual(parser_help, textwrap.dedent('''\ + usage: PROG [-h] [-w W] [-x X] [-y Y | -z Z] + + options: + -h, --help show this help message and exit + + g: + gd + + -w W + -x X + -y Y + -z Z + ''')) + # ============================== # Mutually exclusive group tests # ============================== @@ -2769,6 +3049,27 @@ def test_help(self): ''' self.assertEqual(parser.format_help(), textwrap.dedent(expected)) + def test_help_subparser_all_mutually_exclusive_group_members_suppressed(self): + self.maxDiff = None + parser = ErrorRaisingArgumentParser(prog='PROG') + commands = parser.add_subparsers(title="commands", dest="command") + cmd_foo = commands.add_parser("foo") + group = cmd_foo.add_mutually_exclusive_group() + group.add_argument('--verbose', action='store_true', help=argparse.SUPPRESS) + group.add_argument('--quiet', action='store_true', help=argparse.SUPPRESS) + longopt = '--' + 'long'*32 + longmeta = 'LONG'*32 + cmd_foo.add_argument(longopt) + expected = f'''\ + usage: PROG foo [-h] + [{longopt} {longmeta}] + + options: + -h, --help show this help message and exit + {longopt} {longmeta} + ''' + self.assertEqual(cmd_foo.format_help(), textwrap.dedent(expected)) + def test_empty_group(self): # See issue 26952 parser = argparse.ArgumentParser() @@ -2782,26 +3083,30 @@ def test_failures_when_not_required(self): parse_args = self.get_parser(required=False).parse_args error = ArgumentParserError for args_string in self.failures: - self.assertRaises(error, parse_args, args_string.split()) + with self.subTest(args=args_string): + self.assertRaises(error, parse_args, args_string.split()) def test_failures_when_required(self): parse_args = self.get_parser(required=True).parse_args error = ArgumentParserError for args_string in self.failures + ['']: - self.assertRaises(error, parse_args, args_string.split()) + with self.subTest(args=args_string): + self.assertRaises(error, parse_args, args_string.split()) def test_successes_when_not_required(self): parse_args = self.get_parser(required=False).parse_args successes = self.successes + self.successes_when_not_required for args_string, expected_ns in successes: - actual_ns = parse_args(args_string.split()) - self.assertEqual(actual_ns, expected_ns) + with self.subTest(args=args_string): + actual_ns = parse_args(args_string.split()) + self.assertEqual(actual_ns, expected_ns) def test_successes_when_required(self): parse_args = self.get_parser(required=True).parse_args for args_string, expected_ns in self.successes: - actual_ns = parse_args(args_string.split()) - self.assertEqual(actual_ns, expected_ns) + with self.subTest(args=args_string): + actual_ns = parse_args(args_string.split()) + self.assertEqual(actual_ns, expected_ns) def test_usage_when_not_required(self): format_usage = self.get_parser(required=False).format_usage @@ -2884,12 +3189,12 @@ def get_parser(self, required=None): ] usage_when_not_required = '''\ - usage: PROG [-h] [--abcde ABCDE] [--fghij FGHIJ] - [--klmno KLMNO | --pqrst PQRST] + usage: PROG [-h] [--abcde ABCDE] [--fghij FGHIJ] [--klmno KLMNO | + --pqrst PQRST] ''' usage_when_required = '''\ - usage: PROG [-h] [--abcde ABCDE] [--fghij FGHIJ] - (--klmno KLMNO | --pqrst PQRST) + usage: PROG [-h] [--abcde ABCDE] [--fghij FGHIJ] (--klmno KLMNO | + --pqrst PQRST) ''' help = '''\ @@ -2978,7 +3283,7 @@ def get_parser(self, required): group = parser.add_mutually_exclusive_group(required=required) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') - group.add_argument('badger', nargs='*', default='X', help='BADGER') + group.add_argument('badger', nargs='*', help='BADGER') return parser failures = [ @@ -2989,13 +3294,13 @@ def get_parser(self, required): '--foo X Y', ] successes = [ - ('--foo', NS(foo=True, spam=None, badger='X')), - ('--spam S', NS(foo=False, spam='S', badger='X')), + ('--foo', NS(foo=True, spam=None, badger=[])), + ('--spam S', NS(foo=False, spam='S', badger=[])), ('X', NS(foo=False, spam=None, badger=['X'])), ('X Y Z', NS(foo=False, spam=None, badger=['X', 'Y', 'Z'])), ] successes_when_not_required = [ - ('', NS(foo=False, spam=None, badger='X')), + ('', NS(foo=False, spam=None, badger=[])), ] usage_when_not_required = '''\ @@ -3188,6 +3493,111 @@ def get_parser(self, required): test_successes_when_not_required = None test_successes_when_required = None + +class TestMutuallyExclusiveOptionalOptional(MEMixin, TestCase): + def get_parser(self, required=None): + parser = ErrorRaisingArgumentParser(prog='PROG') + group = parser.add_mutually_exclusive_group(required=required) + group.add_argument('--foo') + group.add_argument('--bar', nargs='?') + return parser + + failures = [ + '--foo X --bar Y', + '--foo X --bar', + ] + successes = [ + ('--foo X', NS(foo='X', bar=None)), + ('--bar X', NS(foo=None, bar='X')), + ('--bar', NS(foo=None, bar=None)), + ] + successes_when_not_required = [ + ('', NS(foo=None, bar=None)), + ] + usage_when_required = '''\ + usage: PROG [-h] (--foo FOO | --bar [BAR]) + ''' + usage_when_not_required = '''\ + usage: PROG [-h] [--foo FOO | --bar [BAR]] + ''' + help = '''\ + + options: + -h, --help show this help message and exit + --foo FOO + --bar [BAR] + ''' + + +class TestMutuallyExclusiveOptionalWithDefault(MEMixin, TestCase): + def get_parser(self, required=None): + parser = ErrorRaisingArgumentParser(prog='PROG') + group = parser.add_mutually_exclusive_group(required=required) + group.add_argument('--foo') + group.add_argument('--bar', type=bool, default=True) + return parser + + failures = [ + '--foo X --bar Y', + '--foo X --bar=', + ] + successes = [ + ('--foo X', NS(foo='X', bar=True)), + ('--bar X', NS(foo=None, bar=True)), + ('--bar=', NS(foo=None, bar=False)), + ] + successes_when_not_required = [ + ('', NS(foo=None, bar=True)), + ] + usage_when_required = '''\ + usage: PROG [-h] (--foo FOO | --bar BAR) + ''' + usage_when_not_required = '''\ + usage: PROG [-h] [--foo FOO | --bar BAR] + ''' + help = '''\ + + options: + -h, --help show this help message and exit + --foo FOO + --bar BAR + ''' + + +class TestMutuallyExclusivePositionalWithDefault(MEMixin, TestCase): + def get_parser(self, required=None): + parser = ErrorRaisingArgumentParser(prog='PROG') + group = parser.add_mutually_exclusive_group(required=required) + group.add_argument('--foo') + group.add_argument('bar', nargs='?', type=bool, default=True) + return parser + + failures = [ + '--foo X Y', + ] + successes = [ + ('--foo X', NS(foo='X', bar=True)), + ('X', NS(foo=None, bar=True)), + ] + successes_when_not_required = [ + ('', NS(foo=None, bar=True)), + ] + usage_when_required = '''\ + usage: PROG [-h] (--foo FOO | bar) + ''' + usage_when_not_required = '''\ + usage: PROG [-h] [--foo FOO | bar] + ''' + help = '''\ + + positional arguments: + bar + + options: + -h, --help show this help message and exit + --foo FOO + ''' + # ================================================= # Mutually exclusive group in parent parser tests # ================================================= @@ -3855,7 +4265,7 @@ class TestHelpUsageWithParentheses(HelpTestCase): options: -h, --help show this help message and exit - -p {1 (option A), 2 (option B)}, --optional {1 (option A), 2 (option B)} + -p, --optional {1 (option A), 2 (option B)} ''' version = '' @@ -4139,6 +4549,158 @@ class TestHelpUsagePositionalsOnlyWrap(HelpTestCase): version = '' +class TestHelpUsageMetavarsSpacesParentheses(HelpTestCase): + # https://github.com/python/cpython/issues/62549 + # https://github.com/python/cpython/issues/89743 + parser_signature = Sig(prog='PROG') + argument_signatures = [ + Sig('-n1', metavar='()', help='n1'), + Sig('-o1', metavar='(1, 2)', help='o1'), + Sig('-u1', metavar=' (uu) ', help='u1'), + Sig('-v1', metavar='( vv )', help='v1'), + Sig('-w1', metavar='(w)w', help='w1'), + Sig('-x1', metavar='x(x)', help='x1'), + Sig('-y1', metavar='yy)', help='y1'), + Sig('-z1', metavar='(zz', help='z1'), + Sig('-n2', metavar='[]', help='n2'), + Sig('-o2', metavar='[1, 2]', help='o2'), + Sig('-u2', metavar=' [uu] ', help='u2'), + Sig('-v2', metavar='[ vv ]', help='v2'), + Sig('-w2', metavar='[w]w', help='w2'), + Sig('-x2', metavar='x[x]', help='x2'), + Sig('-y2', metavar='yy]', help='y2'), + Sig('-z2', metavar='[zz', help='z2'), + ] + + usage = '''\ + usage: PROG [-h] [-n1 ()] [-o1 (1, 2)] [-u1 (uu) ] [-v1 ( vv )] [-w1 (w)w] + [-x1 x(x)] [-y1 yy)] [-z1 (zz] [-n2 []] [-o2 [1, 2]] [-u2 [uu] ] + [-v2 [ vv ]] [-w2 [w]w] [-x2 x[x]] [-y2 yy]] [-z2 [zz] + ''' + help = usage + '''\ + + options: + -h, --help show this help message and exit + -n1 () n1 + -o1 (1, 2) o1 + -u1 (uu) u1 + -v1 ( vv ) v1 + -w1 (w)w w1 + -x1 x(x) x1 + -y1 yy) y1 + -z1 (zz z1 + -n2 [] n2 + -o2 [1, 2] o2 + -u2 [uu] u2 + -v2 [ vv ] v2 + -w2 [w]w w2 + -x2 x[x] x2 + -y2 yy] y2 + -z2 [zz z2 + ''' + version = '' + + +class TestHelpUsageNoWhitespaceCrash(TestCase): + + def test_all_suppressed_mutex_followed_by_long_arg(self): + # https://github.com/python/cpython/issues/62090 + # https://github.com/python/cpython/issues/96310 + parser = argparse.ArgumentParser(prog='PROG') + mutex = parser.add_mutually_exclusive_group() + mutex.add_argument('--spam', help=argparse.SUPPRESS) + parser.add_argument('--eggs-eggs-eggs-eggs-eggs-eggs') + usage = textwrap.dedent('''\ + usage: PROG [-h] + [--eggs-eggs-eggs-eggs-eggs-eggs EGGS_EGGS_EGGS_EGGS_EGGS_EGGS] + ''') + self.assertEqual(parser.format_usage(), usage) + + def test_newline_in_metavar(self): + # https://github.com/python/cpython/issues/77048 + mapping = ['123456', '12345', '12345', '123'] + parser = argparse.ArgumentParser('11111111111111') + parser.add_argument('-v', '--verbose', + help='verbose mode', action='store_true') + parser.add_argument('targets', + help='installation targets', + nargs='+', + metavar='\n'.join(mapping)) + usage = textwrap.dedent('''\ + usage: 11111111111111 [-h] [-v] + 123456 + 12345 + 12345 + 123 [123456 + 12345 + 12345 + 123 ...] + ''') + self.assertEqual(parser.format_usage(), usage) + + def test_empty_metavar_required_arg(self): + # https://github.com/python/cpython/issues/82091 + parser = argparse.ArgumentParser(prog='PROG') + parser.add_argument('--nil', metavar='', required=True) + parser.add_argument('--a', metavar='A' * 70) + usage = ( + 'usage: PROG [-h] --nil \n' + ' [--a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' + 'AAAAAAAAAAAAAAAAAAAAAAA]\n' + ) + self.assertEqual(parser.format_usage(), usage) + + def test_all_suppressed_mutex_with_optional_nargs(self): + # https://github.com/python/cpython/issues/98666 + parser = argparse.ArgumentParser(prog='PROG') + mutex = parser.add_mutually_exclusive_group() + mutex.add_argument( + '--param1', + nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS) + mutex.add_argument( + '--param2', + nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS) + usage = 'usage: PROG [-h]\n' + self.assertEqual(parser.format_usage(), usage) + + def test_nested_mutex_groups(self): + parser = argparse.ArgumentParser(prog='PROG') + g = parser.add_mutually_exclusive_group() + g.add_argument("--spam") + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + gg = g.add_mutually_exclusive_group() + gg.add_argument("--hax") + gg.add_argument("--hox", help=argparse.SUPPRESS) + gg.add_argument("--hex") + g.add_argument("--eggs") + parser.add_argument("--num") + + usage = textwrap.dedent('''\ + usage: PROG [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS] + [--num NUM] + ''') + self.assertEqual(parser.format_usage(), usage) + + def test_long_mutex_groups_wrap(self): + parser = argparse.ArgumentParser(prog='PROG') + g = parser.add_mutually_exclusive_group() + g.add_argument('--op1', metavar='MET', nargs='?') + g.add_argument('--op2', metavar=('MET1', 'MET2'), nargs='*') + g.add_argument('--op3', nargs='*') + g.add_argument('--op4', metavar=('MET1', 'MET2'), nargs='+') + g.add_argument('--op5', nargs='+') + g.add_argument('--op6', nargs=3) + g.add_argument('--op7', metavar=('MET1', 'MET2', 'MET3'), nargs=3) + + usage = textwrap.dedent('''\ + usage: PROG [-h] [--op1 [MET] | --op2 [MET1 [MET2 ...]] | --op3 [OP3 ...] | + --op4 MET1 [MET2 ...] | --op5 OP5 [OP5 ...] | --op6 OP6 OP6 OP6 | + --op7 MET1 MET2 MET3] + ''') + self.assertEqual(parser.format_usage(), usage) + + class TestHelpVariableExpansion(HelpTestCase): """Test that variables are expanded properly in help messages""" @@ -4148,7 +4710,7 @@ class TestHelpVariableExpansion(HelpTestCase): help='x %(prog)s %(default)s %(type)s %%'), Sig('-y', action='store_const', default=42, const='XXX', help='y %(prog)s %(default)s %(const)s'), - Sig('--foo', choices='abc', + Sig('--foo', choices=['a', 'b', 'c'], help='foo %(prog)s %(default)s %(choices)s'), Sig('--bar', default='baz', choices=[1, 2], metavar='BBB', help='bar %(prog)s %(default)s %(dest)s'), @@ -4338,8 +4900,8 @@ class TestHelpAlternatePrefixChars(HelpTestCase): help = usage + '''\ options: - ^^foo foo help - ;b BAR, ;;bar BAR bar help + ^^foo foo help + ;b, ;;bar BAR bar help ''' version = '' @@ -4391,7 +4953,7 @@ class TestHelpNone(HelpTestCase): version = '' -class TestHelpTupleMetavar(HelpTestCase): +class TestHelpTupleMetavarOptional(HelpTestCase): """Test specifying metavar as a tuple""" parser_signature = Sig(prog='PROG') @@ -4418,6 +4980,34 @@ class TestHelpTupleMetavar(HelpTestCase): version = '' +class TestHelpTupleMetavarPositional(HelpTestCase): + """Test specifying metavar on a Positional as a tuple""" + + parser_signature = Sig(prog='PROG') + argument_signatures = [ + Sig('w', help='w help', nargs='+', metavar=('W1', 'W2')), + Sig('x', help='x help', nargs='*', metavar=('X1', 'X2')), + Sig('y', help='y help', nargs=3, metavar=('Y1', 'Y2', 'Y3')), + Sig('z', help='z help', nargs='?', metavar=('Z1',)), + ] + argument_group_signatures = [] + usage = '''\ + usage: PROG [-h] W1 [W2 ...] [X1 [X2 ...]] Y1 Y2 Y3 [Z1] + ''' + help = usage + '''\ + + positional arguments: + W1 W2 w help + X1 X2 x help + Y1 Y2 Y3 y help + Z1 z help + + options: + -h, --help show this help message and exit + ''' + version = '' + + class TestHelpRawText(HelpTestCase): """Test the RawTextHelpFormatter""" @@ -4711,6 +5301,46 @@ def custom_type(string): version = '' +class TestHelpUsageLongSubparserCommand(TestCase): + """Test that subparser commands are formatted correctly in help""" + maxDiff = None + + def test_parent_help(self): + def custom_formatter(prog): + return argparse.RawTextHelpFormatter(prog, max_help_position=50) + + parent_parser = argparse.ArgumentParser( + prog='PROG', + formatter_class=custom_formatter + ) + + cmd_subparsers = parent_parser.add_subparsers(title="commands", + metavar='CMD', + help='command to use') + cmd_subparsers.add_parser("add", + help="add something") + + cmd_subparsers.add_parser("remove", + help="remove something") + + cmd_subparsers.add_parser("a-very-long-command", + help="command that does something") + + parser_help = parent_parser.format_help() + self.assertEqual(parser_help, textwrap.dedent('''\ + usage: PROG [-h] CMD ... + + options: + -h, --help show this help message and exit + + commands: + CMD command to use + add add something + remove remove something + a-very-long-command command that does something + ''')) + + # ===================================== # Optional/Positional constructor tests # ===================================== @@ -4718,15 +5348,15 @@ def custom_type(string): class TestInvalidArgumentConstructors(TestCase): """Test a bunch of invalid Argument constructors""" - def assertTypeError(self, *args, **kwargs): + def assertTypeError(self, *args, errmsg=None, **kwargs): parser = argparse.ArgumentParser() - self.assertRaises(TypeError, parser.add_argument, - *args, **kwargs) + self.assertRaisesRegex(TypeError, errmsg, parser.add_argument, + *args, **kwargs) - def assertValueError(self, *args, **kwargs): + def assertValueError(self, *args, errmsg=None, **kwargs): parser = argparse.ArgumentParser() - self.assertRaises(ValueError, parser.add_argument, - *args, **kwargs) + self.assertRaisesRegex(ValueError, errmsg, parser.add_argument, + *args, **kwargs) def test_invalid_keyword_arguments(self): self.assertTypeError('-x', bar=None) @@ -4736,13 +5366,17 @@ def test_invalid_keyword_arguments(self): def test_missing_destination(self): self.assertTypeError() - for action in ['append', 'store']: - self.assertTypeError(action=action) + for action in ['store', 'append', 'extend']: + with self.subTest(action=action): + self.assertTypeError(action=action) def test_invalid_option_strings(self): self.assertValueError('--') self.assertValueError('---') + def test_invalid_prefix(self): + self.assertValueError('--foo', '+foo') + def test_invalid_type(self): self.assertValueError('--foo', type='int') self.assertValueError('--foo', type=(int, float)) @@ -4751,10 +5385,8 @@ def test_invalid_action(self): self.assertValueError('-x', action='foo') self.assertValueError('foo', action='baz') self.assertValueError('--foo', action=('store', 'append')) - parser = argparse.ArgumentParser() - with self.assertRaises(ValueError) as cm: - parser.add_argument("--foo", action="store-true") - self.assertIn('unknown action', str(cm.exception)) + self.assertValueError('--foo', action="store-true", + errmsg='unknown action') def test_multiple_dest(self): parser = argparse.ArgumentParser() @@ -4767,39 +5399,47 @@ def test_multiple_dest(self): def test_no_argument_actions(self): for action in ['store_const', 'store_true', 'store_false', 'append_const', 'count']: - for attrs in [dict(type=int), dict(nargs='+'), - dict(choices='ab')]: - self.assertTypeError('-x', action=action, **attrs) + with self.subTest(action=action): + for attrs in [dict(type=int), dict(nargs='+'), + dict(choices=['a', 'b'])]: + with self.subTest(attrs=attrs): + self.assertTypeError('-x', action=action, **attrs) + self.assertTypeError('x', action=action, **attrs) + self.assertTypeError('-x', action=action, nargs=0) + self.assertTypeError('x', action=action, nargs=0) def test_no_argument_no_const_actions(self): # options with zero arguments for action in ['store_true', 'store_false', 'count']: + with self.subTest(action=action): + # const is always disallowed + self.assertTypeError('-x', const='foo', action=action) - # const is always disallowed - self.assertTypeError('-x', const='foo', action=action) - - # nargs is always disallowed - self.assertTypeError('-x', nargs='*', action=action) + # nargs is always disallowed + self.assertTypeError('-x', nargs='*', action=action) def test_more_than_one_argument_actions(self): - for action in ['store', 'append']: - - # nargs=0 is disallowed - self.assertValueError('-x', nargs=0, action=action) - self.assertValueError('spam', nargs=0, action=action) - - # const is disallowed with non-optional arguments - for nargs in [1, '*', '+']: - self.assertValueError('-x', const='foo', - nargs=nargs, action=action) - self.assertValueError('spam', const='foo', - nargs=nargs, action=action) + for action in ['store', 'append', 'extend']: + with self.subTest(action=action): + # nargs=0 is disallowed + action_name = 'append' if action == 'extend' else action + self.assertValueError('-x', nargs=0, action=action, + errmsg=f'nargs for {action_name} actions must be != 0') + self.assertValueError('spam', nargs=0, action=action, + errmsg=f'nargs for {action_name} actions must be != 0') + + # const is disallowed with non-optional arguments + for nargs in [1, '*', '+']: + self.assertValueError('-x', const='foo', + nargs=nargs, action=action) + self.assertValueError('spam', const='foo', + nargs=nargs, action=action) def test_required_const_actions(self): for action in ['store_const', 'append_const']: - - # nargs is always disallowed - self.assertTypeError('-x', nargs='+', action=action) + with self.subTest(action=action): + # nargs is always disallowed + self.assertTypeError('-x', nargs='+', action=action) def test_parsers_action_missing_params(self): self.assertTypeError('command', action='parsers') @@ -4807,6 +5447,9 @@ def test_parsers_action_missing_params(self): self.assertTypeError('command', action='parsers', parser_class=argparse.ArgumentParser) + def test_version_missing_params(self): + self.assertTypeError('command', action='version') + def test_required_positional(self): self.assertTypeError('foo', required=True) @@ -5026,7 +5669,8 @@ def test_optional(self): string = ( "Action(option_strings=['--foo', '-a', '-b'], dest='b', " "nargs='+', const=None, default=42, type='int', " - "choices=[1, 2, 3], required=False, help='HELP', metavar='METAVAR')") + "choices=[1, 2, 3], required=False, help='HELP', " + "metavar='METAVAR', deprecated=False)") self.assertStringEqual(option, string) def test_argument(self): @@ -5043,7 +5687,8 @@ def test_argument(self): string = ( "Action(option_strings=[], dest='x', nargs='?', " "const=None, default=2.5, type=%r, choices=[0.5, 1.5, 2.5], " - "required=True, help='H HH H', metavar='MV MV MV')" % float) + "required=True, help='H HH H', metavar='MV MV MV', " + "deprecated=False)" % float) self.assertStringEqual(argument, string) def test_namespace(self): @@ -5235,6 +5880,139 @@ def spam(string_to_convert): args = parser.parse_args('--foo spam!'.split()) self.assertEqual(NS(foo='foo_converted'), args) + +# ============================================== +# Check that deprecated arguments output warning +# ============================================== + +class TestDeprecatedArguments(TestCase): + + def test_deprecated_option(self): + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--foo', deprecated=True) + + with captured_stderr() as stderr: + parser.parse_args(['--foo', 'spam']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['-f', 'spam']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '-f' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['--foo', 'spam', '-f', 'ham']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--foo' is deprecated") + self.assertRegex(stderr, "warning: option '-f' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 2) + + with captured_stderr() as stderr: + parser.parse_args(['--foo', 'spam', '--foo', 'ham']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + def test_deprecated_boolean_option(self): + parser = argparse.ArgumentParser() + parser.add_argument('-f', '--foo', action=argparse.BooleanOptionalAction, deprecated=True) + + with captured_stderr() as stderr: + parser.parse_args(['--foo']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['-f']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '-f' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['--no-foo']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--no-foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['--foo', '--no-foo']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: option '--foo' is deprecated") + self.assertRegex(stderr, "warning: option '--no-foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 2) + + def test_deprecated_arguments(self): + parser = argparse.ArgumentParser() + parser.add_argument('foo', nargs='?', deprecated=True) + parser.add_argument('bar', nargs='?', deprecated=True) + + with captured_stderr() as stderr: + parser.parse_args([]) + stderr = stderr.getvalue() + self.assertEqual(stderr.count('is deprecated'), 0) + + with captured_stderr() as stderr: + parser.parse_args(['spam']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: argument 'foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['spam', 'ham']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: argument 'foo' is deprecated") + self.assertRegex(stderr, "warning: argument 'bar' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 2) + + def test_deprecated_varargument(self): + parser = argparse.ArgumentParser() + parser.add_argument('foo', nargs='*', deprecated=True) + + with captured_stderr() as stderr: + parser.parse_args([]) + stderr = stderr.getvalue() + self.assertEqual(stderr.count('is deprecated'), 0) + + with captured_stderr() as stderr: + parser.parse_args(['spam']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: argument 'foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['spam', 'ham']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: argument 'foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + def test_deprecated_subparser(self): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + subparsers.add_parser('foo', aliases=['baz'], deprecated=True) + subparsers.add_parser('bar') + + with captured_stderr() as stderr: + parser.parse_args(['bar']) + stderr = stderr.getvalue() + self.assertEqual(stderr.count('is deprecated'), 0) + + with captured_stderr() as stderr: + parser.parse_args(['foo']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: command 'foo' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + with captured_stderr() as stderr: + parser.parse_args(['baz']) + stderr = stderr.getvalue() + self.assertRegex(stderr, "warning: command 'baz' is deprecated") + self.assertEqual(stderr.count('is deprecated'), 1) + + # ================================================================== # Check semantics regarding the default argument and type conversion # ================================================================== @@ -5333,6 +6111,133 @@ def test_zero_or_more_optional(self): self.assertEqual(NS(x=[]), args) +class TestDoubleDash(TestCase): + def test_single_argument_option(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('-f', '--foo') + parser.add_argument('bar', nargs='*') + + args = parser.parse_args(['--foo=--']) + self.assertEqual(NS(foo='--', bar=[]), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument -f/--foo: expected one argument', + parser.parse_args, ['--foo', '--']) + args = parser.parse_args(['-f--']) + self.assertEqual(NS(foo='--', bar=[]), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument -f/--foo: expected one argument', + parser.parse_args, ['-f', '--']) + args = parser.parse_args(['--foo', 'a', '--', 'b', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', 'b', '--foo', 'c']) + self.assertEqual(NS(foo='c', bar=['a', 'b']), args) + args = parser.parse_args(['a', '--', 'b', '--foo', 'c']) + self.assertEqual(NS(foo=None, bar=['a', 'b', '--foo', 'c']), args) + args = parser.parse_args(['a', '--', 'b', '--', 'c', '--foo', 'd']) + self.assertEqual(NS(foo=None, bar=['a', 'b', '--', 'c', '--foo', 'd']), args) + + def test_multiple_argument_option(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('-f', '--foo', nargs='*') + parser.add_argument('bar', nargs='*') + + args = parser.parse_args(['--foo=--']) + self.assertEqual(NS(foo=['--'], bar=[]), args) + args = parser.parse_args(['--foo', '--']) + self.assertEqual(NS(foo=[], bar=[]), args) + args = parser.parse_args(['-f--']) + self.assertEqual(NS(foo=['--'], bar=[]), args) + args = parser.parse_args(['-f', '--']) + self.assertEqual(NS(foo=[], bar=[]), args) + args = parser.parse_args(['--foo', 'a', 'b', '--', 'c', 'd']) + self.assertEqual(NS(foo=['a', 'b'], bar=['c', 'd']), args) + args = parser.parse_args(['a', 'b', '--foo', 'c', 'd']) + self.assertEqual(NS(foo=['c', 'd'], bar=['a', 'b']), args) + args = parser.parse_args(['a', '--', 'b', '--foo', 'c', 'd']) + self.assertEqual(NS(foo=None, bar=['a', 'b', '--foo', 'c', 'd']), args) + args, argv = parser.parse_known_args(['a', 'b', '--foo', 'c', '--', 'd']) + self.assertEqual(NS(foo=['c'], bar=['a', 'b']), args) + self.assertEqual(argv, ['--', 'd']) + + def test_multiple_double_dashes(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('foo') + parser.add_argument('bar', nargs='*') + + args = parser.parse_args(['--', 'a', 'b', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', '--', 'b', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', 'b', '--', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', '--', 'b', '--', 'c']) + self.assertEqual(NS(foo='a', bar=['b', '--', 'c']), args) + args = parser.parse_args(['--', '--', 'a', '--', 'b', 'c']) + self.assertEqual(NS(foo='--', bar=['a', '--', 'b', 'c']), args) + + def test_remainder(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('foo') + parser.add_argument('bar', nargs='...') + + args = parser.parse_args(['--', 'a', 'b', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', '--', 'b', 'c']) + self.assertEqual(NS(foo='a', bar=['b', 'c']), args) + args = parser.parse_args(['a', 'b', '--', 'c']) + self.assertEqual(NS(foo='a', bar=['b', '--', 'c']), args) + args = parser.parse_args(['a', '--', 'b', '--', 'c']) + self.assertEqual(NS(foo='a', bar=['b', '--', 'c']), args) + + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--foo') + parser.add_argument('bar', nargs='...') + args = parser.parse_args(['--foo', 'a', '--', 'b', '--', 'c']) + self.assertEqual(NS(foo='a', bar=['--', 'b', '--', 'c']), args) + + def test_subparser(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('foo') + subparsers = parser.add_subparsers() + parser1 = subparsers.add_parser('run') + parser1.add_argument('-f') + parser1.add_argument('bar', nargs='*') + + args = parser.parse_args(['x', 'run', 'a', 'b', '-f', 'c']) + self.assertEqual(NS(foo='x', f='c', bar=['a', 'b']), args) + args = parser.parse_args(['x', 'run', 'a', 'b', '--', '-f', 'c']) + self.assertEqual(NS(foo='x', f=None, bar=['a', 'b', '-f', 'c']), args) + args = parser.parse_args(['x', 'run', 'a', '--', 'b', '-f', 'c']) + self.assertEqual(NS(foo='x', f=None, bar=['a', 'b', '-f', 'c']), args) + args = parser.parse_args(['x', 'run', '--', 'a', 'b', '-f', 'c']) + self.assertEqual(NS(foo='x', f=None, bar=['a', 'b', '-f', 'c']), args) + args = parser.parse_args(['x', '--', 'run', 'a', 'b', '-f', 'c']) + self.assertEqual(NS(foo='x', f='c', bar=['a', 'b']), args) + args = parser.parse_args(['--', 'x', 'run', 'a', 'b', '-f', 'c']) + self.assertEqual(NS(foo='x', f='c', bar=['a', 'b']), args) + args = parser.parse_args(['x', 'run', '--', 'a', '--', 'b']) + self.assertEqual(NS(foo='x', f=None, bar=['a', '--', 'b']), args) + args = parser.parse_args(['x', '--', 'run', '--', 'a', '--', 'b']) + self.assertEqual(NS(foo='x', f=None, bar=['a', '--', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + "invalid choice: '--'", + parser.parse_args, ['--', 'x', '--', 'run', 'a', 'b']) + + def test_subparser_after_multiple_argument_option(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--foo', nargs='*') + subparsers = parser.add_subparsers() + parser1 = subparsers.add_parser('run') + parser1.add_argument('-f') + parser1.add_argument('bar', nargs='*') + + args = parser.parse_args(['--foo', 'x', 'y', '--', 'run', 'a', 'b', '-f', 'c']) + self.assertEqual(NS(foo=['x', 'y'], f='c', bar=['a', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + "invalid choice: '--'", + parser.parse_args, ['--foo', 'x', '--', '--', 'run', 'a', 'b']) + + # =========================== # parse_intermixed_args tests # =========================== @@ -5352,14 +6257,25 @@ def test_basic(self): args, extras = parser.parse_known_args(argv) # cannot parse the '1,2,3' - self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[]), args) - self.assertEqual(["1", "2", "3"], extras) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) + self.assertEqual(["2", "3"], extras) + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual([], extras) + # unknown optionals go into extras + argv = 'cmd --foo x --error 1 2 --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) argv = 'cmd --foo x 1 --error 2 --bar y 3'.split() args, extras = parser.parse_known_intermixed_args(argv) - # unknown optionals go into extras - self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) - self.assertEqual(['--error', '2', '3'], extras) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) + argv = 'cmd --foo x 1 2 --error --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) # restores attributes that were temporarily changed self.assertIsNone(parser.usage) @@ -5378,28 +6294,49 @@ def test_remainder(self): parser.parse_intermixed_args(argv) self.assertRegex(str(cm.exception), r'\.\.\.') - def test_exclusive(self): - # mutually exclusive group; intermixed works fine - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive(self): + # required mutually exclusive group; intermixed works fine + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') parser.add_argument('badger', nargs='*', default='X', help='BADGER') + args = parser.parse_intermixed_args('--foo 1 2'.split()) + self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) args = parser.parse_intermixed_args('1 --foo 2'.split()) self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) - self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, '1 2'.split()) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam is required', + parser.parse_intermixed_args, '1 2'.split()) self.assertEqual(group.required, True) - def test_exclusive_incompatible(self): - # mutually exclusive group including positional - fail - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive_with_positional(self): + # required mutually exclusive group with positional argument + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') group.add_argument('badger', nargs='*', default='X', help='BADGER') - self.assertRaises(TypeError, parser.parse_intermixed_args, []) + args = parser.parse_intermixed_args(['--foo']) + self.assertEqual(NS(foo=True, spam=None, badger='X'), args) + args = parser.parse_intermixed_args(['a', 'b']) + self.assertEqual(NS(foo=False, spam=None, badger=['a', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam badger is required', + parser.parse_intermixed_args, []) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['--foo', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['a', '--foo', 'b']) self.assertEqual(group.required, True) + def test_invalid_args(self): + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, ['a']) + + class TestIntermixedMessageContentError(TestCase): # case where Intermixed gives different error message # error is raised by 1st parsing step @@ -5417,7 +6354,7 @@ def test_missing_argument_name_in_message(self): with self.assertRaises(ArgumentParserError) as cm: parser.parse_intermixed_args([]) msg = str(cm.exception) - self.assertNotRegex(msg, 'req_pos') + self.assertRegex(msg, 'req_pos') self.assertRegex(msg, 'req_opt') # ========================== @@ -5667,7 +6604,8 @@ def test_help_with_metavar(self): class TestExitOnError(TestCase): def setUp(self): - self.parser = argparse.ArgumentParser(exit_on_error=False) + self.parser = argparse.ArgumentParser(exit_on_error=False, + fromfile_prefix_chars='@') self.parser.add_argument('--integers', metavar='N', type=int) def test_exit_on_error_with_good_args(self): @@ -5678,6 +6616,155 @@ def test_exit_on_error_with_bad_args(self): with self.assertRaises(argparse.ArgumentError): self.parser.parse_args('--integers a'.split()) + def test_unrecognized_args(self): + self.assertRaisesRegex(argparse.ArgumentError, + 'unrecognized arguments: --foo bar', + self.parser.parse_args, '--foo bar'.split()) + + def test_unrecognized_intermixed_args(self): + self.assertRaisesRegex(argparse.ArgumentError, + 'unrecognized arguments: --foo bar', + self.parser.parse_intermixed_args, '--foo bar'.split()) + + def test_required_args(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar, baz$', + self.parser.parse_args, []) + + def test_required_args_with_metavar(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', metavar='BaZ') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar, BaZ$', + self.parser.parse_args, []) + + def test_required_args_n(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs=3) + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar, baz$', + self.parser.parse_args, []) + + def test_required_args_n_with_metavar(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs=3, metavar=('B', 'A', 'Z')) + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar, B, A, Z$', + self.parser.parse_args, []) + + def test_required_args_optional(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs='?') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar$', + self.parser.parse_args, []) + + def test_required_args_zero_or_more(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs='*') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar$', + self.parser.parse_args, []) + + def test_required_args_one_or_more(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs='+') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar, baz$', + self.parser.parse_args, []) + + def test_required_args_one_or_more_with_metavar(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs='+', metavar=('BaZ1', 'BaZ2')) + self.assertRaisesRegex(argparse.ArgumentError, + r'the following arguments are required: bar, BaZ1\[, BaZ2]$', + self.parser.parse_args, []) + + def test_required_args_remainder(self): + self.parser.add_argument('bar') + self.parser.add_argument('baz', nargs='...') + self.assertRaisesRegex(argparse.ArgumentError, + 'the following arguments are required: bar$', + self.parser.parse_args, []) + + def test_required_mutually_exclusive_args(self): + group = self.parser.add_mutually_exclusive_group(required=True) + group.add_argument('--bar') + group.add_argument('--baz') + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --bar --baz is required', + self.parser.parse_args, []) + + def test_conflicting_mutually_exclusive_args_optional_with_metavar(self): + group = self.parser.add_mutually_exclusive_group() + group.add_argument('--bar') + group.add_argument('baz', nargs='?', metavar='BaZ') + self.assertRaisesRegex(argparse.ArgumentError, + 'argument BaZ: not allowed with argument --bar$', + self.parser.parse_args, ['--bar', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument --bar: not allowed with argument BaZ$', + self.parser.parse_args, ['a', '--bar', 'b']) + + def test_conflicting_mutually_exclusive_args_zero_or_more_with_metavar1(self): + group = self.parser.add_mutually_exclusive_group() + group.add_argument('--bar') + group.add_argument('baz', nargs='*', metavar=('BAZ1',)) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument BAZ1: not allowed with argument --bar$', + self.parser.parse_args, ['--bar', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument --bar: not allowed with argument BAZ1$', + self.parser.parse_args, ['a', '--bar', 'b']) + + def test_conflicting_mutually_exclusive_args_zero_or_more_with_metavar2(self): + group = self.parser.add_mutually_exclusive_group() + group.add_argument('--bar') + group.add_argument('baz', nargs='*', metavar=('BAZ1', 'BAZ2')) + self.assertRaisesRegex(argparse.ArgumentError, + r'argument BAZ1\[, BAZ2]: not allowed with argument --bar$', + self.parser.parse_args, ['--bar', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + r'argument --bar: not allowed with argument BAZ1\[, BAZ2]$', + self.parser.parse_args, ['a', '--bar', 'b']) + + def test_ambiguous_option(self): + self.parser.add_argument('--foobaz') + self.parser.add_argument('--fooble', action='store_true') + self.parser.add_argument('--foogle') + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob could match --foobaz, --fooble", + self.parser.parse_args, ['--foob']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob', '1', '--foogle', '2']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1', '--foogle', '2']) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_os_error(self): + self.parser.add_argument('file') + self.assertRaisesRegex(argparse.ArgumentError, + "No such file or directory: 'no-such-file'", + self.parser.parse_args, ['@no-such-file']) + + +# ================= +# Translation tests +# ================= + +class TestTranslations(TestTranslationsBase): + + def test_translations(self): + self.assertMsgidsEqual(argparse) + def tearDownModule(): # Remove global references to avoid looking like we have refleaks. @@ -5686,4 +6773,8 @@ def tearDownModule(): if __name__ == '__main__': + # To regenerate translation snapshots + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_translation_snapshots(argparse) + sys.exit(0) unittest.main() diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index c3250ef72e..be89bec522 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -176,7 +176,7 @@ def test_numbers(self): self.assertEqual(a, b, msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase)) - # TODO: RUSTPYTHON + # TODO: RUSTPYTHON - requires UTF-32 encoding support in codecs and proper array reconstructor implementation @unittest.expectedFailure def test_unicode(self): teststr = "Bonne Journ\xe9e \U0002030a\U00020347" diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 8b28686fd6..1cac438250 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -340,7 +340,8 @@ class X: support.gc_collect() self.assertIsNone(ref()) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'not implemented: async for comprehensions'") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_snippets(self): for input, output, kind in ((exec_tests, exec_results, "exec"), (single_tests, single_results, "single"), @@ -353,7 +354,8 @@ def test_snippets(self): with self.subTest(action="compiling", input=i, kind=kind): compile(ast_tree, "?", kind) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'not implemented: async for comprehensions'") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ast_validation(self): # compile() is the only function that calls PyAST_Validate snippets_to_validate = exec_tests + single_tests + eval_tests @@ -361,7 +363,8 @@ def test_ast_validation(self): tree = ast.parse(snippet) compile(tree, '', 'exec') - @unittest.skip("TODO: RUSTPYTHON, OverflowError: Python int too large to convert to Rust u32") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_invalid_position_information(self): invalid_linenos = [ (10, 1), (-10, -11), (10, -11), (-5, -2), (-5, 1) diff --git a/Lib/test/test_asynchat.py b/Lib/test/test_asynchat.py deleted file mode 100644 index 1fcc882ce6..0000000000 --- a/Lib/test/test_asynchat.py +++ /dev/null @@ -1,290 +0,0 @@ -# test asynchat - -from test import support -from test.support import socket_helper -from test.support import threading_helper - - -import asynchat -import asyncore -import errno -import socket -import sys -import threading -import time -import unittest -import unittest.mock - -HOST = socket_helper.HOST -SERVER_QUIT = b'QUIT\n' -TIMEOUT = 3.0 - - -class echo_server(threading.Thread): - # parameter to determine the number of bytes passed back to the - # client each send - chunk_size = 1 - - def __init__(self, event): - threading.Thread.__init__(self) - self.event = event - self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.port = socket_helper.bind_port(self.sock) - # This will be set if the client wants us to wait before echoing - # data back. - self.start_resend_event = None - - def run(self): - self.sock.listen() - self.event.set() - conn, client = self.sock.accept() - self.buffer = b"" - # collect data until quit message is seen - while SERVER_QUIT not in self.buffer: - data = conn.recv(1) - if not data: - break - self.buffer = self.buffer + data - - # remove the SERVER_QUIT message - self.buffer = self.buffer.replace(SERVER_QUIT, b'') - - if self.start_resend_event: - self.start_resend_event.wait() - - # re-send entire set of collected data - try: - # this may fail on some tests, such as test_close_when_done, - # since the client closes the channel when it's done sending - while self.buffer: - n = conn.send(self.buffer[:self.chunk_size]) - time.sleep(0.001) - self.buffer = self.buffer[n:] - except: - pass - - conn.close() - self.sock.close() - -class echo_client(asynchat.async_chat): - - def __init__(self, terminator, server_port): - asynchat.async_chat.__init__(self) - self.contents = [] - self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.connect((HOST, server_port)) - self.set_terminator(terminator) - self.buffer = b"" - - def handle_connect(self): - pass - - if sys.platform == 'darwin': - # select.poll returns a select.POLLHUP at the end of the tests - # on darwin, so just ignore it - def handle_expt(self): - pass - - def collect_incoming_data(self, data): - self.buffer += data - - def found_terminator(self): - self.contents.append(self.buffer) - self.buffer = b"" - -def start_echo_server(): - event = threading.Event() - s = echo_server(event) - s.start() - event.wait() - event.clear() - time.sleep(0.01) # Give server time to start accepting. - return s, event - - -class TestAsynchat(unittest.TestCase): - usepoll = False - - def setUp(self): - self._threads = threading_helper.threading_setup() - - def tearDown(self): - threading_helper.threading_cleanup(*self._threads) - - def line_terminator_check(self, term, server_chunk): - event = threading.Event() - s = echo_server(event) - s.chunk_size = server_chunk - s.start() - event.wait() - event.clear() - time.sleep(0.01) # Give server time to start accepting. - c = echo_client(term, s.port) - c.push(b"hello ") - c.push(b"world" + term) - c.push(b"I'm not dead yet!" + term) - c.push(SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, [b"hello world", b"I'm not dead yet!"]) - - # the line terminator tests below check receiving variously-sized - # chunks back from the server in order to exercise all branches of - # async_chat.handle_read - - def test_line_terminator1(self): - # test one-character terminator - for l in (1, 2, 3): - self.line_terminator_check(b'\n', l) - - def test_line_terminator2(self): - # test two-character terminator - for l in (1, 2, 3): - self.line_terminator_check(b'\r\n', l) - - def test_line_terminator3(self): - # test three-character terminator - for l in (1, 2, 3): - self.line_terminator_check(b'qqq', l) - - def numeric_terminator_check(self, termlen): - # Try reading a fixed number of bytes - s, event = start_echo_server() - c = echo_client(termlen, s.port) - data = b"hello world, I'm not dead yet!\n" - c.push(data) - c.push(SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, [data[:termlen]]) - - def test_numeric_terminator1(self): - # check that ints & longs both work (since type is - # explicitly checked in async_chat.handle_read) - self.numeric_terminator_check(1) - - def test_numeric_terminator2(self): - self.numeric_terminator_check(6) - - def test_none_terminator(self): - # Try reading a fixed number of bytes - s, event = start_echo_server() - c = echo_client(None, s.port) - data = b"hello world, I'm not dead yet!\n" - c.push(data) - c.push(SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, []) - self.assertEqual(c.buffer, data) - - def test_simple_producer(self): - s, event = start_echo_server() - c = echo_client(b'\n', s.port) - data = b"hello world\nI'm not dead yet!\n" - p = asynchat.simple_producer(data+SERVER_QUIT, buffer_size=8) - c.push_with_producer(p) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, [b"hello world", b"I'm not dead yet!"]) - - def test_string_producer(self): - s, event = start_echo_server() - c = echo_client(b'\n', s.port) - data = b"hello world\nI'm not dead yet!\n" - c.push_with_producer(data+SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, [b"hello world", b"I'm not dead yet!"]) - - def test_empty_line(self): - # checks that empty lines are handled correctly - s, event = start_echo_server() - c = echo_client(b'\n', s.port) - c.push(b"hello world\n\nI'm not dead yet!\n") - c.push(SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - - self.assertEqual(c.contents, - [b"hello world", b"", b"I'm not dead yet!"]) - - def test_close_when_done(self): - s, event = start_echo_server() - s.start_resend_event = threading.Event() - c = echo_client(b'\n', s.port) - c.push(b"hello world\nI'm not dead yet!\n") - c.push(SERVER_QUIT) - c.close_when_done() - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - - # Only allow the server to start echoing data back to the client after - # the client has closed its connection. This prevents a race condition - # where the server echoes all of its data before we can check that it - # got any down below. - s.start_resend_event.set() - threading_helper.join_thread(s) - - self.assertEqual(c.contents, []) - # the server might have been able to send a byte or two back, but this - # at least checks that it received something and didn't just fail - # (which could still result in the client not having received anything) - self.assertGreater(len(s.buffer), 0) - - def test_push(self): - # Issue #12523: push() should raise a TypeError if it doesn't get - # a bytes string - s, event = start_echo_server() - c = echo_client(b'\n', s.port) - data = b'bytes\n' - c.push(data) - c.push(bytearray(data)) - c.push(memoryview(data)) - self.assertRaises(TypeError, c.push, 10) - self.assertRaises(TypeError, c.push, 'unicode') - c.push(SERVER_QUIT) - asyncore.loop(use_poll=self.usepoll, count=300, timeout=.01) - threading_helper.join_thread(s) - self.assertEqual(c.contents, [b'bytes', b'bytes', b'bytes']) - - -class TestAsynchat_WithPoll(TestAsynchat): - usepoll = True - - -class TestAsynchatMocked(unittest.TestCase): - def test_blockingioerror(self): - # Issue #16133: handle_read() must ignore BlockingIOError - sock = unittest.mock.Mock() - sock.recv.side_effect = BlockingIOError(errno.EAGAIN) - - dispatcher = asynchat.async_chat() - dispatcher.set_socket(sock) - self.addCleanup(dispatcher.del_channel) - - with unittest.mock.patch.object(dispatcher, 'handle_error') as error: - dispatcher.handle_read() - self.assertFalse(error.called) - - -class TestHelperFunctions(unittest.TestCase): - def test_find_prefix_at_end(self): - self.assertEqual(asynchat.find_prefix_at_end("qwerty\r", "\r\n"), 1) - self.assertEqual(asynchat.find_prefix_at_end("qwertydkjf", "\r\n"), 0) - - -class TestNotConnected(unittest.TestCase): - def test_disallow_negative_terminator(self): - # Issue #11259 - client = asynchat.async_chat() - self.assertRaises(ValueError, client.set_terminator, -1) - - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_asyncore.py b/Lib/test/test_asyncore.py deleted file mode 100644 index bd43463da3..0000000000 --- a/Lib/test/test_asyncore.py +++ /dev/null @@ -1,838 +0,0 @@ -import asyncore -import unittest -import select -import os -import socket -import sys -import time -import errno -import struct -import threading - -from test import support -from test.support import os_helper -from test.support import socket_helper -from test.support import threading_helper -from test.support import warnings_helper -from io import BytesIO - -if support.PGO: - raise unittest.SkipTest("test is not helpful for PGO") - - -TIMEOUT = 3 -HAS_UNIX_SOCKETS = hasattr(socket, 'AF_UNIX') - -class dummysocket: - def __init__(self): - self.closed = False - - def close(self): - self.closed = True - - def fileno(self): - return 42 - -class dummychannel: - def __init__(self): - self.socket = dummysocket() - - def close(self): - self.socket.close() - -class exitingdummy: - def __init__(self): - pass - - def handle_read_event(self): - raise asyncore.ExitNow() - - handle_write_event = handle_read_event - handle_close = handle_read_event - handle_expt_event = handle_read_event - -class crashingdummy: - def __init__(self): - self.error_handled = False - - def handle_read_event(self): - raise Exception() - - handle_write_event = handle_read_event - handle_close = handle_read_event - handle_expt_event = handle_read_event - - def handle_error(self): - self.error_handled = True - -# used when testing senders; just collects what it gets until newline is sent -def capture_server(evt, buf, serv): - try: - serv.listen() - conn, addr = serv.accept() - except socket.timeout: - pass - else: - n = 200 - start = time.monotonic() - while n > 0 and time.monotonic() - start < 3.0: - r, w, e = select.select([conn], [], [], 0.1) - if r: - n -= 1 - data = conn.recv(10) - # keep everything except for the newline terminator - buf.write(data.replace(b'\n', b'')) - if b'\n' in data: - break - time.sleep(0.01) - - conn.close() - finally: - serv.close() - evt.set() - -def bind_af_aware(sock, addr): - """Helper function to bind a socket according to its family.""" - if HAS_UNIX_SOCKETS and sock.family == socket.AF_UNIX: - # Make sure the path doesn't exist. - os_helper.unlink(addr) - socket_helper.bind_unix_socket(sock, addr) - else: - sock.bind(addr) - - -class HelperFunctionTests(unittest.TestCase): - def test_readwriteexc(self): - # Check exception handling behavior of read, write and _exception - - # check that ExitNow exceptions in the object handler method - # bubbles all the way up through asyncore read/write/_exception calls - tr1 = exitingdummy() - self.assertRaises(asyncore.ExitNow, asyncore.read, tr1) - self.assertRaises(asyncore.ExitNow, asyncore.write, tr1) - self.assertRaises(asyncore.ExitNow, asyncore._exception, tr1) - - # check that an exception other than ExitNow in the object handler - # method causes the handle_error method to get called - tr2 = crashingdummy() - asyncore.read(tr2) - self.assertEqual(tr2.error_handled, True) - - tr2 = crashingdummy() - asyncore.write(tr2) - self.assertEqual(tr2.error_handled, True) - - tr2 = crashingdummy() - asyncore._exception(tr2) - self.assertEqual(tr2.error_handled, True) - - # asyncore.readwrite uses constants in the select module that - # are not present in Windows systems (see this thread: - # http://mail.python.org/pipermail/python-list/2001-October/109973.html) - # These constants should be present as long as poll is available - - @unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required') - def test_readwrite(self): - # Check that correct methods are called by readwrite() - - attributes = ('read', 'expt', 'write', 'closed', 'error_handled') - - expected = ( - (select.POLLIN, 'read'), - (select.POLLPRI, 'expt'), - (select.POLLOUT, 'write'), - (select.POLLERR, 'closed'), - (select.POLLHUP, 'closed'), - (select.POLLNVAL, 'closed'), - ) - - class testobj: - def __init__(self): - self.read = False - self.write = False - self.closed = False - self.expt = False - self.error_handled = False - - def handle_read_event(self): - self.read = True - - def handle_write_event(self): - self.write = True - - def handle_close(self): - self.closed = True - - def handle_expt_event(self): - self.expt = True - - def handle_error(self): - self.error_handled = True - - for flag, expectedattr in expected: - tobj = testobj() - self.assertEqual(getattr(tobj, expectedattr), False) - asyncore.readwrite(tobj, flag) - - # Only the attribute modified by the routine we expect to be - # called should be True. - for attr in attributes: - self.assertEqual(getattr(tobj, attr), attr==expectedattr) - - # check that ExitNow exceptions in the object handler method - # bubbles all the way up through asyncore readwrite call - tr1 = exitingdummy() - self.assertRaises(asyncore.ExitNow, asyncore.readwrite, tr1, flag) - - # check that an exception other than ExitNow in the object handler - # method causes the handle_error method to get called - tr2 = crashingdummy() - self.assertEqual(tr2.error_handled, False) - asyncore.readwrite(tr2, flag) - self.assertEqual(tr2.error_handled, True) - - def test_closeall(self): - self.closeall_check(False) - - def test_closeall_default(self): - self.closeall_check(True) - - def closeall_check(self, usedefault): - # Check that close_all() closes everything in a given map - - l = [] - testmap = {} - for i in range(10): - c = dummychannel() - l.append(c) - self.assertEqual(c.socket.closed, False) - testmap[i] = c - - if usedefault: - socketmap = asyncore.socket_map - try: - asyncore.socket_map = testmap - asyncore.close_all() - finally: - testmap, asyncore.socket_map = asyncore.socket_map, socketmap - else: - asyncore.close_all(testmap) - - self.assertEqual(len(testmap), 0) - - for c in l: - self.assertEqual(c.socket.closed, True) - - def test_compact_traceback(self): - try: - raise Exception("I don't like spam!") - except: - real_t, real_v, real_tb = sys.exc_info() - r = asyncore.compact_traceback() - else: - self.fail("Expected exception") - - (f, function, line), t, v, info = r - self.assertEqual(os.path.split(f)[-1], 'test_asyncore.py') - self.assertEqual(function, 'test_compact_traceback') - self.assertEqual(t, real_t) - self.assertEqual(v, real_v) - self.assertEqual(info, '[%s|%s|%s]' % (f, function, line)) - - -class DispatcherTests(unittest.TestCase): - def setUp(self): - pass - - def tearDown(self): - asyncore.close_all() - - def test_basic(self): - d = asyncore.dispatcher() - self.assertEqual(d.readable(), True) - self.assertEqual(d.writable(), True) - - def test_repr(self): - d = asyncore.dispatcher() - self.assertEqual(repr(d), '' % id(d)) - - def test_log(self): - d = asyncore.dispatcher() - - # capture output of dispatcher.log() (to stderr) - l1 = "Lovely spam! Wonderful spam!" - l2 = "I don't like spam!" - with support.captured_stderr() as stderr: - d.log(l1) - d.log(l2) - - lines = stderr.getvalue().splitlines() - self.assertEqual(lines, ['log: %s' % l1, 'log: %s' % l2]) - - def test_log_info(self): - d = asyncore.dispatcher() - - # capture output of dispatcher.log_info() (to stdout via print) - l1 = "Have you got anything without spam?" - l2 = "Why can't she have egg bacon spam and sausage?" - l3 = "THAT'S got spam in it!" - with support.captured_stdout() as stdout: - d.log_info(l1, 'EGGS') - d.log_info(l2) - d.log_info(l3, 'SPAM') - - lines = stdout.getvalue().splitlines() - expected = ['EGGS: %s' % l1, 'info: %s' % l2, 'SPAM: %s' % l3] - self.assertEqual(lines, expected) - - def test_unhandled(self): - d = asyncore.dispatcher() - d.ignore_log_types = () - - # capture output of dispatcher.log_info() (to stdout via print) - with support.captured_stdout() as stdout: - d.handle_expt() - d.handle_read() - d.handle_write() - d.handle_connect() - - lines = stdout.getvalue().splitlines() - expected = ['warning: unhandled incoming priority event', - 'warning: unhandled read event', - 'warning: unhandled write event', - 'warning: unhandled connect event'] - self.assertEqual(lines, expected) - - def test_strerror(self): - # refers to bug #8573 - err = asyncore._strerror(errno.EPERM) - if hasattr(os, 'strerror'): - self.assertEqual(err, os.strerror(errno.EPERM)) - err = asyncore._strerror(-1) - self.assertTrue(err != "") - - -class dispatcherwithsend_noread(asyncore.dispatcher_with_send): - def readable(self): - return False - - def handle_connect(self): - pass - - -class DispatcherWithSendTests(unittest.TestCase): - def setUp(self): - pass - - def tearDown(self): - asyncore.close_all() - - @threading_helper.reap_threads - def test_send(self): - evt = threading.Event() - sock = socket.socket() - sock.settimeout(3) - port = socket_helper.bind_port(sock) - - cap = BytesIO() - args = (evt, cap, sock) - t = threading.Thread(target=capture_server, args=args) - t.start() - try: - # wait a little longer for the server to initialize (it sometimes - # refuses connections on slow machines without this wait) - time.sleep(0.2) - - data = b"Suppose there isn't a 16-ton weight?" - d = dispatcherwithsend_noread() - d.create_socket() - d.connect((socket_helper.HOST, port)) - - # give time for socket to connect - time.sleep(0.1) - - d.send(data) - d.send(data) - d.send(b'\n') - - n = 1000 - while d.out_buffer and n > 0: - asyncore.poll() - n -= 1 - - evt.wait() - - self.assertEqual(cap.getvalue(), data*2) - finally: - threading_helper.join_thread(t) - - -@unittest.skipUnless(hasattr(asyncore, 'file_wrapper'), - 'asyncore.file_wrapper required') -class FileWrapperTest(unittest.TestCase): - def setUp(self): - self.d = b"It's not dead, it's sleeping!" - with open(os_helper.TESTFN, 'wb') as file: - file.write(self.d) - - def tearDown(self): - os_helper.unlink(os_helper.TESTFN) - - def test_recv(self): - fd = os.open(os_helper.TESTFN, os.O_RDONLY) - w = asyncore.file_wrapper(fd) - os.close(fd) - - self.assertNotEqual(w.fd, fd) - self.assertNotEqual(w.fileno(), fd) - self.assertEqual(w.recv(13), b"It's not dead") - self.assertEqual(w.read(6), b", it's") - w.close() - self.assertRaises(OSError, w.read, 1) - - def test_send(self): - d1 = b"Come again?" - d2 = b"I want to buy some cheese." - fd = os.open(os_helper.TESTFN, os.O_WRONLY | os.O_APPEND) - w = asyncore.file_wrapper(fd) - os.close(fd) - - w.write(d1) - w.send(d2) - w.close() - with open(os_helper.TESTFN, 'rb') as file: - self.assertEqual(file.read(), self.d + d1 + d2) - - @unittest.skipUnless(hasattr(asyncore, 'file_dispatcher'), - 'asyncore.file_dispatcher required') - def test_dispatcher(self): - fd = os.open(os_helper.TESTFN, os.O_RDONLY) - data = [] - class FileDispatcher(asyncore.file_dispatcher): - def handle_read(self): - data.append(self.recv(29)) - s = FileDispatcher(fd) - os.close(fd) - asyncore.loop(timeout=0.01, use_poll=True, count=2) - self.assertEqual(b"".join(data), self.d) - - def test_resource_warning(self): - # Issue #11453 - fd = os.open(os_helper.TESTFN, os.O_RDONLY) - f = asyncore.file_wrapper(fd) - - os.close(fd) - with warnings_helper.check_warnings(('', ResourceWarning)): - f = None - support.gc_collect() - - def test_close_twice(self): - fd = os.open(os_helper.TESTFN, os.O_RDONLY) - f = asyncore.file_wrapper(fd) - os.close(fd) - - os.close(f.fd) # file_wrapper dupped fd - with self.assertRaises(OSError): - f.close() - - self.assertEqual(f.fd, -1) - # calling close twice should not fail - f.close() - - -class BaseTestHandler(asyncore.dispatcher): - - def __init__(self, sock=None): - asyncore.dispatcher.__init__(self, sock) - self.flag = False - - def handle_accept(self): - raise Exception("handle_accept not supposed to be called") - - def handle_accepted(self): - raise Exception("handle_accepted not supposed to be called") - - def handle_connect(self): - raise Exception("handle_connect not supposed to be called") - - def handle_expt(self): - raise Exception("handle_expt not supposed to be called") - - def handle_close(self): - raise Exception("handle_close not supposed to be called") - - def handle_error(self): - raise - - -class BaseServer(asyncore.dispatcher): - """A server which listens on an address and dispatches the - connection to a handler. - """ - - def __init__(self, family, addr, handler=BaseTestHandler): - asyncore.dispatcher.__init__(self) - self.create_socket(family) - self.set_reuse_addr() - bind_af_aware(self.socket, addr) - self.listen(5) - self.handler = handler - - @property - def address(self): - return self.socket.getsockname() - - def handle_accepted(self, sock, addr): - self.handler(sock) - - def handle_error(self): - raise - - -class BaseClient(BaseTestHandler): - - def __init__(self, family, address): - BaseTestHandler.__init__(self) - self.create_socket(family) - self.connect(address) - - def handle_connect(self): - pass - - -class BaseTestAPI: - - def tearDown(self): - asyncore.close_all(ignore_all=True) - - def loop_waiting_for_flag(self, instance, timeout=5): - timeout = float(timeout) / 100 - count = 100 - while asyncore.socket_map and count > 0: - asyncore.loop(timeout=0.01, count=1, use_poll=self.use_poll) - if instance.flag: - return - count -= 1 - time.sleep(timeout) - self.fail("flag not set") - - def test_handle_connect(self): - # make sure handle_connect is called on connect() - - class TestClient(BaseClient): - def handle_connect(self): - self.flag = True - - server = BaseServer(self.family, self.addr) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_handle_accept(self): - # make sure handle_accept() is called when a client connects - - class TestListener(BaseTestHandler): - - def __init__(self, family, addr): - BaseTestHandler.__init__(self) - self.create_socket(family) - bind_af_aware(self.socket, addr) - self.listen(5) - self.address = self.socket.getsockname() - - def handle_accept(self): - self.flag = True - - server = TestListener(self.family, self.addr) - client = BaseClient(self.family, server.address) - self.loop_waiting_for_flag(server) - - def test_handle_accepted(self): - # make sure handle_accepted() is called when a client connects - - class TestListener(BaseTestHandler): - - def __init__(self, family, addr): - BaseTestHandler.__init__(self) - self.create_socket(family) - bind_af_aware(self.socket, addr) - self.listen(5) - self.address = self.socket.getsockname() - - def handle_accept(self): - asyncore.dispatcher.handle_accept(self) - - def handle_accepted(self, sock, addr): - sock.close() - self.flag = True - - server = TestListener(self.family, self.addr) - client = BaseClient(self.family, server.address) - self.loop_waiting_for_flag(server) - - - def test_handle_read(self): - # make sure handle_read is called on data received - - class TestClient(BaseClient): - def handle_read(self): - self.flag = True - - class TestHandler(BaseTestHandler): - def __init__(self, conn): - BaseTestHandler.__init__(self, conn) - self.send(b'x' * 1024) - - server = BaseServer(self.family, self.addr, TestHandler) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_handle_write(self): - # make sure handle_write is called - - class TestClient(BaseClient): - def handle_write(self): - self.flag = True - - server = BaseServer(self.family, self.addr) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_handle_close(self): - # make sure handle_close is called when the other end closes - # the connection - - class TestClient(BaseClient): - - def handle_read(self): - # in order to make handle_close be called we are supposed - # to make at least one recv() call - self.recv(1024) - - def handle_close(self): - self.flag = True - self.close() - - class TestHandler(BaseTestHandler): - def __init__(self, conn): - BaseTestHandler.__init__(self, conn) - self.close() - - server = BaseServer(self.family, self.addr, TestHandler) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_handle_close_after_conn_broken(self): - # Check that ECONNRESET/EPIPE is correctly handled (issues #5661 and - # #11265). - - data = b'\0' * 128 - - class TestClient(BaseClient): - - def handle_write(self): - self.send(data) - - def handle_close(self): - self.flag = True - self.close() - - def handle_expt(self): - self.flag = True - self.close() - - class TestHandler(BaseTestHandler): - - def handle_read(self): - self.recv(len(data)) - self.close() - - def writable(self): - return False - - server = BaseServer(self.family, self.addr, TestHandler) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - @unittest.skipIf(sys.platform.startswith("sunos"), - "OOB support is broken on Solaris") - def test_handle_expt(self): - # Make sure handle_expt is called on OOB data received. - # Note: this might fail on some platforms as OOB data is - # tenuously supported and rarely used. - if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX: - self.skipTest("Not applicable to AF_UNIX sockets.") - - if sys.platform == "darwin" and self.use_poll: - self.skipTest("poll may fail on macOS; see issue #28087") - - class TestClient(BaseClient): - def handle_expt(self): - self.socket.recv(1024, socket.MSG_OOB) - self.flag = True - - class TestHandler(BaseTestHandler): - def __init__(self, conn): - BaseTestHandler.__init__(self, conn) - self.socket.send(bytes(chr(244), 'latin-1'), socket.MSG_OOB) - - server = BaseServer(self.family, self.addr, TestHandler) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_handle_error(self): - - class TestClient(BaseClient): - def handle_write(self): - 1.0 / 0 - def handle_error(self): - self.flag = True - try: - raise - except ZeroDivisionError: - pass - else: - raise Exception("exception not raised") - - server = BaseServer(self.family, self.addr) - client = TestClient(self.family, server.address) - self.loop_waiting_for_flag(client) - - def test_connection_attributes(self): - server = BaseServer(self.family, self.addr) - client = BaseClient(self.family, server.address) - - # we start disconnected - self.assertFalse(server.connected) - self.assertTrue(server.accepting) - # this can't be taken for granted across all platforms - #self.assertFalse(client.connected) - self.assertFalse(client.accepting) - - # execute some loops so that client connects to server - asyncore.loop(timeout=0.01, use_poll=self.use_poll, count=100) - self.assertFalse(server.connected) - self.assertTrue(server.accepting) - self.assertTrue(client.connected) - self.assertFalse(client.accepting) - - # disconnect the client - client.close() - self.assertFalse(server.connected) - self.assertTrue(server.accepting) - self.assertFalse(client.connected) - self.assertFalse(client.accepting) - - # stop serving - server.close() - self.assertFalse(server.connected) - self.assertFalse(server.accepting) - - def test_create_socket(self): - s = asyncore.dispatcher() - s.create_socket(self.family) - self.assertEqual(s.socket.type, socket.SOCK_STREAM) - self.assertEqual(s.socket.family, self.family) - self.assertEqual(s.socket.gettimeout(), 0) - self.assertFalse(s.socket.get_inheritable()) - - def test_bind(self): - if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX: - self.skipTest("Not applicable to AF_UNIX sockets.") - s1 = asyncore.dispatcher() - s1.create_socket(self.family) - s1.bind(self.addr) - s1.listen(5) - port = s1.socket.getsockname()[1] - - s2 = asyncore.dispatcher() - s2.create_socket(self.family) - # EADDRINUSE indicates the socket was correctly bound - self.assertRaises(OSError, s2.bind, (self.addr[0], port)) - - def test_set_reuse_addr(self): - if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX: - self.skipTest("Not applicable to AF_UNIX sockets.") - - with socket.socket(self.family) as sock: - try: - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - except OSError: - unittest.skip("SO_REUSEADDR not supported on this platform") - else: - # if SO_REUSEADDR succeeded for sock we expect asyncore - # to do the same - s = asyncore.dispatcher(socket.socket(self.family)) - self.assertFalse(s.socket.getsockopt(socket.SOL_SOCKET, - socket.SO_REUSEADDR)) - s.socket.close() - s.create_socket(self.family) - s.set_reuse_addr() - self.assertTrue(s.socket.getsockopt(socket.SOL_SOCKET, - socket.SO_REUSEADDR)) - - @threading_helper.reap_threads - def test_quick_connect(self): - # see: http://bugs.python.org/issue10340 - if self.family not in (socket.AF_INET, getattr(socket, "AF_INET6", object())): - self.skipTest("test specific to AF_INET and AF_INET6") - - server = BaseServer(self.family, self.addr) - # run the thread 500 ms: the socket should be connected in 200 ms - t = threading.Thread(target=lambda: asyncore.loop(timeout=0.1, - count=5)) - t.start() - try: - with socket.socket(self.family, socket.SOCK_STREAM) as s: - s.settimeout(.2) - s.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, - struct.pack('ii', 1, 0)) - - try: - s.connect(server.address) - except OSError: - pass - finally: - threading_helper.join_thread(t) - -class TestAPI_UseIPv4Sockets(BaseTestAPI): - family = socket.AF_INET - addr = (socket_helper.HOST, 0) - -@unittest.skipUnless(socket_helper.IPV6_ENABLED, 'IPv6 support required') -class TestAPI_UseIPv6Sockets(BaseTestAPI): - family = socket.AF_INET6 - addr = (socket_helper.HOSTv6, 0) - -@unittest.skipUnless(HAS_UNIX_SOCKETS, 'Unix sockets required') -class TestAPI_UseUnixSockets(BaseTestAPI): - if HAS_UNIX_SOCKETS: - family = socket.AF_UNIX - addr = os_helper.TESTFN - - def tearDown(self): - os_helper.unlink(self.addr) - BaseTestAPI.tearDown(self) - -class TestAPI_UseIPv4Select(TestAPI_UseIPv4Sockets, unittest.TestCase): - use_poll = False - -@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required') -class TestAPI_UseIPv4Poll(TestAPI_UseIPv4Sockets, unittest.TestCase): - use_poll = True - -class TestAPI_UseIPv6Select(TestAPI_UseIPv6Sockets, unittest.TestCase): - use_poll = False - -@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required') -class TestAPI_UseIPv6Poll(TestAPI_UseIPv6Sockets, unittest.TestCase): - use_poll = True - -class TestAPI_UseUnixSocketsSelect(TestAPI_UseUnixSockets, unittest.TestCase): - use_poll = False - -@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required') -class TestAPI_UseUnixSocketsPoll(TestAPI_UseUnixSockets, unittest.TestCase): - use_poll = True - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index fa03fa1d61..409c8c109e 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -545,6 +545,40 @@ def test_b85encode(self): self.check_other_types(base64.b85encode, b"www.python.org", b'cXxL#aCvlSZ*DGca%T') + def test_z85encode(self): + eq = self.assertEqual + + tests = { + b'': b'', + b'www.python.org': b'CxXl-AcVLsz/dgCA+t', + bytes(range(255)): b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x""" + b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD""" + b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J""" + b"""CZjQZE0{D[FpSr8GOteoH(41EJe-&}x#)cTlf[Bu8v].4}L}1:^-""" + b"""@qDP""", + b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""" + b"""0123456789!@#0^&*();:<>,. []{}""": + b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^""" + b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""", + b'no padding..': b'zF{UpvpS[.zF7NO', + b'zero compression\x00\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg00000', + b'zero compression\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg0000', + b"""Boundary:\x00\x00\x00\x00""": b"""lt}0:wmoI7iSGcW00""", + b'Space compr: ': b'q/DePwGUG3ze:IRarR^H', + b'\xff': b'@@', + b'\xff'*2: b'%nJ', + b'\xff'*3: b'%nS9', + b'\xff'*4: b'%nSc0', + } + + for data, res in tests.items(): + eq(base64.z85encode(data), res) + + self.check_other_types(base64.z85encode, b"www.python.org", + b'CxXl-AcVLsz/dgCA+t') + def test_a85decode(self): eq = self.assertEqual @@ -586,6 +620,7 @@ def test_a85decode(self): eq(base64.a85decode(b'y+', b"www.python.org") @@ -625,6 +660,41 @@ def test_b85decode(self): self.check_other_types(base64.b85decode, b'cXxL#aCvlSZ*DGca%T', b"www.python.org") + def test_z85decode(self): + eq = self.assertEqual + + tests = { + b'': b'', + b'CxXl-AcVLsz/dgCA+t': b'www.python.org', + b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x""" + b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD""" + b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J""" + b"""CZjQZE0{D[FpSr8GOteoH(41EJe-&}x#)cTlf[Bu8v].4}L}1:^-""" + b"""@qDP""": bytes(range(255)), + b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^""" + b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""": + b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""" + b"""0123456789!@#0^&*();:<>,. []{}""", + b'zF{UpvpS[.zF7NO': b'no padding..', + b'Ds.bnay/tbAb]JhB7]Mg00000': b'zero compression\x00\x00\x00\x00', + b'Ds.bnay/tbAb]JhB7]Mg0000': b'zero compression\x00\x00\x00', + b"""lt}0:wmoI7iSGcW00""": b"""Boundary:\x00\x00\x00\x00""", + b'q/DePwGUG3ze:IRarR^H': b'Space compr: ', + b'@@': b'\xff', + b'%nJ': b'\xff'*2, + b'%nS9': b'\xff'*3, + b'%nSc0': b'\xff'*4, + } + + for data, res in tests.items(): + eq(base64.z85decode(data), res) + eq(base64.z85decode(data.decode("ascii")), res) + + self.check_other_types(base64.z85decode, b'CxXl-AcVLsz/dgCA+t', + b'www.python.org') + def test_a85_padding(self): eq = self.assertEqual @@ -689,6 +759,8 @@ def test_a85decode_errors(self): self.assertRaises(ValueError, base64.a85decode, b's8W', adobe=False) self.assertRaises(ValueError, base64.a85decode, b's8W-', adobe=False) self.assertRaises(ValueError, base64.a85decode, b's8W-"', adobe=False) + self.assertRaises(ValueError, base64.a85decode, b'aaaay', + foldspaces=True) def test_b85decode_errors(self): illegal = list(range(33)) + \ @@ -704,6 +776,21 @@ def test_b85decode_errors(self): self.assertRaises(ValueError, base64.b85decode, b'|NsC') self.assertRaises(ValueError, base64.b85decode, b'|NsC1') + def test_z85decode_errors(self): + illegal = list(range(33)) + \ + list(b'"\',;_`|\\~') + \ + list(range(128, 256)) + for c in illegal: + with self.assertRaises(ValueError, msg=bytes([c])): + base64.z85decode(b'0000' + bytes([c])) + + # b'\xff\xff\xff\xff' encodes to b'%nSc0', the following will overflow: + self.assertRaises(ValueError, base64.z85decode, b'%') + self.assertRaises(ValueError, base64.z85decode, b'%n') + self.assertRaises(ValueError, base64.z85decode, b'%nS') + self.assertRaises(ValueError, base64.z85decode, b'%nSc') + self.assertRaises(ValueError, base64.z85decode, b'%nSc1') + def test_decode_nonascii_str(self): decode_funcs = (base64.b64decode, base64.standard_b64decode, @@ -711,7 +798,8 @@ def test_decode_nonascii_str(self): base64.b32decode, base64.b16decode, base64.b85decode, - base64.a85decode) + base64.a85decode, + base64.z85decode) for f in decode_funcs: self.assertRaises(ValueError, f, 'with non-ascii \xcb') diff --git a/Lib/test/test_baseexception.py b/Lib/test/test_baseexception.py index a73711c416..63bf538aa5 100644 --- a/Lib/test/test_baseexception.py +++ b/Lib/test/test_baseexception.py @@ -79,9 +79,12 @@ def test_inheritance(self): finally: inheritance_tree.close() + # Underscore-prefixed (private) exceptions don't need to be documented + exc_set = set(e for e in exc_set if not e.startswith('_')) # RUSTPYTHON specific exc_set.discard("JitError") - + # XXX: RUSTPYTHON; IncompleteInputError will be officially introduced in Python 3.15 + exc_set.discard("IncompleteInputError") self.assertEqual(len(exc_set), 0, "%s not accounted for" % exc_set) interface_tests = ("length", "args", "str", "repr") @@ -118,8 +121,6 @@ def test_interface_no_arg(self): [repr(exc), exc.__class__.__name__ + '()']) self.interface_test_driver(results) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_setstate_refcount_no_crash(self): # gh-97591: Acquire strong reference before calling tp_hash slot # in PyObject_SetAttr. @@ -135,7 +136,7 @@ class Value(str): d[HashThisKeyWillClearTheDict()] = Value() # refcount of Value() is 1 now - # Exception.__setstate__ should aquire a strong reference of key and + # Exception.__setstate__ should acquire a strong reference of key and # value in the dict. Otherwise, Value()'s refcount would go below # zero in the tp_hash call in PyObject_SetAttr(), and it would cause # crash in GC. diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 4ae89837cc..40a2ca9f76 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -258,8 +258,6 @@ def test_hex(self): self.assertEqual(binascii.hexlify(self.type2test(s)), t) self.assertEqual(binascii.unhexlify(self.type2test(t)), u) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_hex_separator(self): """Test that hexlify and b2a_hex are binary versions of bytes.hex.""" # Logic of separators is tested in test_bytes.py. This checks that @@ -388,8 +386,6 @@ def test_empty_string(self): except Exception as err: self.fail("{}({!r}) raises {!r}".format(func, empty, err)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_unicode_b2a(self): # Unicode strings are not accepted by b2a_* functions. for func in set(all_functions) - set(a2b_functions): diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index baf84642ee..3c634b6cac 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -209,8 +209,6 @@ def test_constructor_overflow(self): except (OverflowError, MemoryError): pass - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor_exceptions(self): # Issue #34974: bytes and bytearray constructors replace unexpected # exceptions. @@ -1992,7 +1990,7 @@ def test_join(self): s3 = s1.join([b"abcd"]) self.assertIs(type(s3), self.basetype) - @unittest.skip("TODO: RUSTPYHON, Fails on ByteArraySubclassWithSlotsTest") + @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_pickle(self): a = self.type2test(b"abcd") a.x = 10 @@ -2007,7 +2005,7 @@ def test_pickle(self): self.assertEqual(type(a.z), type(b.z)) self.assertFalse(hasattr(b, 'y')) - @unittest.skip("TODO: RUSTPYHON, Fails on ByteArraySubclassWithSlotsTest") + @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_copy(self): a = self.type2test(b"abcd") a.x = 10 diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 1f0b9adc36..dfc444cbbd 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1001,6 +1001,8 @@ def test_encoding_error_handler(self): as f: self.assertEqual(f.read(), "foobar") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_newline(self): # Test with explicit newline (universal newline mode disabled). text = self.TEXT.decode("ascii") diff --git a/Lib/test/test_calendar.py b/Lib/test/test_calendar.py index 24e472b5fe..df102fe198 100644 --- a/Lib/test/test_calendar.py +++ b/Lib/test/test_calendar.py @@ -3,12 +3,13 @@ from test import support from test.support.script_helper import assert_python_ok, assert_python_failure -import time -import locale -import sys +import contextlib import datetime +import io +import locale import os -import warnings +import sys +import time # From https://en.wikipedia.org/wiki/Leap_year_starting_on_Saturday result_0_02_text = """\ @@ -456,6 +457,11 @@ def test_formatmonth(self): calendar.TextCalendar().formatmonth(0, 2), result_0_02_text ) + def test_formatmonth_with_invalid_month(self): + with self.assertRaises(calendar.IllegalMonthError): + calendar.TextCalendar().formatmonth(2017, 13) + with self.assertRaises(calendar.IllegalMonthError): + calendar.TextCalendar().formatmonth(2017, -1) def test_formatmonthname_with_year(self): self.assertEqual( @@ -550,26 +556,92 @@ def test_months(self): # verify it "acts like a sequence" in two forms of iteration self.assertEqual(value[::-1], list(reversed(value))) - def test_locale_calendars(self): + def test_locale_text_calendar(self): + try: + cal = calendar.LocaleTextCalendar(locale='') + local_weekday = cal.formatweekday(1, 10) + local_weekday_abbr = cal.formatweekday(1, 3) + local_month = cal.formatmonthname(2010, 10, 10) + except locale.Error: + # cannot set the system default locale -- skip rest of test + raise unittest.SkipTest('cannot set the system default locale') + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_weekday_abbr, str) + self.assertIsInstance(local_month, str) + self.assertEqual(len(local_weekday), 10) + self.assertEqual(len(local_weekday_abbr), 3) + self.assertGreaterEqual(len(local_month), 10) + + cal = calendar.LocaleTextCalendar(locale=None) + local_weekday = cal.formatweekday(1, 10) + local_weekday_abbr = cal.formatweekday(1, 3) + local_month = cal.formatmonthname(2010, 10, 10) + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_weekday_abbr, str) + self.assertIsInstance(local_month, str) + self.assertEqual(len(local_weekday), 10) + self.assertEqual(len(local_weekday_abbr), 3) + self.assertGreaterEqual(len(local_month), 10) + + cal = calendar.LocaleTextCalendar(locale='C') + local_weekday = cal.formatweekday(1, 10) + local_weekday_abbr = cal.formatweekday(1, 3) + local_month = cal.formatmonthname(2010, 10, 10) + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_weekday_abbr, str) + self.assertIsInstance(local_month, str) + self.assertEqual(len(local_weekday), 10) + self.assertEqual(len(local_weekday_abbr), 3) + self.assertGreaterEqual(len(local_month), 10) + + def test_locale_html_calendar(self): + try: + cal = calendar.LocaleHTMLCalendar(locale='') + local_weekday = cal.formatweekday(1) + local_month = cal.formatmonthname(2010, 10) + except locale.Error: + # cannot set the system default locale -- skip rest of test + raise unittest.SkipTest('cannot set the system default locale') + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_month, str) + + cal = calendar.LocaleHTMLCalendar(locale=None) + local_weekday = cal.formatweekday(1) + local_month = cal.formatmonthname(2010, 10) + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_month, str) + + cal = calendar.LocaleHTMLCalendar(locale='C') + local_weekday = cal.formatweekday(1) + local_month = cal.formatmonthname(2010, 10) + self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_month, str) + + def test_locale_calendars_reset_locale_properly(self): # ensure that Locale{Text,HTML}Calendar resets the locale properly # (it is still not thread-safe though) old_october = calendar.TextCalendar().formatmonthname(2010, 10, 10) try: cal = calendar.LocaleTextCalendar(locale='') local_weekday = cal.formatweekday(1, 10) + local_weekday_abbr = cal.formatweekday(1, 3) local_month = cal.formatmonthname(2010, 10, 10) except locale.Error: # cannot set the system default locale -- skip rest of test raise unittest.SkipTest('cannot set the system default locale') self.assertIsInstance(local_weekday, str) + self.assertIsInstance(local_weekday_abbr, str) self.assertIsInstance(local_month, str) self.assertEqual(len(local_weekday), 10) + self.assertEqual(len(local_weekday_abbr), 3) self.assertGreaterEqual(len(local_month), 10) + cal = calendar.LocaleHTMLCalendar(locale='') local_weekday = cal.formatweekday(1) local_month = cal.formatmonthname(2010, 10) self.assertIsInstance(local_weekday, str) self.assertIsInstance(local_month, str) + new_october = calendar.TextCalendar().formatmonthname(2010, 10, 10) self.assertEqual(old_october, new_october) @@ -590,6 +662,21 @@ def test_locale_calendar_formatweekday(self): except locale.Error: raise unittest.SkipTest('cannot set the en_US locale') + def test_locale_calendar_formatmonthname(self): + try: + # formatmonthname uses the same month names regardless of the width argument. + cal = calendar.LocaleTextCalendar(locale='en_US') + # For too short widths, a full name (with year) is used. + self.assertEqual(cal.formatmonthname(2022, 6, 2, withyear=False), "June") + self.assertEqual(cal.formatmonthname(2022, 6, 2, withyear=True), "June 2022") + self.assertEqual(cal.formatmonthname(2022, 6, 3, withyear=False), "June") + self.assertEqual(cal.formatmonthname(2022, 6, 3, withyear=True), "June 2022") + # For long widths, a centered name is used. + self.assertEqual(cal.formatmonthname(2022, 6, 10, withyear=False), " June ") + self.assertEqual(cal.formatmonthname(2022, 6, 15, withyear=True), " June 2022 ") + except locale.Error: + raise unittest.SkipTest('cannot set the en_US locale') + def test_locale_html_calendar_custom_css_class_month_name(self): try: cal = calendar.LocaleHTMLCalendar(locale='') @@ -848,46 +935,104 @@ def conv(s): return s.replace('\n', os.linesep).encode() class CommandLineTestCase(unittest.TestCase): - def run_ok(self, *args): + def setUp(self): + self.runners = [self.run_cli_ok, self.run_cmd_ok] + + @contextlib.contextmanager + def captured_stdout_with_buffer(self): + orig_stdout = sys.stdout + buffer = io.BytesIO() + sys.stdout = io.TextIOWrapper(buffer) + try: + yield sys.stdout + finally: + sys.stdout.flush() + sys.stdout.buffer.seek(0) + sys.stdout = orig_stdout + + @contextlib.contextmanager + def captured_stderr_with_buffer(self): + orig_stderr = sys.stderr + buffer = io.BytesIO() + sys.stderr = io.TextIOWrapper(buffer) + try: + yield sys.stderr + finally: + sys.stderr.flush() + sys.stderr.buffer.seek(0) + sys.stderr = orig_stderr + + def run_cli_ok(self, *args): + with self.captured_stdout_with_buffer() as stdout: + calendar.main(args) + return stdout.buffer.read() + + def run_cmd_ok(self, *args): return assert_python_ok('-m', 'calendar', *args)[1] - def assertFailure(self, *args): + def assertCLIFails(self, *args): + with self.captured_stderr_with_buffer() as stderr: + self.assertRaises(SystemExit, calendar.main, args) + stderr = stderr.buffer.read() + self.assertIn(b'usage:', stderr) + return stderr + + def assertCmdFails(self, *args): rc, stdout, stderr = assert_python_failure('-m', 'calendar', *args) self.assertIn(b'usage:', stderr) self.assertEqual(rc, 2) + return rc, stdout, stderr + + def assertFailure(self, *args): + self.assertCLIFails(*args) + self.assertCmdFails(*args) def test_help(self): - stdout = self.run_ok('-h') + stdout = self.run_cmd_ok('-h') self.assertIn(b'usage:', stdout) self.assertIn(b'calendar.py', stdout) self.assertIn(b'--help', stdout) + # special case: stdout but sys.exit() + with self.captured_stdout_with_buffer() as output: + self.assertRaises(SystemExit, calendar.main, ['-h']) + output = output.buffer.read() + self.assertIn(b'usage:', output) + self.assertIn(b'--help', output) + def test_illegal_arguments(self): self.assertFailure('-z') self.assertFailure('spam') self.assertFailure('2004', 'spam') + self.assertFailure('2004', '1', 'spam') + self.assertFailure('2004', '1', '1') + self.assertFailure('2004', '1', '1', 'spam') self.assertFailure('-t', 'html', '2004', '1') def test_output_current_year(self): - stdout = self.run_ok() - year = datetime.datetime.now().year - self.assertIn((' %s' % year).encode(), stdout) - self.assertIn(b'January', stdout) - self.assertIn(b'Mo Tu We Th Fr Sa Su', stdout) + for run in self.runners: + output = run() + year = datetime.datetime.now().year + self.assertIn(conv(' %s' % year), output) + self.assertIn(b'January', output) + self.assertIn(b'Mo Tu We Th Fr Sa Su', output) def test_output_year(self): - stdout = self.run_ok('2004') - self.assertEqual(stdout, conv(result_2004_text)) + for run in self.runners: + output = run('2004') + self.assertEqual(output, conv(result_2004_text)) def test_output_month(self): - stdout = self.run_ok('2004', '1') - self.assertEqual(stdout, conv(result_2004_01_text)) + for run in self.runners: + output = run('2004', '1') + self.assertEqual(output, conv(result_2004_01_text)) def test_option_encoding(self): self.assertFailure('-e') self.assertFailure('--encoding') - stdout = self.run_ok('--encoding', 'utf-16-le', '2004') - self.assertEqual(stdout, result_2004_text.encode('utf-16-le')) + for run in self.runners: + output = run('--encoding', 'utf-16-le', '2004') + self.assertEqual(output, result_2004_text.encode('utf-16-le')) def test_option_locale(self): self.assertFailure('-L') @@ -905,66 +1050,75 @@ def test_option_locale(self): locale.setlocale(locale.LC_TIME, oldlocale) except (locale.Error, ValueError): self.skipTest('cannot set the system default locale') - stdout = self.run_ok('--locale', lang, '--encoding', enc, '2004') - self.assertIn('2004'.encode(enc), stdout) + for run in self.runners: + for type in ('text', 'html'): + output = run( + '--type', type, '--locale', lang, '--encoding', enc, '2004' + ) + self.assertIn('2004'.encode(enc), output) def test_option_width(self): self.assertFailure('-w') self.assertFailure('--width') self.assertFailure('-w', 'spam') - stdout = self.run_ok('--width', '3', '2004') - self.assertIn(b'Mon Tue Wed Thu Fri Sat Sun', stdout) + for run in self.runners: + output = run('--width', '3', '2004') + self.assertIn(b'Mon Tue Wed Thu Fri Sat Sun', output) def test_option_lines(self): self.assertFailure('-l') self.assertFailure('--lines') self.assertFailure('-l', 'spam') - stdout = self.run_ok('--lines', '2', '2004') - self.assertIn(conv('December\n\nMo Tu We'), stdout) + for run in self.runners: + output = run('--lines', '2', '2004') + self.assertIn(conv('December\n\nMo Tu We'), output) def test_option_spacing(self): self.assertFailure('-s') self.assertFailure('--spacing') self.assertFailure('-s', 'spam') - stdout = self.run_ok('--spacing', '8', '2004') - self.assertIn(b'Su Mo', stdout) + for run in self.runners: + output = run('--spacing', '8', '2004') + self.assertIn(b'Su Mo', output) def test_option_months(self): self.assertFailure('-m') self.assertFailure('--month') self.assertFailure('-m', 'spam') - stdout = self.run_ok('--months', '1', '2004') - self.assertIn(conv('\nMo Tu We Th Fr Sa Su\n'), stdout) + for run in self.runners: + output = run('--months', '1', '2004') + self.assertIn(conv('\nMo Tu We Th Fr Sa Su\n'), output) def test_option_type(self): self.assertFailure('-t') self.assertFailure('--type') self.assertFailure('-t', 'spam') - stdout = self.run_ok('--type', 'text', '2004') - self.assertEqual(stdout, conv(result_2004_text)) - stdout = self.run_ok('--type', 'html', '2004') - self.assertEqual(stdout[:6], b'Calendar for 2004', stdout) + for run in self.runners: + output = run('--type', 'text', '2004') + self.assertEqual(output, conv(result_2004_text)) + output = run('--type', 'html', '2004') + self.assertEqual(output[:6], b'Calendar for 2004', output) def test_html_output_current_year(self): - stdout = self.run_ok('--type', 'html') - year = datetime.datetime.now().year - self.assertIn(('Calendar for %s' % year).encode(), - stdout) - self.assertIn(b'January', - stdout) + for run in self.runners: + output = run('--type', 'html') + year = datetime.datetime.now().year + self.assertIn(('Calendar for %s' % year).encode(), output) + self.assertIn(b'January', output) def test_html_output_year_encoding(self): - stdout = self.run_ok('-t', 'html', '--encoding', 'ascii', '2004') - self.assertEqual(stdout, - result_2004_html.format(**default_format).encode('ascii')) + for run in self.runners: + output = run('-t', 'html', '--encoding', 'ascii', '2004') + self.assertEqual(output, result_2004_html.format(**default_format).encode('ascii')) def test_html_output_year_css(self): self.assertFailure('-t', 'html', '-c') self.assertFailure('-t', 'html', '--css') - stdout = self.run_ok('-t', 'html', '--css', 'custom.css', '2004') - self.assertIn(b'', stdout) + for run in self.runners: + output = run('-t', 'html', '--css', 'custom.css', '2004') + self.assertIn(b'', output) class MiscTestCase(unittest.TestCase): @@ -972,7 +1126,7 @@ def test__all__(self): not_exported = { 'mdays', 'January', 'February', 'EPOCH', 'different_locale', 'c', 'prweek', 'week', 'format', - 'formatstring', 'main', 'monthlen', 'prevmonth', 'nextmonth'} + 'formatstring', 'main', 'monthlen', 'prevmonth', 'nextmonth', ""} support.check__all__(self, calendar, not_exported=not_exported) @@ -1000,6 +1154,13 @@ def test_formatmonth(self): self.assertIn('class="text-center month"', self.cal.formatmonth(2017, 5)) + def test_formatmonth_with_invalid_month(self): + with self.assertRaises(calendar.IllegalMonthError): + self.cal.formatmonth(2017, 13) + with self.assertRaises(calendar.IllegalMonthError): + self.cal.formatmonth(2017, -1) + + def test_formatweek(self): weeks = self.cal.monthdays2calendar(2017, 5) self.assertIn('class="wed text-nowrap"', self.cal.formatweek(weeks[0])) diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 8e64ffffd0..3cb9659acb 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -13,8 +13,6 @@ class FunctionCalls(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_kwargs_order(self): # bpo-34320: **kwargs should preserve order of passed OrderedDict od = collections.OrderedDict([('a', 1), ('b', 2)]) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index da53f085a5..a7e4f6dd27 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -259,7 +259,8 @@ def test_undecodable_code(self): if not stdout.startswith(pattern): raise AssertionError("%a doesn't start with %a" % (stdout, pattern)) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'unexpected invalid UTF-8 code point'") + # TODO: RUSTPYTHON + @unittest.expectedFailure @unittest.skipIf(sys.platform == 'win32', 'Windows has a native unicode API') def test_invalid_utf8_arg(self): diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index ecd574ab83..901f596cc3 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -698,8 +698,6 @@ class NewPoint(tuple): self.assertEqual(np.x, 1) self.assertEqual(np.y, 2) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_new_builtins_issue_43102(self): obj = namedtuple('C', ()) new_func = obj.__new__ diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 2646be086c..9a1743da6d 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -291,18 +291,6 @@ def test_writerows_errors(self): self.assertRaises(TypeError, writer.writerows, None) self.assertRaises(OSError, writer.writerows, BadIterable()) - @support.cpython_only - @support.requires_legacy_unicode_capi() - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_writerows_legacy_strings(self): - import _testcapi - c = _testcapi.unicode_legacy_string('a') - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj) - writer.writerows([[c]]) - fileobj.seek(0) - self.assertEqual(fileobj.read(), "a\r\n") - def _read_test(self, input, expect, **kwargs): reader = csv.reader(input, **kwargs) result = list(reader) diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py index 62ae5622a8..46430d3231 100644 --- a/Lib/test/test_dataclasses.py +++ b/Lib/test/test_dataclasses.py @@ -2086,8 +2086,6 @@ class C: self.assertDocStrEqual(C.__doc__, "C(x:List[int]=)") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_docstring_deque_field(self): @dataclass class C: @@ -2095,8 +2093,6 @@ class C: self.assertDocStrEqual(C.__doc__, "C(x:collections.deque)") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_docstring_deque_field_with_default_factory(self): @dataclass class C: diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 04f4fc4a01..0493d6a41d 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -34,10 +34,10 @@ import locale from test.support import (is_resource_enabled, requires_IEEE_754, requires_docstrings, - requires_legacy_unicode_capi, check_sanitizer) + check_disallow_instantiation) from test.support import (TestFailed, run_with_locale, cpython_only, - darwin_malloc_err_warning, is_emscripten) + darwin_malloc_err_warning) from test.support.import_helper import import_fresh_module from test.support import threading_helper from test.support import warnings_helper @@ -586,18 +586,6 @@ def test_explicit_from_string(self): # underscores don't prevent errors self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") - @cpython_only - @requires_legacy_unicode_capi() - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_from_legacy_strings(self): - import _testcapi - Decimal = self.decimal.Decimal - context = self.decimal.Context() - - s = _testcapi.unicode_legacy_string('9.999999') - self.assertEqual(str(Decimal(s)), '9.999999') - self.assertEqual(str(context.create_decimal(s)), '9.999999') - def test_explicit_from_tuples(self): Decimal = self.decimal.Decimal @@ -2928,23 +2916,6 @@ def test_none_args(self): assert_signals(self, c, 'traps', [InvalidOperation, DivisionByZero, Overflow]) - @cpython_only - @requires_legacy_unicode_capi() - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_from_legacy_strings(self): - import _testcapi - c = self.decimal.Context() - - for rnd in RoundingModes: - c.rounding = _testcapi.unicode_legacy_string(rnd) - self.assertEqual(c.rounding, rnd) - - s = _testcapi.unicode_legacy_string('') - self.assertRaises(TypeError, setattr, c, 'rounding', s) - - s = _testcapi.unicode_legacy_string('ROUND_\x00UP') - self.assertRaises(TypeError, setattr, c, 'rounding', s) - def test_pickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): @@ -5654,48 +5625,6 @@ def __abs__(self): self.assertEqual(Decimal.from_float(cls(101.1)), Decimal.from_float(101.1)) - # Issue 41540: - @unittest.skipIf(sys.platform.startswith("aix"), - "AIX: default ulimit: test is flaky because of extreme over-allocation") - @unittest.skipIf(is_emscripten, "Test is unstable on Emscripten") - @unittest.skipIf(check_sanitizer(address=True, memory=True), - "ASAN/MSAN sanitizer defaults to crashing " - "instead of returning NULL for malloc failure.") - def test_maxcontext_exact_arith(self): - - # Make sure that exact operations do not raise MemoryError due - # to huge intermediate values when the context precision is very - # large. - - # The following functions fill the available precision and are - # therefore not suitable for large precisions (by design of the - # specification). - MaxContextSkip = ['logical_invert', 'next_minus', 'next_plus', - 'logical_and', 'logical_or', 'logical_xor', - 'next_toward', 'rotate', 'shift'] - - Decimal = C.Decimal - Context = C.Context - localcontext = C.localcontext - - # Here only some functions that are likely candidates for triggering a - # MemoryError are tested. deccheck.py has an exhaustive test. - maxcontext = Context(prec=C.MAX_PREC, Emin=C.MIN_EMIN, Emax=C.MAX_EMAX) - with localcontext(maxcontext): - self.assertEqual(Decimal(0).exp(), 1) - self.assertEqual(Decimal(1).ln(), 0) - self.assertEqual(Decimal(1).log10(), 0) - self.assertEqual(Decimal(10**2).log10(), 2) - self.assertEqual(Decimal(10**223).log10(), 223) - self.assertEqual(Decimal(10**19).logb(), 19) - self.assertEqual(Decimal(4).sqrt(), 2) - self.assertEqual(Decimal("40E9").sqrt(), Decimal('2.0E+5')) - self.assertEqual(divmod(Decimal(10), 3), (3, 1)) - self.assertEqual(Decimal(10) // 3, 3) - self.assertEqual(Decimal(4) / 2, 2) - self.assertEqual(Decimal(400) ** -1, Decimal('0.0025')) - - def test_c_signaldict_segfault(self): # See gh-106263 for details. SignalDict = type(C.Context().flags) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index eae8b42fce..d1c83cc337 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1558,8 +1558,6 @@ class B(A1, A2): else: self.fail("finding the most derived metaclass should have failed") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_classmethods(self): # Testing class methods... class C(object): @@ -1851,8 +1849,6 @@ def __init__(self, foo): object.__init__(A(3)) self.assertRaises(TypeError, object.__init__, A(3), 5) - @unittest.expectedFailure - @unittest.skip("TODO: RUSTPYTHON") def test_restored_object_new(self): class A(object): def __new__(cls, *args, **kwargs): @@ -2358,8 +2354,6 @@ class D(object): else: self.fail("expected ZeroDivisionError from bad property") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_properties_doc_attrib(self): @@ -2386,8 +2380,6 @@ def test_testcapi_no_segfault(self): class X(object): p = property(_testcapi.test_with_docstring) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_properties_plus(self): class C(object): foo = property(doc="hello") @@ -2534,8 +2526,6 @@ def __iter__(self): else: self.fail("no ValueError from dict(%r)" % bad) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_dir(self): # Testing dir() ... junk = 12 @@ -4997,8 +4987,6 @@ class Sub(Base): self.assertIn("__dict__", Base.__dict__) self.assertNotIn("__dict__", Sub.__dict__) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_bound_method_repr(self): class Foo: def method(self): @@ -5136,6 +5124,8 @@ def test_iter_keys(self): self.assertEqual(keys, ['__dict__', '__doc__', '__module__', '__weakref__', 'meth']) + # TODO: RUSTPYTHON + @unittest.expectedFailure @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), 'trace function introduces __local__') def test_iter_values(self): diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 8be8122507..1864e611ab 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -425,8 +425,6 @@ def test_windows_message(self): with self.assertRaisesRegex(OSError, 'Windows Error 0x%x' % code): ctypes.pythonapi.PyErr_SetFromWindowsErr(code) - # TODO: RUSTPYTHON - @unittest.expectedFailure def testAttributes(self): # test that exception attributes are happy @@ -609,8 +607,6 @@ class MyException(Exception): self.assertIsInstance(e, MyException) self.assertEqual(e.__traceback__, tb) - # TODO: RUSTPYTHON - @unittest.expectedFailure def testInvalidTraceback(self): try: Exception().__traceback__ = 5 @@ -703,8 +699,6 @@ def test_str(self): self.assertTrue(str(Exception('a'))) self.assertTrue(str(Exception('a', 'b'))) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_cleanup_names(self): # Make sure the local variable bound to the exception instance by # an "except" statement is only visible inside the except block. @@ -727,8 +721,6 @@ def test_exception_cleanup_names2(self): with self.assertRaises(UnboundLocalError): e - # TODO: RUSTPYTHON - @unittest.expectedFailure def testExceptionCleanupState(self): # Make sure exception state is cleaned up as soon as the except # block is left. See #2507 @@ -1575,8 +1567,6 @@ def inner(): gc_collect() # For PyPy or other GCs. self.assertEqual(wr(), None) - # TODO: RUSTPYTHON - @unittest.expectedFailure @no_tracing @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON Windows') def test_recursion_error_cleanup(self): @@ -2074,8 +2064,6 @@ def f(): class AttributeErrorTests(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attributes(self): # Setting 'attr' should not be a problem. exc = AttributeError('Ouch!') @@ -2087,8 +2075,6 @@ def test_attributes(self): self.assertEqual(exc.name, 'carry') self.assertIs(exc.obj, sentinel) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getattr_has_name_and_obj(self): class A: blech = None @@ -2355,8 +2341,6 @@ def __getattribute__(self, attr): class ImportErrorTests(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attributes(self): # Setting 'name' and 'path' should not be a problem. exc = ImportError('test') @@ -2391,8 +2375,6 @@ def test_attributes(self): with self.assertRaisesRegex(TypeError, msg): ImportError('test', invalid='keyword', another=True) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_reset_attributes(self): exc = ImportError('test', name='name', path='path') self.assertEqual(exc.args, ('test',)) @@ -2571,8 +2553,6 @@ def test_non_utf8(self): finally: unlink(TESTFN) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attributes_new_constructor(self): args = ("bad.py", 1, 2, "abcdefg", 1, 100) the_exception = SyntaxError("bad bad", args) @@ -2585,8 +2565,6 @@ def test_attributes_new_constructor(self): self.assertEqual(error, the_exception.text) self.assertEqual("bad bad", the_exception.msg) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attributes_old_constructor(self): args = ("bad.py", 1, 2, "abcdefg") the_exception = SyntaxError("bad bad", args) diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index bda6dee6bf..1a6ef3cd27 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -23,10 +23,9 @@ from io import BytesIO, StringIO from fileinput import FileInput, hook_encoded -from pathlib import Path from test.support import verbose -from test.support.os_helper import TESTFN +from test.support.os_helper import TESTFN, FakePath from test.support.os_helper import unlink as safe_unlink from test.support import os_helper from test import support @@ -151,7 +150,7 @@ def test_buffer_sizes(self): print('6. Inplace') savestdout = sys.stdout try: - fi = FileInput(files=(t1, t2, t3, t4), inplace=1, encoding="utf-8") + fi = FileInput(files=(t1, t2, t3, t4), inplace=True, encoding="utf-8") for line in fi: line = line[:-1].upper() print(line) @@ -256,7 +255,7 @@ def test_detached_stdin_binary_mode(self): def test_file_opening_hook(self): try: # cannot use openhook and inplace mode - fi = FileInput(inplace=1, openhook=lambda f, m: None) + fi = FileInput(inplace=True, openhook=lambda f, m: None) self.fail("FileInput should raise if both inplace " "and openhook arguments are given") except ValueError: @@ -478,23 +477,23 @@ def test_iteration_buffering(self): self.assertRaises(StopIteration, next, fi) self.assertEqual(src.linesread, []) - def test_pathlib_file(self): - t1 = Path(self.writeTmp("Pathlib file.")) + def test_pathlike_file(self): + t1 = FakePath(self.writeTmp("Path-like file.")) with FileInput(t1, encoding="utf-8") as fi: line = fi.readline() - self.assertEqual(line, 'Pathlib file.') + self.assertEqual(line, 'Path-like file.') self.assertEqual(fi.lineno(), 1) self.assertEqual(fi.filelineno(), 1) self.assertEqual(fi.filename(), os.fspath(t1)) - def test_pathlib_file_inplace(self): - t1 = Path(self.writeTmp('Pathlib file.')) + def test_pathlike_file_inplace(self): + t1 = FakePath(self.writeTmp('Path-like file.')) with FileInput(t1, inplace=True, encoding="utf-8") as fi: line = fi.readline() - self.assertEqual(line, 'Pathlib file.') + self.assertEqual(line, 'Path-like file.') print('Modified %s' % line) with open(t1, encoding="utf-8") as f: - self.assertEqual(f.read(), 'Modified Pathlib file.\n') + self.assertEqual(f.read(), 'Modified Path-like file.\n') class MockFileInput: @@ -855,29 +854,29 @@ def setUp(self): self.fake_open = InvocationRecorder() def test_empty_string(self): - self.do_test_use_builtin_open("", 1) + self.do_test_use_builtin_open_text("", "r") def test_no_ext(self): - self.do_test_use_builtin_open("abcd", 2) + self.do_test_use_builtin_open_text("abcd", "r") @unittest.skipUnless(gzip, "Requires gzip and zlib") def test_gz_ext_fake(self): original_open = gzip.open gzip.open = self.fake_open try: - result = fileinput.hook_compressed("test.gz", "3") + result = fileinput.hook_compressed("test.gz", "r") finally: gzip.open = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.gz", "3"), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.gz", "r"), {})) @unittest.skipUnless(gzip, "Requires gzip and zlib") def test_gz_with_encoding_fake(self): original_open = gzip.open gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string') try: - result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8") + result = fileinput.hook_compressed("test.gz", "r", encoding="utf-8") finally: gzip.open = original_open self.assertEqual(list(result), ['Ex-binary string']) @@ -887,23 +886,40 @@ def test_bz2_ext_fake(self): original_open = bz2.BZ2File bz2.BZ2File = self.fake_open try: - result = fileinput.hook_compressed("test.bz2", "4") + result = fileinput.hook_compressed("test.bz2", "r") finally: bz2.BZ2File = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "4"), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "r"), {})) def test_blah_ext(self): - self.do_test_use_builtin_open("abcd.blah", "5") + self.do_test_use_builtin_open_binary("abcd.blah", "rb") def test_gz_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Gz", "6") + self.do_test_use_builtin_open_binary("abcd.Gz", "rb") def test_bz2_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Bz2", "7") + self.do_test_use_builtin_open_binary("abcd.Bz2", "rb") - def do_test_use_builtin_open(self, filename, mode): + def test_binary_mode_encoding(self): + self.do_test_use_builtin_open_binary("abcd", "rb") + + def test_text_mode_encoding(self): + self.do_test_use_builtin_open_text("abcd", "r") + + def do_test_use_builtin_open_binary(self, filename, mode): + original_open = self.replace_builtin_open(self.fake_open) + try: + result = fileinput.hook_compressed(filename, mode) + finally: + self.replace_builtin_open(original_open) + + self.assertEqual(self.fake_open.invocation_count, 1) + self.assertEqual(self.fake_open.last_invocation, + ((filename, mode), {'encoding': None, 'errors': None})) + + def do_test_use_builtin_open_text(self, filename, mode): original_open = self.replace_builtin_open(self.fake_open) try: result = fileinput.hook_compressed(filename, mode) diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 30d27072fb..d94a2bdadd 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -25,7 +25,7 @@ #locate file with float format test values test_dir = os.path.dirname(__file__) or os.curdir -format_testfile = os.path.join(test_dir, 'formatfloat_testcases.txt') +format_testfile = os.path.join(test_dir, 'mathdata', 'formatfloat_testcases.txt') class FloatSubclass(float): pass @@ -131,7 +131,7 @@ def check(s): with self.assertRaises(ValueError, msg='float(%r)' % (s,)) as cm: float(s) self.assertEqual(str(cm.exception), - 'could not convert string to float: %r' % (s,)) + 'could not convert string to float: %r' % (s,)) check('\xbd') check('123\xbd') @@ -153,8 +153,6 @@ def check(s): # non-UTF-8 byte string check(b'123\xa0') - # TODO: RUSTPYTHON - @unittest.skip("RustPython panics on this") @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE', '') def test_float_with_comma(self): # set locale to something that doesn't use '.' for the decimal point @@ -290,11 +288,11 @@ def test_is_integer(self): def test_floatasratio(self): for f, ratio in [ - (0.875, (7, 8)), - (-0.875, (-7, 8)), - (0.0, (0, 1)), - (11.5, (23, 2)), - ]: + (0.875, (7, 8)), + (-0.875, (-7, 8)), + (0.0, (0, 1)), + (11.5, (23, 2)), + ]: self.assertEqual(f.as_integer_ratio(), ratio) for i in range(10000): @@ -337,7 +335,7 @@ def test_float_containment(self): self.assertTrue((f,) == (f,), "(%r,) != (%r,)" % (f, f)) self.assertTrue({f} == {f}, "{%r} != {%r}" % (f, f)) self.assertTrue({f : None} == {f: None}, "{%r : None} != " - "{%r : None}" % (f, f)) + "{%r : None}" % (f, f)) # identical containers l, t, s, d = [f], (f,), {f}, {f: None} @@ -400,9 +398,9 @@ def test_float_mod(self): self.assertEqualAndEqualSign(mod(1e-100, -1.0), -1.0) self.assertEqualAndEqualSign(mod(1.0, -1.0), -0.0) + @support.requires_IEEE_754 # TODO: RUSTPYTHON @unittest.expectedFailure - @support.requires_IEEE_754 def test_float_pow(self): # test builtin pow and ** operator for IEEE 754 special cases. # Special cases taken from section F.9.4.4 of the C99 specification @@ -728,6 +726,8 @@ def test_format(self): self.assertEqual(format(INF, 'F'), 'INF') @support.requires_IEEE_754 + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: for line in testfile: @@ -772,9 +772,12 @@ def test_issue35560(self): self.assertEqual(format(-123.34, '00.10g'), '-123.34') class ReprTestCase(unittest.TestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_repr(self): with open(os.path.join(os.path.split(__file__)[0], - 'floating_points.txt'), encoding="utf-8") as floats_file: + 'mathdata', + 'floating_points.txt'), encoding="utf-8") as floats_file: for line in floats_file: line = line.strip() if not line or line.startswith('#'): @@ -824,7 +827,7 @@ def test_short_repr(self): '2.86438000439698e+28', '8.89142905246179e+28', '3.08578087079232e+35', - ] + ] for s in test_strings: negs = '-'+s @@ -874,14 +877,14 @@ def test_overflow(self): self.assertRaises(OverflowError, round, 1.6e308, -308) self.assertRaises(OverflowError, round, -1.7e308, -308) - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short', "applies only when using short float repr style") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_previous_round_bugs(self): # particular cases that have occurred in bug reports self.assertEqual(round(562949953421312.5, 1), - 562949953421312.5) + 562949953421312.5) self.assertEqual(round(56294995342131.5, 3), 56294995342131.5) # round-half-even @@ -894,10 +897,10 @@ def test_previous_round_bugs(self): self.assertEqual(round(85.0, -1), 80.0) self.assertEqual(round(95.0, -1), 100.0) - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short', "applies only when using short float repr style") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_matches_float_format(self): # round should give the same results as float formatting for i in range(500): @@ -1131,7 +1134,7 @@ def test_invalid_inputs(self): '0x1.\uff10p0', '0x1p0 \n 0x2p0', '0x1p0\0 0x1p0', # embedded null byte is not end of string - ] + ] for x in invalid_inputs: try: result = fromHex(x) @@ -1150,7 +1153,7 @@ def test_whitespace(self): ('1.0', 1.0), ('-0x.2', -0.125), ('-0.0', -0.0) - ] + ] whitespace = [ '', ' ', @@ -1160,7 +1163,7 @@ def test_whitespace(self): '\f', '\v', '\r' - ] + ] for inp, expected in value_pairs: for lead in whitespace: for trail in whitespace: @@ -1510,4 +1513,4 @@ def __init__(self, value): if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index c0c987ca2f..c727b5b22c 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1631,8 +1631,6 @@ def test_empty_format_specifier(self): self.assertEqual(f"{x!s:}", "test") self.assertEqual(f"{x!r:}", "'test'") - # TODO: RUSTPYTHON d[0] error - @unittest.expectedFailure def test_str_format_differences(self): d = { "a": "string", diff --git a/Lib/test/test_ftplib.py b/Lib/test/test_ftplib.py index 7e632efa4c..b3e4e776a4 100644 --- a/Lib/test/test_ftplib.py +++ b/Lib/test/test_ftplib.py @@ -25,15 +25,6 @@ from test.support import asyncore from test.support.socket_helper import HOST, HOSTv6 -import sys -if sys.platform == 'win32': - raise unittest.SkipTest("test_ftplib not working on windows") -if getattr(sys, '_rustpython_debugbuild', False): - raise unittest.SkipTest("something's weird on debug builds") - -asynchat = warnings_helper.import_deprecated('asynchat') -asyncore = warnings_helper.import_deprecated('asyncore') - support.requires_working_socket(module=True) diff --git a/Lib/test/test_funcattrs.py b/Lib/test/test_funcattrs.py index 9080922e5e..5fd268fd90 100644 --- a/Lib/test/test_funcattrs.py +++ b/Lib/test/test_funcattrs.py @@ -140,8 +140,6 @@ def f(): print(a) self.fail("shouldn't be able to read an empty cell") a = 12 - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_set_cell(self): a = 12 def f(): return a @@ -278,8 +276,6 @@ def test___self__(self): self.assertEqual(self.fi.a.__self__, self.fi) self.cannot_set_attr(self.fi.a, "__self__", self.fi, AttributeError) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test___func___non_method(self): # Behavior should be the same when a method is added via an attr # assignment @@ -333,8 +329,6 @@ def test_setting_dict_to_invalid(self): d = UserDict({'known_attr': 7}) self.cannot_set_attr(self.fi.a.__func__, '__dict__', d, TypeError) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_setting_dict_to_valid(self): d = {'known_attr': 7} self.b.__dict__ = d @@ -359,8 +353,6 @@ def test_delete___dict__(self): else: self.fail("deleting function dictionary should raise TypeError") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_unassigned_dict(self): self.assertEqual(self.b.__dict__, {}) @@ -381,8 +373,6 @@ def test_set_docstring_attr(self): self.assertEqual(self.fi.a.__doc__, docstr) self.cannot_set_attr(self.fi.a, "__doc__", docstr, AttributeError) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_delete_docstring(self): self.b.__doc__ = "The docstring" del self.b.__doc__ diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index fb2dcf7a51..32b442b0c0 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -396,8 +396,6 @@ class TestPartialC(TestPartial, unittest.TestCase): module = c_functools partial = c_functools.partial - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attributes_unwritable(self): # attributes should not be writable p = self.partial(capture, 1, 2, a=10, b=20) @@ -1691,8 +1689,6 @@ def f(zomg: 'zomg_annotation'): for attr in self.module.WRAPPER_ASSIGNMENTS: self.assertEqual(getattr(g, attr), getattr(f, attr)) - # TODO: RUSTPYTHON - @unittest.expectedFailure @threading_helper.requires_working_threading() def test_lru_cache_threaded(self): n, m = 5, 11 @@ -2901,8 +2897,6 @@ def _(arg: int | None): self.assertEqual(types_union(1), "types.UnionType") self.assertEqual(types_union(None), "types.UnionType") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_register_genericalias(self): @functools.singledispatch def f(arg): @@ -2922,8 +2916,6 @@ def f(arg): self.assertEqual(f(""), "default") self.assertEqual(f(b""), "default") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_register_genericalias_decorator(self): @functools.singledispatch def f(arg): @@ -2938,8 +2930,6 @@ def f(arg): with self.assertRaisesRegex(TypeError, "Invalid first argument to "): f.register(typing.List[int] | str) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_register_genericalias_annotation(self): @functools.singledispatch def f(arg): diff --git a/Lib/test/test_future_stmt/test_future.py b/Lib/test/test_future_stmt/test_future.py index 0e08051038..9c30054963 100644 --- a/Lib/test/test_future_stmt/test_future.py +++ b/Lib/test/test_future_stmt/test_future.py @@ -442,8 +442,6 @@ def foo(): def bar(arg: (yield)): pass """)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_get_type_hints_on_func_with_variadic_arg(self): # `typing.get_type_hints` might break on a function with a variadic # annotation (e.g. `f(*args: *Ts)`) if `from __future__ import diff --git a/Lib/test/test_genericalias.py b/Lib/test/test_genericalias.py index 560b96f7e3..0daaff099a 100644 --- a/Lib/test/test_genericalias.py +++ b/Lib/test/test_genericalias.py @@ -85,8 +85,6 @@ class BaseTest(unittest.TestCase): if ctypes is not None: generic_types.extend((ctypes.Array, ctypes.LibraryLoader)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_subscriptable(self): for t in self.generic_types: if t is None: @@ -314,8 +312,6 @@ def test_dir(self): for generic_alias_property in ("__origin__", "__args__", "__parameters__"): self.assertIn(generic_alias_property, dir_of_gen_alias) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_weakref(self): for t in self.generic_types: if t is None: diff --git a/Lib/test/test_getopt.py b/Lib/test/test_getopt.py index c8b3442de4..295a2c8136 100644 --- a/Lib/test/test_getopt.py +++ b/Lib/test/test_getopt.py @@ -1,19 +1,19 @@ # test_getopt.py # David Goodger 2000-08-19 -from test.support.os_helper import EnvironmentVarGuard import doctest -import unittest - import getopt +import sys +import unittest +from test.support.i18n_helper import TestTranslationsBase, update_translation_snapshots +from test.support.os_helper import EnvironmentVarGuard sentinel = object() class GetoptTests(unittest.TestCase): def setUp(self): self.env = self.enterContext(EnvironmentVarGuard()) - if "POSIXLY_CORRECT" in self.env: - del self.env["POSIXLY_CORRECT"] + del self.env["POSIXLY_CORRECT"] def assertError(self, *args, **kwargs): self.assertRaises(getopt.GetoptError, *args, **kwargs) @@ -173,10 +173,20 @@ def test_libref_examples(): ['a1', 'a2'] """ + +class TestTranslations(TestTranslationsBase): + def test_translations(self): + self.assertMsgidsEqual(getopt) + + def load_tests(loader, tests, pattern): tests.addTest(doctest.DocTestSuite()) return tests -if __name__ == "__main__": +if __name__ == '__main__': + # To regenerate translation snapshots + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_translation_snapshots(getopt) + sys.exit(0) unittest.main() diff --git a/Lib/test/test_getpass.py b/Lib/test/test_getpass.py index 3452e46213..80dda2caaa 100644 --- a/Lib/test/test_getpass.py +++ b/Lib/test/test_getpass.py @@ -26,7 +26,10 @@ def test_username_priorities_of_env_values(self, environ): environ.get.return_value = None try: getpass.getuser() - except ImportError: # in case there's no pwd module + except OSError: # in case there's no pwd module + pass + except KeyError: + # current user has no pwd entry pass self.assertEqual( environ.get.call_args_list, @@ -44,7 +47,7 @@ def test_username_falls_back_to_pwd(self, environ): getpass.getuser()) getpw.assert_called_once_with(42) else: - self.assertRaises(ImportError, getpass.getuser) + self.assertRaises(OSError, getpass.getuser) class GetpassRawinputTest(unittest.TestCase): diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index da62486e0b..5055c4c7f7 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -456,8 +456,6 @@ def check_blocksize_name(self, name, block_size=0, digest_size=0, # split for sha3_512 / _sha3.sha3 object self.assertIn(name.split("_")[0], repr(m)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_blocksize_name(self): self.check_blocksize_name('md5', 64, 16) self.check_blocksize_name('sha1', 64, 20) @@ -500,8 +498,6 @@ def test_extra_sha3(self): self.check_sha3('shake_128', 256, 1344, b'\x1f') self.check_sha3('shake_256', 512, 1088, b'\x1f') - # TODO: RUSTPYTHON implement all blake2 params - @unittest.expectedFailure @requires_blake2 def test_blocksize_name_blake2(self): self.check_blocksize_name('blake2b', 128, 64) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index cb1e4505b0..1aa8e4e289 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -4,7 +4,6 @@ import unittest import doctest -from test import support from test.support import import_helper from unittest import TestCase, skipUnless from operator import itemgetter diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 8f095d52ac..5a59f372ad 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1,3 +1,4 @@ +import sys import errno from http import client, HTTPStatus import io @@ -1695,8 +1696,6 @@ def test_attributes(self): h = client.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) self.assertEqual(h.timeout, 30) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_networked(self): # Default settings: requires a valid cert from a trusted CA import ssl @@ -1769,8 +1768,6 @@ def test_networked_good_cert(self): h.close() self.assertIn('nginx', server_string) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_networked_bad_cert(self): # We feed a "CA" cert that is unrelated to the server's cert import ssl @@ -1785,6 +1782,7 @@ def test_networked_bad_cert(self): # TODO: RUSTPYTHON @unittest.expectedFailure + @unittest.skipIf(sys.platform == 'darwin', 'Occasionally success on macOS') def test_local_unknown_cert(self): # The custom cert isn't known to the default trust bundle import ssl diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py deleted file mode 100644 index c4aba69197..0000000000 --- a/Lib/test/test_imp.py +++ /dev/null @@ -1,498 +0,0 @@ -import gc -import importlib -import importlib.util -import os -import os.path -import py_compile -import sys -from test import support -from test.support import import_helper -from test.support import os_helper -from test.support import script_helper -from test.support import warnings_helper -import unittest -import warnings -imp = warnings_helper.import_deprecated('imp') -import _imp - - -OS_PATH_NAME = os.path.__name__ - - -def requires_load_dynamic(meth): - """Decorator to skip a test if not running under CPython or lacking - imp.load_dynamic().""" - meth = support.cpython_only(meth) - return unittest.skipIf(getattr(imp, 'load_dynamic', None) is None, - 'imp.load_dynamic() required')(meth) - - -class LockTests(unittest.TestCase): - - """Very basic test of import lock functions.""" - - def verify_lock_state(self, expected): - self.assertEqual(imp.lock_held(), expected, - "expected imp.lock_held() to be %r" % expected) - def testLock(self): - LOOPS = 50 - - # The import lock may already be held, e.g. if the test suite is run - # via "import test.autotest". - lock_held_at_start = imp.lock_held() - self.verify_lock_state(lock_held_at_start) - - for i in range(LOOPS): - imp.acquire_lock() - self.verify_lock_state(True) - - for i in range(LOOPS): - imp.release_lock() - - # The original state should be restored now. - self.verify_lock_state(lock_held_at_start) - - if not lock_held_at_start: - try: - imp.release_lock() - except RuntimeError: - pass - else: - self.fail("release_lock() without lock should raise " - "RuntimeError") - -class ImportTests(unittest.TestCase): - def setUp(self): - mod = importlib.import_module('test.encoded_modules') - self.test_strings = mod.test_strings - self.test_path = mod.__path__ - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_import_encoded_module(self): - for modname, encoding, teststr in self.test_strings: - mod = importlib.import_module('test.encoded_modules.' - 'module_' + modname) - self.assertEqual(teststr, mod.test) - - def test_find_module_encoding(self): - for mod, encoding, _ in self.test_strings: - with imp.find_module('module_' + mod, self.test_path)[0] as fd: - self.assertEqual(fd.encoding, encoding) - - path = [os.path.dirname(__file__)] - with self.assertRaises(SyntaxError): - imp.find_module('badsyntax_pep3120', path) - - def test_issue1267(self): - for mod, encoding, _ in self.test_strings: - fp, filename, info = imp.find_module('module_' + mod, - self.test_path) - with fp: - self.assertNotEqual(fp, None) - self.assertEqual(fp.encoding, encoding) - self.assertEqual(fp.tell(), 0) - self.assertEqual(fp.readline(), '# test %s encoding\n' - % encoding) - - fp, filename, info = imp.find_module("tokenize") - with fp: - self.assertNotEqual(fp, None) - self.assertEqual(fp.encoding, "utf-8") - self.assertEqual(fp.tell(), 0) - self.assertEqual(fp.readline(), - '"""Tokenization help for Python programs.\n') - - def test_issue3594(self): - temp_mod_name = 'test_imp_helper' - sys.path.insert(0, '.') - try: - with open(temp_mod_name + '.py', 'w', encoding="latin-1") as file: - file.write("# coding: cp1252\nu = 'test.test_imp'\n") - file, filename, info = imp.find_module(temp_mod_name) - file.close() - self.assertEqual(file.encoding, 'cp1252') - finally: - del sys.path[0] - os_helper.unlink(temp_mod_name + '.py') - os_helper.unlink(temp_mod_name + '.pyc') - - def test_issue5604(self): - # Test cannot cover imp.load_compiled function. - # Martin von Loewis note what shared library cannot have non-ascii - # character because init_xxx function cannot be compiled - # and issue never happens for dynamic modules. - # But sources modified to follow generic way for processing paths. - - # the return encoding could be uppercase or None - fs_encoding = sys.getfilesystemencoding() - - # covers utf-8 and Windows ANSI code pages - # one non-space symbol from every page - # (http://en.wikipedia.org/wiki/Code_page) - known_locales = { - 'utf-8' : b'\xc3\xa4', - 'cp1250' : b'\x8C', - 'cp1251' : b'\xc0', - 'cp1252' : b'\xc0', - 'cp1253' : b'\xc1', - 'cp1254' : b'\xc0', - 'cp1255' : b'\xe0', - 'cp1256' : b'\xe0', - 'cp1257' : b'\xc0', - 'cp1258' : b'\xc0', - } - - if sys.platform == 'darwin': - self.assertEqual(fs_encoding, 'utf-8') - # Mac OS X uses the Normal Form D decomposition - # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html - special_char = b'a\xcc\x88' - else: - special_char = known_locales.get(fs_encoding) - - if not special_char: - self.skipTest("can't run this test with %s as filesystem encoding" - % fs_encoding) - decoded_char = special_char.decode(fs_encoding) - temp_mod_name = 'test_imp_helper_' + decoded_char - test_package_name = 'test_imp_helper_package_' + decoded_char - init_file_name = os.path.join(test_package_name, '__init__.py') - try: - # if the curdir is not in sys.path the test fails when run with - # ./python ./Lib/test/regrtest.py test_imp - sys.path.insert(0, os.curdir) - with open(temp_mod_name + '.py', 'w', encoding="utf-8") as file: - file.write('a = 1\n') - file, filename, info = imp.find_module(temp_mod_name) - with file: - self.assertIsNotNone(file) - self.assertTrue(filename[:-3].endswith(temp_mod_name)) - self.assertEqual(info[0], '.py') - self.assertEqual(info[1], 'r') - self.assertEqual(info[2], imp.PY_SOURCE) - - mod = imp.load_module(temp_mod_name, file, filename, info) - self.assertEqual(mod.a, 1) - - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - mod = imp.load_source(temp_mod_name, temp_mod_name + '.py') - self.assertEqual(mod.a, 1) - - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - if not sys.dont_write_bytecode: - mod = imp.load_compiled( - temp_mod_name, - imp.cache_from_source(temp_mod_name + '.py')) - self.assertEqual(mod.a, 1) - - if not os.path.exists(test_package_name): - os.mkdir(test_package_name) - with open(init_file_name, 'w', encoding="utf-8") as file: - file.write('b = 2\n') - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - package = imp.load_package(test_package_name, test_package_name) - self.assertEqual(package.b, 2) - finally: - del sys.path[0] - for ext in ('.py', '.pyc'): - os_helper.unlink(temp_mod_name + ext) - os_helper.unlink(init_file_name + ext) - os_helper.rmtree(test_package_name) - os_helper.rmtree('__pycache__') - - def test_issue9319(self): - path = os.path.dirname(__file__) - self.assertRaises(SyntaxError, - imp.find_module, "badsyntax_pep3120", [path]) - - def test_load_from_source(self): - # Verify that the imp module can correctly load and find .py files - # XXX (ncoghlan): It would be nice to use import_helper.CleanImport - # here, but that breaks because the os module registers some - # handlers in copy_reg on import. Since CleanImport doesn't - # revert that registration, the module is left in a broken - # state after reversion. Reinitialising the module contents - # and just reverting os.environ to its previous state is an OK - # workaround - with import_helper.CleanImport('os', 'os.path', OS_PATH_NAME): - import os - orig_path = os.path - orig_getenv = os.getenv - with os_helper.EnvironmentVarGuard(): - x = imp.find_module("os") - self.addCleanup(x[0].close) - new_os = imp.load_module("os", *x) - self.assertIs(os, new_os) - self.assertIs(orig_path, new_os.path) - self.assertIsNot(orig_getenv, new_os.getenv) - - @requires_load_dynamic - def test_issue15828_load_extensions(self): - # Issue 15828 picked up that the adapter between the old imp API - # and importlib couldn't handle C extensions - example = "_heapq" - x = imp.find_module(example) - file_ = x[0] - if file_ is not None: - self.addCleanup(file_.close) - mod = imp.load_module(example, *x) - self.assertEqual(mod.__name__, example) - - @requires_load_dynamic - def test_issue16421_multiple_modules_in_one_dll(self): - # Issue 16421: loading several modules from the same compiled file fails - m = '_testimportmultiple' - fileobj, pathname, description = imp.find_module(m) - fileobj.close() - mod0 = imp.load_dynamic(m, pathname) - mod1 = imp.load_dynamic('_testimportmultiple_foo', pathname) - mod2 = imp.load_dynamic('_testimportmultiple_bar', pathname) - self.assertEqual(mod0.__name__, m) - self.assertEqual(mod1.__name__, '_testimportmultiple_foo') - self.assertEqual(mod2.__name__, '_testimportmultiple_bar') - with self.assertRaises(ImportError): - imp.load_dynamic('nonexistent', pathname) - - @requires_load_dynamic - def test_load_dynamic_ImportError_path(self): - # Issue #1559549 added `name` and `path` attributes to ImportError - # in order to provide better detail. Issue #10854 implemented those - # attributes on import failures of extensions on Windows. - path = 'bogus file path' - name = 'extension' - with self.assertRaises(ImportError) as err: - imp.load_dynamic(name, path) - self.assertIn(path, err.exception.path) - self.assertEqual(name, err.exception.name) - - @requires_load_dynamic - def test_load_module_extension_file_is_None(self): - # When loading an extension module and the file is None, open one - # on the behalf of imp.load_dynamic(). - # Issue #15902 - name = '_testimportmultiple' - found = imp.find_module(name) - if found[0] is not None: - found[0].close() - if found[2][2] != imp.C_EXTENSION: - self.skipTest("found module doesn't appear to be a C extension") - imp.load_module(name, None, *found[1:]) - - @requires_load_dynamic - def test_issue24748_load_module_skips_sys_modules_check(self): - name = 'test.imp_dummy' - try: - del sys.modules[name] - except KeyError: - pass - try: - module = importlib.import_module(name) - spec = importlib.util.find_spec('_testmultiphase') - module = imp.load_dynamic(name, spec.origin) - self.assertEqual(module.__name__, name) - self.assertEqual(module.__spec__.name, name) - self.assertEqual(module.__spec__.origin, spec.origin) - self.assertRaises(AttributeError, getattr, module, 'dummy_name') - self.assertEqual(module.int_const, 1969) - self.assertIs(sys.modules[name], module) - finally: - try: - del sys.modules[name] - except KeyError: - pass - - @unittest.skipIf(sys.dont_write_bytecode, - "test meaningful only when writing bytecode") - def test_bug7732(self): - with os_helper.temp_cwd(): - source = os_helper.TESTFN + '.py' - os.mkdir(source) - self.assertRaisesRegex(ImportError, '^No module', - imp.find_module, os_helper.TESTFN, ["."]) - - def test_multiple_calls_to_get_data(self): - # Issue #18755: make sure multiple calls to get_data() can succeed. - loader = imp._LoadSourceCompatibility('imp', imp.__file__, - open(imp.__file__, encoding="utf-8")) - loader.get_data(imp.__file__) # File should be closed - loader.get_data(imp.__file__) # Will need to create a newly opened file - - def test_load_source(self): - # Create a temporary module since load_source(name) modifies - # sys.modules[name] attributes like __loader___ - modname = f"tmp{__name__}" - mod = type(sys.modules[__name__])(modname) - with support.swap_item(sys.modules, modname, mod): - with self.assertRaisesRegex(ValueError, 'embedded null'): - imp.load_source(modname, __file__ + "\0") - - @support.cpython_only - def test_issue31315(self): - # There shouldn't be an assertion failure in imp.create_dynamic(), - # when spec.name is not a string. - create_dynamic = support.get_attribute(imp, 'create_dynamic') - class BadSpec: - name = None - origin = 'foo' - with self.assertRaises(TypeError): - create_dynamic(BadSpec()) - - def test_issue_35321(self): - # Both _frozen_importlib and _frozen_importlib_external - # should have a spec origin of "frozen" and - # no need to clean up imports in this case. - - import _frozen_importlib_external - self.assertEqual(_frozen_importlib_external.__spec__.origin, "frozen") - - import _frozen_importlib - self.assertEqual(_frozen_importlib.__spec__.origin, "frozen") - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_source_hash(self): - self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~') - self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2') - - def test_pyc_invalidation_mode_from_cmdline(self): - cases = [ - ([], "default"), - (["--check-hash-based-pycs", "default"], "default"), - (["--check-hash-based-pycs", "always"], "always"), - (["--check-hash-based-pycs", "never"], "never"), - ] - for interp_args, expected in cases: - args = interp_args + [ - "-c", - "import _imp; print(_imp.check_hash_based_pycs)", - ] - res = script_helper.assert_python_ok(*args) - self.assertEqual(res.out.strip().decode('utf-8'), expected) - - def test_find_and_load_checked_pyc(self): - # issue 34056 - with os_helper.temp_cwd(): - with open('mymod.py', 'wb') as fp: - fp.write(b'x = 42\n') - py_compile.compile( - 'mymod.py', - doraise=True, - invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, - ) - file, path, description = imp.find_module('mymod', path=['.']) - mod = imp.load_module('mymod', file, path, description) - self.assertEqual(mod.x, 42) - - - @support.cpython_only - def test_create_builtin_subinterp(self): - # gh-99578: create_builtin() behavior changes after the creation of the - # first sub-interpreter. Test both code paths, before and after the - # creation of a sub-interpreter. Previously, create_builtin() had - # a reference leak after the creation of the first sub-interpreter. - - import builtins - create_builtin = support.get_attribute(_imp, "create_builtin") - class Spec: - name = "builtins" - spec = Spec() - - def check_get_builtins(): - refcnt = sys.getrefcount(builtins) - mod = _imp.create_builtin(spec) - self.assertIs(mod, builtins) - self.assertEqual(sys.getrefcount(builtins), refcnt + 1) - # Check that a GC collection doesn't crash - gc.collect() - - check_get_builtins() - - ret = support.run_in_subinterp("import builtins") - self.assertEqual(ret, 0) - - check_get_builtins() - - -class ReloadTests(unittest.TestCase): - - """Very basic tests to make sure that imp.reload() operates just like - reload().""" - - def test_source(self): - # XXX (ncoghlan): It would be nice to use test.import_helper.CleanImport - # here, but that breaks because the os module registers some - # handlers in copy_reg on import. Since CleanImport doesn't - # revert that registration, the module is left in a broken - # state after reversion. Reinitialising the module contents - # and just reverting os.environ to its previous state is an OK - # workaround - with os_helper.EnvironmentVarGuard(): - import os - imp.reload(os) - - def test_extension(self): - with import_helper.CleanImport('time'): - import time - imp.reload(time) - - def test_builtin(self): - with import_helper.CleanImport('marshal'): - import marshal - imp.reload(marshal) - - def test_with_deleted_parent(self): - # see #18681 - from html import parser - html = sys.modules.pop('html') - def cleanup(): - sys.modules['html'] = html - self.addCleanup(cleanup) - with self.assertRaisesRegex(ImportError, 'html'): - imp.reload(parser) - - -class PEP3147Tests(unittest.TestCase): - """Tests of PEP 3147.""" - - tag = imp.get_tag() - - @unittest.skipUnless(sys.implementation.cache_tag is not None, - 'requires sys.implementation.cache_tag not be None') - def test_cache_from_source(self): - # Given the path to a .py file, return the path to its PEP 3147 - # defined .pyc file (i.e. under __pycache__). - path = os.path.join('foo', 'bar', 'baz', 'qux.py') - expect = os.path.join('foo', 'bar', 'baz', '__pycache__', - 'qux.{}.pyc'.format(self.tag)) - self.assertEqual(imp.cache_from_source(path, True), expect) - - @unittest.skipUnless(sys.implementation.cache_tag is not None, - 'requires sys.implementation.cache_tag to not be ' - 'None') - def test_source_from_cache(self): - # Given the path to a PEP 3147 defined .pyc file, return the path to - # its source. This tests the good path. - path = os.path.join('foo', 'bar', 'baz', '__pycache__', - 'qux.{}.pyc'.format(self.tag)) - expect = os.path.join('foo', 'bar', 'baz', 'qux.py') - self.assertEqual(imp.source_from_cache(path), expect) - - -class NullImporterTests(unittest.TestCase): - @unittest.skipIf(os_helper.TESTFN_UNENCODABLE is None, - "Need an undecodeable filename") - def test_unencodeable(self): - name = os_helper.TESTFN_UNENCODABLE - os.mkdir(name) - try: - self.assertRaises(ImportError, imp.NullImporter, name) - finally: - os.rmdir(name) - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 9cc22b959d..e91d454b72 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -5098,13 +5098,15 @@ def alarm2(sig, frame): if e.errno != errno.EBADF: raise - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'already borrowed: BorrowMutError'") + # TODO: RUSTPYTHON + @unittest.expectedFailure @requires_alarm @support.requires_resource('walltime') def test_interrupted_write_retry_buffered(self): self.check_interrupted_write_retry(b"x", mode="wb") - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'already borrowed: BorrowMutError'") + # TODO: RUSTPYTHON + @unittest.expectedFailure @requires_alarm @support.requires_resource('walltime') def test_interrupted_write_retry_text(self): diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index fc27628af1..e69e12495a 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -303,6 +303,14 @@ def test_pickle(self): def test_weakref(self): weakref.ref(self.factory('192.0.2.1')) + def test_ipv6_mapped(self): + self.assertEqual(ipaddress.IPv4Address('192.168.1.1').ipv6_mapped, + ipaddress.IPv6Address('::ffff:192.168.1.1')) + self.assertEqual(ipaddress.IPv4Address('192.168.1.1').ipv6_mapped, + ipaddress.IPv6Address('::ffff:c0a8:101')) + self.assertEqual(ipaddress.IPv4Address('192.168.1.1').ipv6_mapped.ipv4_mapped, + ipaddress.IPv4Address('192.168.1.1')) + class AddressTestCase_v6(BaseTestCase, CommonTestMixin_v6): factory = ipaddress.IPv6Address @@ -389,6 +397,19 @@ def assertBadSplit(addr): # A trailing IPv4 address is two parts assertBadSplit("10:9:8:7:6:5:4:3:42.42.42.42%scope") + def test_bad_address_split_v6_too_long(self): + def assertBadSplit(addr): + msg = r"At most 45 characters expected in '%s" + with self.assertAddressError(msg, re.escape(addr[:45])): + ipaddress.IPv6Address(addr) + + # Long IPv6 address + long_addr = ("0:" * 10000) + "0" + assertBadSplit(long_addr) + assertBadSplit(long_addr + "%zoneid") + assertBadSplit(long_addr + ":255.255.255.255") + assertBadSplit(long_addr + ":ffff:255.255.255.255") + def test_bad_address_split_v6_too_many_parts(self): def assertBadSplit(addr): msg = "Exactly 8 parts expected without '::' in %r" @@ -886,8 +907,8 @@ class ComparisonTests(unittest.TestCase): v6net = ipaddress.IPv6Network(1) v6intf = ipaddress.IPv6Interface(1) v6addr_scoped = ipaddress.IPv6Address('::1%scope') - v6net_scoped= ipaddress.IPv6Network('::1%scope') - v6intf_scoped= ipaddress.IPv6Interface('::1%scope') + v6net_scoped = ipaddress.IPv6Network('::1%scope') + v6intf_scoped = ipaddress.IPv6Interface('::1%scope') v4_addresses = [v4addr, v4intf] v4_objects = v4_addresses + [v4net] @@ -1075,6 +1096,7 @@ def setUp(self): self.ipv6_scoped_interface = ipaddress.IPv6Interface( '2001:658:22a:cafe:200:0:0:1%scope/64') self.ipv6_scoped_network = ipaddress.IPv6Network('2001:658:22a:cafe::%scope/64') + self.ipv6_with_ipv4_part = ipaddress.IPv6Interface('::1.2.3.4') def testRepr(self): self.assertEqual("IPv4Interface('1.2.3.4/32')", @@ -1328,6 +1350,17 @@ def testGetIp(self): self.assertEqual(str(self.ipv6_scoped_interface.ip), '2001:658:22a:cafe:200::1') + def testIPv6IPv4MappedStringRepresentation(self): + long_prefix = '0000:0000:0000:0000:0000:ffff:' + short_prefix = '::ffff:' + ipv4 = '1.2.3.4' + ipv6_ipv4_str = short_prefix + ipv4 + ipv6_ipv4_addr = ipaddress.IPv6Address(ipv6_ipv4_str) + ipv6_ipv4_iface = ipaddress.IPv6Interface(ipv6_ipv4_str) + self.assertEqual(str(ipv6_ipv4_addr), ipv6_ipv4_str) + self.assertEqual(ipv6_ipv4_addr.exploded, long_prefix + ipv4) + self.assertEqual(str(ipv6_ipv4_iface.ip), ipv6_ipv4_str) + def testGetScopeId(self): self.assertEqual(self.ipv6_address.scope_id, None) @@ -1694,6 +1727,8 @@ def testEqual(self): self.assertTrue(self.ipv6_scoped_interface == ipaddress.IPv6Interface('2001:658:22a:cafe:200::1%scope/64')) + self.assertTrue(self.ipv6_with_ipv4_part == + ipaddress.IPv6Interface('0000:0000:0000:0000:0000:0000:0102:0304')) self.assertFalse(self.ipv6_scoped_interface == ipaddress.IPv6Interface('2001:658:22a:cafe:200::1%scope/63')) self.assertFalse(self.ipv6_scoped_interface == @@ -2156,6 +2191,11 @@ def testIPv6AddressTooLarge(self): self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255'), + ipaddress.ip_address('::ffff:c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255'), + ipaddress.ip_address('ffff::c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('::FFFF:192.0.2.1%scope'), ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1%scope'), @@ -2168,11 +2208,16 @@ def testIPv6AddressTooLarge(self): ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertNotEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201%scope')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255%scope'), + ipaddress.ip_address('::ffff:c0a8:ffff%scope')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255%scope'), + ipaddress.ip_address('ffff::c0a8:ffff%scope')) def testIPVersion(self): self.assertEqual(self.ipv4_address.version, 4) self.assertEqual(self.ipv6_address.version, 6) self.assertEqual(self.ipv6_scoped_address.version, 6) + self.assertEqual(self.ipv6_with_ipv4_part.version, 6) def testMaxPrefixLength(self): self.assertEqual(self.ipv4_interface.max_prefixlen, 32) @@ -2269,6 +2314,10 @@ def testReservedIpv4(self): self.assertEqual(True, ipaddress.ip_address( '172.31.255.255').is_private) self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) + self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) + self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) + self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) + self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) self.assertEqual(True, ipaddress.ip_address('169.254.100.200').is_link_local) @@ -2294,6 +2343,7 @@ def testPrivateNetworks(self): self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) + self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) @@ -2310,8 +2360,8 @@ def testPrivateNetworks(self): self.assertEqual(True, ipaddress.ip_network("::/128").is_private) self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) - self.assertEqual(True, ipaddress.ip_network("2001::/23").is_private) self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) + self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) @@ -2390,6 +2440,22 @@ def testReservedIpv6(self): self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) + self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) + self.assertFalse(ipaddress.ip_address('2001::').is_global) + self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) + self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) + self.assertFalse(ipaddress.ip_address('2001:2::').is_global) + self.assertTrue(ipaddress.ip_address('2001:3::').is_global) + self.assertFalse(ipaddress.ip_address('2001:4::').is_global) + self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) + self.assertFalse(ipaddress.ip_address('2001:10::').is_global) + self.assertTrue(ipaddress.ip_address('2001:20::').is_global) + self.assertTrue(ipaddress.ip_address('2001:30::').is_global) + self.assertFalse(ipaddress.ip_address('2001:40::').is_global) + self.assertFalse(ipaddress.ip_address('2002::').is_global) + # gh-124217: conform with RFC 9637 + self.assertFalse(ipaddress.ip_address('3fff::').is_global) + # some generic IETF reserved addresses self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) @@ -2402,12 +2468,52 @@ def testIpv4Mapped(self): self.assertEqual(ipaddress.ip_address('::ffff:c0a8:101').ipv4_mapped, ipaddress.ip_address('192.168.1.1')) + def testIpv4MappedProperties(self): + # Test that an IPv4 mapped IPv6 address has + # the same properties as an IPv4 address. + for addr4 in ( + "178.62.3.251", # global + "169.254.169.254", # link local + "127.0.0.1", # loopback + "224.0.0.1", # multicast + "192.168.0.1", # private + "0.0.0.0", # unspecified + "100.64.0.1", # public and not global + ): + with self.subTest(addr4): + ipv4 = ipaddress.IPv4Address(addr4) + ipv6 = ipaddress.IPv6Address(f"::ffff:{addr4}") + + self.assertEqual(ipv4.is_global, ipv6.is_global) + self.assertEqual(ipv4.is_private, ipv6.is_private) + self.assertEqual(ipv4.is_reserved, ipv6.is_reserved) + self.assertEqual(ipv4.is_multicast, ipv6.is_multicast) + self.assertEqual(ipv4.is_unspecified, ipv6.is_unspecified) + self.assertEqual(ipv4.is_link_local, ipv6.is_link_local) + self.assertEqual(ipv4.is_loopback, ipv6.is_loopback) + def testIpv4MappedPrivateCheck(self): self.assertEqual( True, ipaddress.ip_address('::ffff:192.168.1.1').is_private) self.assertEqual( False, ipaddress.ip_address('::ffff:172.32.0.0').is_private) + def testIpv4MappedLoopbackCheck(self): + # test networks + self.assertEqual(True, ipaddress.ip_network( + '::ffff:127.100.200.254/128').is_loopback) + self.assertEqual(True, ipaddress.ip_network( + '::ffff:127.42.0.0/112').is_loopback) + self.assertEqual(False, ipaddress.ip_network( + '::ffff:128.0.0.0').is_loopback) + # test addresses + self.assertEqual(True, ipaddress.ip_address( + '::ffff:127.100.200.254').is_loopback) + self.assertEqual(True, ipaddress.ip_address( + '::ffff:127.42.0.0').is_loopback) + self.assertEqual(False, ipaddress.ip_address( + '::ffff:128.0.0.0').is_loopback) + def testAddrExclude(self): addr1 = ipaddress.ip_network('10.1.1.0/24') addr2 = ipaddress.ip_network('10.1.1.0/26') @@ -2509,6 +2615,10 @@ def testCompressIPv6Address(self): '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128', '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128', '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128', + '0000:0000:0000:0000:0000:0000:255.255.255.255': '::ffff:ffff/128', + '0000:0000:0000:0000:0000:ffff:255.255.255.255': '::ffff:255.255.255.255/128', + 'ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255': + 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128', } for uncompressed, compressed in list(test_addresses.items()): self.assertEqual(compressed, str(ipaddress.IPv6Interface( @@ -2531,12 +2641,42 @@ def testExplodeShortHandIpStr(self): self.assertEqual('192.168.178.1', addr4.exploded) def testReversePointer(self): - addr1 = ipaddress.IPv4Address('127.0.0.1') - addr2 = ipaddress.IPv6Address('2001:db8::1') - self.assertEqual('1.0.0.127.in-addr.arpa', addr1.reverse_pointer) - self.assertEqual('1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.8.' + - 'b.d.0.1.0.0.2.ip6.arpa', - addr2.reverse_pointer) + for addr_v4, expected in [ + ('127.0.0.1', '1.0.0.127.in-addr.arpa'), + # test vector: https://www.rfc-editor.org/rfc/rfc1035, §3.5 + ('10.2.0.52', '52.0.2.10.in-addr.arpa'), + ]: + with self.subTest('ipv4_reverse_pointer', addr=addr_v4): + addr = ipaddress.IPv4Address(addr_v4) + self.assertEqual(addr.reverse_pointer, expected) + + for addr_v6, expected in [ + ( + '2001:db8::1', ( + '1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.' + '0.0.0.0.0.0.0.0.8.b.d.0.1.0.0.2.' + 'ip6.arpa' + ) + ), + ( + '::FFFF:192.168.1.35', ( + '3.2.1.0.8.a.0.c.f.f.f.f.0.0.0.0.' + '0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.' + 'ip6.arpa' + ) + ), + # test vector: https://www.rfc-editor.org/rfc/rfc3596, §2.5 + ( + '4321:0:1:2:3:4:567:89ab', ( + 'b.a.9.8.7.6.5.0.4.0.0.0.3.0.0.0.' + '2.0.0.0.1.0.0.0.0.0.0.0.1.2.3.4.' + 'ip6.arpa' + ) + ) + ]: + with self.subTest('ipv6_reverse_pointer', addr=addr_v6): + addr = ipaddress.IPv6Address(addr_v6) + self.assertEqual(addr.reverse_pointer, expected) def testIntRepresentation(self): self.assertEqual(16909060, int(self.ipv4_address)) @@ -2642,6 +2782,34 @@ def testV6HashIsNotConstant(self): ipv6_address2 = ipaddress.IPv6Interface("2001:658:22a:cafe:200:0:0:2") self.assertNotEqual(ipv6_address1.__hash__(), ipv6_address2.__hash__()) + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV4HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.255/32").__hash__(), + ipaddress.IPv4Network("192.168.1.0/24").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("172.24.255.0/24").__hash__(), + ipaddress.IPv4Network("172.24.0.0/16").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.87/32").__hash__(), + ipaddress.IPv4Network("192.168.1.86/31").__hash__() + ) + + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV6HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv6Network("fe80::/64").__hash__(), + ipaddress.IPv6Network("fe80::ffff:ffff:ffff:0/112").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("10.0.0.0/8").__hash__(), + ipaddress.IPv6Network( + "ffff:ffff:ffff:ffff:ffff:ffff:aff:0/112" + ).__hash__() + ) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index 9d37cff990..de80e47209 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -3,12 +3,11 @@ # testing of error conditions uncovered when using extension types. import unittest -import sys import typing from test import support - + class TestIsInstanceExceptions(unittest.TestCase): # Test to make sure that an AttributeError when accessing the instance's # class's bases is masked. This was actually a bug in Python 2.2 and @@ -97,7 +96,7 @@ def getclass(self): class D: pass self.assertRaises(RuntimeError, isinstance, c, D) - + # These tests are similar to above, but tickle certain code paths in # issubclass() instead of isinstance() -- really PyObject_IsSubclass() # vs. PyObject_IsInstance(). @@ -147,7 +146,7 @@ def getbases(self): self.assertRaises(TypeError, issubclass, B, C()) - + # meta classes for creating abstract classes and instances class AbstractClass(object): def __init__(self, bases): @@ -179,7 +178,7 @@ class Super: class Child(Super): pass - + class TestIsInstanceIsSubclass(unittest.TestCase): # Tests to ensure that isinstance and issubclass work on abstract # classes and instances. Before the 2.2 release, TypeErrors were @@ -225,7 +224,7 @@ def test_isinstance_with_or_union(self): with self.assertRaises(TypeError): isinstance(2, list[int] | int) with self.assertRaises(TypeError): - isinstance(2, int | str | list[int] | float) + isinstance(2, float | str | list[int] | int) @@ -311,7 +310,7 @@ class X: @property def __bases__(self): return self.__bases__ - with support.infinite_recursion(): + with support.infinite_recursion(25): self.assertRaises(RecursionError, issubclass, X(), int) self.assertRaises(RecursionError, issubclass, int, X()) self.assertRaises(RecursionError, isinstance, 1, X()) @@ -345,7 +344,7 @@ class B: pass A.__getattr__ = B.__getattr__ = X.__getattr__ return (A(), B()) - with support.infinite_recursion(): + with support.infinite_recursion(25): self.assertRaises(RecursionError, issubclass, X(), int) @@ -353,10 +352,12 @@ def blowstack(fxn, arg, compare_to): # Make sure that calling isinstance with a deeply nested tuple for its # argument will raise RecursionError eventually. tuple_arg = (compare_to,) + # XXX: RUSTPYTHON; support.exceeds_recursion_limit() is not available yet. + import sys for cnt in range(sys.getrecursionlimit()+5): tuple_arg = (tuple_arg,) fxn(arg, tuple_arg) - + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index e7b00da71c..8709948b92 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1182,8 +1182,6 @@ def test_pairwise(self): with self.assertRaises(TypeError): pairwise(None) # non-iterable argument - # TODO: RUSTPYTHON - @unittest.skip("TODO: RUSTPYTHON, hangs") def test_pairwise_reenter(self): def check(reenter_at, expected): class I: @@ -1234,8 +1232,6 @@ def __next__(self): ([5], [6]), ]) - # TODO: RUSTPYTHON - @unittest.skip("TODO: RUSTPYTHON, hangs") def test_pairwise_reenter2(self): def check(maxcount, expected): class I: diff --git a/Lib/test/test_linecache.py b/Lib/test/test_linecache.py index 72dd40136c..e23e1cc942 100644 --- a/Lib/test/test_linecache.py +++ b/Lib/test/test_linecache.py @@ -5,8 +5,10 @@ import os.path import tempfile import tokenize +from importlib.machinery import ModuleSpec from test import support from test.support import os_helper +from test.support.script_helper import assert_python_ok FILENAME = linecache.__file__ @@ -82,6 +84,10 @@ def test_getlines(self): class EmptyFile(GetLineTestsGoodData, unittest.TestCase): file_list = [] + def test_getlines(self): + lines = linecache.getlines(self.file_name) + self.assertEqual(lines, ['\n']) + class SingleEmptyLine(GetLineTestsGoodData, unittest.TestCase): file_list = ['\n'] @@ -97,6 +103,16 @@ class BadUnicode_WithDeclaration(GetLineTestsBadData, unittest.TestCase): file_byte_string = b'# coding=utf-8\n\x80abc' +class FakeLoader: + def get_source(self, fullname): + return f'source for {fullname}' + + +class NoSourceLoader: + def get_source(self, fullname): + return None + + class LineCacheTests(unittest.TestCase): def test_getline(self): @@ -238,6 +254,70 @@ def raise_memoryerror(*args, **kwargs): self.assertEqual(lines3, []) self.assertEqual(linecache.getlines(FILENAME), lines) + def test_loader(self): + filename = 'scheme://path' + + for loader in (None, object(), NoSourceLoader()): + linecache.clearcache() + module_globals = {'__name__': 'a.b.c', '__loader__': loader} + self.assertEqual(linecache.getlines(filename, module_globals), []) + + linecache.clearcache() + module_globals = {'__name__': 'a.b.c', '__loader__': FakeLoader()} + self.assertEqual(linecache.getlines(filename, module_globals), + ['source for a.b.c\n']) + + for spec in (None, object(), ModuleSpec('', FakeLoader())): + linecache.clearcache() + module_globals = {'__name__': 'a.b.c', '__loader__': FakeLoader(), + '__spec__': spec} + self.assertEqual(linecache.getlines(filename, module_globals), + ['source for a.b.c\n']) + + linecache.clearcache() + spec = ModuleSpec('x.y.z', FakeLoader()) + module_globals = {'__name__': 'a.b.c', '__loader__': spec.loader, + '__spec__': spec} + self.assertEqual(linecache.getlines(filename, module_globals), + ['source for x.y.z\n']) + + def test_invalid_names(self): + for name, desc in [ + ('\x00', 'NUL bytes filename'), + (__file__ + '\x00', 'filename with embedded NUL bytes'), + # A filename with surrogate codes. A UnicodeEncodeError is raised + # by os.stat() upon querying, which is a subclass of ValueError. + ("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + # For POSIX platforms, an OSError will be raised but for Windows + # platforms, a ValueError is raised due to the path_t converter. + # See: https://github.com/python/cpython/issues/122170 + ('a' * 1_000_000, 'very long filename'), + ]: + with self.subTest(f'updatecache: {desc}'): + linecache.clearcache() + lines = linecache.updatecache(name) + self.assertListEqual(lines, []) + self.assertNotIn(name, linecache.cache) + + # hack into the cache (it shouldn't be allowed + # but we never know what people do...) + for key, fullname in [(name, 'ok'), ('key', name), (name, name)]: + with self.subTest(f'checkcache: {desc}', + key=key, fullname=fullname): + linecache.clearcache() + linecache.cache[key] = (0, 1234, [], fullname) + linecache.checkcache(key) + self.assertNotIn(key, linecache.cache) + + # just to be sure that we did not mess with cache + linecache.clearcache() + + def test_linecache_python_string(self): + cmdline = "import linecache;assert len(linecache.cache) == 0" + retcode, stdout, stderr = assert_python_ok('-c', cmdline) + self.assertEqual(retcode, 0) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') class LineCacheInvalidationTests(unittest.TestCase): def setUp(self): diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py new file mode 100644 index 0000000000..1bac61f59e --- /dev/null +++ b/Lib/test/test_lzma.py @@ -0,0 +1,2197 @@ +import _compression +import array +from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE +import os +import pickle +import random +import sys +from test import support +import unittest + +from test.support import _4G, bigmemtest +from test.support.import_helper import import_module +from test.support.os_helper import ( + TESTFN, unlink, FakePath +) + +lzma = import_module("lzma") +from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile + + +class CompressorDecompressorTestCase(unittest.TestCase): + + # Test error cases. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_simple_bad_args(self): + self.assertRaises(TypeError, LZMACompressor, []) + self.assertRaises(TypeError, LZMACompressor, format=3.45) + self.assertRaises(TypeError, LZMACompressor, check="") + self.assertRaises(TypeError, LZMACompressor, preset="asdf") + self.assertRaises(TypeError, LZMACompressor, filters=3) + # Can't specify FORMAT_AUTO when compressing. + self.assertRaises(ValueError, LZMACompressor, format=lzma.FORMAT_AUTO) + # Can't specify a preset and a custom filter chain at the same time. + with self.assertRaises(ValueError): + LZMACompressor(preset=7, filters=[{"id": lzma.FILTER_LZMA2}]) + + self.assertRaises(TypeError, LZMADecompressor, ()) + self.assertRaises(TypeError, LZMADecompressor, memlimit=b"qw") + with self.assertRaises(TypeError): + LZMADecompressor(lzma.FORMAT_RAW, filters="zzz") + # Cannot specify a memory limit with FILTER_RAW. + with self.assertRaises(ValueError): + LZMADecompressor(lzma.FORMAT_RAW, memlimit=0x1000000) + # Can only specify a custom filter chain with FILTER_RAW. + self.assertRaises(ValueError, LZMADecompressor, filters=FILTERS_RAW_1) + with self.assertRaises(ValueError): + LZMADecompressor(format=lzma.FORMAT_XZ, filters=FILTERS_RAW_1) + with self.assertRaises(ValueError): + LZMADecompressor(format=lzma.FORMAT_ALONE, filters=FILTERS_RAW_1) + + lzc = LZMACompressor() + self.assertRaises(TypeError, lzc.compress) + self.assertRaises(TypeError, lzc.compress, b"foo", b"bar") + self.assertRaises(TypeError, lzc.flush, b"blah") + empty = lzc.flush() + self.assertRaises(ValueError, lzc.compress, b"quux") + self.assertRaises(ValueError, lzc.flush) + + lzd = LZMADecompressor() + self.assertRaises(TypeError, lzd.decompress) + self.assertRaises(TypeError, lzd.decompress, b"foo", b"bar") + lzd.decompress(empty) + self.assertRaises(EOFError, lzd.decompress, b"quux") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_bad_filter_spec(self): + self.assertRaises(TypeError, LZMACompressor, filters=[b"wobsite"]) + self.assertRaises(ValueError, LZMACompressor, filters=[{"xyzzy": 3}]) + self.assertRaises(ValueError, LZMACompressor, filters=[{"id": 98765}]) + with self.assertRaises(ValueError): + LZMACompressor(filters=[{"id": lzma.FILTER_LZMA2, "foo": 0}]) + with self.assertRaises(ValueError): + LZMACompressor(filters=[{"id": lzma.FILTER_DELTA, "foo": 0}]) + with self.assertRaises(ValueError): + LZMACompressor(filters=[{"id": lzma.FILTER_X86, "foo": 0}]) + + def test_decompressor_after_eof(self): + lzd = LZMADecompressor() + lzd.decompress(COMPRESSED_XZ) + self.assertRaises(EOFError, lzd.decompress, b"nyan") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_memlimit(self): + lzd = LZMADecompressor(memlimit=1024) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ) + + lzd = LZMADecompressor(lzma.FORMAT_XZ, memlimit=1024) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ) + + lzd = LZMADecompressor(lzma.FORMAT_ALONE, memlimit=1024) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_ALONE) + + # Test LZMADecompressor on known-good input data. + + def _test_decompressor(self, lzd, data, check, unused_data=b""): + self.assertFalse(lzd.eof) + out = lzd.decompress(data) + self.assertEqual(out, INPUT) + self.assertEqual(lzd.check, check) + self.assertTrue(lzd.eof) + self.assertEqual(lzd.unused_data, unused_data) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_auto(self): + lzd = LZMADecompressor() + self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64) + + lzd = LZMADecompressor() + self._test_decompressor(lzd, COMPRESSED_ALONE, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_xz(self): + lzd = LZMADecompressor(lzma.FORMAT_XZ) + self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_alone(self): + lzd = LZMADecompressor(lzma.FORMAT_ALONE) + self._test_decompressor(lzd, COMPRESSED_ALONE, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_raw_1(self): + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_1) + self._test_decompressor(lzd, COMPRESSED_RAW_1, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_raw_2(self): + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_2) + self._test_decompressor(lzd, COMPRESSED_RAW_2, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_raw_3(self): + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_3) + self._test_decompressor(lzd, COMPRESSED_RAW_3, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_raw_4(self): + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + self._test_decompressor(lzd, COMPRESSED_RAW_4, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_chunks(self): + lzd = LZMADecompressor() + out = [] + for i in range(0, len(COMPRESSED_XZ), 10): + self.assertFalse(lzd.eof) + out.append(lzd.decompress(COMPRESSED_XZ[i:i+10])) + out = b"".join(out) + self.assertEqual(out, INPUT) + self.assertEqual(lzd.check, lzma.CHECK_CRC64) + self.assertTrue(lzd.eof) + self.assertEqual(lzd.unused_data, b"") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_chunks_empty(self): + lzd = LZMADecompressor() + out = [] + for i in range(0, len(COMPRESSED_XZ), 10): + self.assertFalse(lzd.eof) + out.append(lzd.decompress(b'')) + out.append(lzd.decompress(b'')) + out.append(lzd.decompress(b'')) + out.append(lzd.decompress(COMPRESSED_XZ[i:i+10])) + out = b"".join(out) + self.assertEqual(out, INPUT) + self.assertEqual(lzd.check, lzma.CHECK_CRC64) + self.assertTrue(lzd.eof) + self.assertEqual(lzd.unused_data, b"") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_chunks_maxsize(self): + lzd = LZMADecompressor() + max_length = 100 + out = [] + + # Feed first half the input + len_ = len(COMPRESSED_XZ) // 2 + out.append(lzd.decompress(COMPRESSED_XZ[:len_], + max_length=max_length)) + self.assertFalse(lzd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(lzd.decompress(b'', max_length=max_length)) + self.assertFalse(lzd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(lzd.decompress(COMPRESSED_XZ[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while not lzd.eof: + out.append(lzd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, INPUT) + self.assertEqual(lzd.check, lzma.CHECK_CRC64) + self.assertEqual(lzd.unused_data, b"") + + def test_decompressor_inputbuf_1(self): + # Test reusing input buffer after moving existing + # contents to beginning + lzd = LZMADecompressor() + out = [] + + # Create input buffer and fill it + self.assertEqual(lzd.decompress(COMPRESSED_XZ[:100], + max_length=0), b'') + + # Retrieve some results, freeing capacity at beginning + # of input buffer + out.append(lzd.decompress(b'', 2)) + + # Add more data that fits into input buffer after + # moving existing data to beginning + out.append(lzd.decompress(COMPRESSED_XZ[100:105], 15)) + + # Decompress rest of data + out.append(lzd.decompress(COMPRESSED_XZ[105:])) + self.assertEqual(b''.join(out), INPUT) + + def test_decompressor_inputbuf_2(self): + # Test reusing input buffer by appending data at the + # end right away + lzd = LZMADecompressor() + out = [] + + # Create input buffer and empty it + self.assertEqual(lzd.decompress(COMPRESSED_XZ[:200], + max_length=0), b'') + out.append(lzd.decompress(b'')) + + # Fill buffer with new data + out.append(lzd.decompress(COMPRESSED_XZ[200:280], 2)) + + # Append some more data, not enough to require resize + out.append(lzd.decompress(COMPRESSED_XZ[280:300], 2)) + + # Decompress rest of data + out.append(lzd.decompress(COMPRESSED_XZ[300:])) + self.assertEqual(b''.join(out), INPUT) + + def test_decompressor_inputbuf_3(self): + # Test reusing input buffer after extending it + + lzd = LZMADecompressor() + out = [] + + # Create almost full input buffer + out.append(lzd.decompress(COMPRESSED_XZ[:200], 5)) + + # Add even more data to it, requiring resize + out.append(lzd.decompress(COMPRESSED_XZ[200:300], 5)) + + # Decompress rest of data + out.append(lzd.decompress(COMPRESSED_XZ[300:])) + self.assertEqual(b''.join(out), INPUT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_unused_data(self): + lzd = LZMADecompressor() + extra = b"fooblibar" + self._test_decompressor(lzd, COMPRESSED_XZ + extra, lzma.CHECK_CRC64, + unused_data=extra) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_bad_input(self): + lzd = LZMADecompressor() + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_RAW_1) + + lzd = LZMADecompressor(lzma.FORMAT_XZ) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_ALONE) + + lzd = LZMADecompressor(lzma.FORMAT_ALONE) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ) + + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_1) + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_bug_28275(self): + # Test coverage for Issue 28275 + lzd = LZMADecompressor() + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_RAW_1) + # Previously, a second call could crash due to internal inconsistency + self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_RAW_1) + + # Test that LZMACompressor->LZMADecompressor preserves the input data. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_xz(self): + lzc = LZMACompressor() + cdata = lzc.compress(INPUT) + lzc.flush() + lzd = LZMADecompressor() + self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_alone(self): + lzc = LZMACompressor(lzma.FORMAT_ALONE) + cdata = lzc.compress(INPUT) + lzc.flush() + lzd = LZMADecompressor() + self._test_decompressor(lzd, cdata, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_raw(self): + lzc = LZMACompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + cdata = lzc.compress(INPUT) + lzc.flush() + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + self._test_decompressor(lzd, cdata, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_raw_empty(self): + lzc = LZMACompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + cdata = lzc.compress(INPUT) + cdata += lzc.compress(b'') + cdata += lzc.compress(b'') + cdata += lzc.compress(b'') + cdata += lzc.flush() + lzd = LZMADecompressor(lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + self._test_decompressor(lzd, cdata, lzma.CHECK_NONE) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_chunks(self): + lzc = LZMACompressor() + cdata = [] + for i in range(0, len(INPUT), 10): + cdata.append(lzc.compress(INPUT[i:i+10])) + cdata.append(lzc.flush()) + cdata = b"".join(cdata) + lzd = LZMADecompressor() + self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip_empty_chunks(self): + lzc = LZMACompressor() + cdata = [] + for i in range(0, len(INPUT), 10): + cdata.append(lzc.compress(INPUT[i:i+10])) + cdata.append(lzc.compress(b'')) + cdata.append(lzc.compress(b'')) + cdata.append(lzc.compress(b'')) + cdata.append(lzc.flush()) + cdata = b"".join(cdata) + lzd = LZMADecompressor() + self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64) + + # LZMADecompressor intentionally does not handle concatenated streams. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_multistream(self): + lzd = LZMADecompressor() + self._test_decompressor(lzd, COMPRESSED_XZ + COMPRESSED_ALONE, + lzma.CHECK_CRC64, unused_data=COMPRESSED_ALONE) + + # Test with inputs larger than 4GiB. + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=2) + def test_compressor_bigmem(self, size): + lzc = LZMACompressor() + cdata = lzc.compress(b"x" * size) + lzc.flush() + ddata = lzma.decompress(cdata) + try: + self.assertEqual(len(ddata), size) + self.assertEqual(len(ddata.strip(b"x")), 0) + finally: + ddata = None + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=3) + def test_decompressor_bigmem(self, size): + lzd = LZMADecompressor() + blocksize = min(10 * 1024 * 1024, size) + block = random.randbytes(blocksize) + try: + input = block * ((size-1) // blocksize + 1) + cdata = lzma.compress(input) + ddata = lzd.decompress(cdata) + self.assertEqual(ddata, input) + finally: + input = cdata = ddata = None + + # Pickling raises an exception; there's no way to serialize an lzma_stream. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises(TypeError): + pickle.dumps(LZMACompressor(), proto) + with self.assertRaises(TypeError): + pickle.dumps(LZMADecompressor(), proto) + + @support.refcount_test + def test_refleaks_in_decompressor___init__(self): + gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') + lzd = LZMADecompressor() + refs_before = gettotalrefcount() + for i in range(100): + lzd.__init__() + self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10) + + def test_uninitialized_LZMADecompressor_crash(self): + self.assertEqual(LZMADecompressor.__new__(LZMADecompressor). + decompress(bytes()), b'') + + +class CompressDecompressFunctionTestCase(unittest.TestCase): + + # Test error cases: + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_bad_args(self): + self.assertRaises(TypeError, lzma.compress) + self.assertRaises(TypeError, lzma.compress, []) + self.assertRaises(TypeError, lzma.compress, b"", format="xz") + self.assertRaises(TypeError, lzma.compress, b"", check="none") + self.assertRaises(TypeError, lzma.compress, b"", preset="blah") + self.assertRaises(TypeError, lzma.compress, b"", filters=1024) + # Can't specify a preset and a custom filter chain at the same time. + with self.assertRaises(ValueError): + lzma.compress(b"", preset=3, filters=[{"id": lzma.FILTER_LZMA2}]) + + self.assertRaises(TypeError, lzma.decompress) + self.assertRaises(TypeError, lzma.decompress, []) + self.assertRaises(TypeError, lzma.decompress, b"", format="lzma") + self.assertRaises(TypeError, lzma.decompress, b"", memlimit=7.3e9) + with self.assertRaises(TypeError): + lzma.decompress(b"", format=lzma.FORMAT_RAW, filters={}) + # Cannot specify a memory limit with FILTER_RAW. + with self.assertRaises(ValueError): + lzma.decompress(b"", format=lzma.FORMAT_RAW, memlimit=0x1000000) + # Can only specify a custom filter chain with FILTER_RAW. + with self.assertRaises(ValueError): + lzma.decompress(b"", filters=FILTERS_RAW_1) + with self.assertRaises(ValueError): + lzma.decompress(b"", format=lzma.FORMAT_XZ, filters=FILTERS_RAW_1) + with self.assertRaises(ValueError): + lzma.decompress( + b"", format=lzma.FORMAT_ALONE, filters=FILTERS_RAW_1) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_memlimit(self): + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_XZ, memlimit=1024) + with self.assertRaises(LZMAError): + lzma.decompress( + COMPRESSED_XZ, format=lzma.FORMAT_XZ, memlimit=1024) + with self.assertRaises(LZMAError): + lzma.decompress( + COMPRESSED_ALONE, format=lzma.FORMAT_ALONE, memlimit=1024) + + # Test LZMADecompressor on known-good input data. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_good_input(self): + ddata = lzma.decompress(COMPRESSED_XZ) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress(COMPRESSED_ALONE) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress(COMPRESSED_XZ, lzma.FORMAT_XZ) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress(COMPRESSED_ALONE, lzma.FORMAT_ALONE) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress( + COMPRESSED_RAW_1, lzma.FORMAT_RAW, filters=FILTERS_RAW_1) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress( + COMPRESSED_RAW_2, lzma.FORMAT_RAW, filters=FILTERS_RAW_2) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress( + COMPRESSED_RAW_3, lzma.FORMAT_RAW, filters=FILTERS_RAW_3) + self.assertEqual(ddata, INPUT) + + ddata = lzma.decompress( + COMPRESSED_RAW_4, lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + self.assertEqual(ddata, INPUT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_incomplete_input(self): + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_XZ[:128]) + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_ALONE[:128]) + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_RAW_1[:128], + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_1) + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_RAW_2[:128], + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_2) + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_RAW_3[:128], + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_3) + self.assertRaises(LZMAError, lzma.decompress, COMPRESSED_RAW_4[:128], + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_bad_input(self): + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_BOGUS) + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_RAW_1) + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_ALONE, format=lzma.FORMAT_XZ) + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_XZ, format=lzma.FORMAT_ALONE) + with self.assertRaises(LZMAError): + lzma.decompress(COMPRESSED_XZ, format=lzma.FORMAT_RAW, + filters=FILTERS_RAW_1) + + # Test that compress()->decompress() preserves the input data. + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_roundtrip(self): + cdata = lzma.compress(INPUT) + ddata = lzma.decompress(cdata) + self.assertEqual(ddata, INPUT) + + cdata = lzma.compress(INPUT, lzma.FORMAT_XZ) + ddata = lzma.decompress(cdata) + self.assertEqual(ddata, INPUT) + + cdata = lzma.compress(INPUT, lzma.FORMAT_ALONE) + ddata = lzma.decompress(cdata) + self.assertEqual(ddata, INPUT) + + cdata = lzma.compress(INPUT, lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + ddata = lzma.decompress(cdata, lzma.FORMAT_RAW, filters=FILTERS_RAW_4) + self.assertEqual(ddata, INPUT) + + # Unlike LZMADecompressor, decompress() *does* handle concatenated streams. + + def test_decompress_multistream(self): + ddata = lzma.decompress(COMPRESSED_XZ + COMPRESSED_ALONE) + self.assertEqual(ddata, INPUT * 2) + + # Test robust handling of non-LZMA data following the compressed stream(s). + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_trailing_junk(self): + ddata = lzma.decompress(COMPRESSED_XZ + COMPRESSED_BOGUS) + self.assertEqual(ddata, INPUT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompress_multistream_trailing_junk(self): + ddata = lzma.decompress(COMPRESSED_XZ * 3 + COMPRESSED_BOGUS) + self.assertEqual(ddata, INPUT * 3) + + +class TempFile: + """Context manager - creates a file, and deletes it on __exit__.""" + + def __init__(self, filename, data=b""): + self.filename = filename + self.data = data + + def __enter__(self): + with open(self.filename, "wb") as f: + f.write(self.data) + + def __exit__(self, *args): + unlink(self.filename) + + +class FileTestCase(unittest.TestCase): + + def test_init(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "rb") + with LZMAFile(BytesIO(), "w") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + with LZMAFile(BytesIO(), "x") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + with LZMAFile(BytesIO(), "a") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_init_with_PathLike_filename(self): + filename = FakePath(TESTFN) + with TempFile(filename, COMPRESSED_XZ): + with LZMAFile(filename) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.name, TESTFN) + with LZMAFile(filename, "a") as f: + f.write(INPUT) + self.assertEqual(f.name, TESTFN) + with LZMAFile(filename) as f: + self.assertEqual(f.read(), INPUT * 2) + self.assertEqual(f.name, TESTFN) + + def test_init_with_filename(self): + with TempFile(TESTFN, COMPRESSED_XZ): + with LZMAFile(TESTFN) as f: + self.assertEqual(f.name, TESTFN) + self.assertEqual(f.mode, 'rb') + with LZMAFile(TESTFN, "w") as f: + self.assertEqual(f.name, TESTFN) + self.assertEqual(f.mode, 'wb') + with LZMAFile(TESTFN, "a") as f: + self.assertEqual(f.name, TESTFN) + self.assertEqual(f.mode, 'wb') + + def test_init_mode(self): + with TempFile(TESTFN): + with LZMAFile(TESTFN, "r") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "rb") + with LZMAFile(TESTFN, "rb") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "rb") + with LZMAFile(TESTFN, "w") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + with LZMAFile(TESTFN, "wb") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + with LZMAFile(TESTFN, "a") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + with LZMAFile(TESTFN, "ab") as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, "wb") + + def test_init_with_x_mode(self): + self.addCleanup(unlink, TESTFN) + for mode in ("x", "xb"): + unlink(TESTFN) + with LZMAFile(TESTFN, mode) as f: + self.assertIsInstance(f, LZMAFile) + self.assertEqual(f.mode, 'wb') + with self.assertRaises(FileExistsError): + LZMAFile(TESTFN, mode) + + def test_init_bad_mode(self): + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), (3, "x")) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "xt") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "x+") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "rx") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "wx") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "rt") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "r+") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "wt") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "w+") + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), "rw") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_init_bad_check(self): + with self.assertRaises(TypeError): + LZMAFile(BytesIO(), "w", check=b"asd") + # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. + with self.assertRaises(LZMAError): + LZMAFile(BytesIO(), "w", check=lzma.CHECK_UNKNOWN) + with self.assertRaises(LZMAError): + LZMAFile(BytesIO(), "w", check=lzma.CHECK_ID_MAX + 3) + # Cannot specify a check with mode="r". + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), check=lzma.CHECK_NONE) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), check=lzma.CHECK_CRC32) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), check=lzma.CHECK_CRC64) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), check=lzma.CHECK_SHA256) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), check=lzma.CHECK_UNKNOWN) + + def test_init_bad_preset(self): + with self.assertRaises(TypeError): + LZMAFile(BytesIO(), "w", preset=4.39) + with self.assertRaises(LZMAError): + LZMAFile(BytesIO(), "w", preset=10) + with self.assertRaises(LZMAError): + LZMAFile(BytesIO(), "w", preset=23) + with self.assertRaises(OverflowError): + LZMAFile(BytesIO(), "w", preset=-1) + with self.assertRaises(OverflowError): + LZMAFile(BytesIO(), "w", preset=-7) + with self.assertRaises(TypeError): + LZMAFile(BytesIO(), "w", preset="foo") + # Cannot specify a preset with mode="r". + with self.assertRaises(ValueError): + LZMAFile(BytesIO(COMPRESSED_XZ), preset=3) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_init_bad_filter_spec(self): + with self.assertRaises(TypeError): + LZMAFile(BytesIO(), "w", filters=[b"wobsite"]) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", filters=[{"xyzzy": 3}]) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", filters=[{"id": 98765}]) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", + filters=[{"id": lzma.FILTER_LZMA2, "foo": 0}]) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", + filters=[{"id": lzma.FILTER_DELTA, "foo": 0}]) + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", + filters=[{"id": lzma.FILTER_X86, "foo": 0}]) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_init_with_preset_and_filters(self): + with self.assertRaises(ValueError): + LZMAFile(BytesIO(), "w", format=lzma.FORMAT_RAW, + preset=6, filters=FILTERS_RAW_1) + + def test_close(self): + with BytesIO(COMPRESSED_XZ) as src: + f = LZMAFile(src) + f.close() + # LZMAFile.close() should not close the underlying file object. + self.assertFalse(src.closed) + # Try closing an already-closed LZMAFile. + f.close() + self.assertFalse(src.closed) + + # Test with a real file on disk, opened directly by LZMAFile. + with TempFile(TESTFN, COMPRESSED_XZ): + f = LZMAFile(TESTFN) + fp = f._fp + f.close() + # Here, LZMAFile.close() *should* close the underlying file object. + self.assertTrue(fp.closed) + # Try closing an already-closed LZMAFile. + f.close() + + def test_closed(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + try: + self.assertFalse(f.closed) + f.read() + self.assertFalse(f.closed) + finally: + f.close() + self.assertTrue(f.closed) + + f = LZMAFile(BytesIO(), "w") + try: + self.assertFalse(f.closed) + finally: + f.close() + self.assertTrue(f.closed) + + def test_fileno(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + try: + self.assertRaises(UnsupportedOperation, f.fileno) + finally: + f.close() + self.assertRaises(ValueError, f.fileno) + with TempFile(TESTFN, COMPRESSED_XZ): + f = LZMAFile(TESTFN) + try: + self.assertEqual(f.fileno(), f._fp.fileno()) + self.assertIsInstance(f.fileno(), int) + finally: + f.close() + self.assertRaises(ValueError, f.fileno) + + def test_seekable(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + try: + self.assertTrue(f.seekable()) + f.read() + self.assertTrue(f.seekable()) + finally: + f.close() + self.assertRaises(ValueError, f.seekable) + + f = LZMAFile(BytesIO(), "w") + try: + self.assertFalse(f.seekable()) + finally: + f.close() + self.assertRaises(ValueError, f.seekable) + + src = BytesIO(COMPRESSED_XZ) + src.seekable = lambda: False + f = LZMAFile(src) + try: + self.assertFalse(f.seekable()) + finally: + f.close() + self.assertRaises(ValueError, f.seekable) + + def test_readable(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + try: + self.assertTrue(f.readable()) + f.read() + self.assertTrue(f.readable()) + finally: + f.close() + self.assertRaises(ValueError, f.readable) + + f = LZMAFile(BytesIO(), "w") + try: + self.assertFalse(f.readable()) + finally: + f.close() + self.assertRaises(ValueError, f.readable) + + def test_writable(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + try: + self.assertFalse(f.writable()) + f.read() + self.assertFalse(f.writable()) + finally: + f.close() + self.assertRaises(ValueError, f.writable) + + f = LZMAFile(BytesIO(), "w") + try: + self.assertTrue(f.writable()) + finally: + f.close() + self.assertRaises(ValueError, f.writable) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_ALONE)) as f: + self.assertEqual(f.read(), INPUT) + with LZMAFile(BytesIO(COMPRESSED_XZ), format=lzma.FORMAT_XZ) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_ALONE), format=lzma.FORMAT_ALONE) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_RAW_1), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_1) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_RAW_2), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_2) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_RAW_3), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_3) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + with LZMAFile(BytesIO(COMPRESSED_RAW_4), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_4) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + + def test_read_0(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertEqual(f.read(0), b"") + with LZMAFile(BytesIO(COMPRESSED_ALONE)) as f: + self.assertEqual(f.read(0), b"") + with LZMAFile(BytesIO(COMPRESSED_XZ), format=lzma.FORMAT_XZ) as f: + self.assertEqual(f.read(0), b"") + with LZMAFile(BytesIO(COMPRESSED_ALONE), format=lzma.FORMAT_ALONE) as f: + self.assertEqual(f.read(0), b"") + + def test_read_10(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + chunks = [] + while result := f.read(10): + self.assertLessEqual(len(result), 10) + chunks.append(result) + self.assertEqual(b"".join(chunks), INPUT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read_multistream(self): + with LZMAFile(BytesIO(COMPRESSED_XZ * 5)) as f: + self.assertEqual(f.read(), INPUT * 5) + with LZMAFile(BytesIO(COMPRESSED_XZ + COMPRESSED_ALONE)) as f: + self.assertEqual(f.read(), INPUT * 2) + with LZMAFile(BytesIO(COMPRESSED_RAW_3 * 4), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_3) as f: + self.assertEqual(f.read(), INPUT * 4) + + def test_read_multistream_buffer_size_aligned(self): + # Test the case where a stream boundary coincides with the end + # of the raw read buffer. + saved_buffer_size = _compression.BUFFER_SIZE + _compression.BUFFER_SIZE = len(COMPRESSED_XZ) + try: + with LZMAFile(BytesIO(COMPRESSED_XZ * 5)) as f: + self.assertEqual(f.read(), INPUT * 5) + finally: + _compression.BUFFER_SIZE = saved_buffer_size + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read_trailing_junk(self): + with LZMAFile(BytesIO(COMPRESSED_XZ + COMPRESSED_BOGUS)) as f: + self.assertEqual(f.read(), INPUT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read_multistream_trailing_junk(self): + with LZMAFile(BytesIO(COMPRESSED_XZ * 5 + COMPRESSED_BOGUS)) as f: + self.assertEqual(f.read(), INPUT * 5) + + def test_read_from_file(self): + with TempFile(TESTFN, COMPRESSED_XZ): + with LZMAFile(TESTFN) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + self.assertEqual(f.name, TESTFN) + self.assertIsInstance(f.fileno(), int) + self.assertEqual(f.mode, 'rb') + self.assertIs(f.readable(), True) + self.assertIs(f.writable(), False) + self.assertIs(f.seekable(), True) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'rb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + def test_read_from_file_with_bytes_filename(self): + bytes_filename = os.fsencode(TESTFN) + with TempFile(TESTFN, COMPRESSED_XZ): + with LZMAFile(bytes_filename) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + self.assertEqual(f.name, bytes_filename) + + def test_read_from_fileobj(self): + with TempFile(TESTFN, COMPRESSED_XZ): + with open(TESTFN, 'rb') as raw: + with LZMAFile(raw) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + self.assertEqual(f.name, raw.name) + self.assertEqual(f.fileno(), raw.fileno()) + self.assertEqual(f.mode, 'rb') + self.assertIs(f.readable(), True) + self.assertIs(f.writable(), False) + self.assertIs(f.seekable(), True) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'rb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + def test_read_from_fileobj_with_int_name(self): + with TempFile(TESTFN, COMPRESSED_XZ): + fd = os.open(TESTFN, os.O_RDONLY) + with open(fd, 'rb') as raw: + with LZMAFile(raw) as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.read(), b"") + self.assertEqual(f.name, raw.name) + self.assertEqual(f.fileno(), raw.fileno()) + self.assertEqual(f.mode, 'rb') + self.assertIs(f.readable(), True) + self.assertIs(f.writable(), False) + self.assertIs(f.seekable(), True) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'rb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + def test_read_incomplete(self): + with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f: + self.assertRaises(EOFError, f.read) + + def test_read_truncated(self): + # Drop stream footer: CRC (4 bytes), index size (4 bytes), + # flags (2 bytes) and magic number (2 bytes). + truncated = COMPRESSED_XZ[:-12] + with LZMAFile(BytesIO(truncated)) as f: + self.assertRaises(EOFError, f.read) + with LZMAFile(BytesIO(truncated)) as f: + self.assertEqual(f.read(len(INPUT)), INPUT) + self.assertRaises(EOFError, f.read, 1) + # Incomplete 12-byte header. + for i in range(12): + with LZMAFile(BytesIO(truncated[:i])) as f: + self.assertRaises(EOFError, f.read, 1) + + def test_read_bad_args(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + f.close() + self.assertRaises(ValueError, f.read) + with LZMAFile(BytesIO(), "w") as f: + self.assertRaises(ValueError, f.read) + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertRaises(TypeError, f.read, float()) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read_bad_data(self): + with LZMAFile(BytesIO(COMPRESSED_BOGUS)) as f: + self.assertRaises(LZMAError, f.read) + + def test_read1(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + blocks = [] + while result := f.read1(): + blocks.append(result) + self.assertEqual(b"".join(blocks), INPUT) + self.assertEqual(f.read1(), b"") + + def test_read1_0(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertEqual(f.read1(0), b"") + + def test_read1_10(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + blocks = [] + while result := f.read1(10): + blocks.append(result) + self.assertEqual(b"".join(blocks), INPUT) + self.assertEqual(f.read1(), b"") + + def test_read1_multistream(self): + with LZMAFile(BytesIO(COMPRESSED_XZ * 5)) as f: + blocks = [] + while result := f.read1(): + blocks.append(result) + self.assertEqual(b"".join(blocks), INPUT * 5) + self.assertEqual(f.read1(), b"") + + def test_read1_bad_args(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + f.close() + self.assertRaises(ValueError, f.read1) + with LZMAFile(BytesIO(), "w") as f: + self.assertRaises(ValueError, f.read1) + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertRaises(TypeError, f.read1, None) + + def test_peek(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + result = f.peek() + self.assertGreater(len(result), 0) + self.assertTrue(INPUT.startswith(result)) + self.assertEqual(f.read(), INPUT) + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + result = f.peek(10) + self.assertGreater(len(result), 0) + self.assertTrue(INPUT.startswith(result)) + self.assertEqual(f.read(), INPUT) + + def test_peek_bad_args(self): + with LZMAFile(BytesIO(), "w") as f: + self.assertRaises(ValueError, f.peek) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_iterator(self): + with BytesIO(INPUT) as f: + lines = f.readlines() + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertListEqual(list(iter(f)), lines) + with LZMAFile(BytesIO(COMPRESSED_ALONE)) as f: + self.assertListEqual(list(iter(f)), lines) + with LZMAFile(BytesIO(COMPRESSED_XZ), format=lzma.FORMAT_XZ) as f: + self.assertListEqual(list(iter(f)), lines) + with LZMAFile(BytesIO(COMPRESSED_ALONE), format=lzma.FORMAT_ALONE) as f: + self.assertListEqual(list(iter(f)), lines) + with LZMAFile(BytesIO(COMPRESSED_RAW_2), + format=lzma.FORMAT_RAW, filters=FILTERS_RAW_2) as f: + self.assertListEqual(list(iter(f)), lines) + + def test_readline(self): + with BytesIO(INPUT) as f: + lines = f.readlines() + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + for line in lines: + self.assertEqual(f.readline(), line) + + def test_readlines(self): + with BytesIO(INPUT) as f: + lines = f.readlines() + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertListEqual(f.readlines(), lines) + + def test_decompress_limited(self): + """Decompressed data buffering should be limited""" + bomb = lzma.compress(b'\0' * int(2e6), preset=6) + self.assertLess(len(bomb), _compression.BUFFER_SIZE) + + decomp = LZMAFile(BytesIO(bomb)) + self.assertEqual(decomp.read(1), b'\0') + max_decomp = 1 + DEFAULT_BUFFER_SIZE + self.assertLessEqual(decomp._buffer.raw.tell(), max_decomp, + "Excessive amount of data was decompressed") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_write(self): + with BytesIO() as dst: + with LZMAFile(dst, "w") as f: + f.write(INPUT) + with self.assertRaises(AttributeError): + f.name + expected = lzma.compress(INPUT) + self.assertEqual(dst.getvalue(), expected) + with BytesIO() as dst: + with LZMAFile(dst, "w", format=lzma.FORMAT_XZ) as f: + f.write(INPUT) + expected = lzma.compress(INPUT, format=lzma.FORMAT_XZ) + self.assertEqual(dst.getvalue(), expected) + with BytesIO() as dst: + with LZMAFile(dst, "w", format=lzma.FORMAT_ALONE) as f: + f.write(INPUT) + expected = lzma.compress(INPUT, format=lzma.FORMAT_ALONE) + self.assertEqual(dst.getvalue(), expected) + with BytesIO() as dst: + with LZMAFile(dst, "w", format=lzma.FORMAT_RAW, + filters=FILTERS_RAW_2) as f: + f.write(INPUT) + expected = lzma.compress(INPUT, format=lzma.FORMAT_RAW, + filters=FILTERS_RAW_2) + self.assertEqual(dst.getvalue(), expected) + + def test_write_10(self): + with BytesIO() as dst: + with LZMAFile(dst, "w") as f: + for start in range(0, len(INPUT), 10): + f.write(INPUT[start:start+10]) + expected = lzma.compress(INPUT) + self.assertEqual(dst.getvalue(), expected) + + def test_write_append(self): + part1 = INPUT[:1024] + part2 = INPUT[1024:1536] + part3 = INPUT[1536:] + expected = b"".join(lzma.compress(x) for x in (part1, part2, part3)) + with BytesIO() as dst: + with LZMAFile(dst, "w") as f: + f.write(part1) + self.assertEqual(f.mode, 'wb') + with LZMAFile(dst, "a") as f: + f.write(part2) + self.assertEqual(f.mode, 'wb') + with LZMAFile(dst, "a") as f: + f.write(part3) + self.assertEqual(f.mode, 'wb') + self.assertEqual(dst.getvalue(), expected) + + def test_write_to_file(self): + try: + with LZMAFile(TESTFN, "w") as f: + f.write(INPUT) + self.assertEqual(f.name, TESTFN) + self.assertIsInstance(f.fileno(), int) + self.assertEqual(f.mode, 'wb') + self.assertIs(f.readable(), False) + self.assertIs(f.writable(), True) + self.assertIs(f.seekable(), False) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'wb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + expected = lzma.compress(INPUT) + with open(TESTFN, "rb") as f: + self.assertEqual(f.read(), expected) + finally: + unlink(TESTFN) + + def test_write_to_file_with_bytes_filename(self): + bytes_filename = os.fsencode(TESTFN) + try: + with LZMAFile(bytes_filename, "w") as f: + f.write(INPUT) + self.assertEqual(f.name, bytes_filename) + expected = lzma.compress(INPUT) + with open(TESTFN, "rb") as f: + self.assertEqual(f.read(), expected) + finally: + unlink(TESTFN) + + def test_write_to_fileobj(self): + try: + with open(TESTFN, "wb") as raw: + with LZMAFile(raw, "w") as f: + f.write(INPUT) + self.assertEqual(f.name, raw.name) + self.assertEqual(f.fileno(), raw.fileno()) + self.assertEqual(f.mode, 'wb') + self.assertIs(f.readable(), False) + self.assertIs(f.writable(), True) + self.assertIs(f.seekable(), False) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'wb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + expected = lzma.compress(INPUT) + with open(TESTFN, "rb") as f: + self.assertEqual(f.read(), expected) + finally: + unlink(TESTFN) + + def test_write_to_fileobj_with_int_name(self): + try: + fd = os.open(TESTFN, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + with open(fd, 'wb') as raw: + with LZMAFile(raw, "w") as f: + f.write(INPUT) + self.assertEqual(f.name, raw.name) + self.assertEqual(f.fileno(), raw.fileno()) + self.assertEqual(f.mode, 'wb') + self.assertIs(f.readable(), False) + self.assertIs(f.writable(), True) + self.assertIs(f.seekable(), False) + self.assertIs(f.closed, False) + self.assertIs(f.closed, True) + with self.assertRaises(ValueError): + f.name + self.assertRaises(ValueError, f.fileno) + self.assertEqual(f.mode, 'wb') + self.assertRaises(ValueError, f.readable) + self.assertRaises(ValueError, f.writable) + self.assertRaises(ValueError, f.seekable) + + expected = lzma.compress(INPUT) + with open(TESTFN, "rb") as f: + self.assertEqual(f.read(), expected) + finally: + unlink(TESTFN) + + def test_write_append_to_file(self): + part1 = INPUT[:1024] + part2 = INPUT[1024:1536] + part3 = INPUT[1536:] + expected = b"".join(lzma.compress(x) for x in (part1, part2, part3)) + try: + with LZMAFile(TESTFN, "w") as f: + f.write(part1) + self.assertEqual(f.mode, 'wb') + with LZMAFile(TESTFN, "a") as f: + f.write(part2) + self.assertEqual(f.mode, 'wb') + with LZMAFile(TESTFN, "a") as f: + f.write(part3) + self.assertEqual(f.mode, 'wb') + with open(TESTFN, "rb") as f: + self.assertEqual(f.read(), expected) + finally: + unlink(TESTFN) + + def test_write_bad_args(self): + f = LZMAFile(BytesIO(), "w") + f.close() + self.assertRaises(ValueError, f.write, b"foo") + with LZMAFile(BytesIO(COMPRESSED_XZ), "r") as f: + self.assertRaises(ValueError, f.write, b"bar") + with LZMAFile(BytesIO(), "w") as f: + self.assertRaises(TypeError, f.write, None) + self.assertRaises(TypeError, f.write, "text") + self.assertRaises(TypeError, f.write, 789) + + def test_writelines(self): + with BytesIO(INPUT) as f: + lines = f.readlines() + with BytesIO() as dst: + with LZMAFile(dst, "w") as f: + f.writelines(lines) + expected = lzma.compress(INPUT) + self.assertEqual(dst.getvalue(), expected) + + def test_seek_forward(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.seek(555) + self.assertEqual(f.read(), INPUT[555:]) + + def test_seek_forward_across_streams(self): + with LZMAFile(BytesIO(COMPRESSED_XZ * 2)) as f: + f.seek(len(INPUT) + 123) + self.assertEqual(f.read(), INPUT[123:]) + + def test_seek_forward_relative_to_current(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.read(100) + f.seek(1236, 1) + self.assertEqual(f.read(), INPUT[1336:]) + + def test_seek_forward_relative_to_end(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.seek(-555, 2) + self.assertEqual(f.read(), INPUT[-555:]) + + def test_seek_backward(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.read(1001) + f.seek(211) + self.assertEqual(f.read(), INPUT[211:]) + + def test_seek_backward_across_streams(self): + with LZMAFile(BytesIO(COMPRESSED_XZ * 2)) as f: + f.read(len(INPUT) + 333) + f.seek(737) + self.assertEqual(f.read(), INPUT[737:] + INPUT) + + def test_seek_backward_relative_to_end(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.seek(-150, 2) + self.assertEqual(f.read(), INPUT[-150:]) + + def test_seek_past_end(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.seek(len(INPUT) + 9001) + self.assertEqual(f.tell(), len(INPUT)) + self.assertEqual(f.read(), b"") + + def test_seek_past_start(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + f.seek(-88) + self.assertEqual(f.tell(), 0) + self.assertEqual(f.read(), INPUT) + + def test_seek_bad_args(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + f.close() + self.assertRaises(ValueError, f.seek, 0) + with LZMAFile(BytesIO(), "w") as f: + self.assertRaises(ValueError, f.seek, 0) + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + self.assertRaises(ValueError, f.seek, 0, 3) + # io.BufferedReader raises TypeError instead of ValueError + self.assertRaises((TypeError, ValueError), f.seek, 9, ()) + self.assertRaises(TypeError, f.seek, None) + self.assertRaises(TypeError, f.seek, b"derp") + + def test_tell(self): + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + pos = 0 + while True: + self.assertEqual(f.tell(), pos) + result = f.read(183) + if not result: + break + pos += len(result) + self.assertEqual(f.tell(), len(INPUT)) + with LZMAFile(BytesIO(), "w") as f: + for pos in range(0, len(INPUT), 144): + self.assertEqual(f.tell(), pos) + f.write(INPUT[pos:pos+144]) + self.assertEqual(f.tell(), len(INPUT)) + + def test_tell_bad_args(self): + f = LZMAFile(BytesIO(COMPRESSED_XZ)) + f.close() + self.assertRaises(ValueError, f.tell) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_issue21872(self): + # sometimes decompress data incompletely + + # --------------------- + # when max_length == -1 + # --------------------- + d1 = LZMADecompressor() + entire = d1.decompress(ISSUE_21872_DAT, max_length=-1) + self.assertEqual(len(entire), 13160) + self.assertTrue(d1.eof) + + # --------------------- + # when max_length > 0 + # --------------------- + d2 = LZMADecompressor() + + # When this value of max_length is used, the input and output + # buffers are exhausted at the same time, and lzs's internal + # state still have 11 bytes can be output. + out1 = d2.decompress(ISSUE_21872_DAT, max_length=13149) + self.assertFalse(d2.needs_input) # ensure needs_input mechanism works + self.assertFalse(d2.eof) + + # simulate needs_input mechanism + # output internal state's 11 bytes + out2 = d2.decompress(b'') + self.assertEqual(len(out2), 11) + self.assertTrue(d2.eof) + self.assertEqual(out1 + out2, entire) + + def test_issue44439(self): + q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize + + with LZMAFile(BytesIO(), 'w') as f: + self.assertEqual(f.write(q), LENGTH) + self.assertEqual(f.tell(), LENGTH) + + +class OpenTestCase(unittest.TestCase): + + def test_binary_modes(self): + with lzma.open(BytesIO(COMPRESSED_XZ), "rb") as f: + self.assertEqual(f.read(), INPUT) + with BytesIO() as bio: + with lzma.open(bio, "wb") as f: + f.write(INPUT) + file_data = lzma.decompress(bio.getvalue()) + self.assertEqual(file_data, INPUT) + with lzma.open(bio, "ab") as f: + f.write(INPUT) + file_data = lzma.decompress(bio.getvalue()) + self.assertEqual(file_data, INPUT * 2) + + def test_text_modes(self): + uncompressed = INPUT.decode("ascii") + uncompressed_raw = uncompressed.replace("\n", os.linesep) + with lzma.open(BytesIO(COMPRESSED_XZ), "rt", encoding="ascii") as f: + self.assertEqual(f.read(), uncompressed) + with BytesIO() as bio: + with lzma.open(bio, "wt", encoding="ascii") as f: + f.write(uncompressed) + file_data = lzma.decompress(bio.getvalue()).decode("ascii") + self.assertEqual(file_data, uncompressed_raw) + with lzma.open(bio, "at", encoding="ascii") as f: + f.write(uncompressed) + file_data = lzma.decompress(bio.getvalue()).decode("ascii") + self.assertEqual(file_data, uncompressed_raw * 2) + + def test_filename(self): + with TempFile(TESTFN): + with lzma.open(TESTFN, "wb") as f: + f.write(INPUT) + with open(TESTFN, "rb") as f: + file_data = lzma.decompress(f.read()) + self.assertEqual(file_data, INPUT) + with lzma.open(TESTFN, "rb") as f: + self.assertEqual(f.read(), INPUT) + with lzma.open(TESTFN, "ab") as f: + f.write(INPUT) + with lzma.open(TESTFN, "rb") as f: + self.assertEqual(f.read(), INPUT * 2) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_with_pathlike_filename(self): + filename = FakePath(TESTFN) + with TempFile(filename): + with lzma.open(filename, "wb") as f: + f.write(INPUT) + self.assertEqual(f.name, TESTFN) + with open(filename, "rb") as f: + file_data = lzma.decompress(f.read()) + self.assertEqual(file_data, INPUT) + with lzma.open(filename, "rb") as f: + self.assertEqual(f.read(), INPUT) + self.assertEqual(f.name, TESTFN) + + def test_bad_params(self): + # Test invalid parameter combinations. + with self.assertRaises(ValueError): + lzma.open(TESTFN, "") + with self.assertRaises(ValueError): + lzma.open(TESTFN, "rbt") + with self.assertRaises(ValueError): + lzma.open(TESTFN, "rb", encoding="utf-8") + with self.assertRaises(ValueError): + lzma.open(TESTFN, "rb", errors="ignore") + with self.assertRaises(ValueError): + lzma.open(TESTFN, "rb", newline="\n") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_format_and_filters(self): + # Test non-default format and filter chain. + options = {"format": lzma.FORMAT_RAW, "filters": FILTERS_RAW_1} + with lzma.open(BytesIO(COMPRESSED_RAW_1), "rb", **options) as f: + self.assertEqual(f.read(), INPUT) + with BytesIO() as bio: + with lzma.open(bio, "wb", **options) as f: + f.write(INPUT) + file_data = lzma.decompress(bio.getvalue(), **options) + self.assertEqual(file_data, INPUT) + + def test_encoding(self): + # Test non-default encoding. + uncompressed = INPUT.decode("ascii") + uncompressed_raw = uncompressed.replace("\n", os.linesep) + with BytesIO() as bio: + with lzma.open(bio, "wt", encoding="utf-16-le") as f: + f.write(uncompressed) + file_data = lzma.decompress(bio.getvalue()).decode("utf-16-le") + self.assertEqual(file_data, uncompressed_raw) + bio.seek(0) + with lzma.open(bio, "rt", encoding="utf-16-le") as f: + self.assertEqual(f.read(), uncompressed) + + def test_encoding_error_handler(self): + # Test with non-default encoding error handler. + with BytesIO(lzma.compress(b"foo\xffbar")) as bio: + with lzma.open(bio, "rt", encoding="ascii", errors="ignore") as f: + self.assertEqual(f.read(), "foobar") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_newline(self): + # Test with explicit newline (universal newline mode disabled). + text = INPUT.decode("ascii") + with BytesIO() as bio: + with lzma.open(bio, "wt", encoding="ascii", newline="\n") as f: + f.write(text) + bio.seek(0) + with lzma.open(bio, "rt", encoding="ascii", newline="\r") as f: + self.assertEqual(f.readlines(), [text]) + + def test_x_mode(self): + self.addCleanup(unlink, TESTFN) + for mode in ("x", "xb", "xt"): + unlink(TESTFN) + encoding = "ascii" if "t" in mode else None + with lzma.open(TESTFN, mode, encoding=encoding): + pass + with self.assertRaises(FileExistsError): + with lzma.open(TESTFN, mode): + pass + + +class MiscellaneousTestCase(unittest.TestCase): + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_is_check_supported(self): + # CHECK_NONE and CHECK_CRC32 should always be supported, + # regardless of the options liblzma was compiled with. + self.assertTrue(lzma.is_check_supported(lzma.CHECK_NONE)) + self.assertTrue(lzma.is_check_supported(lzma.CHECK_CRC32)) + + # The .xz format spec cannot store check IDs above this value. + self.assertFalse(lzma.is_check_supported(lzma.CHECK_ID_MAX + 1)) + + # This value should not be a valid check ID. + self.assertFalse(lzma.is_check_supported(lzma.CHECK_UNKNOWN)) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test__encode_filter_properties(self): + with self.assertRaises(TypeError): + lzma._encode_filter_properties(b"not a dict") + with self.assertRaises(ValueError): + lzma._encode_filter_properties({"id": 0x100}) + with self.assertRaises(ValueError): + lzma._encode_filter_properties({"id": lzma.FILTER_LZMA2, "junk": 12}) + with self.assertRaises(lzma.LZMAError): + lzma._encode_filter_properties({"id": lzma.FILTER_DELTA, + "dist": 9001}) + + # Test with parameters used by zipfile module. + props = lzma._encode_filter_properties({ + "id": lzma.FILTER_LZMA1, + "pb": 2, + "lp": 0, + "lc": 3, + "dict_size": 8 << 20, + }) + self.assertEqual(props, b"]\x00\x00\x80\x00") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test__decode_filter_properties(self): + with self.assertRaises(TypeError): + lzma._decode_filter_properties(lzma.FILTER_X86, {"should be": bytes}) + with self.assertRaises(lzma.LZMAError): + lzma._decode_filter_properties(lzma.FILTER_DELTA, b"too long") + + # Test with parameters used by zipfile module. + filterspec = lzma._decode_filter_properties( + lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00") + self.assertEqual(filterspec["id"], lzma.FILTER_LZMA1) + self.assertEqual(filterspec["pb"], 2) + self.assertEqual(filterspec["lp"], 0) + self.assertEqual(filterspec["lc"], 3) + self.assertEqual(filterspec["dict_size"], 8 << 20) + + # see gh-104282 + filters = [lzma.FILTER_X86, lzma.FILTER_POWERPC, + lzma.FILTER_IA64, lzma.FILTER_ARM, + lzma.FILTER_ARMTHUMB, lzma.FILTER_SPARC] + for f in filters: + filterspec = lzma._decode_filter_properties(f, b"") + self.assertEqual(filterspec, {"id": f}) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_filter_properties_roundtrip(self): + spec1 = lzma._decode_filter_properties( + lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00") + reencoded = lzma._encode_filter_properties(spec1) + spec2 = lzma._decode_filter_properties(lzma.FILTER_LZMA1, reencoded) + self.assertEqual(spec1, spec2) + + +# Test data: + +INPUT = b""" +LAERTES + + O, fear me not. + I stay too long: but here my father comes. + + Enter POLONIUS + + A double blessing is a double grace, + Occasion smiles upon a second leave. + +LORD POLONIUS + + Yet here, Laertes! aboard, aboard, for shame! + The wind sits in the shoulder of your sail, + And you are stay'd for. There; my blessing with thee! + And these few precepts in thy memory + See thou character. Give thy thoughts no tongue, + Nor any unproportioned thought his act. + Be thou familiar, but by no means vulgar. + Those friends thou hast, and their adoption tried, + Grapple them to thy soul with hoops of steel; + But do not dull thy palm with entertainment + Of each new-hatch'd, unfledged comrade. Beware + Of entrance to a quarrel, but being in, + Bear't that the opposed may beware of thee. + Give every man thy ear, but few thy voice; + Take each man's censure, but reserve thy judgment. + Costly thy habit as thy purse can buy, + But not express'd in fancy; rich, not gaudy; + For the apparel oft proclaims the man, + And they in France of the best rank and station + Are of a most select and generous chief in that. + Neither a borrower nor a lender be; + For loan oft loses both itself and friend, + And borrowing dulls the edge of husbandry. + This above all: to thine ownself be true, + And it must follow, as the night the day, + Thou canst not then be false to any man. + Farewell: my blessing season this in thee! + +LAERTES + + Most humbly do I take my leave, my lord. + +LORD POLONIUS + + The time invites you; go; your servants tend. + +LAERTES + + Farewell, Ophelia; and remember well + What I have said to you. + +OPHELIA + + 'Tis in my memory lock'd, + And you yourself shall keep the key of it. + +LAERTES + + Farewell. +""" + +COMPRESSED_BOGUS = b"this is not a valid lzma stream" + +COMPRESSED_XZ = ( + b"\xfd7zXZ\x00\x00\x04\xe6\xd6\xb4F\x02\x00!\x01\x16\x00\x00\x00t/\xe5\xa3" + b"\xe0\x07\x80\x03\xdf]\x00\x05\x14\x07bX\x19\xcd\xddn\x98\x15\xe4\xb4\x9d" + b"o\x1d\xc4\xe5\n\x03\xcc2h\xc7\\\x86\xff\xf8\xe2\xfc\xe7\xd9\xfe6\xb8(" + b"\xa8wd\xc2\"u.n\x1e\xc3\xf2\x8e\x8d\x8f\x02\x17/\xa6=\xf0\xa2\xdf/M\x89" + b"\xbe\xde\xa7\x1cz\x18-]\xd5\xef\x13\x8frZ\x15\x80\x8c\xf8\x8do\xfa\x12" + b"\x9b#z/\xef\xf0\xfaF\x01\x82\xa3M\x8e\xa1t\xca6 BF$\xe5Q\xa4\x98\xee\xde" + b"l\xe8\x7f\xf0\x9d,bn\x0b\x13\xd4\xa8\x81\xe4N\xc8\x86\x153\xf5x2\xa2O" + b"\x13@Q\xa1\x00/\xa5\xd0O\x97\xdco\xae\xf7z\xc4\xcdS\xb6t<\x16\xf2\x9cI#" + b"\x89ud\xc66Y\xd9\xee\xe6\xce\x12]\xe5\xf0\xaa\x96-Pe\xade:\x04\t\x1b\xf7" + b"\xdb7\n\x86\x1fp\xc8J\xba\xf4\xf0V\xa9\xdc\xf0\x02%G\xf9\xdf=?\x15\x1b" + b"\xe1(\xce\x82=\xd6I\xac3\x12\x0cR\xb7\xae\r\xb1i\x03\x95\x01\xbd\xbe\xfa" + b"\x02s\x01P\x9d\x96X\xb12j\xc8L\xa8\x84b\xf6\xc3\xd4c-H\x93oJl\xd0iQ\xe4k" + b"\x84\x0b\xc1\xb7\xbc\xb1\x17\x88\xb1\xca?@\xf6\x07\xea\xe6x\xf1H12P\x0f" + b"\x8a\xc9\xeauw\xe3\xbe\xaai\xa9W\xd0\x80\xcd#cb5\x99\xd8]\xa9d\x0c\xbd" + b"\xa2\xdcWl\xedUG\xbf\x89yF\xf77\x81v\xbd5\x98\xbeh8\x18W\x08\xf0\x1b\x99" + b"5:\x1a?rD\x96\xa1\x04\x0f\xae\xba\x85\xeb\x9d5@\xf5\x83\xd37\x83\x8ac" + b"\x06\xd4\x97i\xcdt\x16S\x82k\xf6K\x01vy\x88\x91\x9b6T\xdae\r\xfd]:k\xbal" + b"\xa9\xbba\xc34\xf9r\xeb}r\xdb\xc7\xdb*\x8f\x03z\xdc8h\xcc\xc9\xd3\xbcl" + b"\xa5-\xcb\xeaK\xa2\xc5\x15\xc0\xe3\xc1\x86Z\xfb\xebL\xe13\xcf\x9c\xe3" + b"\x1d\xc9\xed\xc2\x06\xcc\xce!\x92\xe5\xfe\x9c^\xa59w \x9bP\xa3PK\x08d" + b"\xf9\xe2Z}\xa7\xbf\xed\xeb%$\x0c\x82\xb8/\xb0\x01\xa9&,\xf7qh{Q\x96)\xf2" + b"q\x96\xc3\x80\xb4\x12\xb0\xba\xe6o\xf4!\xb4[\xd4\x8aw\x10\xf7t\x0c\xb3" + b"\xd9\xd5\xc3`^\x81\x11??\\\xa4\x99\x85R\xd4\x8e\x83\xc9\x1eX\xbfa\xf1" + b"\xac\xb0\xea\xea\xd7\xd0\xab\x18\xe2\xf2\xed\xe1\xb7\xc9\x18\xcbS\xe4>" + b"\xc9\x95H\xe8\xcb\t\r%\xeb\xc7$.o\xf1\xf3R\x17\x1db\xbb\xd8U\xa5^\xccS" + b"\x16\x01\x87\xf3/\x93\xd1\xf0v\xc0r\xd7\xcc\xa2Gkz\xca\x80\x0e\xfd\xd0" + b"\x8b\xbb\xd2Ix\xb3\x1ey\xca-0\xe3z^\xd6\xd6\x8f_\xf1\x9dP\x9fi\xa7\xd1" + b"\xe8\x90\x84\xdc\xbf\xcdky\x8e\xdc\x81\x7f\xa3\xb2+\xbf\x04\xef\xd8\\" + b"\xc4\xdf\xe1\xb0\x01\xe9\x93\xe3Y\xf1\x1dY\xe8h\x81\xcf\xf1w\xcc\xb4\xef" + b" \x8b|\x04\xea\x83ej\xbe\x1f\xd4z\x9c`\xd3\x1a\x92A\x06\xe5\x8f\xa9\x13" + b"\t\x9e=\xfa\x1c\xe5_\x9f%v\x1bo\x11ZO\xd8\xf4\t\xddM\x16-\x04\xfc\x18<\"" + b"CM\xddg~b\xf6\xef\x8e\x0c\xd0\xde|\xa0'\x8a\x0c\xd6x\xae!J\xa6F\x88\x15u" + b"\x008\x17\xbc7y\xb3\xd8u\xac_\x85\x8d\xe7\xc1@\x9c\xecqc\xa3#\xad\xf1" + b"\x935\xb5)_\r\xec3]\x0fo]5\xd0my\x07\x9b\xee\x81\xb5\x0f\xcfK+\x00\xc0" + b"\xe4b\x10\xe4\x0c\x1a \x9b\xe0\x97t\xf6\xa1\x9e\x850\xba\x0c\x9a\x8d\xc8" + b"\x8f\x07\xd7\xae\xc8\xf9+i\xdc\xb9k\xb0>f\x19\xb8\r\xa8\xf8\x1f$\xa5{p" + b"\xc6\x880\xce\xdb\xcf\xca_\x86\xac\x88h6\x8bZ%'\xd0\n\xbf\x0f\x9c\"\xba" + b"\xe5\x86\x9f\x0f7X=mNX[\xcc\x19FU\xc9\x860\xbc\x90a+* \xae_$\x03\x1e\xd3" + b"\xcd_\xa0\x9c\xde\xaf46q\xa5\xc9\x92\xd7\xca\xe3`\x9d\x85}\xb4\xff\xb3" + b"\x83\xfb\xb6\xca\xae`\x0bw\x7f\xfc\xd8\xacVe\x19\xc8\x17\x0bZ\xad\x88" + b"\xeb#\x97\x03\x13\xb1d\x0f{\x0c\x04w\x07\r\x97\xbd\xd6\xc1\xc3B:\x95\x08" + b"^\x10V\xaeaH\x02\xd9\xe3\n\\\x01X\xf6\x9c\x8a\x06u#%\xbe*\xa1\x18v\x85" + b"\xec!\t4\x00\x00\x00\x00Vj?uLU\xf3\xa6\x00\x01\xfb\x07\x81\x0f\x00\x00tw" + b"\x99P\xb1\xc4g\xfb\x02\x00\x00\x00\x00\x04YZ" +) + +COMPRESSED_ALONE = ( + b"]\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x05\x14\x07bX\x19" + b"\xcd\xddn\x98\x15\xe4\xb4\x9do\x1d\xc4\xe5\n\x03\xcc2h\xc7\\\x86\xff\xf8" + b"\xe2\xfc\xe7\xd9\xfe6\xb8(\xa8wd\xc2\"u.n\x1e\xc3\xf2\x8e\x8d\x8f\x02" + b"\x17/\xa6=\xf0\xa2\xdf/M\x89\xbe\xde\xa7\x1cz\x18-]\xd5\xef\x13\x8frZ" + b"\x15\x80\x8c\xf8\x8do\xfa\x12\x9b#z/\xef\xf0\xfaF\x01\x82\xa3M\x8e\xa1t" + b"\xca6 BF$\xe5Q\xa4\x98\xee\xdel\xe8\x7f\xf0\x9d,bn\x0b\x13\xd4\xa8\x81" + b"\xe4N\xc8\x86\x153\xf5x2\xa2O\x13@Q\xa1\x00/\xa5\xd0O\x97\xdco\xae\xf7z" + b"\xc4\xcdS\xb6t<\x16\xf2\x9cI#\x89ud\xc66Y\xd9\xee\xe6\xce\x12]\xe5\xf0" + b"\xaa\x96-Pe\xade:\x04\t\x1b\xf7\xdb7\n\x86\x1fp\xc8J\xba\xf4\xf0V\xa9" + b"\xdc\xf0\x02%G\xf9\xdf=?\x15\x1b\xe1(\xce\x82=\xd6I\xac3\x12\x0cR\xb7" + b"\xae\r\xb1i\x03\x95\x01\xbd\xbe\xfa\x02s\x01P\x9d\x96X\xb12j\xc8L\xa8" + b"\x84b\xf8\x1epl\xeajr\xd1=\t\x03\xdd\x13\x1b3!E\xf9vV\xdaF\xf3\xd7\xb4" + b"\x0c\xa9P~\xec\xdeE\xe37\xf6\x1d\xc6\xbb\xddc%\xb6\x0fI\x07\xf0;\xaf\xe7" + b"\xa0\x8b\xa7Z\x99(\xe9\xe2\xf0o\x18>`\xe1\xaa\xa8\xd9\xa1\xb2}\xe7\x8d" + b"\x834T\xb6\xef\xc1\xde\xe3\x98\xbcD\x03MA@\xd8\xed\xdc\xc8\x93\x03\x1a" + b"\x93\x0b\x7f\x94\x12\x0b\x02Sa\x18\xc9\xc5\x9bTJE}\xf6\xc8g\x17#ZV\x01" + b"\xc9\x9dc\x83\x0e>0\x16\x90S\xb8/\x03y_\x18\xfa(\xd7\x0br\xa2\xb0\xba?" + b"\x8c\xe6\x83@\x84\xdf\x02:\xc5z\x9e\xa6\x84\xc9\xf5BeyX\x83\x1a\xf1 :\t" + b"\xf7\x19\xfexD\\&G\xf3\x85Y\xa2J\xf9\x0bv{\x89\xf6\xe7)A\xaf\x04o\x00" + b"\x075\xd3\xe0\x7f\x97\x98F\x0f?v\x93\xedVtTf\xb5\x97\x83\xed\x19\xd7\x1a" + b"'k\xd7\xd9\xc5\\Y\xd1\xdc\x07\x15|w\xbc\xacd\x87\x08d\xec\xa7\xf6\x82" + b"\xfc\xb3\x93\xeb\xb9 \x8d\xbc ,\xb3X\xb0\xd2s\xd7\xd1\xffv\x05\xdf}\xa2" + b"\x96\xfb%\n\xdf\xa2\x7f\x08.\xa16\n\xe0\x19\x93\x7fh\n\x1c\x8c\x0f \x11" + b"\xc6Bl\x95\x19U}\xe4s\xb5\x10H\xea\x86pB\xe88\x95\xbe\x8cZ\xdb\xe4\x94A" + b"\x92\xb9;z\xaa\xa7{\x1c5!\xc0\xaf\xc1A\xf9\xda\xf0$\xb0\x02qg\xc8\xc7/|" + b"\xafr\x99^\x91\x88\xbf\x03\xd9=\xd7n\xda6{>8\n\xc7:\xa9'\xba.\x0b\xe2" + b"\xb5\x1d\x0e\n\x9a\x8e\x06\x8f:\xdd\x82'[\xc3\"wD$\xa7w\xecq\x8c,1\x93" + b"\xd0,\xae2w\x93\x12$Jd\x19mg\x02\x93\x9cA\x95\x9d&\xca8i\x9c\xb0;\xe7NQ" + b"\x1frh\x8beL;\xb0m\xee\x07Q\x9b\xc6\xd8\x03\xb5\xdeN\xd4\xfe\x98\xd0\xdc" + b"\x1a[\x04\xde\x1a\xf6\x91j\xf8EOli\x8eB^\x1d\x82\x07\xb2\xb5R]\xb7\xd7" + b"\xe9\xa6\xc3.\xfb\xf0-\xb4e\x9b\xde\x03\x88\xc6\xc1iN\x0e\x84wbQ\xdf~" + b"\xe9\xa4\x884\x96kM\xbc)T\xf3\x89\x97\x0f\x143\xe7)\xa0\xb3B\x00\xa8\xaf" + b"\x82^\xcb\xc7..\xdb\xc7\t\x9dH\xee5\xe9#\xe6NV\x94\xcb$Kk\xe3\x7f\r\xe3t" + b"\x12\xcf'\xefR\x8b\xf42\xcf-LH\xac\xe5\x1f0~?SO\xeb\xc1E\x1a\x1c]\xf2" + b"\xc4<\x11\x02\x10Z0a*?\xe4r\xff\xfb\xff\xf6\x14nG\xead^\xd6\xef8\xb6uEI" + b"\x99\nV\xe2\xb3\x95\x8e\x83\xf6i!\xb5&1F\xb1DP\xf4 SO3D!w\x99_G\x7f+\x90" + b".\xab\xbb]\x91>\xc9#h;\x0f5J\x91K\xf4^-[\x9e\x8a\\\x94\xca\xaf\xf6\x19" + b"\xd4\xa1\x9b\xc4\xb8p\xa1\xae\x15\xe9r\x84\xe0\xcar.l []\x8b\xaf+0\xf2g" + b"\x01aKY\xdfI\xcf,\n\xe8\xf0\xe7V\x80_#\xb2\xf2\xa9\x06\x8c>w\xe2W,\xf4" + b"\x8c\r\xf963\xf5J\xcc2\x05=kT\xeaUti\xe5_\xce\x1b\xfa\x8dl\x02h\xef\xa8" + b"\xfbf\x7f\xff\xf0\x19\xeax" +) + +FILTERS_RAW_1 = [{"id": lzma.FILTER_LZMA2, "preset": 3}] +COMPRESSED_RAW_1 = ( + b"\xe0\x07\x80\x03\xfd]\x00\x05\x14\x07bX\x19\xcd\xddn\x96cyq\xa1\xdd\xee" + b"\xf8\xfam\xe3'\x88\xd3\xff\xe4\x9e \xceQ\x91\xa4\x14I\xf6\xb9\x9dVL8\x15" + b"_\x0e\x12\xc3\xeb\xbc\xa5\xcd\nW\x1d$=R;\x1d\xf8k8\t\xb1{\xd4\xc5+\x9d" + b"\x87c\xe5\xef\x98\xb4\xd7S3\xcd\xcc\xd2\xed\xa4\x0em\xe5\xf4\xdd\xd0b" + b"\xbe4*\xaa\x0b\xc5\x08\x10\x85+\x81.\x17\xaf9\xc9b\xeaZrA\xe20\x7fs\"r" + b"\xdaG\x81\xde\x90cu\xa5\xdb\xa9.A\x08l\xb0<\xf6\x03\xddOi\xd0\xc5\xb4" + b"\xec\xecg4t6\"\xa6\xb8o\xb5?\x18^}\xb6}\x03[:\xeb\x03\xa9\n[\x89l\x19g" + b"\x16\xc82\xed\x0b\xfb\x86n\xa2\x857@\x93\xcd6T\xc3u\xb0\t\xf9\x1b\x918" + b"\xfc[\x1b\x1e4\xb3\x14\x06PCV\xa8\"\xf5\x81x~\xe9\xb5N\x9cK\x9f\xc6\xc3%" + b"\xc8k:{6\xe7\xf7\xbd\x05\x02\xb4\xc4\xc3\xd3\xfd\xc3\xa8\\\xfc@\xb1F_" + b"\xc8\x90\xd9sU\x98\xad8\x05\x07\xde7J\x8bM\xd0\xb3;X\xec\x87\xef\xae\xb3" + b"eO,\xb1z,d\x11y\xeejlB\x02\x1d\xf28\x1f#\x896\xce\x0b\xf0\xf5\xa9PK\x0f" + b"\xb3\x13P\xd8\x88\xd2\xa1\x08\x04C?\xdb\x94_\x9a\"\xe9\xe3e\x1d\xde\x9b" + b"\xa1\xe8>H\x98\x10;\xc5\x03#\xb5\x9d4\x01\xe7\xc5\xba%v\xa49\x97A\xe0\"" + b"\x8c\xc22\xe3i\xc1\x9d\xab3\xdf\xbe\xfdDm7\x1b\x9d\xab\xb5\x15o:J\x92" + b"\xdb\x816\x17\xc2O\x99\x1b\x0e\x8d\xf3\tQ\xed\x8e\x95S/\x16M\xb2S\x04" + b"\x0f\xc3J\xc6\xc7\xe4\xcb\xc5\xf4\xe7d\x14\xe4=^B\xfb\xd3E\xd3\x1e\xcd" + b"\x91\xa5\xd0G\x8f.\xf6\xf9\x0bb&\xd9\x9f\xc2\xfdj\xa2\x9e\xc4\\\x0e\x1dC" + b"v\xe8\xd2\x8a?^H\xec\xae\xeb>\xfe\xb8\xab\xd4IqY\x8c\xd4K7\x11\xf4D\xd0W" + b"\xa5\xbe\xeaO\xbf\xd0\x04\xfdl\x10\xae5\xd4U\x19\x06\xf9{\xaa\xe0\x81" + b"\x0f\xcf\xa3k{\x95\xbd\x19\xa2\xf8\xe4\xa3\x08O*\xf1\xf1B-\xc7(\x0eR\xfd" + b"@E\x9f\xd3\x1e:\xfdV\xb7\x04Y\x94\xeb]\x83\xc4\xa5\xd7\xc0gX\x98\xcf\x0f" + b"\xcd3\x00]n\x17\xec\xbd\xa3Y\x86\xc5\xf3u\xf6*\xbdT\xedA$A\xd9A\xe7\x98" + b"\xef\x14\x02\x9a\xfdiw\xec\xa0\x87\x11\xd9%\xc5\xeb\x8a=\xae\xc0\xc4\xc6" + b"D\x80\x8f\xa8\xd1\xbbq\xb2\xc0\xa0\xf5Cqp\xeeL\xe3\xe5\xdc \x84\"\xe9" + b"\x80t\x83\x05\xba\xf1\xc5~\x93\xc9\xf0\x01c\xceix\x9d\xed\xc5)l\x16)\xd1" + b"\x03@l\x04\x7f\x87\xa5yn\x1b\x01D\xaa:\xd2\x96\xb4\xb3?\xb0\xf9\xce\x07" + b"\xeb\x81\x00\xe4\xc3\xf5%_\xae\xd4\xf9\xeb\xe2\rh\xb2#\xd67Q\x16D\x82hn" + b"\xd1\xa3_?q\xf0\xe2\xac\xf317\x9e\xd0_\x83|\xf1\xca\xb7\x95S\xabW\x12" + b"\xff\xddt\xf69L\x01\xf2|\xdaW\xda\xees\x98L\x18\xb8_\xe8$\x82\xea\xd6" + b"\xd1F\xd4\x0b\xcdk\x01vf\x88h\xc3\xae\xb91\xc7Q\x9f\xa5G\xd9\xcc\x1f\xe3" + b"5\xb1\xdcy\x7fI\x8bcw\x8e\x10rIp\x02:\x19p_\xc8v\xcea\"\xc1\xd9\x91\x03" + b"\xbfe\xbe\xa6\xb3\xa8\x14\x18\xc3\xabH*m}\xc2\xc1\x9a}>l%\xce\x84\x99" + b"\xb3d\xaf\xd3\x82\x15\xdf\xc1\xfc5fOg\x9b\xfc\x8e^&\t@\xce\x9f\x06J\xb8" + b"\xb5\x86\x1d\xda{\x9f\xae\xb0\xff\x02\x81r\x92z\x8cM\xb7ho\xc9^\x9c\xb6" + b"\x9c\xae\xd1\xc9\xf4\xdfU7\xd6\\!\xea\x0b\x94k\xb9Ud~\x98\xe7\x86\x8az" + b"\x10;\xe3\x1d\xe5PG\xf8\xa4\x12\x05w\x98^\xc4\xb1\xbb\xfb\xcf\xe0\x7f" + b"\x033Sf\x0c \xb1\xf6@\x94\xe5\xa3\xb2\xa7\x10\x9a\xc0\x14\xc3s\xb5xRD" + b"\xf4`W\xd9\xe5\xd3\xcf\x91\rTZ-X\xbe\xbf\xb5\xe2\xee|\x1a\xbf\xfb\x08" + b"\x91\xe1\xfc\x9a\x18\xa3\x8b\xd6^\x89\xf5[\xef\x87\xd1\x06\x1c7\xd6\xa2" + b"\t\tQ5/@S\xc05\xd2VhAK\x03VC\r\x9b\x93\xd6M\xf1xO\xaaO\xed\xb9<\x0c\xdae" + b"*\xd0\x07Hk6\x9fG+\xa1)\xcd\x9cl\x87\xdb\xe1\xe7\xefK}\x875\xab\xa0\x19u" + b"\xf6*F\xb32\x00\x00\x00" +) + +FILTERS_RAW_2 = [{"id": lzma.FILTER_DELTA, "dist": 2}, + {"id": lzma.FILTER_LZMA2, + "preset": lzma.PRESET_DEFAULT | lzma.PRESET_EXTREME}] +COMPRESSED_RAW_2 = ( + b"\xe0\x07\x80\x05\x91]\x00\x05\x14\x06-\xd4\xa8d?\xef\xbe\xafH\xee\x042" + b"\xcb.\xb5g\x8f\xfb\x14\xab\xa5\x9f\x025z\xa4\xdd\xd8\t[}W\xf8\x0c\x1dmH" + b"\xfa\x05\xfcg\xba\xe5\x01Q\x0b\x83R\xb6A\x885\xc0\xba\xee\n\x1cv~\xde:o" + b"\x06:J\xa7\x11Cc\xea\xf7\xe5*o\xf7\x83\\l\xbdE\x19\x1f\r\xa8\x10\xb42" + b"\x0caU{\xd7\xb8w\xdc\xbe\x1b\xfc8\xb4\xcc\xd38\\\xf6\x13\xf6\xe7\x98\xfa" + b"\xc7[\x17_9\x86%\xa8\xf8\xaa\xb8\x8dfs#\x1e=\xed<\x92\x10\\t\xff\x86\xfb" + b"=\x9e7\x18\x1dft\\\xb5\x01\x95Q\xc5\x19\xb38\xe0\xd4\xaa\x07\xc3\x7f\xd8" + b"\xa2\x00>-\xd3\x8e\xa1#\xfa\x83ArAm\xdbJ~\x93\xa3B\x82\xe0\xc7\xcc(\x08`" + b"WK\xad\x1b\x94kaj\x04 \xde\xfc\xe1\xed\xb0\x82\x91\xefS\x84%\x86\xfbi" + b"\x99X\xf1B\xe7\x90;E\xfde\x98\xda\xca\xd6T\xb4bg\xa4\n\x9aj\xd1\x83\x9e]" + b"\"\x7fM\xb5\x0fr\xd2\\\xa5j~P\x10GH\xbfN*Z\x10.\x81\tpE\x8a\x08\xbe1\xbd" + b"\xcd\xa9\xe1\x8d\x1f\x04\xf9\x0eH\xb9\xae\xd6\xc3\xc1\xa5\xa9\x95P\xdc~" + b"\xff\x01\x930\xa9\x04\xf6\x03\xfe\xb5JK\xc3]\xdd9\xb1\xd3\xd7F\xf5\xd1" + b"\x1e\xa0\x1c_\xed[\x0c\xae\xd4\x8b\x946\xeb\xbf\xbb\xe3$kS{\xb5\x80,f:Sj" + b"\x0f\x08z\x1c\xf5\xe8\xe6\xae\x98\xb0Q~r\x0f\xb0\x05?\xb6\x90\x19\x02&" + b"\xcb\x80\t\xc4\xea\x9c|x\xce\x10\x9c\xc5|\xcbdhh+\x0c'\xc5\x81\xc33\xb5" + b"\x14q\xd6\xc5\xe3`Z#\xdc\x8a\xab\xdd\xea\x08\xc2I\xe7\x02l{\xec\x196\x06" + b"\x91\x8d\xdc\xd5\xb3x\xe1hz%\xd1\xf8\xa5\xdd\x98!\x8c\x1c\xc1\x17RUa\xbb" + b"\x95\x0f\xe4X\xea1\x0c\xf1=R\xbe\xc60\xe3\xa4\x9a\x90bd\x97$]B\x01\xdd" + b"\x1f\xe3h2c\x1e\xa0L`4\xc6x\xa3Z\x8a\r\x14]T^\xd8\x89\x1b\x92\r;\xedY" + b"\x0c\xef\x8d9z\xf3o\xb6)f\xa9]$n\rp\x93\xd0\x10\xa4\x08\xb8\xb2\x8b\xb6" + b"\x8f\x80\xae;\xdcQ\xf1\xfa\x9a\x06\x8e\xa5\x0e\x8cK\x9c @\xaa:UcX\n!\xc6" + b"\x02\x12\xcb\x1b\"=\x16.\x1f\x176\xf2g=\xe1Wn\xe9\xe1\xd4\xf1O\xad\x15" + b"\x86\xe9\xa3T\xaf\xa9\xd7D\xb5\xd1W3pnt\x11\xc7VOj\xb7M\xc4i\xa1\xf1$3" + b"\xbb\xdc\x8af\xb0\xc5Y\r\xd1\xfb\xf2\xe7K\xe6\xc5hwO\xfe\x8c2^&\x07\xd5" + b"\x1fV\x19\xfd\r\x14\xd2i=yZ\xe6o\xaf\xc6\xb6\x92\x9d\xc4\r\xb3\xafw\xac%" + b"\xcfc\x1a\xf1`]\xf2\x1a\x9e\x808\xedm\xedQ\xb2\xfe\xe4h`[q\xae\xe0\x0f" + b"\xba0g\xb6\"N\xc3\xfb\xcfR\x11\xc5\x18)(\xc40\\\xa3\x02\xd9G!\xce\x1b" + b"\xc1\x96x\xb5\xc8z\x1f\x01\xb4\xaf\xde\xc2\xcd\x07\xe7H\xb3y\xa8M\n\\A\t" + b"ar\xddM\x8b\x9a\xea\x84\x9b!\xf1\x8d\xb1\xf1~\x1e\r\xa5H\xba\xf1\x84o" + b"\xda\x87\x01h\xe9\xa2\xbe\xbeqN\x9d\x84\x0b!WG\xda\xa1\xa5A\xb7\xc7`j" + b"\x15\xf2\xe9\xdd?\x015B\xd2~E\x06\x11\xe0\x91!\x05^\x80\xdd\xa8y\x15}" + b"\xa1)\xb1)\x81\x18\xf4\xf4\xf8\xc0\xefD\xe3\xdb2f\x1e\x12\xabu\xc9\x97" + b"\xcd\x1e\xa7\x0c\x02x4_6\x03\xc4$t\xf39\x94\x1d=\xcb\xbfv\\\xf5\xa3\x1d" + b"\x9d8jk\x95\x13)ff\xf9n\xc4\xa9\xe3\x01\xb8\xda\xfb\xab\xdfM\x99\xfb\x05" + b"\xe0\xe9\xb0I\xf4E\xab\xe2\x15\xa3\x035\xe7\xdeT\xee\x82p\xb4\x88\xd3" + b"\x893\x9c/\xc0\xd6\x8fou;\xf6\x95PR\xa9\xb2\xc1\xefFj\xe2\xa7$\xf7h\xf1" + b"\xdfK(\xc9c\xba7\xe8\xe3)\xdd\xb2,\x83\xfb\x84\x18.y\x18Qi\x88\xf8`h-" + b"\xef\xd5\xed\x8c\t\xd8\xc3^\x0f\x00\xb7\xd0[!\xafM\x9b\xd7.\x07\xd8\xfb" + b"\xd9\xe2-S+\xaa8,\xa0\x03\x1b \xea\xa8\x00\xc3\xab~\xd0$e\xa5\x7f\xf7" + b"\x95P]\x12\x19i\xd9\x7fo\x0c\xd8g^\rE\xa5\x80\x18\xc5\x01\x80\xaek`\xff~" + b"\xb6y\xe7+\xe5\x11^D\xa7\x85\x18\"!\xd6\xd2\xa7\xf4\x1eT\xdb\x02\xe15" + b"\x02Y\xbc\x174Z\xe7\x9cH\x1c\xbf\x0f\xc6\xe9f]\xcf\x8cx\xbc\xe5\x15\x94" + b"\xfc3\xbc\xa7TUH\xf1\x84\x1b\xf7\xa9y\xc07\x84\xf8X\xd8\xef\xfc \x1c\xd8" + b"( /\xf2\xb7\xec\xc1\\\x8c\xf6\x95\xa1\x03J\x83vP8\xe1\xe3\xbb~\xc24kA" + b"\x98y\xa1\xf2P\xe9\x9d\xc9J\xf8N\x99\xb4\xceaO\xde\x16\x1e\xc2\x19\xa7" + b"\x03\xd2\xe0\x8f:\x15\xf3\x84\x9e\xee\xe6e\xb8\x02q\xc7AC\x1emw\xfd\t" + b"\x9a\x1eu\xc1\xa9\xcaCwUP\x00\xa5\xf78L4w!\x91L2 \x87\xd0\xf2\x06\x81j" + b"\x80;\x03V\x06\x87\x92\xcb\x90lv@E\x8d\x8d\xa5\xa6\xe7Z[\xdf\xd6E\x03`>" + b"\x8f\xde\xa1bZ\x84\xd0\xa9`\x05\x0e{\x80;\xe3\xbef\x8d\x1d\xebk1.\xe3" + b"\xe9N\x15\xf7\xd4(\xfa\xbb\x15\xbdu\xf7\x7f\x86\xae!\x03L\x1d\xb5\xc1" + b"\xb9\x11\xdb\xd0\x93\xe4\x02\xe1\xd2\xcbBjc_\xe8}d\xdb\xc3\xa0Y\xbe\xc9/" + b"\x95\x01\xa3,\xe6bl@\x01\xdbp\xc2\xce\x14\x168\xc2q\xe3uH\x89X\xa4\xa9" + b"\x19\x1d\xc1}\x7fOX\x19\x9f\xdd\xbe\x85\x83\xff\x96\x1ee\x82O`CF=K\xeb$I" + b"\x17_\xefX\x8bJ'v\xde\x1f+\xd9.v\xf8Tv\x17\xf2\x9f5\x19\xe1\xb9\x91\xa8S" + b"\x86\xbd\x1a\"(\xa5x\x8dC\x03X\x81\x91\xa8\x11\xc4pS\x13\xbc\xf2'J\xae!" + b"\xef\xef\x84G\t\x8d\xc4\x10\x132\x00oS\x9e\xe0\xe4d\x8f\xb8y\xac\xa6\x9f" + b",\xb8f\x87\r\xdf\x9eE\x0f\xe1\xd0\\L\x00\xb2\xe1h\x84\xef}\x98\xa8\x11" + b"\xccW#\\\x83\x7fo\xbbz\x8f\x00" +) + +FILTERS_RAW_3 = [{"id": lzma.FILTER_IA64, "start_offset": 0x100}, + {"id": lzma.FILTER_LZMA2}] +COMPRESSED_RAW_3 = ( + b"\xe0\x07\x80\x03\xdf]\x00\x05\x14\x07bX\x19\xcd\xddn\x98\x15\xe4\xb4\x9d" + b"o\x1d\xc4\xe5\n\x03\xcc2h\xc7\\\x86\xff\xf8\xe2\xfc\xe7\xd9\xfe6\xb8(" + b"\xa8wd\xc2\"u.n\x1e\xc3\xf2\x8e\x8d\x8f\x02\x17/\xa6=\xf0\xa2\xdf/M\x89" + b"\xbe\xde\xa7\x1cz\x18-]\xd5\xef\x13\x8frZ\x15\x80\x8c\xf8\x8do\xfa\x12" + b"\x9b#z/\xef\xf0\xfaF\x01\x82\xa3M\x8e\xa1t\xca6 BF$\xe5Q\xa4\x98\xee\xde" + b"l\xe8\x7f\xf0\x9d,bn\x0b\x13\xd4\xa8\x81\xe4N\xc8\x86\x153\xf5x2\xa2O" + b"\x13@Q\xa1\x00/\xa5\xd0O\x97\xdco\xae\xf7z\xc4\xcdS\xb6t<\x16\xf2\x9cI#" + b"\x89ud\xc66Y\xd9\xee\xe6\xce\x12]\xe5\xf0\xaa\x96-Pe\xade:\x04\t\x1b\xf7" + b"\xdb7\n\x86\x1fp\xc8J\xba\xf4\xf0V\xa9\xdc\xf0\x02%G\xf9\xdf=?\x15\x1b" + b"\xe1(\xce\x82=\xd6I\xac3\x12\x0cR\xb7\xae\r\xb1i\x03\x95\x01\xbd\xbe\xfa" + b"\x02s\x01P\x9d\x96X\xb12j\xc8L\xa8\x84b\xf6\xc3\xd4c-H\x93oJl\xd0iQ\xe4k" + b"\x84\x0b\xc1\xb7\xbc\xb1\x17\x88\xb1\xca?@\xf6\x07\xea\xe6x\xf1H12P\x0f" + b"\x8a\xc9\xeauw\xe3\xbe\xaai\xa9W\xd0\x80\xcd#cb5\x99\xd8]\xa9d\x0c\xbd" + b"\xa2\xdcWl\xedUG\xbf\x89yF\xf77\x81v\xbd5\x98\xbeh8\x18W\x08\xf0\x1b\x99" + b"5:\x1a?rD\x96\xa1\x04\x0f\xae\xba\x85\xeb\x9d5@\xf5\x83\xd37\x83\x8ac" + b"\x06\xd4\x97i\xcdt\x16S\x82k\xf6K\x01vy\x88\x91\x9b6T\xdae\r\xfd]:k\xbal" + b"\xa9\xbba\xc34\xf9r\xeb}r\xdb\xc7\xdb*\x8f\x03z\xdc8h\xcc\xc9\xd3\xbcl" + b"\xa5-\xcb\xeaK\xa2\xc5\x15\xc0\xe3\xc1\x86Z\xfb\xebL\xe13\xcf\x9c\xe3" + b"\x1d\xc9\xed\xc2\x06\xcc\xce!\x92\xe5\xfe\x9c^\xa59w \x9bP\xa3PK\x08d" + b"\xf9\xe2Z}\xa7\xbf\xed\xeb%$\x0c\x82\xb8/\xb0\x01\xa9&,\xf7qh{Q\x96)\xf2" + b"q\x96\xc3\x80\xb4\x12\xb0\xba\xe6o\xf4!\xb4[\xd4\x8aw\x10\xf7t\x0c\xb3" + b"\xd9\xd5\xc3`^\x81\x11??\\\xa4\x99\x85R\xd4\x8e\x83\xc9\x1eX\xbfa\xf1" + b"\xac\xb0\xea\xea\xd7\xd0\xab\x18\xe2\xf2\xed\xe1\xb7\xc9\x18\xcbS\xe4>" + b"\xc9\x95H\xe8\xcb\t\r%\xeb\xc7$.o\xf1\xf3R\x17\x1db\xbb\xd8U\xa5^\xccS" + b"\x16\x01\x87\xf3/\x93\xd1\xf0v\xc0r\xd7\xcc\xa2Gkz\xca\x80\x0e\xfd\xd0" + b"\x8b\xbb\xd2Ix\xb3\x1ey\xca-0\xe3z^\xd6\xd6\x8f_\xf1\x9dP\x9fi\xa7\xd1" + b"\xe8\x90\x84\xdc\xbf\xcdky\x8e\xdc\x81\x7f\xa3\xb2+\xbf\x04\xef\xd8\\" + b"\xc4\xdf\xe1\xb0\x01\xe9\x93\xe3Y\xf1\x1dY\xe8h\x81\xcf\xf1w\xcc\xb4\xef" + b" \x8b|\x04\xea\x83ej\xbe\x1f\xd4z\x9c`\xd3\x1a\x92A\x06\xe5\x8f\xa9\x13" + b"\t\x9e=\xfa\x1c\xe5_\x9f%v\x1bo\x11ZO\xd8\xf4\t\xddM\x16-\x04\xfc\x18<\"" + b"CM\xddg~b\xf6\xef\x8e\x0c\xd0\xde|\xa0'\x8a\x0c\xd6x\xae!J\xa6F\x88\x15u" + b"\x008\x17\xbc7y\xb3\xd8u\xac_\x85\x8d\xe7\xc1@\x9c\xecqc\xa3#\xad\xf1" + b"\x935\xb5)_\r\xec3]\x0fo]5\xd0my\x07\x9b\xee\x81\xb5\x0f\xcfK+\x00\xc0" + b"\xe4b\x10\xe4\x0c\x1a \x9b\xe0\x97t\xf6\xa1\x9e\x850\xba\x0c\x9a\x8d\xc8" + b"\x8f\x07\xd7\xae\xc8\xf9+i\xdc\xb9k\xb0>f\x19\xb8\r\xa8\xf8\x1f$\xa5{p" + b"\xc6\x880\xce\xdb\xcf\xca_\x86\xac\x88h6\x8bZ%'\xd0\n\xbf\x0f\x9c\"\xba" + b"\xe5\x86\x9f\x0f7X=mNX[\xcc\x19FU\xc9\x860\xbc\x90a+* \xae_$\x03\x1e\xd3" + b"\xcd_\xa0\x9c\xde\xaf46q\xa5\xc9\x92\xd7\xca\xe3`\x9d\x85}\xb4\xff\xb3" + b"\x83\xfb\xb6\xca\xae`\x0bw\x7f\xfc\xd8\xacVe\x19\xc8\x17\x0bZ\xad\x88" + b"\xeb#\x97\x03\x13\xb1d\x0f{\x0c\x04w\x07\r\x97\xbd\xd6\xc1\xc3B:\x95\x08" + b"^\x10V\xaeaH\x02\xd9\xe3\n\\\x01X\xf6\x9c\x8a\x06u#%\xbe*\xa1\x18v\x85" + b"\xec!\t4\x00\x00\x00" +) + +FILTERS_RAW_4 = [{"id": lzma.FILTER_DELTA, "dist": 4}, + {"id": lzma.FILTER_X86, "start_offset": 0x40}, + {"id": lzma.FILTER_LZMA2, "preset": 4, "lc": 2}] +COMPRESSED_RAW_4 = ( + b"\xe0\x07\x80\x06\x0e\\\x00\x05\x14\x07bW\xaah\xdd\x10\xdc'\xd6\x90,\xc6v" + b"Jq \x14l\xb7\x83xB\x0b\x97f=&fx\xba\n>Tn\xbf\x8f\xfb\x1dF\xca\xc3v_\xca?" + b"\xfbV<\x92#\xd4w\xa6\x8a\xeb\xf6\x03\xc0\x01\x94\xd8\x9e\x13\x12\x98\xd1" + b"*\xfa]c\xe8\x1e~\xaf\xb5]Eg\xfb\x9e\x01\"8\xb2\x90\x06=~\xe9\x91W\xcd" + b"\xecD\x12\xc7\xfa\xe1\x91\x06\xc7\x99\xb9\xe3\x901\x87\x19u\x0f\x869\xff" + b"\xc1\xb0hw|\xb0\xdcl\xcck\xb16o7\x85\xee{Y_b\xbf\xbc$\xf3=\x8d\x8bw\xe5Z" + b"\x08@\xc4kmE\xad\xfb\xf6*\xd8\xad\xa1\xfb\xc5{\xdej,)\x1emB\x1f<\xaeca" + b"\x80(\xee\x07 \xdf\xe9\xf8\xeb\x0e-\x97\x86\x90c\xf9\xea'B\xf7`\xd7\xb0" + b"\x92\xbd\xa0\x82]\xbd\x0e\x0eB\x19\xdc\x96\xc6\x19\xd86D\xf0\xd5\x831" + b"\x03\xb7\x1c\xf7&5\x1a\x8f PZ&j\xf8\x98\x1bo\xcc\x86\x9bS\xd3\xa5\xcdu" + b"\xf9$\xcc\x97o\xe5V~\xfb\x97\xb5\x0b\x17\x9c\xfdxW\x10\xfep4\x80\xdaHDY" + b"\xfa)\xfet\xb5\"\xd4\xd3F\x81\xf4\x13\x1f\xec\xdf\xa5\x13\xfc\"\x91x\xb7" + b"\x99\xce\xc8\x92\n\xeb[\x10l*Y\xd8\xb1@\x06\xc8o\x8d7r\xebu\xfd5\x0e\x7f" + b"\xf1$U{\t}\x1fQ\xcfxN\x9d\x9fXX\xe9`\x83\xc1\x06\xf4\x87v-f\x11\xdb/\\" + b"\x06\xff\xd7)B\xf3g\x06\x88#2\x1eB244\x7f4q\t\xc893?mPX\x95\xa6a\xfb)d" + b"\x9b\xfc\x98\x9aj\x04\xae\x9b\x9d\x19w\xba\xf92\xfaA\x11\\\x17\x97C3\xa4" + b"\xbc!\x88\xcdo[\xec:\x030\x91.\x85\xe0@\\4\x16\x12\x9d\xcaJv\x97\xb04" + b"\xack\xcbkf\xa3ss\xfc\x16^\x8ce\x85a\xa5=&\xecr\xb3p\xd1E\xd5\x80y\xc7" + b"\xda\xf6\xfek\xbcT\xbfH\xee\x15o\xc5\x8c\x830\xec\x1d\x01\xae\x0c-e\\" + b"\x91\x90\x94\xb2\xf8\x88\x91\xe8\x0b\xae\xa7>\x98\xf6\x9ck\xd2\xc6\x08" + b"\xe6\xab\t\x98\xf2!\xa0\x8c^\xacqA\x99<\x1cEG\x97\xc8\xf1\xb6\xb9\x82" + b"\x8d\xf7\x08s\x98a\xff\xe3\xcc\x92\x0e\xd2\xb6U\xd7\xd9\x86\x7fa\xe5\x1c" + b"\x8dTG@\t\x1e\x0e7*\xfc\xde\xbc]6N\xf7\xf1\x84\x9e\x9f\xcf\xe9\x1e\xb5'" + b"\xf4<\xdf\x99sq\xd0\x9d\xbd\x99\x0b\xb4%p4\xbf{\xbb\x8a\xd2\x0b\xbc=M" + b"\x94H:\xf5\xa8\xd6\xa4\xc90\xc2D\xb9\xd3\xa8\xb0S\x87 `\xa2\xeb\xf3W\xce" + b" 7\xf9N#\r\xe6\xbe\t\x9d\xe7\x811\xf9\x10\xc1\xc2\x14\xf6\xfc\xcba\xb7" + b"\xb1\x7f\x95l\xe4\tjA\xec:\x10\xe5\xfe\xc2\\=D\xe2\x0c\x0b3]\xf7\xc1\xf7" + b"\xbceZ\xb1A\xea\x16\xe5\xfddgFQ\xed\xaf\x04\xa3\xd3\xf8\xa2q\x19B\xd4r" + b"\xc5\x0c\x9a\x14\x94\xea\x91\xc4o\xe4\xbb\xb4\x99\xf4@\xd1\xe6\x0c\xe3" + b"\xc6d\xa0Q\n\xf2/\xd8\xb8S5\x8a\x18:\xb5g\xac\x95D\xce\x17\x07\xd4z\xda" + b"\x90\xe65\x07\x19H!\t\xfdu\x16\x8e\x0eR\x19\xf4\x8cl\x0c\xf9Q\xf1\x80" + b"\xe3\xbf\xd7O\xf8\x8c\x18\x0b\x9c\xf1\x1fb\xe1\tR\xb2\xf1\xe1A\xea \xcf-" + b"IGE\xf1\x14\x98$\x83\x15\xc9\xd8j\xbf\x19\x0f\xd5\xd1\xaa\xb3\xf3\xa5I2s" + b"\x8d\x145\xca\xd5\xd93\x9c\xb8D0\xe6\xaa%\xd0\xc0P}JO^h\x8e\x08\xadlV." + b"\x18\x88\x13\x05o\xb0\x07\xeaw\xe0\xb6\xa4\xd5*\xe4r\xef\x07G+\xc1\xbei[" + b"w\xe8\xab@_\xef\x15y\xe5\x12\xc9W\x1b.\xad\x85-\xc2\xf7\xe3mU6g\x8eSA" + b"\x01(\xd3\xdb\x16\x13=\xde\x92\xf9,D\xb8\x8a\xb2\xb4\xc9\xc3\xefnE\xe8\\" + b"\xa6\xe2Y\xd2\xcf\xcb\x8c\xb6\xd5\xe9\x1d\x1e\x9a\x8b~\xe2\xa6\rE\x84uV" + b"\xed\xc6\x99\xddm<\x10[\x0fu\x1f\xc1\x1d1\n\xcfw\xb2%!\xf0[\xce\x87\x83B" + b"\x08\xaa,\x08%d\xcef\x94\"\xd9g.\xc83\xcbXY+4\xec\x85qA\n\x1d=9\xf0*\xb1" + b"\x1f/\xf3s\xd61b\x7f@\xfb\x9d\xe3FQ\\\xbd\x82\x1e\x00\xf3\xce\xd3\xe1" + b"\xca,E\xfd7[\xab\xb6\xb7\xac!mA}\xbd\x9d3R5\x9cF\xabH\xeb\x92)cc\x13\xd0" + b"\xbd\xee\xe9n{\x1dIJB\xa5\xeb\x11\xe8`w&`\x8b}@Oxe\t\x8a\x07\x02\x95\xf2" + b"\xed\xda|\xb1e\xbe\xaa\xbbg\x19@\xe1Y\x878\x84\x0f\x8c\xe3\xc98\xf2\x9e" + b"\xd5N\xb5J\xef\xab!\xe2\x8dq\xe1\xe5q\xc5\xee\x11W\xb7\xe4k*\x027\xa0" + b"\xa3J\xf4\xd8m\xd0q\x94\xcb\x07\n:\xb6`.\xe4\x9c\x15+\xc0)\xde\x80X\xd4" + b"\xcfQm\x01\xc2cP\x1cA\x85'\xc9\xac\x8b\xe6\xb2)\xe6\x84t\x1c\x92\xe4Z" + b"\x1cR\xb0\x9e\x96\xd1\xfb\x1c\xa6\x8b\xcb`\x10\x12]\xf2gR\x9bFT\xe0\xc8H" + b"S\xfb\xac<\x04\xc7\xc1\xe8\xedP\xf4\x16\xdb\xc0\xd7e\xc2\x17J^\x1f\xab" + b"\xff[\x08\x19\xb4\xf5\xfb\x19\xb4\x04\xe5c~']\xcb\xc2A\xec\x90\xd0\xed" + b"\x06,\xc5K{\x86\x03\xb1\xcdMx\xdeQ\x8c3\xf9\x8a\xea=\x89\xaba\xd2\xc89a" + b"\xd72\xf0\xc3\x19\x8a\xdfs\xd4\xfd\xbb\x81b\xeaE\"\xd8\xf4d\x0cD\xf7IJ!" + b"\xe5d\xbbG\xe9\xcam\xaa\x0f_r\x95\x91NBq\xcaP\xce\xa7\xa9\xb5\x10\x94eP!" + b"|\x856\xcd\xbfIir\xb8e\x9bjP\x97q\xabwS7\x1a\x0ehM\xe7\xca\x86?\xdeP}y~" + b"\x0f\x95I\xfc\x13\xe1r\xa9k\x88\xcb" + b"\xfd\xc3v\xe2\xb9\x8a\x02\x8eq\x92I\xf8\xf6\xf1\x03s\x9b\xb8\xe3\"\xe3" + b"\xa9\xa5>D\xb8\x96;\xe7\x92\xd133\xe8\xdd'e\xc9.\xdc;\x17\x1f\xf5H\x13q" + b"\xa4W\x0c\xdb~\x98\x01\xeb\xdf\xe32\x13\x0f\xddx\n6\xa0\t\x10\xb6\xbb" + b"\xb0\xc3\x18\xb6;\x9fj[\xd9\xd5\xc9\x06\x8a\x87\xcd\xe5\xee\xfc\x9c-%@" + b"\xee\xe0\xeb\xd2\xe3\xe8\xfb\xc0\x122\\\xc7\xaf\xc2\xa1Oth\xb3\x8f\x82" + b"\xb3\x18\xa8\x07\xd5\xee_\xbe\xe0\x1cA\x1e_\r\x9a\xb0\x17W&\xa2D\x91\x94" + b"\x1a\xb2\xef\xf2\xdc\x85;X\xb0,\xeb>-7S\xe5\xca\x07)\x1fp\x7f\xcaQBL\xca" + b"\xf3\xb9d\xfc\xb5su\xb0\xc8\x95\x90\xeb*)\xa0v\xe4\x9a{FW\xf4l\xde\xcdj" + b"\x00" +) + +ISSUE_21872_DAT = ( + b']\x00\x00@\x00h3\x00\x00\x00\x00\x00\x00\x00\x00`D\x0c\x99\xc8' + b'\xd1\xbbZ^\xc43+\x83\xcd\xf1\xc6g\xec-\x061F\xb1\xbb\xc7\x17%-\xea' + b'\xfap\xfb\x8fs\x128\xb2,\x88\xe4\xc0\x12|*x\xd0\xa2\xc4b\x1b!\x02c' + b'\xab\xd9\x87U\xb8n \xfaVJ\x9a"\xb78\xff%_\x17`?@*\xc2\x82' + b"\xf2^\x1b\xb8\x04&\xc0\xbb\x03g\x9d\xca\xe9\xa4\xc9\xaf'\xe5\x8e}" + b'F\xdd\x11\xf3\x86\xbe\x1fN\x95\\\xef\xa2Mz-\xcb\x9a\xe3O@' + b"\x19\x07\xf6\xee\x9e\x9ag\xc6\xa5w\rnG'\x99\xfd\xfeGI\xb0" + b'\xbb\xf9\xc2\xe1\xff\xc5r\xcf\x85y[\x01\xa1\xbd\xcc/\xa3\x1b\x83\xaa' + b'\xc6\xf9\x99\x0c\xb6_\xc9MQ+x\xa2F\xda]\xdd\xe8\xfb\x1a&' + b',\xc4\x19\x1df\x81\x1e\x90\xf3\xb8Hgr\x85v\xbe\xa3qx\x01Y\xb5\x9fF' + b"\x13\x18\x01\xe69\x9b\xc8'\x1e\x9d\xd6\xe4F\x84\xac\xf8d<\x11\xd5" + b'\\\x0b\xeb\x0e\x82\xab\xb1\xe6\x1fka\xe1i\xc4 C\xb1"4)\xd6\xa7`\x02' + b'\xec\x11\x8c\xf0\x14\xb0\x1d\x1c\xecy\xf0\xb7|\x11j\x85X\xb2!\x1c' + b'\xac\xb5N\xc7\x85j\x9ev\xf5\xe6\x0b\xc1]c\xc15\x16\x9f\xd5\x99' + b"\xfei^\xd2G\x9b\xbdl\xab:\xbe,\xa9'4\x82\xe5\xee\xb3\xc1" + b'$\x93\x95\xa8Y\x16\xf5\xbf\xacw\x91\x04\x1d\x18\x06\xe9\xc5\xfdk\x06' + b'\xe8\xfck\xc5\x86>\x8b~\xa4\xcb\xf1\xb3\x04\xf1\x04G5\xe2\xcc]' + b'\x16\xbf\x140d\x18\xe2\xedw#(3\xca\xa1\x80bX\x7f\xb3\x84' + b'\x9d\xdb\xe7\x08\x97\xcd\x16\xb9\xf1\xd5r+m\x1e\xcb3q\xc5\x9e\x92' + b"\x7f\x8e*\xc7\xde\xe9\xe26\xcds\xb1\x10-\xf6r\x02?\x9d\xddCgJN'" + b'\x11M\xfa\nQ\n\xe6`m\xb8N\xbbq\x8el\x0b\x02\xc7:q\x04G\xa1T' + b'\xf1\xfe!0\x85~\xe5\x884\xe9\x89\xfb\x13J8\x15\xe42\xb6\xad' + b'\x877A\x9a\xa6\xbft]\xd0\xe35M\xb0\x0cK\xc8\xf6\x88\xae\xed\xa9,j7' + b'\x81\x13\xa0(\xcb\xe1\xe9l2\x7f\xcd\xda\x95(\xa70B\xbd\xf4\xe3' + b'hp\x94\xbdJ\xd7\t\xc7g\xffo?\x89?\xf8}\x7f\xbc\x1c\x87' + b'\x14\xc0\xcf\x8cV:\x9a\x0e\xd0\xb2\x1ck\xffk\xb9\xe0=\xc7\x8d/' + b'\xb8\xff\x7f\x1d\x87`\x19.\x98X*~\xa7j\xb9\x0b"\xf4\xe4;V`\xb9\xd7' + b'\x03\x1e\xd0t0\xd3\xde\x1fd\xb9\xe2)\x16\x81}\xb1\\b\x7fJ' + b'\x92\xf4\xff\n+V!\xe9\xde\x98\xa0\x8fK\xdf7\xb9\xc0\x12\x1f\xe2' + b'\xe9\xb0`\xae\x14\r\xa7\xc4\x81~\xd8\x8d\xc5\x06\xd8m\xb0Y\x8a)' + b'\x06/\xbb\xf9\xac\xeaP\xe0\x91\x05m[\xe5z\xe6Z\xf3\x9f\xc7\xd0' + b'\xd3\x8b\xf3\x8a\x1b\xfa\xe4Pf\xbc0\x17\x10\xa9\xd0\x95J{\xb3\xc3' + b'\xfdW\x9bop\x0f\xbe\xaee\xa3]\x93\x9c\xda\xb75<\xf6g!\xcc\xb1\xfc\\' + b'7\x152Mc\x17\x84\x9d\xcd35\r0\xacL-\xf3\xfb\xcb\x96\x1e\xe9U\x7f' + b'\xd7\xca\xb0\xcc\x89\x0c*\xce\x14\xd1P\xf1\x03\xb6.~9o?\xe8' + b'\r\x86\xe0\x92\x87}\xa3\x84\x03P\xe0\xc2\x7f\n;m\x9d\x9e\xb4|' + b'\x8c\x18\xc0#0\xfe3\x07<\xda\xd8\xcf^\xd4Hi\xd6\xb3\x0bT' + b'\x1dF\x88\x85q}\x02\xc6&\xc4\xae\xce\x9cU\xfa\x0f\xcc\xb6\x1f\x11' + b'drw\x9eN\x19\xbd\xffz\x0f\xf0\x04s\xadR\xc1\xc0\xbfl\xf1\xba\xf95^' + b'e\xb1\xfbVY\xd9\x9f\x1c\xbf*\xc4\xa86\x08+\xd6\x88[\xc4_rc\xf0f' + b'\xb8\xd4\xec\x1dx\x19|\xbf\xa7\xe0\x82\x0b\x8c~\x10L/\x90\xd6\xfb' + b'\x81\xdb\x98\xcc\x02\x14\xa5C\xb2\xa7i\xfd\xcd\x1fO\xf7\xe9\x89t\xf0' + b'\x17\xa5\x1c\xad\xfe' + b'"\xd1v`\x82\xd5$\x89\xcf^\xd52.\xafd\xe8d@\xaa\xd5Y|\x90\x84' + b'j\xdb}\x84riV\x8e\xf0X4rB\xf2NPS[\x8e\x88\xd4\x0fI\xb8' + b'\xdd\xcb\x1d\xf2(\xdf;9\x9e|\xef^0;.*[\x9fl\x7f\xa2_X\xaff!\xbb\x03' + b'\xff\x19\x8f\x88\xb5\xb6\x884\xa3\x05\xde3D{\xe3\xcb\xce\xe4t]' + b'\x875\xe3Uf\xae\xea\x88\x1c\x03b\n\xb1,Q\xec\xcf\x08\t\xde@\x83\xaa<' + b',-\xe4\xee\x9b\x843\xe5\x007\tK\xac\x057\xd6*X\xa3\xc6~\xba\xe6O' + b'\x81kz"\xbe\xe43sL\xf1\xfa;\xf4^\x1e\xb4\x80\xe2\xbd\xaa\x17Z\xe1f' + b'\xda\xa6\xb9\x07:]}\x9fa\x0b?\xba\xe7\xf15\x04M\xe3\n}M\xa4\xcb\r' + b'2\x8a\x88\xa9\xa7\x92\x93\x84\x81Yo\x00\xcc\xc4\xab\x9aT\x96\x0b\xbe' + b'U\xac\x1d\x8d\x1b\x98"\xf8\x8f\xf1u\xc1n\xcc\xfcA\xcc\x90\xb7i' + b'\x83\x9c\x9c~\x1d4\xa2\xf0*J\xe7t\x12\xb4\xe3\xa0u\xd7\x95Z' + b'\xf7\xafG\x96~ST,\xa7\rC\x06\xf4\xf0\xeb`2\x9e>Q\x0e\xf6\xf5\xc5' + b'\x9b\xb5\xaf\xbe\xa3\x8f\xc0\xa3hu\x14\x12 \x97\x99\x04b\x8e\xc7\x1b' + b'VKc\xc1\xf3 \xde\x85-:\xdc\x1f\xac\xce*\x06\xb3\x80;`' + b'\xdb\xdd\x97\xfdg\xbf\xe7\xa8S\x08}\xf55e7\xb8/\xf0!\xc8' + b"Y\xa8\x9a\x07'\xe2\xde\r\x02\xe1\xb2\x0c\xf4C\xcd\xf9\xcb(\xe8\x90" + b'\xd3bTD\x15_\xf6\xc3\xfb\xb3E\xfc\xd6\x98{\xc6\\fz\x81\xa99\x85\xcb' + b'\xa5\xb1\x1d\x94bqW\x1a!;z~\x18\x88\xe8i\xdb\x1b\x8d\x8d' + b'\x06\xaa\x0e\x99s+5k\x00\xe4\xffh\xfe\xdbt\xa6\x1bU\xde\xa3' + b'\xef\xcb\x86\x9e\x81\x16j\n\x9d\xbc\xbbC\x80?\x010\xc7Jj;' + b'\xc4\xe5\x86\xd5\x0e0d#\xc6;\xb8\xd1\xc7c\xb5&8?\xd9J\xe5\xden\xb9' + b'\xe9cb4\xbb\xe6\x14\xe0\xe7l\x1b\x85\x94\x1fh\xf1n\xdeZ\xbe' + b'\x88\xff\xc2e\xca\xdc,B-\x8ac\xc9\xdf\xf5|&\xe4LL\xf0\x1f\xaa8\xbd' + b'\xc26\x94bVi\xd3\x0c\x1c\xb6\xbb\x99F\x8f\x0e\xcc\x8e4\xc6/^W\xf5?' + b'\xdc\x84(\x14dO\x9aD6\x0f4\xa3,\x0c\x0bS\x9fJ\xe1\xacc^\x8a0\t\x80D[' + b'\xb8\xe6\x86\xb0\xe8\xd4\xf9\x1en\xf1\xf5^\xeb\xb8\xb8\xf8' + b')\xa8\xbf\xaa\x84\x86\xb1a \x95\x16\x08\x1c\xbb@\xbd+\r/\xfb' + b'\x92\xfbh\xf1\x8d3\xf9\x92\xde`\xf1\x86\x03\xaa+\xd9\xd9\xc6P\xaf' + b'\xe3-\xea\xa5\x0fB\xca\xde\xd5n^\xe3/\xbf\xa6w\xc8\x0e' + b'\xc6\xfd\x97_+D{\x03\x15\xe8s\xb1\xc8HAG\xcf\xf4\x1a\xdd' + b'\xad\x11\xbf\x157q+\xdeW\x89g"X\x82\xfd~\xf7\xab4\xf6`\xab\xf1q' + b')\x82\x10K\xe9sV\xfe\xe45\xafs*\x14\xa7;\xac{\x06\x9d<@\x93G' + b'j\x1d\xefL\xe9\xd8\x92\x19&\xa1\x16\x19\x04\tu5\x01]\xf6\xf4' + b'\xcd\\\xd8A|I\xd4\xeb\x05\x88C\xc6e\xacQ\xe9*\x97~\x9au\xf8Xy' + b"\x17P\x10\x9f\n\x8c\xe2fZEu>\x9b\x1e\x91\x0b'`\xbd\xc0\xa8\x86c\x1d" + b'Z\xe2\xdc8j\x95\xffU\x90\x1e\xf4o\xbc\xe5\xe3e>\xd2R\xc0b#\xbc\x15' + b'H-\xb9!\xde\x9d\x90k\xdew\x9b{\x99\xde\xf7/K)A\xfd\xf5\xe6:\xda' + b'UM\xcc\xbb\xa2\x0b\x9a\x93\xf5{9\xc0 \xd2((6i\xc0\xbbu\xd8\x9e\x8d' + b'\xf8\x04q\x10\xd4\x14\x9e7-\xb9B\xea\x01Q8\xc8v\x9a\x12A\x88Cd\x92' + b"\x1c\x8c!\xf4\x94\x87'\xe3\xcd\xae\xf7\xd8\x93\xfa\xde\xa8b\x9e\xee2" + b'K\xdb\x00l\x9d\t\xb1|D\x05U\xbb\xf4>\xf1w\x887\xd1}W\x9d|g|1\xb0\x13' + b"\xa3 \xe5\xbfm@\xc06+\xb7\t\xcf\x15D\x9a \x1fM\x1f\xd2\xb5'\xa9\xbb" + b'~Co\x82\xfa\xc2\t\xe6f\xfc\xbeI\xae1\x8e\xbe\xb8\xcf\x86\x17' + b'\x9f\xe2`\xbd\xaf\xba\xb9\xbc\x1b\xa3\xcd\x82\x8fwc\xefd\xa9\xd5\x14' + b'\xe2C\xafUE\xb6\x11MJH\xd0=\x05\xd4*I\xff"\r\x1b^\xcaS6=\xec@\xd5' + b'\x11,\xe0\x87Gr\xaa[\xb8\xbc>n\xbd\x81\x0c\x07<\xe9\x92(' + b'\xb2\xff\xac}\xe7\xb6\x15\x90\x9f~4\x9a\xe6\xd6\xd8s\xed\x99tf' + b'\xa0f\xf8\xf1\x87\t\x96/)\x85\xb6\n\xd7\xb2w\x0b\xbc\xba\x99\xee' + b'Q\xeen\x1d\xad\x03\xc3s\xd1\xfd\xa2\xc6\xb7\x9a\x9c(G<6\xad[~H ' + b'\x16\x89\x89\xd0\xc3\xd2\xca~\xac\xea\xa5\xed\xe5\xfb\r:' + b'\x8e\xa6\xf1e\xbb\xba\xbd\xe0(\xa3\x89_\x01(\xb5c\xcc\x9f\x1fg' + b'v\xfd\x17\xb3\x08S=S\xee\xfc\x85>\x91\x8d\x8d\nYR\xb3G\xd1A\xa2\xb1' + b'\xec\xb0\x01\xd2\xcd\xf9\xfe\x82\x06O\xb3\xecd\xa9c\xe0\x8eP\x90\xce' + b'\xe0\xcd\xd8\xd8\xdc\x9f\xaa\x01"[Q~\xe4\x88\xa1#\xc1\x12C\xcf' + b'\xbe\x80\x11H\xbf\x86\xd8\xbem\xcfWFQ(X\x01DK\xdfB\xaa\x10.-' + b'\xd5\x9e|\x86\x15\x86N]\xc7Z\x17\xcd=\xd7)M\xde\x15\xa4LTi\xa0\x15' + b'\xd1\xe7\xbdN\xa4?\xd1\xe7\x02\xfe4\xe4O\x89\x98&\x96\x0f\x02\x9c' + b'\x9e\x19\xaa\x13u7\xbd0\xdc\xd8\x93\xf4BNE\x1d\x93\x82\x81\x16' + b'\xe5y\xcf\x98D\xca\x9a\xe2\xfd\xcdL\xcc\xd1\xfc_\x0b\x1c\xa0]\xdc' + b'\xa91 \xc9c\xd8\xbf\x97\xcfp\xe6\x19-\xad\xff\xcc\xd1N(\xe8' + b'\xeb#\x182\x96I\xf7l\xf3r\x00' +) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index fa79456ed4..16d12f9688 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -33,8 +33,8 @@ else: file = __file__ test_dir = os.path.dirname(file) or os.curdir -math_testcases = os.path.join(test_dir, 'math_testcases.txt') -test_file = os.path.join(test_dir, 'cmath_testcases.txt') +math_testcases = os.path.join(test_dir, 'mathdata', 'math_testcases.txt') +test_file = os.path.join(test_dir, 'mathdata', 'cmath_testcases.txt') def to_ulps(x): @@ -1374,6 +1374,7 @@ def test_sumprod_accuracy(self): self.assertEqual(sumprod([True, False] * 10, [0.1] * 20), 1.0) self.assertEqual(sumprod([1.0, 10E100, 1.0, -10E100], [1.0]*4), 2.0) + @unittest.skip("TODO: RUSTPYTHON, Taking a few minutes.") @support.requires_resource('cpu') def test_sumprod_stress(self): sumprod = math.sumprod @@ -2079,7 +2080,6 @@ def test_testfile(self): self.fail('Failures in test_testfile:\n ' + '\n '.join(failures)) - @unittest.skip("TODO: RUSTPYTHON, Currently hangs. Function never finishes.") @requires_IEEE_754 def test_mtestfile(self): fail_fmt = "{}: {}({!r}): {}" @@ -2628,9 +2628,247 @@ def test_fractions(self): self.assertAllNotClose(fraction_examples, rel_tol=1e-9) +class FMATests(unittest.TestCase): + """ Tests for math.fma. """ + + def test_fma_nan_results(self): + # Selected representative values. + values = [ + -math.inf, -1e300, -2.3, -1e-300, -0.0, + 0.0, 1e-300, 2.3, 1e300, math.inf, math.nan + ] + + # If any input is a NaN, the result should be a NaN, too. + for a, b in itertools.product(values, repeat=2): + self.assertIsNaN(math.fma(math.nan, a, b)) + self.assertIsNaN(math.fma(a, math.nan, b)) + self.assertIsNaN(math.fma(a, b, math.nan)) + + def test_fma_infinities(self): + # Cases involving infinite inputs or results. + positives = [1e-300, 2.3, 1e300, math.inf] + finites = [-1e300, -2.3, -1e-300, -0.0, 0.0, 1e-300, 2.3, 1e300] + non_nans = [-math.inf, -2.3, -0.0, 0.0, 2.3, math.inf] + + # ValueError due to inf * 0 computation. + for c in non_nans: + for infinity in [math.inf, -math.inf]: + for zero in [0.0, -0.0]: + with self.assertRaises(ValueError): + math.fma(infinity, zero, c) + with self.assertRaises(ValueError): + math.fma(zero, infinity, c) + + # ValueError when a*b and c both infinite of opposite signs. + for b in positives: + with self.assertRaises(ValueError): + math.fma(math.inf, b, -math.inf) + with self.assertRaises(ValueError): + math.fma(math.inf, -b, math.inf) + with self.assertRaises(ValueError): + math.fma(-math.inf, -b, -math.inf) + with self.assertRaises(ValueError): + math.fma(-math.inf, b, math.inf) + with self.assertRaises(ValueError): + math.fma(b, math.inf, -math.inf) + with self.assertRaises(ValueError): + math.fma(-b, math.inf, math.inf) + with self.assertRaises(ValueError): + math.fma(-b, -math.inf, -math.inf) + with self.assertRaises(ValueError): + math.fma(b, -math.inf, math.inf) + + # Infinite result when a*b and c both infinite of the same sign. + for b in positives: + self.assertEqual(math.fma(math.inf, b, math.inf), math.inf) + self.assertEqual(math.fma(math.inf, -b, -math.inf), -math.inf) + self.assertEqual(math.fma(-math.inf, -b, math.inf), math.inf) + self.assertEqual(math.fma(-math.inf, b, -math.inf), -math.inf) + self.assertEqual(math.fma(b, math.inf, math.inf), math.inf) + self.assertEqual(math.fma(-b, math.inf, -math.inf), -math.inf) + self.assertEqual(math.fma(-b, -math.inf, math.inf), math.inf) + self.assertEqual(math.fma(b, -math.inf, -math.inf), -math.inf) + + # Infinite result when a*b finite, c infinite. + for a, b in itertools.product(finites, finites): + self.assertEqual(math.fma(a, b, math.inf), math.inf) + self.assertEqual(math.fma(a, b, -math.inf), -math.inf) + + # Infinite result when a*b infinite, c finite. + for b, c in itertools.product(positives, finites): + self.assertEqual(math.fma(math.inf, b, c), math.inf) + self.assertEqual(math.fma(-math.inf, b, c), -math.inf) + self.assertEqual(math.fma(-math.inf, -b, c), math.inf) + self.assertEqual(math.fma(math.inf, -b, c), -math.inf) + + self.assertEqual(math.fma(b, math.inf, c), math.inf) + self.assertEqual(math.fma(b, -math.inf, c), -math.inf) + self.assertEqual(math.fma(-b, -math.inf, c), math.inf) + self.assertEqual(math.fma(-b, math.inf, c), -math.inf) + + # gh-73468: On some platforms, libc fma() doesn't implement IEE 754-2008 + # properly: it doesn't use the right sign when the result is zero. + @unittest.skipIf( + sys.platform.startswith(("freebsd", "wasi", "netbsd")) + or (sys.platform == "android" and platform.machine() == "x86_64"), + f"this platform doesn't implement IEE 754-2008 properly") + def test_fma_zero_result(self): + nonnegative_finites = [0.0, 1e-300, 2.3, 1e300] + + # Zero results from exact zero inputs. + for b in nonnegative_finites: + self.assertIsPositiveZero(math.fma(0.0, b, 0.0)) + self.assertIsPositiveZero(math.fma(0.0, b, -0.0)) + self.assertIsNegativeZero(math.fma(0.0, -b, -0.0)) + self.assertIsPositiveZero(math.fma(0.0, -b, 0.0)) + self.assertIsPositiveZero(math.fma(-0.0, -b, 0.0)) + self.assertIsPositiveZero(math.fma(-0.0, -b, -0.0)) + self.assertIsNegativeZero(math.fma(-0.0, b, -0.0)) + self.assertIsPositiveZero(math.fma(-0.0, b, 0.0)) + + self.assertIsPositiveZero(math.fma(b, 0.0, 0.0)) + self.assertIsPositiveZero(math.fma(b, 0.0, -0.0)) + self.assertIsNegativeZero(math.fma(-b, 0.0, -0.0)) + self.assertIsPositiveZero(math.fma(-b, 0.0, 0.0)) + self.assertIsPositiveZero(math.fma(-b, -0.0, 0.0)) + self.assertIsPositiveZero(math.fma(-b, -0.0, -0.0)) + self.assertIsNegativeZero(math.fma(b, -0.0, -0.0)) + self.assertIsPositiveZero(math.fma(b, -0.0, 0.0)) + + # Exact zero result from nonzero inputs. + self.assertIsPositiveZero(math.fma(2.0, 2.0, -4.0)) + self.assertIsPositiveZero(math.fma(2.0, -2.0, 4.0)) + self.assertIsPositiveZero(math.fma(-2.0, -2.0, -4.0)) + self.assertIsPositiveZero(math.fma(-2.0, 2.0, 4.0)) + + # Underflow to zero. + tiny = 1e-300 + self.assertIsPositiveZero(math.fma(tiny, tiny, 0.0)) + self.assertIsNegativeZero(math.fma(tiny, -tiny, 0.0)) + self.assertIsPositiveZero(math.fma(-tiny, -tiny, 0.0)) + self.assertIsNegativeZero(math.fma(-tiny, tiny, 0.0)) + self.assertIsPositiveZero(math.fma(tiny, tiny, -0.0)) + self.assertIsNegativeZero(math.fma(tiny, -tiny, -0.0)) + self.assertIsPositiveZero(math.fma(-tiny, -tiny, -0.0)) + self.assertIsNegativeZero(math.fma(-tiny, tiny, -0.0)) + + # Corner case where rounding the multiplication would + # give the wrong result. + x = float.fromhex('0x1p-500') + y = float.fromhex('0x1p-550') + z = float.fromhex('0x1p-1000') + self.assertIsNegativeZero(math.fma(x-y, x+y, -z)) + self.assertIsPositiveZero(math.fma(y-x, x+y, z)) + self.assertIsNegativeZero(math.fma(y-x, -(x+y), -z)) + self.assertIsPositiveZero(math.fma(x-y, -(x+y), z)) + + def test_fma_overflow(self): + a = b = float.fromhex('0x1p512') + c = float.fromhex('0x1p1023') + # Overflow from multiplication. + with self.assertRaises(OverflowError): + math.fma(a, b, 0.0) + self.assertEqual(math.fma(a, b/2.0, 0.0), c) + # Overflow from the addition. + with self.assertRaises(OverflowError): + math.fma(a, b/2.0, c) + # No overflow, even though a*b overflows a float. + self.assertEqual(math.fma(a, b, -c), c) + + # Extreme case: a * b is exactly at the overflow boundary, so the + # tiniest offset makes a difference between overflow and a finite + # result. + a = float.fromhex('0x1.ffffffc000000p+511') + b = float.fromhex('0x1.0000002000000p+512') + c = float.fromhex('0x0.0000000000001p-1022') + with self.assertRaises(OverflowError): + math.fma(a, b, 0.0) + with self.assertRaises(OverflowError): + math.fma(a, b, c) + self.assertEqual(math.fma(a, b, -c), + float.fromhex('0x1.fffffffffffffp+1023')) + + # Another extreme case: here a*b is about as large as possible subject + # to math.fma(a, b, c) being finite. + a = float.fromhex('0x1.ae565943785f9p+512') + b = float.fromhex('0x1.3094665de9db8p+512') + c = float.fromhex('0x1.fffffffffffffp+1023') + self.assertEqual(math.fma(a, b, -c), c) + + def test_fma_single_round(self): + a = float.fromhex('0x1p-50') + self.assertEqual(math.fma(a - 1.0, a + 1.0, 1.0), a*a) + + def test_random(self): + # A collection of randomly generated inputs for which the naive FMA + # (with two rounds) gives a different result from a singly-rounded FMA. + + # tuples (a, b, c, expected) + test_values = [ + ('0x1.694adde428b44p-1', '0x1.371b0d64caed7p-1', + '0x1.f347e7b8deab8p-4', '0x1.19f10da56c8adp-1'), + ('0x1.605401ccc6ad6p-2', '0x1.ce3a40bf56640p-2', + '0x1.96e3bf7bf2e20p-2', '0x1.1af6d8aa83101p-1'), + ('0x1.e5abd653a67d4p-2', '0x1.a2e400209b3e6p-1', + '0x1.a90051422ce13p-1', '0x1.37d68cc8c0fbbp+0'), + ('0x1.f94e8efd54700p-2', '0x1.123065c812cebp-1', + '0x1.458f86fb6ccd0p-1', '0x1.ccdcee26a3ff3p-1'), + ('0x1.bd926f1eedc96p-1', '0x1.eee9ca68c5740p-1', + '0x1.960c703eb3298p-2', '0x1.3cdcfb4fdb007p+0'), + ('0x1.27348350fbccdp-1', '0x1.3b073914a53f1p-1', + '0x1.e300da5c2b4cbp-1', '0x1.4c51e9a3c4e29p+0'), + ('0x1.2774f00b3497bp-1', '0x1.7038ec336bff0p-2', + '0x1.2f6f2ccc3576bp-1', '0x1.99ad9f9c2688bp-1'), + ('0x1.51d5a99300e5cp-1', '0x1.5cd74abd445a1p-1', + '0x1.8880ab0bbe530p-1', '0x1.3756f96b91129p+0'), + ('0x1.73cb965b821b8p-2', '0x1.218fd3d8d5371p-1', + '0x1.d1ea966a1f758p-2', '0x1.5217b8fd90119p-1'), + ('0x1.4aa98e890b046p-1', '0x1.954d85dff1041p-1', + '0x1.122b59317ebdfp-1', '0x1.0bf644b340cc5p+0'), + ('0x1.e28f29e44750fp-1', '0x1.4bcc4fdcd18fep-1', + '0x1.fd47f81298259p-1', '0x1.9b000afbc9995p+0'), + ('0x1.d2e850717fe78p-3', '0x1.1dd7531c303afp-1', + '0x1.e0869746a2fc2p-2', '0x1.316df6eb26439p-1'), + ('0x1.cf89c75ee6fbap-2', '0x1.b23decdc66825p-1', + '0x1.3d1fe76ac6168p-1', '0x1.00d8ea4c12abbp+0'), + ('0x1.3265ae6f05572p-2', '0x1.16d7ec285f7a2p-1', + '0x1.0b8405b3827fbp-1', '0x1.5ef33c118a001p-1'), + ('0x1.c4d1bf55ec1a5p-1', '0x1.bc59618459e12p-2', + '0x1.ce5b73dc1773dp-1', '0x1.496cf6164f99bp+0'), + ('0x1.d350026ac3946p-1', '0x1.9a234e149a68cp-2', + '0x1.f5467b1911fd6p-2', '0x1.b5cee3225caa5p-1'), + ] + for a_hex, b_hex, c_hex, expected_hex in test_values: + a = float.fromhex(a_hex) + b = float.fromhex(b_hex) + c = float.fromhex(c_hex) + expected = float.fromhex(expected_hex) + self.assertEqual(math.fma(a, b, c), expected) + self.assertEqual(math.fma(b, a, c), expected) + + # Custom assertions. + def assertIsNaN(self, value): + self.assertTrue( + math.isnan(value), + msg="Expected a NaN, got {!r}".format(value) + ) + + def assertIsPositiveZero(self, value): + self.assertTrue( + value == 0 and math.copysign(1, value) > 0, + msg="Expected a positive zero, got {!r}".format(value) + ) + + def assertIsNegativeZero(self, value): + self.assertTrue( + value == 0 and math.copysign(1, value) < 0, + msg="Expected a negative zero, got {!r}".format(value) + ) + + def load_tests(loader, tests, pattern): from doctest import DocFileSuite - tests.addTest(DocFileSuite("ieee754.txt")) + tests.addTest(DocFileSuite(os.path.join("mathdata", "ieee754.txt"))) return tests if __name__ == '__main__': diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index f10de0b7af..3b19d3d3cd 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1980,7 +1980,6 @@ def get_urandom_subprocess(self, count): self.assertEqual(len(stdout), count) return stdout - @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON (ModuleNotFoundError: No module named 'os'") def test_urandom_subprocess(self): data1 = self.get_urandom_subprocess(16) data2 = self.get_urandom_subprocess(16) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index d61570ce10..e01ddcf0a8 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -664,6 +664,9 @@ def test_exceptions(self): BaseExceptionGroup, ExceptionGroup): continue + # TODO: RUSTPYTHON: fix name mapping for _IncompleteInputError + if exc is _IncompleteInputError: + continue if exc is not OSError and issubclass(exc, OSError): self.assertEqual(reverse_mapping('builtins', name), ('exceptions', 'OSError')) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 3a10ec8fc3..ece4cf2d05 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -491,33 +491,6 @@ def test_nested(self): self.assertEqual(c, 1) self.assertEqual(d, 2) - -class ImportlibMigrationTests(unittest.TestCase): - # With full PEP 302 support in the standard import machinery, the - # PEP 302 emulation in this module is in the process of being - # deprecated in favour of importlib proper - - def check_deprecated(self): - return check_warnings( - ("This emulation is deprecated and slated for removal in " - "Python 3.12; use 'importlib' instead", - DeprecationWarning)) - - def test_importer_deprecated(self): - with self.check_deprecated(): - pkgutil.ImpImporter("") - - def test_loader_deprecated(self): - with self.check_deprecated(): - pkgutil.ImpLoader("", "", "", "") - - def test_get_loader_avoids_emulation(self): - with check_warnings() as w: - self.assertIsNotNone(pkgutil.get_loader("sys")) - self.assertIsNotNone(pkgutil.get_loader("os")) - self.assertIsNotNone(pkgutil.get_loader("test.support")) - self.assertEqual(len(w.warnings), 0) - @unittest.skipIf(__name__ == '__main__', 'not compatible with __main__') def test_get_loader_handles_missing_loader_attribute(self): global __loader__ diff --git a/Lib/test/test_poll.py b/Lib/test/test_poll.py index 338eb00f0c..a9bfb755c3 100644 --- a/Lib/test/test_poll.py +++ b/Lib/test/test_poll.py @@ -152,8 +152,6 @@ def test_poll2(self): else: self.fail('Unexpected return value from select.poll: %s' % fdlist) - # TODO: RUSTPYTHON int overflow - @unittest.expectedFailure def test_poll3(self): # test int overflow pollster = select.poll() diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 7e9e261d78..30d6b6d3c3 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -1730,8 +1730,6 @@ def test_no_such_executable(self): self.assertEqual(pid2, pid) self.assertNotEqual(status, 0) - # TODO: RUSTPYTHON: TypeError: '_Environ' object is not a mapping - @unittest.expectedFailure def test_specify_environment(self): envfile = os_helper.TESTFN self.addCleanup(os_helper.unlink, envfile) @@ -1765,8 +1763,6 @@ def test_empty_file_actions(self): ) support.wait_process(pid, exitcode=0) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument resetids - @unittest.expectedFailure def test_resetids_explicit_default(self): pid = self.spawn_func( sys.executable, @@ -1776,8 +1772,6 @@ def test_resetids_explicit_default(self): ) support.wait_process(pid, exitcode=0) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument resetids - @unittest.expectedFailure def test_resetids(self): pid = self.spawn_func( sys.executable, @@ -1787,8 +1781,6 @@ def test_resetids(self): ) support.wait_process(pid, exitcode=0) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument setpgroup - @unittest.expectedFailure def test_setpgroup(self): pid = self.spawn_func( sys.executable, @@ -1819,8 +1811,6 @@ def test_setsigmask(self): ) support.wait_process(pid, exitcode=0) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument setsigmask - @unittest.expectedFailure def test_setsigmask_wrong_type(self): with self.assertRaises(TypeError): self.spawn_func(sys.executable, @@ -1836,8 +1826,6 @@ def test_setsigmask_wrong_type(self): os.environ, setsigmask=[signal.NSIG, signal.NSIG+1]) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument setsid - @unittest.expectedFailure def test_setsid(self): rfd, wfd = os.pipe() self.addCleanup(os.close, rfd) @@ -1902,7 +1890,6 @@ def test_setsigdef_wrong_type(self): [sys.executable, "-c", "pass"], os.environ, setsigdef=[signal.NSIG, signal.NSIG+1]) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument scheduler @unittest.expectedFailure @requires_sched @unittest.skipIf(sys.platform.startswith(('freebsd', 'netbsd')), @@ -1924,7 +1911,6 @@ def test_setscheduler_only_param(self): ) support.wait_process(pid, exitcode=0) - # TODO: RUSTPYTHON: TypeError: Unexpected keyword argument scheduler @unittest.expectedFailure @requires_sched @unittest.skipIf(sys.platform.startswith(('freebsd', 'netbsd')), diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index 6ea7e7db2c..4e6fed1ab9 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -7,8 +7,8 @@ import itertools import pprint import random +import re import test.support -import test.test_set import types import unittest @@ -535,7 +535,10 @@ def test_dataclass_with_repr(self): def test_dataclass_no_repr(self): dc = dataclass3() formatted = pprint.pformat(dc, width=10) - self.assertRegex(formatted, r"") + self.assertRegex( + formatted, + fr"<{re.escape(__name__)}.dataclass3 object at \w+>", + ) def test_recursive_dataclass(self): dc = dataclass4(None) @@ -619,9 +622,6 @@ def test_set_reprs(self): self.assertEqual(pprint.pformat(frozenset3(range(7)), width=20), 'frozenset3({0, 1, 2, 3, 4, 5, 6})') - @unittest.expectedFailure - #See http://bugs.python.org/issue13907 - @test.support.cpython_only def test_set_of_sets_reprs(self): # This test creates a complex arrangement of frozensets and # compares the pretty-printed repr against a string hard-coded in @@ -632,204 +632,106 @@ def test_set_of_sets_reprs(self): # partial ordering (subset relationships), the output of the # list.sort() method is undefined for lists of sets." # - # In a nutshell, the test assumes frozenset({0}) will always - # sort before frozenset({1}), but: - # # >>> frozenset({0}) < frozenset({1}) # False # >>> frozenset({1}) < frozenset({0}) # False # - # Consequently, this test is fragile and - # implementation-dependent. Small changes to Python's sort - # algorithm cause the test to fail when it should pass. - # XXX Or changes to the dictionary implementation... - - cube_repr_tgt = """\ -{frozenset(): frozenset({frozenset({2}), frozenset({0}), frozenset({1})}), - frozenset({0}): frozenset({frozenset(), - frozenset({0, 2}), - frozenset({0, 1})}), - frozenset({1}): frozenset({frozenset(), - frozenset({1, 2}), - frozenset({0, 1})}), - frozenset({2}): frozenset({frozenset(), - frozenset({1, 2}), - frozenset({0, 2})}), - frozenset({1, 2}): frozenset({frozenset({2}), - frozenset({1}), - frozenset({0, 1, 2})}), - frozenset({0, 2}): frozenset({frozenset({2}), - frozenset({0}), - frozenset({0, 1, 2})}), - frozenset({0, 1}): frozenset({frozenset({0}), - frozenset({1}), - frozenset({0, 1, 2})}), - frozenset({0, 1, 2}): frozenset({frozenset({1, 2}), - frozenset({0, 2}), - frozenset({0, 1})})}""" - cube = test.test_set.cube(3) - self.assertEqual(pprint.pformat(cube), cube_repr_tgt) - cubo_repr_tgt = """\ -{frozenset({frozenset({0, 2}), frozenset({0})}): frozenset({frozenset({frozenset({0, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 1})}), - frozenset({frozenset(), - frozenset({0})}), - frozenset({frozenset({2}), - frozenset({0, - 2})})}), - frozenset({frozenset({0, 1}), frozenset({1})}): frozenset({frozenset({frozenset({0, - 1}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 1})}), - frozenset({frozenset({1}), - frozenset({1, - 2})}), - frozenset({frozenset(), - frozenset({1})})}), - frozenset({frozenset({1, 2}), frozenset({1})}): frozenset({frozenset({frozenset({1, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({2}), - frozenset({1, - 2})}), - frozenset({frozenset(), - frozenset({1})}), - frozenset({frozenset({1}), - frozenset({0, - 1})})}), - frozenset({frozenset({1, 2}), frozenset({2})}): frozenset({frozenset({frozenset({1, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({1}), - frozenset({1, - 2})}), - frozenset({frozenset({2}), - frozenset({0, - 2})}), - frozenset({frozenset(), - frozenset({2})})}), - frozenset({frozenset(), frozenset({0})}): frozenset({frozenset({frozenset({0}), - frozenset({0, - 1})}), - frozenset({frozenset({0}), - frozenset({0, - 2})}), - frozenset({frozenset(), - frozenset({1})}), - frozenset({frozenset(), - frozenset({2})})}), - frozenset({frozenset(), frozenset({1})}): frozenset({frozenset({frozenset(), - frozenset({0})}), - frozenset({frozenset({1}), - frozenset({1, - 2})}), - frozenset({frozenset(), - frozenset({2})}), - frozenset({frozenset({1}), - frozenset({0, - 1})})}), - frozenset({frozenset({2}), frozenset()}): frozenset({frozenset({frozenset({2}), - frozenset({1, - 2})}), - frozenset({frozenset(), - frozenset({0})}), - frozenset({frozenset(), - frozenset({1})}), - frozenset({frozenset({2}), - frozenset({0, - 2})})}), - frozenset({frozenset({0, 1, 2}), frozenset({0, 1})}): frozenset({frozenset({frozenset({1, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 1})}), - frozenset({frozenset({1}), - frozenset({0, - 1})})}), - frozenset({frozenset({0}), frozenset({0, 1})}): frozenset({frozenset({frozenset(), - frozenset({0})}), - frozenset({frozenset({0, - 1}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 2})}), - frozenset({frozenset({1}), - frozenset({0, - 1})})}), - frozenset({frozenset({2}), frozenset({0, 2})}): frozenset({frozenset({frozenset({0, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({2}), - frozenset({1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 2})}), - frozenset({frozenset(), - frozenset({2})})}), - frozenset({frozenset({0, 1, 2}), frozenset({0, 2})}): frozenset({frozenset({frozenset({1, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0, - 1}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0}), - frozenset({0, - 2})}), - frozenset({frozenset({2}), - frozenset({0, - 2})})}), - frozenset({frozenset({1, 2}), frozenset({0, 1, 2})}): frozenset({frozenset({frozenset({0, - 2}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({0, - 1}), - frozenset({0, - 1, - 2})}), - frozenset({frozenset({2}), - frozenset({1, - 2})}), - frozenset({frozenset({1}), - frozenset({1, - 2})})})}""" - - cubo = test.test_set.linegraph(cube) - self.assertEqual(pprint.pformat(cubo), cubo_repr_tgt) + # In this test we list all possible invariants of the result + # for unordered frozensets. + # + # This test has a long history, see: + # - https://github.com/python/cpython/commit/969fe57baa0eb80332990f9cda936a33e13fabef + # - https://github.com/python/cpython/issues/58115 + # - https://github.com/python/cpython/issues/111147 + + import textwrap + + # Single-line, always ordered: + fs0 = frozenset() + fs1 = frozenset(('abc', 'xyz')) + data = frozenset((fs0, fs1)) + self.assertEqual(pprint.pformat(data), + 'frozenset({%r, %r})' % (fs0, fs1)) + self.assertEqual(pprint.pformat(data), repr(data)) + + fs2 = frozenset(('one', 'two')) + data = {fs2: frozenset((fs0, fs1))} + self.assertEqual(pprint.pformat(data), + "{%r: frozenset({%r, %r})}" % (fs2, fs0, fs1)) + self.assertEqual(pprint.pformat(data), repr(data)) + + # Single-line, unordered: + fs1 = frozenset(("xyz", "qwerty")) + fs2 = frozenset(("abcd", "spam")) + fs = frozenset((fs1, fs2)) + self.assertEqual(pprint.pformat(fs), repr(fs)) + + # Multiline, unordered: + def check(res, invariants): + self.assertIn(res, [textwrap.dedent(i).strip() for i in invariants]) + + # Inner-most frozensets are singleline, result is multiline, unordered: + fs1 = frozenset(('regular string', 'other string')) + fs2 = frozenset(('third string', 'one more string')) + check( + pprint.pformat(frozenset((fs1, fs2))), + [ + """ + frozenset({%r, + %r}) + """ % (fs1, fs2), + """ + frozenset({%r, + %r}) + """ % (fs2, fs1), + ], + ) + + # Everything is multiline, unordered: + check( + pprint.pformat( + frozenset(( + frozenset(( + "xyz very-very long string", + "qwerty is also absurdly long", + )), + frozenset(( + "abcd is even longer that before", + "spam is not so long", + )), + )), + ), + [ + """ + frozenset({frozenset({'abcd is even longer that before', + 'spam is not so long'}), + frozenset({'qwerty is also absurdly long', + 'xyz very-very long string'})}) + """, + + """ + frozenset({frozenset({'abcd is even longer that before', + 'spam is not so long'}), + frozenset({'xyz very-very long string', + 'qwerty is also absurdly long'})}) + """, + + """ + frozenset({frozenset({'qwerty is also absurdly long', + 'xyz very-very long string'}), + frozenset({'abcd is even longer that before', + 'spam is not so long'})}) + """, + + """ + frozenset({frozenset({'qwerty is also absurdly long', + 'xyz very-very long string'}), + frozenset({'spam is not so long', + 'abcd is even longer that before'})}) + """, + ], + ) def test_depth(self): nested_tuple = (1, (2, (3, (4, (5, 6))))) diff --git a/Lib/test/test_property.py b/Lib/test/test_property.py index 5312925d93..8411e903b1 100644 --- a/Lib/test/test_property.py +++ b/Lib/test/test_property.py @@ -100,32 +100,24 @@ def test_property_decorator_subclass(self): self.assertRaises(PropertySet, setattr, sub, "spam", None) self.assertRaises(PropertyDel, delattr, sub, "spam") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_decorator_subclass_doc(self): sub = SubClass() self.assertEqual(sub.__class__.spam.__doc__, "SubClass.getter") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_decorator_baseclass_doc(self): base = BaseClass() self.assertEqual(base.__class__.spam.__doc__, "BaseClass.getter") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_property_decorator_doc(self): base = PropertyDocBase() sub = PropertyDocSub() self.assertEqual(base.__class__.spam.__doc__, "spam spam spam") self.assertEqual(sub.__class__.spam.__doc__, "spam spam spam") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_getter_doc_override(self): @@ -136,8 +128,6 @@ def test_property_getter_doc_override(self): self.assertEqual(newgetter.spam, 8) self.assertEqual(newgetter.__class__.spam.__doc__, "new docstring") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_property___isabstractmethod__descriptor(self): for val in (True, False, [], [1], '', '1'): class C(object): @@ -169,8 +159,6 @@ def test_property_builtin_doc_writable(self): p.__doc__ = 'extended' self.assertEqual(p.__doc__, 'extended') - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_decorator_doc_writable(self): @@ -268,8 +256,6 @@ def spam(self): else: raise Exception("AttributeError not raised") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_docstring_copy(self): @@ -282,8 +268,6 @@ def spam(self): Foo.spam.__doc__, "spam wrapped in property subclass") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_setter_copies_getter_docstring(self): @@ -317,8 +301,6 @@ def spam(self, value): FooSub.spam.__doc__, "spam wrapped in property subclass") - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_new_getter_new_docstring(self): @@ -358,20 +340,14 @@ def _format_exc_msg(self, msg): def setUpClass(cls): cls.obj = cls.cls() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_get_property(self): with self.assertRaisesRegex(AttributeError, self._format_exc_msg("has no getter")): self.obj.foo - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_set_property(self): with self.assertRaisesRegex(AttributeError, self._format_exc_msg("has no setter")): self.obj.foo = None - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_del_property(self): with self.assertRaisesRegex(AttributeError, self._format_exc_msg("has no deleter")): del self.obj.foo diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index cfa6003a86..93cbe1fe23 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -2,6 +2,7 @@ # to ensure the Queue locks remain stable. import itertools import random +import sys import threading import time import unittest @@ -10,6 +11,8 @@ from test.support import import_helper from test.support import threading_helper +# queue module depends on threading primitives +threading_helper.requires_working_threading(module=True) py_queue = import_helper.import_fresh_module('queue', blocked=['_queue']) c_queue = import_helper.import_fresh_module('queue', fresh=['_queue']) @@ -239,6 +242,418 @@ def test_shrinking_queue(self): with self.assertRaises(self.queue.Full): q.put_nowait(4) + def test_shutdown_empty(self): + q = self.type2test() + q.shutdown() + with self.assertRaises(self.queue.ShutDown): + q.put("data") + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_nonempty(self): + q = self.type2test() + q.put("data") + q.shutdown() + q.get() + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_immediate(self): + q = self.type2test() + q.put("data") + q.shutdown(immediate=True) + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_allowed_transitions(self): + # allowed transitions would be from alive via shutdown to immediate + q = self.type2test() + self.assertFalse(q.is_shutdown) + + q.shutdown() + self.assertTrue(q.is_shutdown) + + q.shutdown(immediate=True) + self.assertTrue(q.is_shutdown) + + q.shutdown(immediate=False) + + def _shutdown_all_methods_in_one_thread(self, immediate): + q = self.type2test(2) + q.put("L") + q.put_nowait("O") + q.shutdown(immediate) + + with self.assertRaises(self.queue.ShutDown): + q.put("E") + with self.assertRaises(self.queue.ShutDown): + q.put_nowait("W") + if immediate: + with self.assertRaises(self.queue.ShutDown): + q.get() + with self.assertRaises(self.queue.ShutDown): + q.get_nowait() + with self.assertRaises(ValueError): + q.task_done() + q.join() + else: + self.assertIn(q.get(), "LO") + q.task_done() + self.assertIn(q.get(), "LO") + q.task_done() + q.join() + # on shutdown(immediate=False) + # when queue is empty, should raise ShutDown Exception + with self.assertRaises(self.queue.ShutDown): + q.get() # p.get(True) + with self.assertRaises(self.queue.ShutDown): + q.get_nowait() # p.get(False) + with self.assertRaises(self.queue.ShutDown): + q.get(True, 1.0) + + def test_shutdown_all_methods_in_one_thread(self): + return self._shutdown_all_methods_in_one_thread(False) + + def test_shutdown_immediate_all_methods_in_one_thread(self): + return self._shutdown_all_methods_in_one_thread(True) + + def _write_msg_thread(self, q, n, results, + i_when_exec_shutdown, event_shutdown, + barrier_start): + # All `write_msg_threads` + # put several items into the queue. + for i in range(0, i_when_exec_shutdown//2): + q.put((i, 'LOYD')) + # Wait for the barrier to be complete. + barrier_start.wait() + + for i in range(i_when_exec_shutdown//2, n): + try: + q.put((i, "YDLO")) + except self.queue.ShutDown: + results.append(False) + break + + # Trigger queue shutdown. + if i == i_when_exec_shutdown: + # Only one thread should call shutdown(). + if not event_shutdown.is_set(): + event_shutdown.set() + results.append(True) + + def _read_msg_thread(self, q, results, barrier_start): + # Get at least one item. + q.get(True) + q.task_done() + # Wait for the barrier to be complete. + barrier_start.wait() + while True: + try: + q.get(False) + q.task_done() + except self.queue.ShutDown: + results.append(True) + break + except self.queue.Empty: + pass + + def _shutdown_thread(self, q, results, event_end, immediate): + event_end.wait() + q.shutdown(immediate) + results.append(q.qsize() == 0) + + def _join_thread(self, q, barrier_start): + # Wait for the barrier to be complete. + barrier_start.wait() + q.join() + + def _shutdown_all_methods_in_many_threads(self, immediate): + # Run a 'multi-producers/consumers queue' use case, + # with enough items into the queue. + # When shutdown, all running threads will be joined. + q = self.type2test() + ps = [] + res_puts = [] + res_gets = [] + res_shutdown = [] + write_threads = 4 + read_threads = 6 + join_threads = 2 + nb_msgs = 1024*64 + nb_msgs_w = nb_msgs // write_threads + when_exec_shutdown = nb_msgs_w // 2 + # Use of a Barrier to ensure that + # - all write threads put all their items into the queue, + # - all read thread get at least one item from the queue, + # and keep on running until shutdown. + # The join thread is started only when shutdown is immediate. + nparties = write_threads + read_threads + if immediate: + nparties += join_threads + barrier_start = threading.Barrier(nparties) + ev_exec_shutdown = threading.Event() + lprocs = [ + (self._write_msg_thread, write_threads, (q, nb_msgs_w, res_puts, + when_exec_shutdown, ev_exec_shutdown, + barrier_start)), + (self._read_msg_thread, read_threads, (q, res_gets, barrier_start)), + (self._shutdown_thread, 1, (q, res_shutdown, ev_exec_shutdown, immediate)), + ] + if immediate: + lprocs.append((self._join_thread, join_threads, (q, barrier_start))) + # start all threads. + for func, n, args in lprocs: + for i in range(n): + ps.append(threading.Thread(target=func, args=args)) + ps[-1].start() + for thread in ps: + thread.join() + + self.assertTrue(True in res_puts) + self.assertEqual(res_gets.count(True), read_threads) + if immediate: + self.assertListEqual(res_shutdown, [True]) + self.assertTrue(q.empty()) + + def test_shutdown_all_methods_in_many_threads(self): + return self._shutdown_all_methods_in_many_threads(False) + + def test_shutdown_immediate_all_methods_in_many_threads(self): + return self._shutdown_all_methods_in_many_threads(True) + + def _get(self, q, go, results, shutdown=False): + go.wait() + try: + msg = q.get() + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _get_shutdown(self, q, go, results): + return self._get(q, go, results, True) + + def _get_task_done(self, q, go, results): + go.wait() + try: + msg = q.get() + q.task_done() + results.append(True) + return msg + except self.queue.ShutDown: + results.append(False) + return False + + def _put(self, q, msg, go, results, shutdown=False): + go.wait() + try: + q.put(msg) + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _put_shutdown(self, q, msg, go, results): + return self._put(q, msg, go, results, True) + + def _join(self, q, results, shutdown=False): + try: + q.join() + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _join_shutdown(self, q, results): + return self._join(q, results, True) + + def _shutdown_get(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + # queue full + + if immediate: + thrds = ( + (self._get_shutdown, (q, go, results)), + (self._get_shutdown, (q, go, results)), + ) + else: + thrds = ( + # on shutdown(immediate=False) + # one of these threads should raise Shutdown + (self._get, (q, go, results)), + (self._get, (q, go, results)), + (self._get, (q, go, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + q.shutdown(immediate) + go.set() + for t in threads: + t.join() + if immediate: + self.assertListEqual(results, [True, True]) + else: + self.assertListEqual(sorted(results), [False] + [True]*(len(thrds)-1)) + + def test_shutdown_get(self): + return self._shutdown_get(False) + + def test_shutdown_immediate_get(self): + return self._shutdown_get(True) + + def _shutdown_put(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + # queue fulled + + thrds = ( + (self._put_shutdown, (q, "E", go, results)), + (self._put_shutdown, (q, "W", go, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + q.shutdown() + go.set() + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_put(self): + return self._shutdown_put(False) + + def test_shutdown_immediate_put(self): + return self._shutdown_put(True) + + def _shutdown_join(self, immediate): + q = self.type2test() + results = [] + q.put("Y") + go = threading.Event() + nb = q.qsize() + + thrds = ( + (self._join, (q, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + if not immediate: + res = [] + for i in range(nb): + threads.append(threading.Thread(target=self._get_task_done, args=(q, go, res))) + threads[-1].start() + q.shutdown(immediate) + go.set() + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_immediate_join(self): + return self._shutdown_join(True) + + def test_shutdown_join(self): + return self._shutdown_join(False) + + def _shutdown_put_join(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + # queue not fulled + + thrds = ( + (self._put_shutdown, (q, "E", go, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + self.assertEqual(q.unfinished_tasks, 1) + + q.shutdown(immediate) + go.set() + + if immediate: + with self.assertRaises(self.queue.ShutDown): + q.get_nowait() + else: + result = q.get() + self.assertEqual(result, "Y") + q.task_done() + + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_immediate_put_join(self): + return self._shutdown_put_join(True) + + def test_shutdown_put_join(self): + return self._shutdown_put_join(False) + + def test_shutdown_get_task_done_join(self): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + self.assertEqual(q.unfinished_tasks, q.qsize()) + + thrds = ( + (self._get_task_done, (q, go, results)), + (self._get_task_done, (q, go, results)), + (self._join, (q, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + go.set() + q.shutdown(False) + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_pending_get(self): + def get(): + try: + results.append(q.get()) + except Exception as e: + results.append(e) + + q = self.type2test() + results = [] + get_thread = threading.Thread(target=get) + get_thread.start() + q.shutdown(immediate=False) + get_thread.join(timeout=10.0) + self.assertFalse(get_thread.is_alive()) + self.assertEqual(len(results), 1) + self.assertIsInstance(results[0], self.queue.ShutDown) + + class QueueTest(BaseQueueTestMixin): def setUp(self): @@ -289,6 +704,7 @@ class CPriorityQueueTest(PriorityQueueTest, unittest.TestCase): # A Queue subclass that can provoke failure at a moment's notice :) class FailingQueueException(Exception): pass + class FailingQueueTest(BlockingTestMixin): def setUp(self): diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 611fb9d1e4..396be4b104 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -9,6 +9,7 @@ import importlib import importlib.util import unittest +import textwrap from test.support import verbose from test.support.os_helper import create_empty_file @@ -25,6 +26,29 @@ def nestedTuple(nesting): class ReprTests(unittest.TestCase): + def test_init_kwargs(self): + example_kwargs = { + "maxlevel": 101, + "maxtuple": 102, + "maxlist": 103, + "maxarray": 104, + "maxdict": 105, + "maxset": 106, + "maxfrozenset": 107, + "maxdeque": 108, + "maxstring": 109, + "maxlong": 110, + "maxother": 111, + "fillvalue": "x" * 112, + "indent": "x" * 113, + } + r1 = Repr() + for attr, val in example_kwargs.items(): + setattr(r1, attr, val) + r2 = Repr(**example_kwargs) + for attr in example_kwargs: + self.assertEqual(getattr(r1, attr), getattr(r2, attr), msg=attr) + def test_string(self): eq = self.assertEqual eq(r("abc"), "'abc'") @@ -51,6 +75,13 @@ def test_tuple(self): expected = repr(t3)[:-2] + "...)" eq(r2.repr(t3), expected) + # modified fillvalue: + r3 = Repr() + r3.fillvalue = '+++' + r3.maxtuple = 2 + expected = repr(t3)[:-2] + "+++)" + eq(r3.repr(t3), expected) + def test_container(self): from array import array from collections import deque @@ -223,6 +254,382 @@ def test_unsortable(self): r(y) r(z) + def test_valid_indent(self): + test_cases = [ + { + 'object': (), + 'tests': ( + (dict(indent=None), '()'), + (dict(indent=False), '()'), + (dict(indent=True), '()'), + (dict(indent=0), '()'), + (dict(indent=1), '()'), + (dict(indent=4), '()'), + (dict(indent=4, maxlevel=2), '()'), + (dict(indent=''), '()'), + (dict(indent='-->'), '()'), + (dict(indent='....'), '()'), + ), + }, + { + 'object': '', + 'tests': ( + (dict(indent=None), "''"), + (dict(indent=False), "''"), + (dict(indent=True), "''"), + (dict(indent=0), "''"), + (dict(indent=1), "''"), + (dict(indent=4), "''"), + (dict(indent=4, maxlevel=2), "''"), + (dict(indent=''), "''"), + (dict(indent='-->'), "''"), + (dict(indent='....'), "''"), + ), + }, + { + 'object': [1, 'spam', {'eggs': True, 'ham': []}], + 'tests': ( + (dict(indent=None), '''\ + [1, 'spam', {'eggs': True, 'ham': []}]'''), + (dict(indent=False), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=True), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=0), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=1), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=4), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=4, maxlevel=2), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent=''), '''\ + [ + 1, + 'spam', + { + 'eggs': True, + 'ham': [], + }, + ]'''), + (dict(indent='-->'), '''\ + [ + -->1, + -->'spam', + -->{ + -->-->'eggs': True, + -->-->'ham': [], + -->}, + ]'''), + (dict(indent='....'), '''\ + [ + ....1, + ....'spam', + ....{ + ........'eggs': True, + ........'ham': [], + ....}, + ]'''), + ), + }, + { + 'object': { + 1: 'two', + b'three': [ + (4.5, 6.7), + [set((8, 9)), frozenset((10, 11))], + ], + }, + 'tests': ( + (dict(indent=None), '''\ + {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''), + (dict(indent=False), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent=True), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent=0), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent=1), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent=4), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent=4, maxlevel=2), '''\ + { + 1: 'two', + b'three': [ + (...), + [...], + ], + }'''), + (dict(indent=''), '''\ + { + 1: 'two', + b'three': [ + ( + 4.5, + 6.7, + ), + [ + { + 8, + 9, + }, + frozenset({ + 10, + 11, + }), + ], + ], + }'''), + (dict(indent='-->'), '''\ + { + -->1: 'two', + -->b'three': [ + -->-->( + -->-->-->4.5, + -->-->-->6.7, + -->-->), + -->-->[ + -->-->-->{ + -->-->-->-->8, + -->-->-->-->9, + -->-->-->}, + -->-->-->frozenset({ + -->-->-->-->10, + -->-->-->-->11, + -->-->-->}), + -->-->], + -->], + }'''), + (dict(indent='....'), '''\ + { + ....1: 'two', + ....b'three': [ + ........( + ............4.5, + ............6.7, + ........), + ........[ + ............{ + ................8, + ................9, + ............}, + ............frozenset({ + ................10, + ................11, + ............}), + ........], + ....], + }'''), + ), + }, + ] + for test_case in test_cases: + with self.subTest(test_object=test_case['object']): + for repr_settings, expected_repr in test_case['tests']: + with self.subTest(repr_settings=repr_settings): + r = Repr() + for attribute, value in repr_settings.items(): + setattr(r, attribute, value) + resulting_repr = r.repr(test_case['object']) + expected_repr = textwrap.dedent(expected_repr) + self.assertEqual(resulting_repr, expected_repr) + + def test_invalid_indent(self): + test_object = [1, 'spam', {'eggs': True, 'ham': []}] + test_cases = [ + (-1, (ValueError, '[Nn]egative|[Pp]ositive')), + (-4, (ValueError, '[Nn]egative|[Pp]ositive')), + ((), (TypeError, None)), + ([], (TypeError, None)), + ((4,), (TypeError, None)), + ([4,], (TypeError, None)), + (object(), (TypeError, None)), + ] + for indent, (expected_error, expected_msg) in test_cases: + with self.subTest(indent=indent): + r = Repr() + r.indent = indent + expected_msg = expected_msg or f'{type(indent)}' + with self.assertRaisesRegex(expected_error, expected_msg): + r.repr(test_object) + + def test_shadowed_stdlib_array(self): + # Issue #113570: repr() should not be fooled by an array + class array: + def __repr__(self): + return "not array.array" + + self.assertEqual(r(array()), "not array.array") + + def test_shadowed_builtin(self): + # Issue #113570: repr() should not be fooled + # by a shadowed builtin function + class list: + def __repr__(self): + return "not builtins.list" + + self.assertEqual(r(list()), "not builtins.list") + + def test_custom_repr(self): + class MyRepr(Repr): + + def repr_TextIOWrapper(self, obj, level): + if obj.name in {'', '', ''}: + return obj.name + return repr(obj) + + aRepr = MyRepr() + self.assertEqual(aRepr.repr(sys.stdin), "") + + def test_custom_repr_class_with_spaces(self): + class TypeWithSpaces: + pass + + t = TypeWithSpaces() + type(t).__name__ = "type with spaces" + self.assertEqual(type(t).__name__, "type with spaces") + + class MyRepr(Repr): + def repr_type_with_spaces(self, obj, level): + return "Type With Spaces" + + + aRepr = MyRepr() + self.assertEqual(aRepr.repr(t), "Type With Spaces") + def write_file(path, text): with open(path, 'w', encoding='ASCII') as fp: fp.write(text) @@ -408,5 +815,27 @@ def test_assigned_attributes(self): for name in assigned: self.assertIs(getattr(wrapper, name), getattr(wrapped, name)) + def test__wrapped__(self): + class X: + def __repr__(self): + return 'X()' + f = __repr__ # save reference to check it later + __repr__ = recursive_repr()(__repr__) + + self.assertIs(X.f, X.__repr__.__wrapped__) + + # TODO: RUSTPYTHON: AttributeError: 'TypeVar' object has no attribute '__name__' + @unittest.expectedFailure + def test__type_params__(self): + class My: + @recursive_repr() + def __repr__[T: str](self, default: T = '') -> str: + return default + + type_params = My().__repr__.__type_params__ + self.assertEqual(len(type_params), 1) + self.assertEqual(type_params[0].__name__, 'T') + self.assertEqual(type_params[0].__bound__, str) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sched.py b/Lib/test/test_sched.py index 7ae7baae85..eb52ac7983 100644 --- a/Lib/test/test_sched.py +++ b/Lib/test/test_sched.py @@ -58,6 +58,7 @@ def test_enterabs(self): scheduler.run() self.assertEqual(l, [0.01, 0.02, 0.03, 0.04, 0.05]) + @threading_helper.requires_working_threading() def test_enter_concurrent(self): q = queue.Queue() fun = q.put @@ -91,10 +92,23 @@ def test_priority(self): l = [] fun = lambda x: l.append(x) scheduler = sched.scheduler(time.time, time.sleep) - for priority in [1, 2, 3, 4, 5]: - z = scheduler.enterabs(0.01, priority, fun, (priority,)) - scheduler.run() - self.assertEqual(l, [1, 2, 3, 4, 5]) + + cases = [ + ([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]), + ([5, 4, 3, 2, 1], [1, 2, 3, 4, 5]), + ([2, 5, 3, 1, 4], [1, 2, 3, 4, 5]), + ([1, 2, 3, 2, 1], [1, 1, 2, 2, 3]), + ] + for priorities, expected in cases: + with self.subTest(priorities=priorities, expected=expected): + for priority in priorities: + scheduler.enterabs(0.01, priority, fun, (priority,)) + scheduler.run() + self.assertEqual(l, expected) + + # Cleanup: + self.assertTrue(scheduler.empty()) + l.clear() def test_cancel(self): l = [] @@ -111,6 +125,7 @@ def test_cancel(self): scheduler.run() self.assertEqual(l, [0.02, 0.03, 0.04]) + @threading_helper.requires_working_threading() def test_cancel_concurrent(self): q = queue.Queue() fun = q.put diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 75447336e4..8ef2871c61 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -582,8 +582,6 @@ def test_ixor(self): else: self.assertNotIn(c, self.s) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_inplace_on_self(self): t = self.s.copy() t |= t diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 16416547c1..b64ccb37a5 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2000,13 +2000,9 @@ def check_unpack_tarball(self, format): ('Python 3.14', DeprecationWarning)): self.check_unpack_archive(format) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_unpack_archive_tar(self): self.check_unpack_tarball('tar') - # TODO: RUSTPYTHON - @unittest.expectedFailure @support.requires_zlib() def test_unpack_archive_gztar(self): self.check_unpack_tarball('gztar') diff --git a/Lib/test/test_smtpd.py b/Lib/test/test_smtpd.py deleted file mode 100644 index d2e150d535..0000000000 --- a/Lib/test/test_smtpd.py +++ /dev/null @@ -1,1018 +0,0 @@ -import unittest -import textwrap -from test import support, mock_socket -from test.support import socket_helper -from test.support import warnings_helper -import socket -import io - -import warnings -with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - import smtpd - import asyncore - - -class DummyServer(smtpd.SMTPServer): - def __init__(self, *args, **kwargs): - smtpd.SMTPServer.__init__(self, *args, **kwargs) - self.messages = [] - if self._decode_data: - self.return_status = 'return status' - else: - self.return_status = b'return status' - - def process_message(self, peer, mailfrom, rcpttos, data, **kw): - self.messages.append((peer, mailfrom, rcpttos, data)) - if data == self.return_status: - return '250 Okish' - if 'mail_options' in kw and 'SMTPUTF8' in kw['mail_options']: - return '250 SMTPUTF8 message okish' - - -class DummyDispatcherBroken(Exception): - pass - - -class BrokenDummyServer(DummyServer): - def listen(self, num): - raise DummyDispatcherBroken() - - -class SMTPDServerTest(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def test_process_message_unimplemented(self): - server = smtpd.SMTPServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - - def write_line(line): - channel.socket.queue_recv(line) - channel.handle_read() - - write_line(b'HELO example') - write_line(b'MAIL From:eggs@example') - write_line(b'RCPT To:spam@example') - write_line(b'DATA') - self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n') - - def test_decode_data_and_enable_SMTPUTF8_raises(self): - self.assertRaises( - ValueError, - smtpd.SMTPServer, - (socket_helper.HOST, 0), - ('b', 0), - enable_SMTPUTF8=True, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - -class DebuggingServerTest(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def send_data(self, channel, data, enable_SMTPUTF8=False): - def write_line(line): - channel.socket.queue_recv(line) - channel.handle_read() - write_line(b'EHLO example') - if enable_SMTPUTF8: - write_line(b'MAIL From:eggs@example BODY=8BITMIME SMTPUTF8') - else: - write_line(b'MAIL From:eggs@example') - write_line(b'RCPT To:spam@example') - write_line(b'DATA') - write_line(data) - write_line(b'.') - - def test_process_message_with_decode_data_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nhello\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - From: test - X-Peer: peer-address - - hello - ------------ END MESSAGE ------------ - """)) - - def test_process_message_with_decode_data_false(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def test_process_message_with_enable_SMTPUTF8_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def test_process_SMTPUTF8_message_with_enable_SMTPUTF8_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n', - enable_SMTPUTF8=True) - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - mail options: ['BODY=8BITMIME', 'SMTPUTF8'] - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - -class TestFamilyDetection(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - @unittest.skipUnless(socket_helper.IPV6_ENABLED, "IPv6 not enabled") - def test_socket_uses_IPv6(self): - server = smtpd.SMTPServer((socket_helper.HOSTv6, 0), (socket_helper.HOSTv4, 0)) - self.assertEqual(server.socket.family, socket.AF_INET6) - - def test_socket_uses_IPv4(self): - server = smtpd.SMTPServer((socket_helper.HOSTv4, 0), (socket_helper.HOSTv6, 0)) - self.assertEqual(server.socket.family, socket.AF_INET) - - -class TestRcptOptionParsing(unittest.TestCase): - error_response = (b'555 RCPT TO parameters not recognized or not ' - b'implemented\r\n') - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, channel, line): - channel.socket.queue_recv(line) - channel.handle_read() - - def test_params_rejected(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - self.write_line(channel, b'MAIL from: size=20') - self.write_line(channel, b'RCPT to: foo=bar') - self.assertEqual(channel.socket.last, self.error_response) - - def test_nothing_accepted(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - self.write_line(channel, b'MAIL from: size=20') - self.write_line(channel, b'RCPT to: ') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - -class TestMailOptionParsing(unittest.TestCase): - error_response = (b'555 MAIL FROM parameters not recognized or not ' - b'implemented\r\n') - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, channel, line): - channel.socket.queue_recv(line) - channel.handle_read() - - def test_with_decode_data_true(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0), decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - self.write_line(channel, b'EHLO example') - for line in [ - b'MAIL from: size=20 SMTPUTF8', - b'MAIL from: size=20 SMTPUTF8 BODY=8BITMIME', - b'MAIL from: size=20 BODY=UNKNOWN', - b'MAIL from: size=20 body=8bitmime', - ]: - self.write_line(channel, line) - self.assertEqual(channel.socket.last, self.error_response) - self.write_line(channel, b'MAIL from: size=20') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - def test_with_decode_data_false(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - for line in [ - b'MAIL from: size=20 SMTPUTF8', - b'MAIL from: size=20 SMTPUTF8 BODY=8BITMIME', - ]: - self.write_line(channel, line) - self.assertEqual(channel.socket.last, self.error_response) - self.write_line( - channel, - b'MAIL from: size=20 SMTPUTF8 BODY=UNKNOWN') - self.assertEqual( - channel.socket.last, - b'501 Error: BODY can only be one of 7BIT, 8BITMIME\r\n') - self.write_line( - channel, b'MAIL from: size=20 body=8bitmime') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - def test_with_enable_smtputf8_true(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0), enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - self.write_line(channel, b'EHLO example') - self.write_line( - channel, - b'MAIL from: size=20 body=8bitmime smtputf8') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - -class SMTPDChannelTest(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_broken_connect(self): - self.assertRaises( - DummyDispatcherBroken, BrokenDummyServer, - (socket_helper.HOST, 0), ('b', 0), decode_data=True) - - def test_decode_data_and_enable_SMTPUTF8_raises(self): - self.assertRaises( - ValueError, smtpd.SMTPChannel, - self.server, self.channel.conn, self.channel.addr, - enable_SMTPUTF8=True, decode_data=True) - - def test_server_accept(self): - self.server.handle_accept() - - def test_missing_data(self): - self.write_line(b'') - self.assertEqual(self.channel.socket.last, - b'500 Error: bad syntax\r\n') - - def test_EHLO(self): - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, b'250 HELP\r\n') - - def test_EHLO_bad_syntax(self): - self.write_line(b'EHLO') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: EHLO hostname\r\n') - - def test_EHLO_duplicate(self): - self.write_line(b'EHLO example') - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_EHLO_HELO_duplicate(self): - self.write_line(b'EHLO example') - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELO(self): - name = smtpd.socket.getfqdn() - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - '250 {}\r\n'.format(name).encode('ascii')) - - def test_HELO_EHLO_duplicate(self): - self.write_line(b'HELO example') - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELP(self): - self.write_line(b'HELP') - self.assertEqual(self.channel.socket.last, - b'250 Supported commands: EHLO HELO MAIL RCPT ' + \ - b'DATA RSET NOOP QUIT VRFY\r\n') - - def test_HELP_command(self): - self.write_line(b'HELP MAIL') - self.assertEqual(self.channel.socket.last, - b'250 Syntax: MAIL FROM:
\r\n') - - def test_HELP_command_unknown(self): - self.write_line(b'HELP SPAM') - self.assertEqual(self.channel.socket.last, - b'501 Supported commands: EHLO HELO MAIL RCPT ' + \ - b'DATA RSET NOOP QUIT VRFY\r\n') - - def test_HELO_bad_syntax(self): - self.write_line(b'HELO') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: HELO hostname\r\n') - - def test_HELO_duplicate(self): - self.write_line(b'HELO example') - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELO_parameter_rejected_when_extensions_not_enabled(self): - self.extended_smtp = False - self.write_line(b'HELO example') - self.write_line(b'MAIL from: SIZE=1234') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_allows_space_after_colon(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from: ') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_extended_MAIL_allows_space_after_colon(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: size=20') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_NOOP(self): - self.write_line(b'NOOP') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_HELO_NOOP(self): - self.write_line(b'HELO example') - self.write_line(b'NOOP') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_NOOP_bad_syntax(self): - self.write_line(b'NOOP hi') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: NOOP\r\n') - - def test_QUIT(self): - self.write_line(b'QUIT') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_HELO_QUIT(self): - self.write_line(b'HELO example') - self.write_line(b'QUIT') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_QUIT_arg_ignored(self): - self.write_line(b'QUIT bye bye') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_bad_state(self): - self.channel.smtp_state = 'BAD STATE' - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'451 Internal confusion\r\n') - - def test_command_too_long(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from: ' + - b'a' * self.channel.command_size_limit + - b'@example') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - - def test_MAIL_command_limit_extended_with_SIZE(self): - self.write_line(b'EHLO example') - fill_len = self.channel.command_size_limit - len('MAIL from:<@example>') - self.write_line(b'MAIL from:<' + - b'a' * fill_len + - b'@example> SIZE=1234') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'MAIL from:<' + - b'a' * (fill_len + 26) + - b'@example> SIZE=1234') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - - def test_MAIL_command_rejects_SMTPUTF8_by_default(self): - self.write_line(b'EHLO example') - self.write_line( - b'MAIL from: BODY=8BITMIME SMTPUTF8') - self.assertEqual(self.channel.socket.last[0:1], b'5') - - def test_data_longer_than_default_data_size_limit(self): - # Hack the default so we don't have to generate so much data. - self.channel.data_size_limit = 1048 - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'A' * self.channel.data_size_limit + - b'A\r\n.') - self.assertEqual(self.channel.socket.last, - b'552 Error: Too much mail data\r\n') - - def test_MAIL_size_parameter(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=512') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_MAIL_invalid_size_parameter(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=invalid') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
[SP ]\r\n') - - def test_MAIL_RCPT_unknown_parameters(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: ham=green') - self.assertEqual(self.channel.socket.last, - b'555 MAIL FROM parameters not recognized or not implemented\r\n') - - self.write_line(b'MAIL FROM:') - self.write_line(b'RCPT TO: ham=green') - self.assertEqual(self.channel.socket.last, - b'555 RCPT TO parameters not recognized or not implemented\r\n') - - def test_MAIL_size_parameter_larger_than_default_data_size_limit(self): - self.channel.data_size_limit = 1048 - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=2096') - self.assertEqual(self.channel.socket.last, - b'552 Error: message size exceeds fixed maximum message size\r\n') - - def test_need_MAIL(self): - self.write_line(b'HELO example') - self.write_line(b'RCPT to:spam@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: need MAIL command\r\n') - - def test_MAIL_syntax_HELO(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_syntax_EHLO(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
[SP ]\r\n') - - def test_MAIL_missing_address(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_chevrons(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_MAIL_empty_chevrons(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from:<>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_MAIL_quoted_localpart(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: <"Fred Blogs"@example.com>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_no_angles(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: "Fred Blogs"@example.com') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_with_size(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: <"Fred Blogs"@example.com> SIZE=1000') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_with_size_no_angles(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: "Fred Blogs"@example.com SIZE=1000') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_nested_MAIL(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:eggs@example') - self.write_line(b'MAIL from:spam@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: nested MAIL command\r\n') - - def test_VRFY(self): - self.write_line(b'VRFY eggs@example') - self.assertEqual(self.channel.socket.last, - b'252 Cannot VRFY user, but will accept message and attempt ' + \ - b'delivery\r\n') - - def test_VRFY_syntax(self): - self.write_line(b'VRFY') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: VRFY
\r\n') - - def test_EXPN_not_implemented(self): - self.write_line(b'EXPN') - self.assertEqual(self.channel.socket.last, - b'502 EXPN not implemented\r\n') - - def test_no_HELO_MAIL(self): - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_need_RCPT(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'503 Error: need RCPT command\r\n') - - def test_RCPT_syntax_HELO(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: RCPT TO:
\r\n') - - def test_RCPT_syntax_EHLO(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: RCPT TO:
[SP ]\r\n') - - def test_RCPT_lowercase_to_OK(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to: ') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_no_HELO_RCPT(self): - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_data_dialog(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'data\r\nmore\r\n.') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example'], - 'data\nmore')]) - - def test_DATA_syntax(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA spam') - self.assertEqual(self.channel.socket.last, b'501 Syntax: DATA\r\n') - - def test_no_HELO_DATA(self): - self.write_line(b'DATA spam') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_data_transparency_section_4_5_2(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'..\r\n.\r\n') - self.assertEqual(self.channel.received_data, '.') - - def test_multiple_RCPT(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'RCPT To:ham@example') - self.write_line(b'DATA') - self.write_line(b'data\r\n.') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example','ham@example'], - 'data')]) - - def test_manual_status(self): - # checks that the Channel is able to return a custom status message - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'return status\r\n.') - self.assertEqual(self.channel.socket.last, b'250 Okish\r\n') - - def test_RSET(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'RSET') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'MAIL From:foo@example') - self.write_line(b'RCPT To:eggs@example') - self.write_line(b'DATA') - self.write_line(b'data\r\n.') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'foo@example', - ['eggs@example'], - 'data')]) - - def test_HELO_RSET(self): - self.write_line(b'HELO example') - self.write_line(b'RSET') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_RSET_syntax(self): - self.write_line(b'RSET hi') - self.assertEqual(self.channel.socket.last, b'501 Syntax: RSET\r\n') - - def test_unknown_command(self): - self.write_line(b'UNKNOWN_CMD') - self.assertEqual(self.channel.socket.last, - b'500 Error: command "UNKNOWN_CMD" not ' + \ - b'recognized\r\n') - - def test_attribute_deprecations(self): - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__server - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__server = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__line - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__line = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__state - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__state = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__greeting - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__greeting = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__mailfrom - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__mailfrom = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__rcpttos - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__rcpttos = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__data - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__data = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__fqdn - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__fqdn = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__peer - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__peer = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__conn - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__conn = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__addr - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__addr = 'spam' - -@unittest.skipUnless(socket_helper.IPV6_ENABLED, "IPv6 not enabled") -class SMTPDChannelIPv6Test(SMTPDChannelTest): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOSTv6, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - -class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - # Set DATA size limit to 32 bytes for easy testing - self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_data_limit_dialog(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'data\r\nmore\r\n.') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example'], - 'data\nmore')]) - - def test_data_limit_dialog_too_much_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'This message is longer than 32 bytes\r\n.') - self.assertEqual(self.channel.socket.last, - b'552 Error: Too much mail data\r\n') - - -class SMTPDChannelWithDecodeDataFalse(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_ascii_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'plain ascii text') - self.write_line(b'.') - self.assertEqual(self.channel.received_data, b'plain ascii text') - - def test_utf8_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'and some plain ascii') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n' - b'and some plain ascii') - - -class SMTPDChannelWithDecodeDataTrue(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - # Set decode_data to True - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_ascii_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'plain ascii text') - self.write_line(b'.') - self.assertEqual(self.channel.received_data, 'plain ascii text') - - def test_utf8_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'and some plain ascii') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - 'utf8 enriched text: żźć\nand some plain ascii') - - -class SMTPDChannelTestWithEnableSMTPUTF8True(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - enable_SMTPUTF8=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_MAIL_command_accepts_SMTPUTF8_when_announced(self): - self.write_line(b'EHLO example') - self.write_line( - 'MAIL from: BODY=8BITMIME SMTPUTF8'.encode( - 'utf-8') - ) - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_process_smtputf8_message(self): - self.write_line(b'EHLO example') - for mail_parameters in [b'', b'BODY=8BITMIME SMTPUTF8']: - self.write_line(b'MAIL from: ' + mail_parameters) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'rcpt to:') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'data') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'c\r\n.') - if mail_parameters == b'': - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - else: - self.assertEqual(self.channel.socket.last, - b'250 SMTPUTF8 message okish\r\n') - - def test_utf8_data(self): - self.write_line(b'EHLO example') - self.write_line( - 'MAIL From: naïve@examplé BODY=8BITMIME SMTPUTF8'.encode('utf-8')) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line('RCPT To:späm@examplé'.encode('utf-8')) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - - def test_MAIL_command_limit_extended_with_SIZE_and_SMTPUTF8(self): - self.write_line(b'ehlo example') - fill_len = (512 + 26 + 10) - len('mail from:<@example>') - self.write_line(b'MAIL from:<' + - b'a' * (fill_len + 1) + - b'@example>') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - self.write_line(b'MAIL from:<' + - b'a' * fill_len + - b'@example>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_multiple_emails_with_extended_command_length(self): - self.write_line(b'ehlo example') - fill_len = (512 + 26 + 10) - len('mail from:<@example>') - for char in [b'a', b'b', b'c']: - self.write_line(b'MAIL from:<' + char * fill_len + b'a@example>') - self.assertEqual(self.channel.socket.last[0:3], b'500') - self.write_line(b'MAIL from:<' + char * fill_len + b'@example>') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'rcpt to:') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'data') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'test\r\n.') - self.assertEqual(self.channel.socket.last[0:3], b'250') - - -class MiscTestCase(unittest.TestCase): - def test__all__(self): - not_exported = { - "program", "Devnull", "DEBUGSTREAM", "NEWLINE", "COMMASPACE", - "DATA_SIZE_DEFAULT", "usage", "Options", "parseargs", - } - support.check__all__(self, smtpd, not_exported=not_exported) - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py index bbc151fcb7..56cd8f8a68 100644 --- a/Lib/test/test_sqlite3/test_dbapi.py +++ b/Lib/test/test_sqlite3/test_dbapi.py @@ -591,7 +591,7 @@ def test_connection_bad_reinit(self): ((v,) for v in range(3))) -@unittest.skip("TODO: RUSTPYHON") +@unittest.skip("TODO: RUSTPYTHON") class UninitialisedConnectionTests(unittest.TestCase): def setUp(self): self.cx = sqlite.Connection.__new__(sqlite.Connection) diff --git a/Lib/test/test_stat.py b/Lib/test/test_stat.py index c1edea8491..b6e9c24a80 100644 --- a/Lib/test/test_stat.py +++ b/Lib/test/test_stat.py @@ -240,40 +240,10 @@ class TestFilemodeCStat(TestFilemode, unittest.TestCase): statmod = c_stat # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_devices(self): - super().test_devices() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_directory(self): - super().test_directory() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_file_attribute_constants(self): - super().test_file_attribute_constants() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_link(self): - super().test_link() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_mode(self): - super().test_mode() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_module_attributes(self): - super().test_module_attributes() - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_socket(self): - super().test_socket() - + if sys.platform == "win32": + @unittest.expectedFailure + def test_link(self): + super().test_link() class TestFilemodePyStat(TestFilemode, unittest.TestCase): statmod = py_stat diff --git a/Lib/test/test_strftime.py b/Lib/test/test_strftime.py index 08ccebb9ed..f5024d8e6d 100644 --- a/Lib/test/test_strftime.py +++ b/Lib/test/test_strftime.py @@ -63,7 +63,6 @@ def setUp(self): setlocale(LC_TIME, 'C') self.addCleanup(setlocale, LC_TIME, saved_locale) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'a Display implementation returned an error unexpectedly: Error'") def test_strftime(self): now = time.time() self._update_variables(now) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 6cb7b610e5..ef5602d083 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -9,7 +9,7 @@ import weakref from test import support -from test.support import import_helper +from test.support import import_helper, suppress_immortalization from test.support.script_helper import assert_python_ok ISBIGENDIAN = sys.byteorder == "big" @@ -96,6 +96,13 @@ def test_new_features(self): ('10s', b'helloworld', b'helloworld', b'helloworld', 0), ('11s', b'helloworld', b'helloworld\0', b'helloworld\0', 1), ('20s', b'helloworld', b'helloworld'+10*b'\0', b'helloworld'+10*b'\0', 1), + ('0p', b'helloworld', b'', b'', 1), + ('1p', b'helloworld', b'\x00', b'\x00', 1), + ('2p', b'helloworld', b'\x01h', b'\x01h', 1), + ('10p', b'helloworld', b'\x09helloworl', b'\x09helloworl', 1), + ('11p', b'helloworld', b'\x0Ahelloworld', b'\x0Ahelloworld', 0), + ('12p', b'helloworld', b'\x0Ahelloworld\0', b'\x0Ahelloworld\0', 1), + ('20p', b'helloworld', b'\x0Ahelloworld'+9*b'\0', b'\x0Ahelloworld'+9*b'\0', 1), ('b', 7, b'\7', b'\7', 0), ('b', -7, b'\371', b'\371', 0), ('B', 7, b'\7', b'\7', 0), @@ -339,6 +346,7 @@ def assertStructError(func, *args, **kwargs): def test_p_code(self): # Test p ("Pascal string") code. for code, input, expected, expectedback in [ + ('0p', b'abc', b'', b''), ('p', b'abc', b'\x00', b''), ('1p', b'abc', b'\x00', b''), ('2p', b'abc', b'\x01a', b'a'), @@ -523,6 +531,9 @@ def __bool__(self): for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']: self.assertTrue(struct.unpack('>?', c)[0]) + self.assertTrue(struct.unpack('': + for int_type in 'BHILQ': + test_error_msg(prefix, int_type, True) + for int_type in 'bhilq': + test_error_msg(prefix, int_type, False) + + int_type = 'N' + test_error_msg('@', int_type, True) + + int_type = 'n' + test_error_msg('@', int_type, False) + + @support.cpython_only + def test_issue98248_error_propagation(self): + class Div0: + def __index__(self): + 1 / 0 + + def test_error_propagation(fmt_str): + with self.subTest(format_str=fmt_str, exception="ZeroDivisionError"): + with self.assertRaises(ZeroDivisionError): + struct.pack(fmt_str, Div0()) + + for prefix in '@=<>': + for int_type in 'BHILQbhilq': + test_error_propagation(prefix + int_type) + + test_error_propagation('N') + test_error_propagation('n') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_struct_subclass_instantiation(self): + # Regression test for https://github.com/python/cpython/issues/112358 + class MyStruct(struct.Struct): + def __init__(self): + super().__init__('>h') + + my_struct = MyStruct() + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + + def test_repr(self): + s = struct.Struct('=i2H') + self.assertEqual(repr(s), f'Struct({s.format!r})') class UnpackIteratorTest(unittest.TestCase): """ @@ -741,8 +818,6 @@ def _check_iterator(it): with self.assertRaises(struct.error): s.iter_unpack(b"12") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_uninstantiable(self): iter_unpack_type = type(struct.Struct(">ibcp").iter_unpack(b"")) self.assertRaises(TypeError, iter_unpack_type) @@ -895,4 +970,4 @@ def test_half_float(self): if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 590d9c8df8..1ce2e9fc0f 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -167,8 +167,6 @@ def test_excepthook_bytes_filename(self): self.assertIn(""" text\n""", err) self.assertTrue(err.endswith("SyntaxError: msg\n")) - # TODO: RUSTPYTHON, print argument error to stderr in sys.excepthook instead of throwing - @unittest.expectedFailure def test_excepthook(self): with test.support.captured_output("stderr") as stderr: sys.excepthook(1, '1', 1) @@ -260,8 +258,6 @@ def test_getdefaultencoding(self): # testing sys.settrace() is done in test_sys_settrace.py # testing sys.setprofile() is done in test_sys_setprofile.py - # TODO: RUSTPYTHON, AttributeError: module 'sys' has no attribute 'setswitchinterval' - @unittest.expectedFailure def test_switchinterval(self): self.assertRaises(TypeError, sys.setswitchinterval) self.assertRaises(TypeError, sys.setswitchinterval, "a") diff --git a/Lib/test/test_syslog.py b/Lib/test/test_syslog.py index 96945bfd8b..b378d62e5c 100644 --- a/Lib/test/test_syslog.py +++ b/Lib/test/test_syslog.py @@ -55,8 +55,6 @@ def test_openlog_noargs(self): syslog.openlog() syslog.syslog('test message from python test_syslog') - # TODO: RUSTPYTHON; AttributeError: module 'sys' has no attribute 'getswitchinterval' - @unittest.expectedFailure @threading_helper.requires_working_threading() def test_syslog_threaded(self): start = threading.Event() diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 63f7b347ad..0fed89c773 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -31,6 +31,8 @@ import lzma except ImportError: lzma = None +# XXX: RUSTPYTHON; xz is not supported yet +lzma = None def sha256sum(data): return sha256(data).hexdigest() diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 92ff3dc380..94f21d1c38 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -412,8 +412,6 @@ def child(): b"Woke up, sleep function is: ") self.assertEqual(err, b"") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_enumerate_after_join(self): # Try hard to trigger #1703448: a thread is still returned in # threading.enumerate() after it has been join()ed. @@ -1745,8 +1743,6 @@ def run_last(): self.assertFalse(err) self.assertEqual(out.strip(), b'parrot') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_atexit_called_once(self): rc, out, err = assert_python_ok("-c", """if True: import threading diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 3af18efae2..3886aae934 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -236,7 +236,6 @@ def _bounds_checking(self, func): def test_strftime_bounding_check(self): self._bounds_checking(lambda tup: time.strftime('', tup)) - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'a Display implementation returned an error unexpectedly: Error'") def test_strftime_format_check(self): # Test that strftime does not crash on invalid format strings # that may trigger a buffer overread. When not triggered, @@ -459,7 +458,6 @@ def test_mktime(self): # Issue #13309: passing extreme values to mktime() or localtime() # borks the glibc's internal timezone data. - @unittest.skip("TODO: RUSTPYTHON, thread 'main' panicked at 'a Display implementation returned an error unexpectedly: Error'") @unittest.skipUnless(platform.libc_ver()[0] != 'glibc', "disabled because of a bug in glibc. Issue #13309") def test_mktime_error(self): diff --git a/Lib/test/test_tools/__init__.py b/Lib/test/test_tools/__init__.py new file mode 100644 index 0000000000..c4395c7c0a --- /dev/null +++ b/Lib/test/test_tools/__init__.py @@ -0,0 +1,43 @@ +"""Support functions for testing scripts in the Tools directory.""" +import contextlib +import importlib +import os.path +import unittest +from test import support +from test.support import import_helper + + +if not support.has_subprocess_support: + raise unittest.SkipTest("test module requires subprocess") + + +basepath = os.path.normpath( + os.path.dirname( # + os.path.dirname( # Lib + os.path.dirname( # test + os.path.dirname(__file__))))) # test_tools + +toolsdir = os.path.join(basepath, 'Tools') +scriptsdir = os.path.join(toolsdir, 'scripts') + +def skip_if_missing(tool=None): + if tool: + tooldir = os.path.join(toolsdir, tool) + else: + tool = 'scripts' + tooldir = scriptsdir + if not os.path.isdir(tooldir): + raise unittest.SkipTest(f'{tool} directory could not be found') + +@contextlib.contextmanager +def imports_under_tool(name, *subdirs): + tooldir = os.path.join(toolsdir, name, *subdirs) + with import_helper.DirsOnSysPath(tooldir) as cm: + yield cm + +def import_tool(toolname): + with import_helper.DirsOnSysPath(scriptsdir): + return importlib.import_module(toolname) + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/__main__.py b/Lib/test/test_tools/__main__.py new file mode 100644 index 0000000000..b6f13e534e --- /dev/null +++ b/Lib/test/test_tools/__main__.py @@ -0,0 +1,4 @@ +from test.test_tools import load_tests +import unittest + +unittest.main() diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot new file mode 100644 index 0000000000..18d868b6a2 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: escapes.py:5 +msgid "" +"\"\t\n" +"\r\\" +msgstr "" + +#: escapes.py:8 +msgid "" +"\000\001\002\003\004\005\006\007\010\t\n" +"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +msgstr "" + +#: escapes.py:13 +msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" +msgstr "" + +#: escapes.py:17 +msgid "\177" +msgstr "" + +#: escapes.py:20 +msgid "€   ÿ" +msgstr "" + +#: escapes.py:23 +msgid "α ㄱ 𓂀" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/docstrings.pot b/Lib/test/test_tools/i18n_data/docstrings.pot new file mode 100644 index 0000000000..5af1d41422 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/docstrings.pot @@ -0,0 +1,40 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: docstrings.py:7 +#, docstring +msgid "" +msgstr "" + +#: docstrings.py:18 +#, docstring +msgid "" +"multiline\n" +" docstring\n" +" " +msgstr "" + +#: docstrings.py:25 +#, docstring +msgid "docstring1" +msgstr "" + +#: docstrings.py:30 +#, docstring +msgid "Hello, {}!" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/docstrings.py b/Lib/test/test_tools/i18n_data/docstrings.py new file mode 100644 index 0000000000..85d7f159d3 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/docstrings.py @@ -0,0 +1,41 @@ +# Test docstring extraction +from gettext import gettext as _ + + +# Empty docstring +def test(x): + """""" + + +# Leading empty line +def test2(x): + + """docstring""" # XXX This should be extracted but isn't. + + +# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`. +def test3(x): + """multiline + docstring + """ + + +# Multiple docstrings - only the first should be extracted +def test4(x): + """docstring1""" + """docstring2""" + + +def test5(x): + """Hello, {}!""".format("world!") # XXX This should not be extracted. + + +# Nested docstrings +def test6(x): + def inner(y): + """nested docstring""" # XXX This should be extracted but isn't. + + +class Outer: + class Inner: + "nested class docstring" # XXX This should be extracted but isn't. diff --git a/Lib/test/test_tools/i18n_data/escapes.pot b/Lib/test/test_tools/i18n_data/escapes.pot new file mode 100644 index 0000000000..2c7899d59d --- /dev/null +++ b/Lib/test/test_tools/i18n_data/escapes.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: escapes.py:5 +msgid "" +"\"\t\n" +"\r\\" +msgstr "" + +#: escapes.py:8 +msgid "" +"\000\001\002\003\004\005\006\007\010\t\n" +"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +msgstr "" + +#: escapes.py:13 +msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" +msgstr "" + +#: escapes.py:17 +msgid "\177" +msgstr "" + +#: escapes.py:20 +msgid "\302\200 \302\240 \303\277" +msgstr "" + +#: escapes.py:23 +msgid "\316\261 \343\204\261 \360\223\202\200" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/escapes.py b/Lib/test/test_tools/i18n_data/escapes.py new file mode 100644 index 0000000000..900bd97a70 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/escapes.py @@ -0,0 +1,23 @@ +import gettext as _ + + +# Special characters that are always escaped in the POT file +_('"\t\n\r\\') + +# All ascii characters 0-31 +_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n' + '\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15' + '\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f') + +# All ascii characters 32-126 +_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~') + +# ascii char 127 +_('\x7f') + +# some characters in the 128-255 range +_('\x80 \xa0 ÿ') + +# some characters >= 256 encoded as 2, 3 and 4 bytes, respectively +_('α ㄱ 𓂀') diff --git a/Lib/test/test_tools/i18n_data/fileloc.pot b/Lib/test/test_tools/i18n_data/fileloc.pot new file mode 100644 index 0000000000..dbd28687a7 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/fileloc.pot @@ -0,0 +1,35 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: fileloc.py:5 fileloc.py:6 +msgid "foo" +msgstr "" + +#: fileloc.py:9 +msgid "bar" +msgstr "" + +#: fileloc.py:14 fileloc.py:18 +#, docstring +msgid "docstring" +msgstr "" + +#: fileloc.py:22 fileloc.py:26 +#, docstring +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/fileloc.py b/Lib/test/test_tools/i18n_data/fileloc.py new file mode 100644 index 0000000000..c5d4d0595f --- /dev/null +++ b/Lib/test/test_tools/i18n_data/fileloc.py @@ -0,0 +1,26 @@ +# Test file locations +from gettext import gettext as _ + +# Duplicate strings +_('foo') +_('foo') + +# Duplicate strings on the same line should only add one location to the output +_('bar'), _('bar') + + +# Duplicate docstrings +class A: + """docstring""" + + +def f(): + """docstring""" + + +# Duplicate message and docstring +_('baz') + + +def g(): + """baz""" diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot new file mode 100644 index 0000000000..ddfbd18349 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -0,0 +1,67 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: messages.py:5 +msgid "" +msgstr "" + +#: messages.py:8 messages.py:9 +msgid "parentheses" +msgstr "" + +#: messages.py:12 +msgid "Hello, world!" +msgstr "" + +#: messages.py:15 +msgid "" +"Hello,\n" +" multiline!\n" +msgstr "" + +#: messages.py:29 +msgid "Hello, {}!" +msgstr "" + +#: messages.py:33 +msgid "1" +msgstr "" + +#: messages.py:33 +msgid "2" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "A" +msgstr "" + +#: messages.py:34 messages.py:35 +msgid "B" +msgstr "" + +#: messages.py:36 +msgid "set" +msgstr "" + +#: messages.py:42 +msgid "nested string" +msgstr "" + +#: messages.py:47 +msgid "baz" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py new file mode 100644 index 0000000000..f220294b8d --- /dev/null +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -0,0 +1,64 @@ +# Test message extraction +from gettext import gettext as _ + +# Empty string +_("") + +# Extra parentheses +(_("parentheses")) +((_("parentheses"))) + +# Multiline strings +_("Hello, " + "world!") + +_("""Hello, + multiline! +""") + +# Invalid arguments +_() +_(None) +_(1) +_(False) +_(x="kwargs are not allowed") +_("foo", "bar") +_("something", x="something else") + +# .format() +_("Hello, {}!").format("world") # valid +_("Hello, {}!".format("world")) # invalid + +# Nested structures +_("1"), _("2") +arr = [_("A"), _("B")] +obj = {'a': _("A"), 'b': _("B")} +{{{_('set')}}} + + +# Nested functions and classes +def test(): + _("nested string") # XXX This should be extracted but isn't. + [_("nested string")] + + +class Foo: + def bar(self): + return _("baz") + + +def bar(x=_('default value')): # XXX This should be extracted but isn't. + pass + + +def baz(x=[_('default value')]): # XXX This should be extracted but isn't. + pass + + +# Shadowing _() +def _(x): + pass + + +def _(x="don't extract me"): + pass diff --git a/Lib/test/test_tools/msgfmt_data/fuzzy.json b/Lib/test/test_tools/msgfmt_data/fuzzy.json new file mode 100644 index 0000000000..fe51488c70 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/fuzzy.json @@ -0,0 +1 @@ +[] diff --git a/Lib/test/test_tools/msgfmt_data/fuzzy.mo b/Lib/test/test_tools/msgfmt_data/fuzzy.mo new file mode 100644 index 0000000000..4b144831cf Binary files /dev/null and b/Lib/test/test_tools/msgfmt_data/fuzzy.mo differ diff --git a/Lib/test/test_tools/msgfmt_data/fuzzy.po b/Lib/test/test_tools/msgfmt_data/fuzzy.po new file mode 100644 index 0000000000..05e8354948 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/fuzzy.po @@ -0,0 +1,23 @@ +# Fuzzy translations are not written to the .mo file. +#, fuzzy +msgid "foo" +msgstr "bar" + +# comment +#, fuzzy +msgctxt "abc" +msgid "foo" +msgstr "bar" + +#, fuzzy +# comment +msgctxt "xyz" +msgid "foo" +msgstr "bar" + +#, fuzzy +msgctxt "abc" +msgid "One email sent." +msgid_plural "%d emails sent." +msgstr[0] "One email sent." +msgstr[1] "%d emails sent." diff --git a/Lib/test/test_tools/msgfmt_data/general.json b/Lib/test/test_tools/msgfmt_data/general.json new file mode 100644 index 0000000000..0586113985 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/general.json @@ -0,0 +1,58 @@ +[ + [ + "", + "Project-Id-Version: PACKAGE VERSION\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\nLast-Translator: FULL NAME \nLanguage-Team: LANGUAGE \nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\n" + ], + [ + "\n newlines \n", + "\n translated \n" + ], + [ + "\"escapes\"", + "\"translated\"" + ], + [ + "Multilinestring", + "Multilinetranslation" + ], + [ + "abc\u0004foo", + "bar" + ], + [ + "bar", + "baz" + ], + [ + "xyz\u0004foo", + "bar" + ], + [ + [ + "One email sent.", + 0 + ], + "One email sent." + ], + [ + [ + "One email sent.", + 1 + ], + "%d emails sent." + ], + [ + [ + "abc\u0004One email sent.", + 0 + ], + "One email sent." + ], + [ + [ + "abc\u0004One email sent.", + 1 + ], + "%d emails sent." + ] +] diff --git a/Lib/test/test_tools/msgfmt_data/general.mo b/Lib/test/test_tools/msgfmt_data/general.mo new file mode 100644 index 0000000000..ee905cbb3e Binary files /dev/null and b/Lib/test/test_tools/msgfmt_data/general.mo differ diff --git a/Lib/test/test_tools/msgfmt_data/general.po b/Lib/test/test_tools/msgfmt_data/general.po new file mode 100644 index 0000000000..8f84042682 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/general.po @@ -0,0 +1,47 @@ +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2024-10-26 18:06+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "foo" +msgstr "" + +msgid "bar" +msgstr "baz" + +msgctxt "abc" +msgid "foo" +msgstr "bar" + +# comment +msgctxt "xyz" +msgid "foo" +msgstr "bar" + +msgid "Multiline" +"string" +msgstr "Multiline" +"translation" + +msgid "\"escapes\"" +msgstr "\"translated\"" + +msgid "\n newlines \n" +msgstr "\n translated \n" + +msgid "One email sent." +msgid_plural "%d emails sent." +msgstr[0] "One email sent." +msgstr[1] "%d emails sent." + +msgctxt "abc" +msgid "One email sent." +msgid_plural "%d emails sent." +msgstr[0] "One email sent." +msgstr[1] "%d emails sent." diff --git a/Lib/test/test_tools/test_freeze.py b/Lib/test/test_tools/test_freeze.py new file mode 100644 index 0000000000..0e7ed67de7 --- /dev/null +++ b/Lib/test/test_tools/test_freeze.py @@ -0,0 +1,37 @@ +"""Sanity-check tests for the "freeze" tool.""" + +import sys +import textwrap +import unittest + +from test import support +from test.support import os_helper +from test.test_tools import imports_under_tool, skip_if_missing + +skip_if_missing('freeze') +with imports_under_tool('freeze', 'test'): + import freeze as helper + +@support.requires_zlib() +@unittest.skipIf(sys.platform.startswith('win'), 'not supported on Windows') +@unittest.skipIf(sys.platform == 'darwin' and sys._framework, + 'not supported for frameworks builds on macOS') +@support.skip_if_buildbot('not all buildbots have enough space') +# gh-103053: Skip test if Python is built with Profile Guided Optimization +# (PGO), since the test is just too slow in this case. +@unittest.skipIf(support.check_cflags_pgo(), + 'test is too slow with PGO') +class TestFreeze(unittest.TestCase): + + @support.requires_resource('cpu') # Building Python is slow + def test_freeze_simple_script(self): + script = textwrap.dedent(""" + import sys + print('running...') + sys.exit(0) + """) + with os_helper.temp_dir() as outdir: + outdir, scriptfile, python = helper.prepare(script, outdir) + executable = helper.freeze(python, scriptfile, outdir) + text = helper.run(executable) + self.assertEqual(text, 'running...') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py new file mode 100644 index 0000000000..ffa1b1178e --- /dev/null +++ b/Lib/test/test_tools/test_i18n.py @@ -0,0 +1,444 @@ +"""Tests to cover the Tools/i18n package""" + +import os +import re +import sys +import unittest +from textwrap import dedent +from pathlib import Path + +from test.support.script_helper import assert_python_ok +from test.test_tools import skip_if_missing, toolsdir +from test.support.os_helper import temp_cwd, temp_dir + + +skip_if_missing() + +DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' + + +def normalize_POT_file(pot): + """Normalize the POT creation timestamp, charset and + file locations to make the POT file easier to compare. + + """ + # Normalize the creation date. + date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"') + header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"' + pot = re.sub(date_pattern, header, pot) + + # Normalize charset to UTF-8 (currently there's no way to specify the output charset). + charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"') + charset = r'"Content-Type: text/plain; charset=UTF-8\\n"' + pot = re.sub(charset_pattern, charset, pot) + + # Normalize file location path separators in case this test is + # running on Windows (which uses '\'). + fileloc_pattern = re.compile(r'#:.+') + + def replace(match): + return match[0].replace(os.sep, "/") + pot = re.sub(fileloc_pattern, replace, pot) + return pot + + +class Test_pygettext(unittest.TestCase): + """Tests for the pygettext.py tool""" + + script = Path(toolsdir, 'i18n', 'pygettext.py') + + def get_header(self, data): + """ utility: return the header of a .po file as a dictionary """ + headers = {} + for line in data.split('\n'): + if not line or line.startswith(('#', 'msgid', 'msgstr')): + continue + line = line.strip('"') + key, val = line.split(':', 1) + headers[key] = val.strip() + return headers + + def get_msgids(self, data): + """ utility: return all msgids in .po file as a list of strings """ + msgids = [] + reading_msgid = False + cur_msgid = [] + for line in data.split('\n'): + if reading_msgid: + if line.startswith('"'): + cur_msgid.append(line.strip('"')) + else: + msgids.append('\n'.join(cur_msgid)) + cur_msgid = [] + reading_msgid = False + continue + if line.startswith('msgid '): + line = line[len('msgid '):] + cur_msgid.append(line.strip('"')) + reading_msgid = True + else: + if reading_msgid: + msgids.append('\n'.join(cur_msgid)) + + return msgids + + def assert_POT_equal(self, expected, actual): + """Check if two POT files are equal""" + self.maxDiff = None + self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) + + def extract_from_str(self, module_content, *, args=(), strict=True): + """Return all msgids extracted from module_content.""" + filename = 'test.py' + with temp_cwd(None): + with open(filename, 'w', encoding='utf-8') as fp: + fp.write(module_content) + res = assert_python_ok('-Xutf8', self.script, *args, filename) + if strict: + self.assertEqual(res.err, b'') + with open('messages.pot', encoding='utf-8') as fp: + data = fp.read() + return self.get_msgids(data) + + def extract_docstrings_from_str(self, module_content): + """Return all docstrings extracted from module_content.""" + return self.extract_from_str(module_content, args=('--docstrings',), strict=False) + + def test_header(self): + """Make sure the required fields are in the header, according to: + http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry + """ + with temp_cwd(None) as cwd: + assert_python_ok('-Xutf8', self.script) + with open('messages.pot', encoding='utf-8') as fp: + data = fp.read() + header = self.get_header(data) + + self.assertIn("Project-Id-Version", header) + self.assertIn("POT-Creation-Date", header) + self.assertIn("PO-Revision-Date", header) + self.assertIn("Last-Translator", header) + self.assertIn("Language-Team", header) + self.assertIn("MIME-Version", header) + self.assertIn("Content-Type", header) + self.assertIn("Content-Transfer-Encoding", header) + self.assertIn("Generated-By", header) + + # not clear if these should be required in POT (template) files + #self.assertIn("Report-Msgid-Bugs-To", header) + #self.assertIn("Language", header) + + #"Plural-Forms" is optional + + @unittest.skipIf(sys.platform.startswith('aix'), + 'bpo-29972: broken test on AIX') + def test_POT_Creation_Date(self): + """ Match the date format from xgettext for POT-Creation-Date """ + from datetime import datetime + with temp_cwd(None) as cwd: + assert_python_ok('-Xutf8', self.script) + with open('messages.pot', encoding='utf-8') as fp: + data = fp.read() + header = self.get_header(data) + creationDate = header['POT-Creation-Date'] + + # peel off the escaped newline at the end of string + if creationDate.endswith('\\n'): + creationDate = creationDate[:-len('\\n')] + + # This will raise if the date format does not exactly match. + datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') + + def test_funcdocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_funcdocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_funcdocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar): + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_classdocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_classdocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_classdocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + class C: + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_moduledocstring(self): + for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'): + with self.subTest(doc): + msgids = self.extract_docstrings_from_str(dedent('''\ + %s + ''' % doc)) + self.assertIn('doc', msgids) + + def test_moduledocstring_bytes(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + b"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_moduledocstring_fstring(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"""doc""" + ''')) + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_msgid(self): + msgids = self.extract_docstrings_from_str( + '''_("""doc""" r'str' u"ing")''') + self.assertIn('docstring', msgids) + + def test_msgid_bytes(self): + msgids = self.extract_docstrings_from_str('_(b"""doc""")') + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_msgid_fstring(self): + msgids = self.extract_docstrings_from_str('_(f"""doc""")') + self.assertFalse([msgid for msgid in msgids if 'doc' in msgid]) + + def test_funcdocstring_annotated_args(self): + """ Test docstrings for functions with annotated args """ + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar: str): + """doc""" + ''')) + self.assertIn('doc', msgids) + + def test_funcdocstring_annotated_return(self): + """ Test docstrings for functions with annotated return type """ + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar) -> str: + """doc""" + ''')) + self.assertIn('doc', msgids) + + def test_funcdocstring_defvalue_args(self): + """ Test docstring for functions with default arg values """ + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo(bar=()): + """doc""" + ''')) + self.assertIn('doc', msgids) + + def test_funcdocstring_multiple_funcs(self): + """ Test docstring extraction for multiple functions combining + annotated args, annotated return types and default arg values + """ + msgids = self.extract_docstrings_from_str(dedent('''\ + def foo1(bar: tuple=()) -> str: + """doc1""" + + def foo2(bar: List[1:2]) -> (lambda x: x): + """doc2""" + + def foo3(bar: 'func'=lambda x: x) -> {1: 2}: + """doc3""" + ''')) + self.assertIn('doc1', msgids) + self.assertIn('doc2', msgids) + self.assertIn('doc3', msgids) + + def test_classdocstring_early_colon(self): + """ Test docstring extraction for a class with colons occurring within + the parentheses. + """ + msgids = self.extract_docstrings_from_str(dedent('''\ + class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)): + """doc""" + ''')) + self.assertIn('doc', msgids) + + def test_calls_in_fstrings(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_('foo bar')}" + ''')) + self.assertIn('foo bar', msgids) + + def test_calls_in_fstrings_raw(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + rf"{_('foo bar')}" + ''')) + self.assertIn('foo bar', msgids) + + def test_calls_in_fstrings_nested(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"""{f'{_("foo bar")}'}""" + ''')) + self.assertIn('foo bar', msgids) + + def test_calls_in_fstrings_attribute(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{obj._('foo bar')}" + ''')) + self.assertIn('foo bar', msgids) + + def test_calls_in_fstrings_with_call_on_call(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{type(str)('foo bar')}" + ''')) + self.assertNotIn('foo bar', msgids) + + def test_calls_in_fstrings_with_format(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_('foo {bar}').format(bar='baz')}" + ''')) + self.assertIn('foo {bar}', msgids) + + def test_calls_in_fstrings_with_wrong_input_1(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_(f'foo {bar}')}" + ''')) + self.assertFalse([msgid for msgid in msgids if 'foo {bar}' in msgid]) + + def test_calls_in_fstrings_with_wrong_input_2(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_(1)}" + ''')) + self.assertNotIn(1, msgids) + + def test_calls_in_fstring_with_multiple_args(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_('foo', 'bar')}" + ''')) + self.assertNotIn('foo', msgids) + self.assertNotIn('bar', msgids) + + def test_calls_in_fstring_with_keyword_args(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_('foo', bar='baz')}" + ''')) + self.assertNotIn('foo', msgids) + self.assertNotIn('bar', msgids) + self.assertNotIn('baz', msgids) + + def test_calls_in_fstring_with_partially_wrong_expression(self): + msgids = self.extract_docstrings_from_str(dedent('''\ + f"{_(f'foo') + _('bar')}" + ''')) + self.assertNotIn('foo', msgids) + self.assertIn('bar', msgids) + + def test_function_and_class_names(self): + """Test that function and class names are not mistakenly extracted.""" + msgids = self.extract_from_str(dedent('''\ + def _(x): + pass + + def _(x="foo"): + pass + + async def _(x): + pass + + class _(object): + pass + ''')) + self.assertEqual(msgids, ['']) + + def test_pygettext_output(self): + """Test that the pygettext output exactly matches snapshots.""" + for input_file, output_file, output in extract_from_snapshots(): + with self.subTest(input_file=input_file): + expected = output_file.read_text(encoding='utf-8') + self.assert_POT_equal(expected, output) + + def test_files_list(self): + """Make sure the directories are inspected for source files + bpo-31920 + """ + text1 = 'Text to translate1' + text2 = 'Text to translate2' + text3 = 'Text to ignore' + with temp_cwd(None), temp_dir(None) as sdir: + pymod = Path(sdir, 'pypkg', 'pymod.py') + pymod.parent.mkdir() + pymod.write_text(f'_({text1!r})', encoding='utf-8') + + pymod2 = Path(sdir, 'pkg.py', 'pymod2.py') + pymod2.parent.mkdir() + pymod2.write_text(f'_({text2!r})', encoding='utf-8') + + pymod3 = Path(sdir, 'CVS', 'pymod3.py') + pymod3.parent.mkdir() + pymod3.write_text(f'_({text3!r})', encoding='utf-8') + + assert_python_ok('-Xutf8', self.script, sdir) + data = Path('messages.pot').read_text(encoding='utf-8') + self.assertIn(f'msgid "{text1}"', data) + self.assertIn(f'msgid "{text2}"', data) + self.assertNotIn(text3, data) + + +def extract_from_snapshots(): + snapshots = { + 'messages.py': ('--docstrings',), + 'fileloc.py': ('--docstrings',), + 'docstrings.py': ('--docstrings',), + # == Test character escaping + # Escape ascii and unicode: + 'escapes.py': ('--escape',), + # Escape only ascii and let unicode pass through: + ('escapes.py', 'ascii-escapes.pot'): (), + } + + for filename, args in snapshots.items(): + if isinstance(filename, tuple): + filename, output_file = filename + output_file = DATA_DIR / output_file + input_file = DATA_DIR / filename + else: + input_file = DATA_DIR / filename + output_file = input_file.with_suffix('.pot') + contents = input_file.read_bytes() + with temp_cwd(None): + Path(input_file.name).write_bytes(contents) + assert_python_ok('-Xutf8', Test_pygettext.script, *args, + input_file.name) + yield (input_file, output_file, + Path('messages.pot').read_text(encoding='utf-8')) + + +def update_POT_snapshots(): + for _, output_file, output in extract_from_snapshots(): + output = normalize_POT_file(output) + output_file.write_text(output, encoding='utf-8') + + +if __name__ == '__main__': + # To regenerate POT files + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_POT_snapshots() + sys.exit(0) + unittest.main() diff --git a/Lib/test/test_tools/test_makefile.py b/Lib/test/test_tools/test_makefile.py new file mode 100644 index 0000000000..4c7588d4d9 --- /dev/null +++ b/Lib/test/test_tools/test_makefile.py @@ -0,0 +1,81 @@ +""" +Tests for `Makefile`. +""" + +import os +import unittest +from test import support +import sysconfig + +MAKEFILE = sysconfig.get_makefile_filename() + +if not support.check_impl_detail(cpython=True): + raise unittest.SkipTest('cpython only') +if not os.path.exists(MAKEFILE) or not os.path.isfile(MAKEFILE): + raise unittest.SkipTest('Makefile could not be found') + + +class TestMakefile(unittest.TestCase): + def list_test_dirs(self): + result = [] + found_testsubdirs = False + with open(MAKEFILE, 'r', encoding='utf-8') as f: + for line in f: + if line.startswith('TESTSUBDIRS='): + found_testsubdirs = True + result.append( + line.removeprefix('TESTSUBDIRS=').replace( + '\\', '', + ).strip(), + ) + continue + if found_testsubdirs: + if '\t' not in line: + break + result.append(line.replace('\\', '').strip()) + return result + + @unittest.skipUnless(support.TEST_MODULES_ENABLED, "requires test modules") + def test_makefile_test_folders(self): + test_dirs = self.list_test_dirs() + idle_test = 'idlelib/idle_test' + self.assertIn(idle_test, test_dirs) + + used = set([idle_test]) + for dirpath, dirs, files in os.walk(support.TEST_HOME_DIR): + dirname = os.path.basename(dirpath) + # Skip temporary dirs: + if dirname == '__pycache__' or dirname.startswith('.'): + dirs.clear() # do not process subfolders + continue + # Skip empty dirs: + if not dirs and not files: + continue + # Skip dirs with hidden-only files: + if files and all( + filename.startswith('.') or filename == '__pycache__' + for filename in files + ): + continue + + relpath = os.path.relpath(dirpath, support.STDLIB_DIR) + with self.subTest(relpath=relpath): + self.assertIn( + relpath, + test_dirs, + msg=( + f"{relpath!r} is not included in the Makefile's list " + "of test directories to install" + ) + ) + used.add(relpath) + + # Don't check the wheel dir when Python is built --with-wheel-pkg-dir + if sysconfig.get_config_var('WHEEL_PKG_DIR'): + test_dirs.remove('test/wheeldata') + used.discard('test/wheeldata') + + # Check that there are no extra entries: + unique_test_dirs = set(test_dirs) + self.assertSetEqual(unique_test_dirs, used) + self.assertEqual(len(test_dirs), len(unique_test_dirs)) diff --git a/Lib/test/test_tools/test_makeunicodedata.py b/Lib/test/test_tools/test_makeunicodedata.py new file mode 100644 index 0000000000..f31375117e --- /dev/null +++ b/Lib/test/test_tools/test_makeunicodedata.py @@ -0,0 +1,122 @@ +import unittest +from test.test_tools import skip_if_missing, imports_under_tool +from test import support +from test.support.hypothesis_helper import hypothesis + +st = hypothesis.strategies +given = hypothesis.given +example = hypothesis.example + + +skip_if_missing("unicode") +with imports_under_tool("unicode"): + from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup + + +@st.composite +def char_name_db(draw, min_length=1, max_length=30): + m = draw(st.integers(min_value=min_length, max_value=max_length)) + names = draw( + st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m) + ) + characters = draw(st.sets(st.characters(), min_size=m, max_size=m)) + return list(zip(names, characters)) + + +class TestDawg(unittest.TestCase): + """Tests for the directed acyclic word graph data structure that is used + to store the unicode character names in unicodedata. Tests ported from PyPy + """ + + def test_dawg_direct_simple(self): + dawg = Dawg() + dawg.insert("a", -4) + dawg.insert("c", -2) + dawg.insert("cat", -1) + dawg.insert("catarr", 0) + dawg.insert("catnip", 1) + dawg.insert("zcatnip", 5) + packed, data, inverse = dawg.finish() + + self.assertEqual(lookup(packed, data, b"a"), -4) + self.assertEqual(lookup(packed, data, b"c"), -2) + self.assertEqual(lookup(packed, data, b"cat"), -1) + self.assertEqual(lookup(packed, data, b"catarr"), 0) + self.assertEqual(lookup(packed, data, b"catnip"), 1) + self.assertEqual(lookup(packed, data, b"zcatnip"), 5) + self.assertRaises(KeyError, lookup, packed, data, b"b") + self.assertRaises(KeyError, lookup, packed, data, b"catni") + self.assertRaises(KeyError, lookup, packed, data, b"catnipp") + + self.assertEqual(inverse_lookup(packed, inverse, -4), b"a") + self.assertEqual(inverse_lookup(packed, inverse, -2), b"c") + self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat") + self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr") + self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip") + self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip") + self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12) + + def test_forbid_empty_dawg(self): + dawg = Dawg() + self.assertRaises(ValueError, dawg.finish) + + @given(char_name_db()) + @example([("abc", "a"), ("abd", "b")]) + @example( + [ + ("bab", "1"), + ("a", ":"), + ("ad", "@"), + ("b", "<"), + ("aacc", "?"), + ("dab", "D"), + ("aa", "0"), + ("ab", "F"), + ("aaa", "7"), + ("cbd", "="), + ("abad", ";"), + ("ac", "B"), + ("abb", "4"), + ("bb", "2"), + ("aab", "9"), + ("caaaaba", "E"), + ("ca", ">"), + ("bbaaa", "5"), + ("d", "3"), + ("baac", "8"), + ("c", "6"), + ("ba", "A"), + ] + ) + @example( + [ + ("bcdac", "9"), + ("acc", "g"), + ("d", "d"), + ("daabdda", "0"), + ("aba", ";"), + ("c", "6"), + ("aa", "7"), + ("abbd", "c"), + ("badbd", "?"), + ("bbd", "f"), + ("cc", "@"), + ("bb", "8"), + ("daca", ">"), + ("ba", ":"), + ("baac", "3"), + ("dbdddac", "a"), + ("a", "2"), + ("cabd", "b"), + ("b", "="), + ("abd", "4"), + ("adcbd", "5"), + ("abc", "e"), + ("ab", "1"), + ] + ) + def test_dawg(self, data): + # suppress debug prints + with support.captured_stdout() as output: + # it's enough to build it, building will also check the result + build_compression_dawg(data) diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py new file mode 100644 index 0000000000..8cd31680f7 --- /dev/null +++ b/Lib/test/test_tools/test_msgfmt.py @@ -0,0 +1,159 @@ +"""Tests for the Tools/i18n/msgfmt.py tool.""" + +import json +import sys +import unittest +from gettext import GNUTranslations +from pathlib import Path + +from test.support.os_helper import temp_cwd +from test.support.script_helper import assert_python_failure, assert_python_ok +from test.test_tools import skip_if_missing, toolsdir + + +skip_if_missing('i18n') + +data_dir = (Path(__file__).parent / 'msgfmt_data').resolve() +script_dir = Path(toolsdir) / 'i18n' +msgfmt = script_dir / 'msgfmt.py' + + +def compile_messages(po_file, mo_file): + assert_python_ok(msgfmt, '-o', mo_file, po_file) + + +class CompilationTest(unittest.TestCase): + + def test_compilation(self): + self.maxDiff = None + with temp_cwd(): + for po_file in data_dir.glob('*.po'): + with self.subTest(po_file=po_file): + mo_file = po_file.with_suffix('.mo') + with open(mo_file, 'rb') as f: + expected = GNUTranslations(f) + + tmp_mo_file = mo_file.name + compile_messages(po_file, tmp_mo_file) + with open(tmp_mo_file, 'rb') as f: + actual = GNUTranslations(f) + + self.assertDictEqual(actual._catalog, expected._catalog) + + def test_translations(self): + with open(data_dir / 'general.mo', 'rb') as f: + t = GNUTranslations(f) + + self.assertEqual(t.gettext('foo'), 'foo') + self.assertEqual(t.gettext('bar'), 'baz') + self.assertEqual(t.pgettext('abc', 'foo'), 'bar') + self.assertEqual(t.pgettext('xyz', 'foo'), 'bar') + self.assertEqual(t.gettext('Multilinestring'), 'Multilinetranslation') + self.assertEqual(t.gettext('"escapes"'), '"translated"') + self.assertEqual(t.gettext('\n newlines \n'), '\n translated \n') + self.assertEqual(t.ngettext('One email sent.', '%d emails sent.', 1), + 'One email sent.') + self.assertEqual(t.ngettext('One email sent.', '%d emails sent.', 2), + '%d emails sent.') + self.assertEqual(t.npgettext('abc', 'One email sent.', + '%d emails sent.', 1), + 'One email sent.') + self.assertEqual(t.npgettext('abc', 'One email sent.', + '%d emails sent.', 2), + '%d emails sent.') + + def test_invalid_msgid_plural(self): + with temp_cwd(): + Path('invalid.po').write_text('''\ +msgid_plural "plural" +msgstr[0] "singular" +''') + + res = assert_python_failure(msgfmt, 'invalid.po') + err = res.err.decode('utf-8') + self.assertIn('msgid_plural not preceded by msgid', err) + + def test_plural_without_msgid_plural(self): + with temp_cwd(): + Path('invalid.po').write_text('''\ +msgid "foo" +msgstr[0] "bar" +''') + + res = assert_python_failure(msgfmt, 'invalid.po') + err = res.err.decode('utf-8') + self.assertIn('plural without msgid_plural', err) + + def test_indexed_msgstr_without_msgid_plural(self): + with temp_cwd(): + Path('invalid.po').write_text('''\ +msgid "foo" +msgid_plural "foos" +msgstr "bar" +''') + + res = assert_python_failure(msgfmt, 'invalid.po') + err = res.err.decode('utf-8') + self.assertIn('indexed msgstr required for plural', err) + + def test_generic_syntax_error(self): + with temp_cwd(): + Path('invalid.po').write_text('''\ +"foo" +''') + + res = assert_python_failure(msgfmt, 'invalid.po') + err = res.err.decode('utf-8') + self.assertIn('Syntax error', err) + +class CLITest(unittest.TestCase): + + def test_help(self): + for option in ('--help', '-h'): + res = assert_python_ok(msgfmt, option) + err = res.err.decode('utf-8') + self.assertIn('Generate binary message catalog from textual translation description.', err) + + def test_version(self): + for option in ('--version', '-V'): + res = assert_python_ok(msgfmt, option) + out = res.out.decode('utf-8').strip() + self.assertEqual('msgfmt.py 1.2', out) + + def test_invalid_option(self): + res = assert_python_failure(msgfmt, '--invalid-option') + err = res.err.decode('utf-8') + self.assertIn('Generate binary message catalog from textual translation description.', err) + self.assertIn('option --invalid-option not recognized', err) + + def test_no_input_file(self): + res = assert_python_ok(msgfmt) + err = res.err.decode('utf-8').replace('\r\n', '\n') + self.assertIn('No input file given\n' + "Try `msgfmt --help' for more information.", err) + + def test_nonexistent_file(self): + assert_python_failure(msgfmt, 'nonexistent.po') + + +def update_catalog_snapshots(): + for po_file in data_dir.glob('*.po'): + mo_file = po_file.with_suffix('.mo') + compile_messages(po_file, mo_file) + # Create a human-readable JSON file which is + # easier to review than the binary .mo file. + with open(mo_file, 'rb') as f: + translations = GNUTranslations(f) + catalog_file = po_file.with_suffix('.json') + with open(catalog_file, 'w') as f: + data = translations._catalog.items() + data = sorted(data, key=lambda x: (isinstance(x[0], tuple), x[0])) + json.dump(data, f, indent=4) + f.write('\n') + + +if __name__ == '__main__': + if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update': + update_catalog_snapshots() + sys.exit(0) + unittest.main() diff --git a/Lib/test/test_tools/test_reindent.py b/Lib/test/test_tools/test_reindent.py new file mode 100644 index 0000000000..64e31c2b77 --- /dev/null +++ b/Lib/test/test_tools/test_reindent.py @@ -0,0 +1,35 @@ +"""Tests for scripts in the Tools directory. + +This file contains regression tests for some of the scripts found in the +Tools directory of a Python checkout or tarball, such as reindent.py. +""" + +import os +import unittest +from test.support.script_helper import assert_python_ok +from test.support import findfile + +from test.test_tools import toolsdir, skip_if_missing + +skip_if_missing() + +class ReindentTests(unittest.TestCase): + script = os.path.join(toolsdir, 'patchcheck', 'reindent.py') + + def test_noargs(self): + assert_python_ok(self.script) + + def test_help(self): + rc, out, err = assert_python_ok(self.script, '-h') + self.assertEqual(out, b'') + self.assertGreater(err, b'') + + def test_reindent_file_with_bad_encoding(self): + bad_coding_path = findfile('bad_coding.py', subdir='tokenizedata') + rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path) + self.assertEqual(out, b'') + self.assertNotEqual(err, b'') + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_tools/test_sundry.py b/Lib/test/test_tools/test_sundry.py new file mode 100644 index 0000000000..d0b702d392 --- /dev/null +++ b/Lib/test/test_tools/test_sundry.py @@ -0,0 +1,30 @@ +"""Tests for scripts in the Tools/scripts directory. + +This file contains extremely basic regression tests for the scripts found in +the Tools directory of a Python checkout or tarball which don't have separate +tests of their own. +""" + +import os +import unittest +from test.support import import_helper + +from test.test_tools import scriptsdir, import_tool, skip_if_missing + +skip_if_missing() + +class TestSundryScripts(unittest.TestCase): + # import logging registers "atfork" functions which keep indirectly the + # logging module dictionary alive. Mock the function to be able to unload + # cleanly the logging module. + @import_helper.mock_register_at_fork + def test_sundry(self, mock_os): + for fn in os.listdir(scriptsdir): + if not fn.endswith('.py'): + continue + name = fn[:-3] + import_tool(name) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 28a8697235..30a13587a2 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -93,8 +93,6 @@ def test_caret(self): self.assertEqual(err[1].find("("), err[2].find("^")) # in the right place self.assertEqual(err[2].count("^"), 1) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_nocaret(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) err = traceback.format_exception_only(SyntaxError, exc) @@ -748,8 +746,6 @@ def outer_raise(): self.assertIn('inner_raise() # Marker', blocks[2]) self.check_zero_div(blocks[2]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_syntax_error_offset_at_eol(self): # See #10186. def e(): @@ -808,8 +804,6 @@ def __str__(self): exp = f'.{X.__qualname__}: I am X\n' self.assertEqual(exp, err) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_syntax_error_various_offsets(self): for offset in range(-5, 10): for add in [0, 2]: diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 26b238e086..d2a2ed310b 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -77,8 +77,6 @@ def f(): # We expect tuples whose base components have deterministic hashes to # have deterministic hashes too - and, indeed, the same hashes across # platforms with hash codes of the same bit width. - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_hash_exact(self): def check_one_exact(t, e32, e64): got = hash(t) diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index b6a167f998..a4f2e897b1 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -1,64 +1,80 @@ import contextlib import collections +import collections.abc +from collections import defaultdict +from functools import lru_cache, wraps, reduce +import gc +import inspect +import itertools +import operator import pickle import re import sys -from unittest import TestCase, main, skipUnless, skip -# TODO: RUSTPYTHON -import unittest +from unittest import TestCase, main, skip +from unittest.mock import patch from copy import copy, deepcopy -from typing import Any, NoReturn -from typing import TypeVar, AnyStr +from typing import Any, NoReturn, Never, assert_never +from typing import overload, get_overloads, clear_overloads +from typing import TypeVar, TypeVarTuple, Unpack, AnyStr from typing import T, KT, VT # Not in __all__. from typing import Union, Optional, Literal from typing import Tuple, List, Dict, MutableMapping from typing import Callable from typing import Generic, ClassVar, Final, final, Protocol -from typing import cast, runtime_checkable +from typing import assert_type, cast, runtime_checkable from typing import get_type_hints -from typing import get_origin, get_args -from typing import is_typeddict +from typing import get_origin, get_args, get_protocol_members +from typing import is_typeddict, is_protocol +from typing import reveal_type +from typing import dataclass_transform from typing import no_type_check, no_type_check_decorator from typing import Type -from typing import NewType -from typing import NamedTuple, TypedDict +from typing import NamedTuple, NotRequired, Required, ReadOnly, TypedDict from typing import IO, TextIO, BinaryIO from typing import Pattern, Match from typing import Annotated, ForwardRef +from typing import Self, LiteralString from typing import TypeAlias from typing import ParamSpec, Concatenate, ParamSpecArgs, ParamSpecKwargs -from typing import TypeGuard +from typing import TypeGuard, TypeIs, NoDefault import abc +import textwrap import typing import weakref import types -from test import mod_generics_cache -from test import _typed_dict_helper +from test.support import captured_stderr, cpython_only, infinite_recursion, requires_docstrings, import_helper +from test.support.testcase import ExtraAssertions +from test.typinganndata import ann_module695, mod_generics_cache, _typed_dict_helper +# TODO: RUSTPYTHON +import unittest -class BaseTestCase(TestCase): +CANNOT_SUBCLASS_TYPE = 'Cannot subclass special typing classes' +NOT_A_BASE_TYPE = "type 'typing.%s' is not an acceptable base type" +CANNOT_SUBCLASS_INSTANCE = 'Cannot subclass an instance of %s' - def assertIsSubclass(self, cls, class_or_tuple, msg=None): - if not issubclass(cls, class_or_tuple): - message = '%r is not a subclass of %r' % (cls, class_or_tuple) - if msg is not None: - message += ' : %s' % msg - raise self.failureException(message) - def assertNotIsSubclass(self, cls, class_or_tuple, msg=None): - if issubclass(cls, class_or_tuple): - message = '%r is a subclass of %r' % (cls, class_or_tuple) - if msg is not None: - message += ' : %s' % msg - raise self.failureException(message) +class BaseTestCase(TestCase, ExtraAssertions): def clear_caches(self): for f in typing._cleanups: f() +def all_pickle_protocols(test_func): + """Runs `test_func` with various values for `proto` argument.""" + + @wraps(test_func) + def wrapper(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(pickle_proto=proto): + test_func(self, proto=proto) + + return wrapper + + class Employee: pass @@ -81,28 +97,59 @@ def test_any_instance_type_error(self): with self.assertRaises(TypeError): isinstance(42, Any) - def test_any_subclass_type_error(self): - with self.assertRaises(TypeError): - issubclass(Employee, Any) - with self.assertRaises(TypeError): - issubclass(Any, Employee) - def test_repr(self): self.assertEqual(repr(Any), 'typing.Any') + class Sub(Any): pass + self.assertEqual( + repr(Sub), + f".Sub'>", + ) + def test_errors(self): with self.assertRaises(TypeError): - issubclass(42, Any) + isinstance(42, Any) with self.assertRaises(TypeError): Any[int] # Any is not a generic type. - def test_cannot_subclass(self): - with self.assertRaises(TypeError): - class A(Any): - pass - with self.assertRaises(TypeError): - class A(type(Any)): - pass + def test_can_subclass(self): + class Mock(Any): pass + self.assertTrue(issubclass(Mock, Any)) + self.assertIsInstance(Mock(), Mock) + + class Something: pass + self.assertFalse(issubclass(Something, Any)) + self.assertNotIsInstance(Something(), Mock) + + class MockSomething(Something, Mock): pass + self.assertTrue(issubclass(MockSomething, Any)) + self.assertTrue(issubclass(MockSomething, MockSomething)) + self.assertTrue(issubclass(MockSomething, Something)) + self.assertTrue(issubclass(MockSomething, Mock)) + ms = MockSomething() + self.assertIsInstance(ms, MockSomething) + self.assertIsInstance(ms, Something) + self.assertIsInstance(ms, Mock) + + def test_subclassing_with_custom_constructor(self): + class Sub(Any): + def __init__(self, *args, **kwargs): pass + # The instantiation must not fail. + Sub(0, s="") + + def test_multiple_inheritance_with_custom_constructors(self): + class Foo: + def __init__(self, x): + self.x = x + + class Bar(Any, Foo): + def __init__(self, x, y): + self.y = y + super().__init__(x) + + b = Bar(1, 2) + self.assertEqual(b.x, 1) + self.assertEqual(b.y, 2) def test_cannot_instantiate(self): with self.assertRaises(TypeError): @@ -117,48 +164,270 @@ def test_any_works_with_alias(self): typing.IO[Any] -class NoReturnTests(BaseTestCase): +class BottomTypeTestsMixin: + bottom_type: ClassVar[Any] + + def test_equality(self): + self.assertEqual(self.bottom_type, self.bottom_type) + self.assertIs(self.bottom_type, self.bottom_type) + self.assertNotEqual(self.bottom_type, None) + + def test_get_origin(self): + self.assertIs(get_origin(self.bottom_type), None) + + def test_instance_type_error(self): + with self.assertRaises(TypeError): + isinstance(42, self.bottom_type) + + def test_subclass_type_error(self): + with self.assertRaises(TypeError): + issubclass(Employee, self.bottom_type) + with self.assertRaises(TypeError): + issubclass(NoReturn, self.bottom_type) - def test_noreturn_instance_type_error(self): + def test_not_generic(self): with self.assertRaises(TypeError): - isinstance(42, NoReturn) + self.bottom_type[int] + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, + 'Cannot subclass ' + re.escape(str(self.bottom_type))): + class A(self.bottom_type): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class B(type(self.bottom_type)): + pass - def test_noreturn_subclass_type_error(self): + def test_cannot_instantiate(self): with self.assertRaises(TypeError): - issubclass(Employee, NoReturn) + self.bottom_type() with self.assertRaises(TypeError): - issubclass(NoReturn, Employee) + type(self.bottom_type)() + + +class NoReturnTests(BottomTypeTestsMixin, BaseTestCase): + bottom_type = NoReturn def test_repr(self): self.assertEqual(repr(NoReturn), 'typing.NoReturn') - def test_not_generic(self): + def test_get_type_hints(self): + def some(arg: NoReturn) -> NoReturn: ... + def some_str(arg: 'NoReturn') -> 'typing.NoReturn': ... + + expected = {'arg': NoReturn, 'return': NoReturn} + for target in [some, some_str]: + with self.subTest(target=target): + self.assertEqual(gth(target), expected) + + def test_not_equality(self): + self.assertNotEqual(NoReturn, Never) + self.assertNotEqual(Never, NoReturn) + + +class NeverTests(BottomTypeTestsMixin, BaseTestCase): + bottom_type = Never + + def test_repr(self): + self.assertEqual(repr(Never), 'typing.Never') + + def test_get_type_hints(self): + def some(arg: Never) -> Never: ... + def some_str(arg: 'Never') -> 'typing.Never': ... + + expected = {'arg': Never, 'return': Never} + for target in [some, some_str]: + with self.subTest(target=target): + self.assertEqual(gth(target), expected) + + +class AssertNeverTests(BaseTestCase): + def test_exception(self): + with self.assertRaises(AssertionError): + assert_never(None) + + value = "some value" + with self.assertRaisesRegex(AssertionError, value): + assert_never(value) + + # Make sure a huge value doesn't get printed in its entirety + huge_value = "a" * 10000 + with self.assertRaises(AssertionError) as cm: + assert_never(huge_value) + self.assertLess( + len(cm.exception.args[0]), + typing._ASSERT_NEVER_REPR_MAX_LENGTH * 2, + ) + + +class SelfTests(BaseTestCase): + def test_equality(self): + self.assertEqual(Self, Self) + self.assertIs(Self, Self) + self.assertNotEqual(Self, None) + + def test_basics(self): + class Foo: + def bar(self) -> Self: ... + class FooStr: + def bar(self) -> 'Self': ... + class FooStrTyping: + def bar(self) -> 'typing.Self': ... + + for target in [Foo, FooStr, FooStrTyping]: + with self.subTest(target=target): + self.assertEqual(gth(target.bar), {'return': Self}) + self.assertIs(get_origin(Self), None) + + def test_repr(self): + self.assertEqual(repr(Self), 'typing.Self') + + def test_cannot_subscript(self): with self.assertRaises(TypeError): - NoReturn[int] + Self[int] def test_cannot_subclass(self): - with self.assertRaises(TypeError): - class A(NoReturn): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(Self)): pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Self'): + class D(Self): + pass + + def test_cannot_init(self): + with self.assertRaises(TypeError): + Self() + with self.assertRaises(TypeError): + type(Self)() + + def test_no_isinstance(self): + with self.assertRaises(TypeError): + isinstance(1, Self) + with self.assertRaises(TypeError): + issubclass(int, Self) + + def test_alias(self): + # TypeAliases are not actually part of the spec + alias_1 = Tuple[Self, Self] + alias_2 = List[Self] + alias_3 = ClassVar[Self] + self.assertEqual(get_args(alias_1), (Self, Self)) + self.assertEqual(get_args(alias_2), (Self,)) + self.assertEqual(get_args(alias_3), (Self,)) + + +class LiteralStringTests(BaseTestCase): + def test_equality(self): + self.assertEqual(LiteralString, LiteralString) + self.assertIs(LiteralString, LiteralString) + self.assertNotEqual(LiteralString, None) + + def test_basics(self): + class Foo: + def bar(self) -> LiteralString: ... + class FooStr: + def bar(self) -> 'LiteralString': ... + class FooStrTyping: + def bar(self) -> 'typing.LiteralString': ... + + for target in [Foo, FooStr, FooStrTyping]: + with self.subTest(target=target): + self.assertEqual(gth(target.bar), {'return': LiteralString}) + self.assertIs(get_origin(LiteralString), None) + + def test_repr(self): + self.assertEqual(repr(LiteralString), 'typing.LiteralString') + + def test_cannot_subscript(self): with self.assertRaises(TypeError): - class A(type(NoReturn)): + LiteralString[int] + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(LiteralString)): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.LiteralString'): + class D(LiteralString): pass - def test_cannot_instantiate(self): + def test_cannot_init(self): + with self.assertRaises(TypeError): + LiteralString() + with self.assertRaises(TypeError): + type(LiteralString)() + + def test_no_isinstance(self): with self.assertRaises(TypeError): - NoReturn() + isinstance(1, LiteralString) with self.assertRaises(TypeError): - type(NoReturn)() + issubclass(int, LiteralString) + def test_alias(self): + alias_1 = Tuple[LiteralString, LiteralString] + alias_2 = List[LiteralString] + alias_3 = ClassVar[LiteralString] + self.assertEqual(get_args(alias_1), (LiteralString, LiteralString)) + self.assertEqual(get_args(alias_2), (LiteralString,)) + self.assertEqual(get_args(alias_3), (LiteralString,)) class TypeVarTests(BaseTestCase): - def test_basic_plain(self): T = TypeVar('T') # T equals itself. self.assertEqual(T, T) # T is an instance of TypeVar self.assertIsInstance(T, TypeVar) + self.assertEqual(T.__name__, 'T') + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + self.assertEqual(T.__module__, __name__) + + def test_basic_with_exec(self): + ns = {} + exec('from typing import TypeVar; T = TypeVar("T", bound=float)', ns, ns) + T = ns['T'] + self.assertIsInstance(T, TypeVar) + self.assertEqual(T.__name__, 'T') + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, float) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + self.assertIs(T.__module__, None) + + def test_attributes(self): + T_bound = TypeVar('T_bound', bound=int) + self.assertEqual(T_bound.__name__, 'T_bound') + self.assertEqual(T_bound.__constraints__, ()) + self.assertIs(T_bound.__bound__, int) + + T_constraints = TypeVar('T_constraints', int, str) + self.assertEqual(T_constraints.__name__, 'T_constraints') + self.assertEqual(T_constraints.__constraints__, (int, str)) + self.assertIs(T_constraints.__bound__, None) + + T_co = TypeVar('T_co', covariant=True) + self.assertEqual(T_co.__name__, 'T_co') + self.assertIs(T_co.__covariant__, True) + self.assertIs(T_co.__contravariant__, False) + self.assertIs(T_co.__infer_variance__, False) + + T_contra = TypeVar('T_contra', contravariant=True) + self.assertEqual(T_contra.__name__, 'T_contra') + self.assertIs(T_contra.__covariant__, False) + self.assertIs(T_contra.__contravariant__, True) + self.assertIs(T_contra.__infer_variance__, False) + + T_infer = TypeVar('T_infer', infer_variance=True) + self.assertEqual(T_infer.__name__, 'T_infer') + self.assertIs(T_infer.__covariant__, False) + self.assertIs(T_infer.__contravariant__, False) + self.assertIs(T_infer.__infer_variance__, True) def test_typevar_instance_type_error(self): T = TypeVar('T') @@ -218,15 +487,13 @@ def test_no_redefinition(self): self.assertNotEqual(TypeVar('T'), TypeVar('T')) self.assertNotEqual(TypeVar('T', int, str), TypeVar('T', int, str)) - def test_cannot_subclass_vars(self): - with self.assertRaises(TypeError): - class V(TypeVar('T')): - pass - - def test_cannot_subclass_var_itself(self): - with self.assertRaises(TypeError): - class V(TypeVar): - pass + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'TypeVar'): + class V(TypeVar): pass + T = TypeVar("T") + with self.assertRaisesRegex(TypeError, + CANNOT_SUBCLASS_INSTANCE % 'TypeVar'): + class W(T): pass def test_cannot_instantiate_vars(self): with self.assertRaises(TypeError): @@ -234,9 +501,12 @@ def test_cannot_instantiate_vars(self): def test_bound_errors(self): with self.assertRaises(TypeError): - TypeVar('X', bound=42) + TypeVar('X', bound=Union) with self.assertRaises(TypeError): TypeVar('X', str, float, bound=Employee) + with self.assertRaisesRegex(TypeError, + r"Bound must be a type\. Got \(1, 2\)\."): + TypeVar('X', bound=(1, 2)) def test_missing__name__(self): # See bpo-39942 @@ -249,166 +519,1767 @@ def test_no_bivariant(self): with self.assertRaises(ValueError): TypeVar('T', covariant=True, contravariant=True) + def test_cannot_combine_explicit_and_infer(self): + with self.assertRaises(ValueError): + TypeVar('T', covariant=True, infer_variance=True) + with self.assertRaises(ValueError): + TypeVar('T', contravariant=True, infer_variance=True) + + def test_var_substitution(self): + T = TypeVar('T') + subst = T.__typing_subst__ + self.assertIs(subst(int), int) + self.assertEqual(subst(list[int]), list[int]) + self.assertEqual(subst(List[int]), List[int]) + self.assertEqual(subst(List), List) + self.assertIs(subst(Any), Any) + self.assertIs(subst(None), type(None)) + self.assertIs(subst(T), T) + self.assertEqual(subst(int|str), int|str) + self.assertEqual(subst(Union[int, str]), Union[int, str]) + def test_bad_var_substitution(self): T = TypeVar('T') - for arg in (), (int, str): + bad_args = ( + (), (int, str), Union, + Generic, Generic[T], Protocol, Protocol[T], + Final, Final[int], ClassVar, ClassVar[int], + ) + for arg in bad_args: with self.subTest(arg=arg): + with self.assertRaises(TypeError): + T.__typing_subst__(arg) with self.assertRaises(TypeError): List[T][arg] with self.assertRaises(TypeError): list[T][arg] + def test_many_weakrefs(self): + # gh-108295: this used to segfault + for cls in (ParamSpec, TypeVarTuple, TypeVar): + with self.subTest(cls=cls): + vals = weakref.WeakValueDictionary() + + for x in range(10): + vals[x] = cls(str(x)) + del vals + + def test_constructor(self): + T = TypeVar(name="T") + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__default__, typing.NoDefault) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + + T = TypeVar(name="T", bound=type) + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, type) + self.assertIs(T.__default__, typing.NoDefault) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + + T = TypeVar(name="T", default=()) + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__default__, ()) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + + T = TypeVar(name="T", covariant=True) + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__default__, typing.NoDefault) + self.assertIs(T.__covariant__, True) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, False) + + T = TypeVar(name="T", contravariant=True) + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__default__, typing.NoDefault) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, True) + self.assertIs(T.__infer_variance__, False) + + T = TypeVar(name="T", infer_variance=True) + self.assertEqual(T.__name__, "T") + self.assertEqual(T.__constraints__, ()) + self.assertIs(T.__bound__, None) + self.assertIs(T.__default__, typing.NoDefault) + self.assertIs(T.__covariant__, False) + self.assertIs(T.__contravariant__, False) + self.assertIs(T.__infer_variance__, True) + +class TypeParameterDefaultsTests(BaseTestCase): + def test_typevar(self): + T = TypeVar('T', default=int) + self.assertEqual(T.__default__, int) + self.assertTrue(T.has_default()) + self.assertIsInstance(T, TypeVar) -class UnionTests(BaseTestCase): + class A(Generic[T]): ... + Alias = Optional[T] - def test_basics(self): - u = Union[int, float] - self.assertNotEqual(u, Union) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_typevar_none(self): + U = TypeVar('U') + U_None = TypeVar('U_None', default=None) + self.assertIs(U.__default__, NoDefault) + self.assertFalse(U.has_default()) + self.assertIs(U_None.__default__, None) + self.assertTrue(U_None.has_default()) - def test_subclass_error(self): - with self.assertRaises(TypeError): - issubclass(int, Union) - with self.assertRaises(TypeError): - issubclass(Union, int) - with self.assertRaises(TypeError): - issubclass(Union[int, str], int) + class X[T]: ... + T, = X.__type_params__ + self.assertIs(T.__default__, NoDefault) + self.assertFalse(T.has_default()) - def test_union_any(self): - u = Union[Any] - self.assertEqual(u, Any) - u1 = Union[int, Any] - u2 = Union[Any, int] - u3 = Union[Any, object] - self.assertEqual(u1, u2) - self.assertNotEqual(u1, Any) - self.assertNotEqual(u2, Any) - self.assertNotEqual(u3, Any) + def test_paramspec(self): + P = ParamSpec('P', default=(str, int)) + self.assertEqual(P.__default__, (str, int)) + self.assertTrue(P.has_default()) + self.assertIsInstance(P, ParamSpec) - def test_union_object(self): - u = Union[object] - self.assertEqual(u, object) - u1 = Union[int, object] - u2 = Union[object, int] - self.assertEqual(u1, u2) - self.assertNotEqual(u1, object) - self.assertNotEqual(u2, object) + class A(Generic[P]): ... + Alias = typing.Callable[P, None] - def test_unordered(self): - u1 = Union[int, float] - u2 = Union[float, int] - self.assertEqual(u1, u2) + P_default = ParamSpec('P_default', default=...) + self.assertIs(P_default.__default__, ...) - def test_single_class_disappears(self): - t = Union[Employee] - self.assertIs(t, Employee) + def test_paramspec_none(self): + U = ParamSpec('U') + U_None = ParamSpec('U_None', default=None) + self.assertIs(U.__default__, NoDefault) + self.assertFalse(U.has_default()) + self.assertIs(U_None.__default__, None) + self.assertTrue(U_None.has_default()) - def test_base_class_kept(self): - u = Union[Employee, Manager] - self.assertNotEqual(u, Employee) - self.assertIn(Employee, u.__args__) - self.assertIn(Manager, u.__args__) + class X[**P]: ... + P, = X.__type_params__ + self.assertIs(P.__default__, NoDefault) + self.assertFalse(P.has_default()) - def test_union_union(self): - u = Union[int, float] - v = Union[u, Employee] - self.assertEqual(v, Union[int, float, Employee]) + def test_typevartuple(self): + Ts = TypeVarTuple('Ts', default=Unpack[Tuple[str, int]]) + self.assertEqual(Ts.__default__, Unpack[Tuple[str, int]]) + self.assertTrue(Ts.has_default()) + self.assertIsInstance(Ts, TypeVarTuple) - def test_repr(self): - self.assertEqual(repr(Union), 'typing.Union') - u = Union[Employee, int] - self.assertEqual(repr(u), 'typing.Union[%s.Employee, int]' % __name__) - u = Union[int, Employee] - self.assertEqual(repr(u), 'typing.Union[int, %s.Employee]' % __name__) - T = TypeVar('T') - u = Union[T, int][int] - self.assertEqual(repr(u), repr(int)) - u = Union[List[int], int] - self.assertEqual(repr(u), 'typing.Union[typing.List[int], int]') - u = Union[list[int], dict[str, float]] - self.assertEqual(repr(u), 'typing.Union[list[int], dict[str, float]]') - u = Union[int | float] - self.assertEqual(repr(u), 'typing.Union[int, float]') + class A(Generic[Unpack[Ts]]): ... + Alias = Optional[Unpack[Ts]] - u = Union[None, str] - self.assertEqual(repr(u), 'typing.Optional[str]') - u = Union[str, None] - self.assertEqual(repr(u), 'typing.Optional[str]') - u = Union[None, str, int] - self.assertEqual(repr(u), 'typing.Union[NoneType, str, int]') - u = Optional[str] - self.assertEqual(repr(u), 'typing.Optional[str]') + def test_typevartuple_specialization(self): + T = TypeVar("T") + Ts = TypeVarTuple('Ts', default=Unpack[Tuple[str, int]]) + self.assertEqual(Ts.__default__, Unpack[Tuple[str, int]]) + class A(Generic[T, Unpack[Ts]]): ... + self.assertEqual(A[float].__args__, (float, str, int)) + self.assertEqual(A[float, range].__args__, (float, range)) + self.assertEqual(A[float, *tuple[int, ...]].__args__, (float, *tuple[int, ...])) + + def test_typevar_and_typevartuple_specialization(self): + T = TypeVar("T") + U = TypeVar("U", default=float) + Ts = TypeVarTuple('Ts', default=Unpack[Tuple[str, int]]) + self.assertEqual(Ts.__default__, Unpack[Tuple[str, int]]) + class A(Generic[T, U, Unpack[Ts]]): ... + self.assertEqual(A[int].__args__, (int, float, str, int)) + self.assertEqual(A[int, str].__args__, (int, str, str, int)) + self.assertEqual(A[int, str, range].__args__, (int, str, range)) + self.assertEqual(A[int, str, *tuple[int, ...]].__args__, (int, str, *tuple[int, ...])) - def test_cannot_subclass(self): - with self.assertRaises(TypeError): - class C(Union): - pass - with self.assertRaises(TypeError): - class C(type(Union)): - pass - with self.assertRaises(TypeError): - class C(Union[int, str]): - pass + def test_no_default_after_typevar_tuple(self): + T = TypeVar("T", default=int) + Ts = TypeVarTuple("Ts") + Ts_default = TypeVarTuple("Ts_default", default=Unpack[Tuple[str, int]]) - def test_cannot_instantiate(self): - with self.assertRaises(TypeError): - Union() - with self.assertRaises(TypeError): - type(Union)() - u = Union[int, float] with self.assertRaises(TypeError): - u() - with self.assertRaises(TypeError): - type(u)() + class X(Generic[*Ts, T]): ... - def test_union_generalization(self): - self.assertFalse(Union[str, typing.Iterable[int]] == str) - self.assertFalse(Union[str, typing.Iterable[int]] == typing.Iterable[int]) - self.assertIn(str, Union[str, typing.Iterable[int]].__args__) - self.assertIn(typing.Iterable[int], Union[str, typing.Iterable[int]].__args__) + with self.assertRaises(TypeError): + class Y(Generic[*Ts_default, T]): ... - def test_union_compare_other(self): - self.assertNotEqual(Union, object) - self.assertNotEqual(Union, Any) - self.assertNotEqual(ClassVar, Union) - self.assertNotEqual(Optional, Union) - self.assertNotEqual([None], Optional) - self.assertNotEqual(Optional, typing.Mapping) - self.assertNotEqual(Optional[typing.MutableMapping], Union) + def test_allow_default_after_non_default_in_alias(self): + T_default = TypeVar('T_default', default=int) + T = TypeVar('T') + Ts = TypeVarTuple('Ts') - def test_optional(self): - o = Optional[int] - u = Union[int, None] - self.assertEqual(o, u) + a1 = Callable[[T_default], T] + self.assertEqual(a1.__args__, (T_default, T)) - def test_empty(self): - with self.assertRaises(TypeError): - Union[()] + a2 = dict[T_default, T] + self.assertEqual(a2.__args__, (T_default, T)) - def test_no_eval_union(self): - u = Union[int, str] - def f(x: u): ... - self.assertIs(get_type_hints(f)['x'], u) + a3 = typing.Dict[T_default, T] + self.assertEqual(a3.__args__, (T_default, T)) - def test_function_repr_union(self): - def fun() -> int: ... - self.assertEqual(repr(Union[fun, int]), 'typing.Union[fun, int]') + a4 = Callable[*Ts, T] + self.assertEqual(a4.__args__, (*Ts, T)) - def test_union_str_pattern(self): - # Shouldn't crash; see http://bugs.python.org/issue25390 - A = Union[str, Pattern] - A + def test_paramspec_specialization(self): + T = TypeVar("T") + P = ParamSpec('P', default=[str, int]) + self.assertEqual(P.__default__, [str, int]) + class A(Generic[T, P]): ... + self.assertEqual(A[float].__args__, (float, (str, int))) + self.assertEqual(A[float, [range]].__args__, (float, (range,))) - def test_etree(self): - # See https://github.com/python/typing/issues/229 - # (Only relevant for Python 2.) - from xml.etree.ElementTree import Element + def test_typevar_and_paramspec_specialization(self): + T = TypeVar("T") + U = TypeVar("U", default=float) + P = ParamSpec('P', default=[str, int]) + self.assertEqual(P.__default__, [str, int]) + class A(Generic[T, U, P]): ... + self.assertEqual(A[float].__args__, (float, float, (str, int))) + self.assertEqual(A[float, int].__args__, (float, int, (str, int))) + self.assertEqual(A[float, int, [range]].__args__, (float, int, (range,))) + + def test_paramspec_and_typevar_specialization(self): + T = TypeVar("T") + P = ParamSpec('P', default=[str, int]) + U = TypeVar("U", default=float) + self.assertEqual(P.__default__, [str, int]) + class A(Generic[T, P, U]): ... + self.assertEqual(A[float].__args__, (float, (str, int), float)) + self.assertEqual(A[float, [range]].__args__, (float, (range,), float)) + self.assertEqual(A[float, [range], int].__args__, (float, (range,), int)) + + def test_typevartuple_none(self): + U = TypeVarTuple('U') + U_None = TypeVarTuple('U_None', default=None) + self.assertIs(U.__default__, NoDefault) + self.assertFalse(U.has_default()) + self.assertIs(U_None.__default__, None) + self.assertTrue(U_None.has_default()) + + class X[**Ts]: ... + Ts, = X.__type_params__ + self.assertIs(Ts.__default__, NoDefault) + self.assertFalse(Ts.has_default()) + + def test_no_default_after_non_default(self): + DefaultStrT = TypeVar('DefaultStrT', default=str) + T = TypeVar('T') + + with self.assertRaisesRegex( + TypeError, r"Type parameter ~T without a default follows type parameter with a default" + ): + Test = Generic[DefaultStrT, T] + + def test_need_more_params(self): + DefaultStrT = TypeVar('DefaultStrT', default=str) + T = TypeVar('T') + U = TypeVar('U') + + class A(Generic[T, U, DefaultStrT]): ... + A[int, bool] + A[int, bool, str] + + with self.assertRaisesRegex( + TypeError, r"Too few arguments for .+; actual 1, expected at least 2" + ): + Test = A[int] + + def test_pickle(self): + global U, U_co, U_contra, U_default # pickle wants to reference the class by name + U = TypeVar('U') + U_co = TypeVar('U_co', covariant=True) + U_contra = TypeVar('U_contra', contravariant=True) + U_default = TypeVar('U_default', default=int) + for proto in range(pickle.HIGHEST_PROTOCOL): + for typevar in (U, U_co, U_contra, U_default): + z = pickle.loads(pickle.dumps(typevar, proto)) + self.assertEqual(z.__name__, typevar.__name__) + self.assertEqual(z.__covariant__, typevar.__covariant__) + self.assertEqual(z.__contravariant__, typevar.__contravariant__) + self.assertEqual(z.__bound__, typevar.__bound__) + self.assertEqual(z.__default__, typevar.__default__) + + + +def template_replace(templates: list[str], replacements: dict[str, list[str]]) -> list[tuple[str]]: + """Renders templates with possible combinations of replacements. + + Example 1: Suppose that: + templates = ["dog_breed are awesome", "dog_breed are cool"] + replacements = ["dog_breed": ["Huskies", "Beagles"]] + Then we would return: + [ + ("Huskies are awesome", "Huskies are cool"), + ("Beagles are awesome", "Beagles are cool") + ] + + Example 2: Suppose that: + templates = ["Huskies are word1 but also word2"] + replacements = {"word1": ["playful", "cute"], + "word2": ["feisty", "tiring"]} + Then we would return: + [ + ("Huskies are playful but also feisty"), + ("Huskies are playful but also tiring"), + ("Huskies are cute but also feisty"), + ("Huskies are cute but also tiring") + ] + + Note that if any of the replacements do not occur in any template: + templates = ["Huskies are word1", "Beagles!"] + replacements = {"word1": ["playful", "cute"], + "word2": ["feisty", "tiring"]} + Then we do not generate duplicates, returning: + [ + ("Huskies are playful", "Beagles!"), + ("Huskies are cute", "Beagles!") + ] + """ + # First, build a structure like: + # [ + # [("word1", "playful"), ("word1", "cute")], + # [("word2", "feisty"), ("word2", "tiring")] + # ] + replacement_combos = [] + for original, possible_replacements in replacements.items(): + original_replacement_tuples = [] + for replacement in possible_replacements: + original_replacement_tuples.append((original, replacement)) + replacement_combos.append(original_replacement_tuples) + + # Second, generate rendered templates, including possible duplicates. + rendered_templates = [] + for replacement_combo in itertools.product(*replacement_combos): + # replacement_combo would be e.g. + # [("word1", "playful"), ("word2", "feisty")] + templates_with_replacements = [] + for template in templates: + for original, replacement in replacement_combo: + template = template.replace(original, replacement) + templates_with_replacements.append(template) + rendered_templates.append(tuple(templates_with_replacements)) + + # Finally, remove the duplicates (but keep the order). + rendered_templates_no_duplicates = [] + for x in rendered_templates: + # Inefficient, but should be fine for our purposes. + if x not in rendered_templates_no_duplicates: + rendered_templates_no_duplicates.append(x) + + return rendered_templates_no_duplicates + + +class TemplateReplacementTests(BaseTestCase): + + def test_two_templates_two_replacements_yields_correct_renders(self): + actual = template_replace( + templates=["Cats are word1", "Dogs are word2"], + replacements={ + "word1": ["small", "cute"], + "word2": ["big", "fluffy"], + }, + ) + expected = [ + ("Cats are small", "Dogs are big"), + ("Cats are small", "Dogs are fluffy"), + ("Cats are cute", "Dogs are big"), + ("Cats are cute", "Dogs are fluffy"), + ] + self.assertEqual(actual, expected) + + def test_no_duplicates_if_replacement_not_in_templates(self): + actual = template_replace( + templates=["Cats are word1", "Dogs!"], + replacements={ + "word1": ["small", "cute"], + "word2": ["big", "fluffy"], + }, + ) + expected = [ + ("Cats are small", "Dogs!"), + ("Cats are cute", "Dogs!"), + ] + self.assertEqual(actual, expected) + + + +class GenericAliasSubstitutionTests(BaseTestCase): + """Tests for type variable substitution in generic aliases. + + For variadic cases, these tests should be regarded as the source of truth, + since we hadn't realised the full complexity of variadic substitution + at the time of finalizing PEP 646. For full discussion, see + https://github.com/python/cpython/issues/91162. + """ + + def test_one_parameter(self): + T = TypeVar('T') + Ts = TypeVarTuple('Ts') + Ts2 = TypeVarTuple('Ts2') + + class C(Generic[T]): pass + + generics = ['C', 'list', 'List'] + tuple_types = ['tuple', 'Tuple'] + + tests = [ + # Alias # Args # Expected result + ('generic[T]', '[()]', 'TypeError'), + ('generic[T]', '[int]', 'generic[int]'), + ('generic[T]', '[int, str]', 'TypeError'), + ('generic[T]', '[tuple_type[int, ...]]', 'generic[tuple_type[int, ...]]'), + ('generic[T]', '[*tuple_type[int]]', 'generic[int]'), + ('generic[T]', '[*tuple_type[()]]', 'TypeError'), + ('generic[T]', '[*tuple_type[int, str]]', 'TypeError'), + ('generic[T]', '[*tuple_type[int, ...]]', 'TypeError'), + ('generic[T]', '[*Ts]', 'TypeError'), + ('generic[T]', '[T, *Ts]', 'TypeError'), + ('generic[T]', '[*Ts, T]', 'TypeError'), + # Raises TypeError because C is not variadic. + # (If C _were_ variadic, it'd be fine.) + ('C[T, *tuple_type[int, ...]]', '[int]', 'TypeError'), + # Should definitely raise TypeError: list only takes one argument. + ('list[T, *tuple_type[int, ...]]', '[int]', 'list[int, *tuple_type[int, ...]]'), + ('List[T, *tuple_type[int, ...]]', '[int]', 'TypeError'), + # Should raise, because more than one `TypeVarTuple` is not supported. + ('generic[*Ts, *Ts2]', '[int]', 'TypeError'), + ] + + for alias_template, args_template, expected_template in tests: + rendered_templates = template_replace( + templates=[alias_template, args_template, expected_template], + replacements={'generic': generics, 'tuple_type': tuple_types} + ) + for alias_str, args_str, expected_str in rendered_templates: + with self.subTest(alias=alias_str, args=args_str, expected=expected_str): + if expected_str == 'TypeError': + with self.assertRaises(TypeError): + eval(alias_str + args_str) + else: + self.assertEqual( + eval(alias_str + args_str), + eval(expected_str) + ) + + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_two_parameters(self): + T1 = TypeVar('T1') + T2 = TypeVar('T2') + Ts = TypeVarTuple('Ts') + + class C(Generic[T1, T2]): pass + + generics = ['C', 'dict', 'Dict'] + tuple_types = ['tuple', 'Tuple'] + + tests = [ + # Alias # Args # Expected result + ('generic[T1, T2]', '[()]', 'TypeError'), + ('generic[T1, T2]', '[int]', 'TypeError'), + ('generic[T1, T2]', '[int, str]', 'generic[int, str]'), + ('generic[T1, T2]', '[int, str, bool]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int, str]]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[int, str, bool]]', 'TypeError'), + + ('generic[T1, T2]', '[int, *tuple_type[str]]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[int], str]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[int], *tuple_type[str]]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[int, str], *tuple_type[()]]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[()], *tuple_type[int, str]]', 'generic[int, str]'), + ('generic[T1, T2]', '[*tuple_type[int], *tuple_type[()]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[()], *tuple_type[int]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int, str], *tuple_type[float]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int], *tuple_type[str, float]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int, str], *tuple_type[float, bool]]', 'TypeError'), + + ('generic[T1, T2]', '[tuple_type[int, ...]]', 'TypeError'), + ('generic[T1, T2]', '[tuple_type[int, ...], tuple_type[str, ...]]', 'generic[tuple_type[int, ...], tuple_type[str, ...]]'), + ('generic[T1, T2]', '[*tuple_type[int, ...]]', 'TypeError'), + ('generic[T1, T2]', '[int, *tuple_type[str, ...]]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int, ...], str]', 'TypeError'), + ('generic[T1, T2]', '[*tuple_type[int, ...], *tuple_type[str, ...]]', 'TypeError'), + ('generic[T1, T2]', '[*Ts]', 'TypeError'), + ('generic[T1, T2]', '[T, *Ts]', 'TypeError'), + ('generic[T1, T2]', '[*Ts, T]', 'TypeError'), + # This one isn't technically valid - none of the things that + # `generic` can be (defined in `generics` above) are variadic, so we + # shouldn't really be able to do `generic[T1, *tuple_type[int, ...]]`. + # So even if type checkers shouldn't allow it, we allow it at + # runtime, in accordance with a general philosophy of "Keep the + # runtime lenient so people can experiment with typing constructs". + ('generic[T1, *tuple_type[int, ...]]', '[str]', 'generic[str, *tuple_type[int, ...]]'), + ] + + for alias_template, args_template, expected_template in tests: + rendered_templates = template_replace( + templates=[alias_template, args_template, expected_template], + replacements={'generic': generics, 'tuple_type': tuple_types} + ) + for alias_str, args_str, expected_str in rendered_templates: + with self.subTest(alias=alias_str, args=args_str, expected=expected_str): + if expected_str == 'TypeError': + with self.assertRaises(TypeError): + eval(alias_str + args_str) + else: + self.assertEqual( + eval(alias_str + args_str), + eval(expected_str) + ) + + def test_three_parameters(self): + T1 = TypeVar('T1') + T2 = TypeVar('T2') + T3 = TypeVar('T3') + + class C(Generic[T1, T2, T3]): pass + + generics = ['C'] + tuple_types = ['tuple', 'Tuple'] + + tests = [ + # Alias # Args # Expected result + ('generic[T1, bool, T2]', '[int, str]', 'generic[int, bool, str]'), + ('generic[T1, bool, T2]', '[*tuple_type[int, str]]', 'generic[int, bool, str]'), + ] + + for alias_template, args_template, expected_template in tests: + rendered_templates = template_replace( + templates=[alias_template, args_template, expected_template], + replacements={'generic': generics, 'tuple_type': tuple_types} + ) + for alias_str, args_str, expected_str in rendered_templates: + with self.subTest(alias=alias_str, args=args_str, expected=expected_str): + if expected_str == 'TypeError': + with self.assertRaises(TypeError): + eval(alias_str + args_str) + else: + self.assertEqual( + eval(alias_str + args_str), + eval(expected_str) + ) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_variadic_parameters(self): + T1 = TypeVar('T1') + T2 = TypeVar('T2') + Ts = TypeVarTuple('Ts') + + generics = ['C', 'tuple', 'Tuple'] + tuple_types = ['tuple', 'Tuple'] + + tests = [ + # Alias # Args # Expected result + ('generic[*Ts]', '[()]', 'generic[()]'), + ('generic[*Ts]', '[int]', 'generic[int]'), + ('generic[*Ts]', '[int, str]', 'generic[int, str]'), + ('generic[*Ts]', '[*tuple_type[int]]', 'generic[int]'), + ('generic[*Ts]', '[*tuple_type[*Ts]]', 'generic[*Ts]'), + ('generic[*Ts]', '[*tuple_type[int, str]]', 'generic[int, str]'), + ('generic[*Ts]', '[str, *tuple_type[int, ...], bool]', 'generic[str, *tuple_type[int, ...], bool]'), + ('generic[*Ts]', '[tuple_type[int, ...]]', 'generic[tuple_type[int, ...]]'), + ('generic[*Ts]', '[tuple_type[int, ...], tuple_type[str, ...]]', 'generic[tuple_type[int, ...], tuple_type[str, ...]]'), + ('generic[*Ts]', '[*tuple_type[int, ...]]', 'generic[*tuple_type[int, ...]]'), + ('generic[*Ts]', '[*tuple_type[int, ...], *tuple_type[str, ...]]', 'TypeError'), + + ('generic[*Ts]', '[*Ts]', 'generic[*Ts]'), + ('generic[*Ts]', '[T, *Ts]', 'generic[T, *Ts]'), + ('generic[*Ts]', '[*Ts, T]', 'generic[*Ts, T]'), + ('generic[T, *Ts]', '[()]', 'TypeError'), + ('generic[T, *Ts]', '[int]', 'generic[int]'), + ('generic[T, *Ts]', '[int, str]', 'generic[int, str]'), + ('generic[T, *Ts]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[list[T], *Ts]', '[()]', 'TypeError'), + ('generic[list[T], *Ts]', '[int]', 'generic[list[int]]'), + ('generic[list[T], *Ts]', '[int, str]', 'generic[list[int], str]'), + ('generic[list[T], *Ts]', '[int, str, bool]', 'generic[list[int], str, bool]'), + + ('generic[*Ts, T]', '[()]', 'TypeError'), + ('generic[*Ts, T]', '[int]', 'generic[int]'), + ('generic[*Ts, T]', '[int, str]', 'generic[int, str]'), + ('generic[*Ts, T]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[*Ts, list[T]]', '[()]', 'TypeError'), + ('generic[*Ts, list[T]]', '[int]', 'generic[list[int]]'), + ('generic[*Ts, list[T]]', '[int, str]', 'generic[int, list[str]]'), + ('generic[*Ts, list[T]]', '[int, str, bool]', 'generic[int, str, list[bool]]'), + + ('generic[T1, T2, *Ts]', '[()]', 'TypeError'), + ('generic[T1, T2, *Ts]', '[int]', 'TypeError'), + ('generic[T1, T2, *Ts]', '[int, str]', 'generic[int, str]'), + ('generic[T1, T2, *Ts]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[T1, T2, *Ts]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + + ('generic[*Ts, T1, T2]', '[()]', 'TypeError'), + ('generic[*Ts, T1, T2]', '[int]', 'TypeError'), + ('generic[*Ts, T1, T2]', '[int, str]', 'generic[int, str]'), + ('generic[*Ts, T1, T2]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[*Ts, T1, T2]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + + ('generic[T1, *Ts, T2]', '[()]', 'TypeError'), + ('generic[T1, *Ts, T2]', '[int]', 'TypeError'), + ('generic[T1, *Ts, T2]', '[int, str]', 'generic[int, str]'), + ('generic[T1, *Ts, T2]', '[int, str, bool]', 'generic[int, str, bool]'), + ('generic[T1, *Ts, T2]', '[int, str, bool, bytes]', 'generic[int, str, bool, bytes]'), + + ('generic[T, *Ts]', '[*tuple_type[int, ...]]', 'generic[int, *tuple_type[int, ...]]'), + ('generic[T, *Ts]', '[str, *tuple_type[int, ...]]', 'generic[str, *tuple_type[int, ...]]'), + ('generic[T, *Ts]', '[*tuple_type[int, ...], str]', 'generic[int, *tuple_type[int, ...], str]'), + ('generic[*Ts, T]', '[*tuple_type[int, ...]]', 'generic[*tuple_type[int, ...], int]'), + ('generic[*Ts, T]', '[str, *tuple_type[int, ...]]', 'generic[str, *tuple_type[int, ...], int]'), + ('generic[*Ts, T]', '[*tuple_type[int, ...], str]', 'generic[*tuple_type[int, ...], str]'), + ('generic[T1, *Ts, T2]', '[*tuple_type[int, ...]]', 'generic[int, *tuple_type[int, ...], int]'), + ('generic[T, str, *Ts]', '[*tuple_type[int, ...]]', 'generic[int, str, *tuple_type[int, ...]]'), + ('generic[*Ts, str, T]', '[*tuple_type[int, ...]]', 'generic[*tuple_type[int, ...], str, int]'), + ('generic[list[T], *Ts]', '[*tuple_type[int, ...]]', 'generic[list[int], *tuple_type[int, ...]]'), + ('generic[*Ts, list[T]]', '[*tuple_type[int, ...]]', 'generic[*tuple_type[int, ...], list[int]]'), + + ('generic[T, *tuple_type[int, ...]]', '[str]', 'generic[str, *tuple_type[int, ...]]'), + ('generic[T1, T2, *tuple_type[int, ...]]', '[str, bool]', 'generic[str, bool, *tuple_type[int, ...]]'), + ('generic[T1, *tuple_type[int, ...], T2]', '[str, bool]', 'generic[str, *tuple_type[int, ...], bool]'), + ('generic[T1, *tuple_type[int, ...], T2]', '[str, bool, float]', 'TypeError'), + + ('generic[T1, *tuple_type[T2, ...]]', '[int, str]', 'generic[int, *tuple_type[str, ...]]'), + ('generic[*tuple_type[T1, ...], T2]', '[int, str]', 'generic[*tuple_type[int, ...], str]'), + ('generic[T1, *tuple_type[generic[*Ts], ...]]', '[int, str, bool]', 'generic[int, *tuple_type[generic[str, bool], ...]]'), + ('generic[*tuple_type[generic[*Ts], ...], T1]', '[int, str, bool]', 'generic[*tuple_type[generic[int, str], ...], bool]'), + ] + + for alias_template, args_template, expected_template in tests: + rendered_templates = template_replace( + templates=[alias_template, args_template, expected_template], + replacements={'generic': generics, 'tuple_type': tuple_types} + ) + for alias_str, args_str, expected_str in rendered_templates: + with self.subTest(alias=alias_str, args=args_str, expected=expected_str): + if expected_str == 'TypeError': + with self.assertRaises(TypeError): + eval(alias_str + args_str) + else: + self.assertEqual( + eval(alias_str + args_str), + eval(expected_str) + ) + + + + + +class UnpackTests(BaseTestCase): + + def test_accepts_single_type(self): + # (*tuple[int],) + Unpack[Tuple[int]] + + def test_dir(self): + dir_items = set(dir(Unpack[Tuple[int]])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + + def test_rejects_multiple_types(self): + with self.assertRaises(TypeError): + Unpack[Tuple[int], Tuple[str]] + # We can't do the equivalent for `*` here - + # *(Tuple[int], Tuple[str]) is just plain tuple unpacking, + # which is valid. + + def test_rejects_multiple_parameterization(self): + with self.assertRaises(TypeError): + (*tuple[int],)[0][tuple[int]] + with self.assertRaises(TypeError): + Unpack[Tuple[int]][Tuple[int]] + + def test_cannot_be_called(self): + with self.assertRaises(TypeError): + Unpack() + + def test_usage_with_kwargs(self): + Movie = TypedDict('Movie', {'name': str, 'year': int}) + def foo(**kwargs: Unpack[Movie]): ... + self.assertEqual(repr(foo.__annotations__['kwargs']), + f"typing.Unpack[{__name__}.Movie]") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_builtin_tuple(self): + Ts = TypeVarTuple("Ts") + + class Old(Generic[*Ts]): ... + class New[*Ts]: ... + + PartOld = Old[int, *Ts] + self.assertEqual(PartOld[str].__args__, (int, str)) + self.assertEqual(PartOld[*tuple[str]].__args__, (int, str)) + self.assertEqual(PartOld[*Tuple[str]].__args__, (int, str)) + self.assertEqual(PartOld[Unpack[tuple[str]]].__args__, (int, str)) + self.assertEqual(PartOld[Unpack[Tuple[str]]].__args__, (int, str)) + + PartNew = New[int, *Ts] + self.assertEqual(PartNew[str].__args__, (int, str)) + self.assertEqual(PartNew[*tuple[str]].__args__, (int, str)) + self.assertEqual(PartNew[*Tuple[str]].__args__, (int, str)) + self.assertEqual(PartNew[Unpack[tuple[str]]].__args__, (int, str)) + self.assertEqual(PartNew[Unpack[Tuple[str]]].__args__, (int, str)) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_unpack_wrong_type(self): + Ts = TypeVarTuple("Ts") + class Gen[*Ts]: ... + PartGen = Gen[int, *Ts] + + bad_unpack_param = re.escape("Unpack[...] must be used with a tuple type") + with self.assertRaisesRegex(TypeError, bad_unpack_param): + PartGen[Unpack[list[int]]] + with self.assertRaisesRegex(TypeError, bad_unpack_param): + PartGen[Unpack[List[int]]] + +class TypeVarTupleTests(BaseTestCase): + + def test_name(self): + Ts = TypeVarTuple('Ts') + self.assertEqual(Ts.__name__, 'Ts') + Ts2 = TypeVarTuple('Ts2') + self.assertEqual(Ts2.__name__, 'Ts2') + + def test_module(self): + Ts = TypeVarTuple('Ts') + self.assertEqual(Ts.__module__, __name__) + + def test_exec(self): + ns = {} + exec('from typing import TypeVarTuple; Ts = TypeVarTuple("Ts")', ns) + Ts = ns['Ts'] + self.assertEqual(Ts.__name__, 'Ts') + self.assertIs(Ts.__module__, None) + + def test_instance_is_equal_to_itself(self): + Ts = TypeVarTuple('Ts') + self.assertEqual(Ts, Ts) + + def test_different_instances_are_different(self): + self.assertNotEqual(TypeVarTuple('Ts'), TypeVarTuple('Ts')) + + def test_instance_isinstance_of_typevartuple(self): + Ts = TypeVarTuple('Ts') + self.assertIsInstance(Ts, TypeVarTuple) + + def test_cannot_call_instance(self): + Ts = TypeVarTuple('Ts') + with self.assertRaises(TypeError): + Ts() + + def test_unpacked_typevartuple_is_equal_to_itself(self): + Ts = TypeVarTuple('Ts') + self.assertEqual((*Ts,)[0], (*Ts,)[0]) + self.assertEqual(Unpack[Ts], Unpack[Ts]) + + def test_parameterised_tuple_is_equal_to_itself(self): + Ts = TypeVarTuple('Ts') + self.assertEqual(tuple[*Ts], tuple[*Ts]) + self.assertEqual(Tuple[Unpack[Ts]], Tuple[Unpack[Ts]]) + + def tests_tuple_arg_ordering_matters(self): + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + self.assertNotEqual( + tuple[*Ts1, *Ts2], + tuple[*Ts2, *Ts1], + ) + self.assertNotEqual( + Tuple[Unpack[Ts1], Unpack[Ts2]], + Tuple[Unpack[Ts2], Unpack[Ts1]], + ) + + def test_tuple_args_and_parameters_are_correct(self): + Ts = TypeVarTuple('Ts') + t1 = tuple[*Ts] + self.assertEqual(t1.__args__, (*Ts,)) + self.assertEqual(t1.__parameters__, (Ts,)) + t2 = Tuple[Unpack[Ts]] + self.assertEqual(t2.__args__, (Unpack[Ts],)) + self.assertEqual(t2.__parameters__, (Ts,)) + + def test_var_substitution(self): + Ts = TypeVarTuple('Ts') + T = TypeVar('T') + T2 = TypeVar('T2') + class G1(Generic[*Ts]): pass + class G2(Generic[Unpack[Ts]]): pass + + for A in G1, G2, Tuple, tuple: + B = A[*Ts] + self.assertEqual(B[()], A[()]) + self.assertEqual(B[float], A[float]) + self.assertEqual(B[float, str], A[float, str]) + + C = A[Unpack[Ts]] + self.assertEqual(C[()], A[()]) + self.assertEqual(C[float], A[float]) + self.assertEqual(C[float, str], A[float, str]) + + D = list[A[*Ts]] + self.assertEqual(D[()], list[A[()]]) + self.assertEqual(D[float], list[A[float]]) + self.assertEqual(D[float, str], list[A[float, str]]) + + E = List[A[Unpack[Ts]]] + self.assertEqual(E[()], List[A[()]]) + self.assertEqual(E[float], List[A[float]]) + self.assertEqual(E[float, str], List[A[float, str]]) + + F = A[T, *Ts, T2] + with self.assertRaises(TypeError): + F[()] + with self.assertRaises(TypeError): + F[float] + self.assertEqual(F[float, str], A[float, str]) + self.assertEqual(F[float, str, int], A[float, str, int]) + self.assertEqual(F[float, str, int, bytes], A[float, str, int, bytes]) + + G = A[T, Unpack[Ts], T2] + with self.assertRaises(TypeError): + G[()] + with self.assertRaises(TypeError): + G[float] + self.assertEqual(G[float, str], A[float, str]) + self.assertEqual(G[float, str, int], A[float, str, int]) + self.assertEqual(G[float, str, int, bytes], A[float, str, int, bytes]) + + H = tuple[list[T], A[*Ts], list[T2]] + with self.assertRaises(TypeError): + H[()] + with self.assertRaises(TypeError): + H[float] + if A != Tuple: + self.assertEqual(H[float, str], + tuple[list[float], A[()], list[str]]) + self.assertEqual(H[float, str, int], + tuple[list[float], A[str], list[int]]) + self.assertEqual(H[float, str, int, bytes], + tuple[list[float], A[str, int], list[bytes]]) + + I = Tuple[List[T], A[Unpack[Ts]], List[T2]] + with self.assertRaises(TypeError): + I[()] + with self.assertRaises(TypeError): + I[float] + if A != Tuple: + self.assertEqual(I[float, str], + Tuple[List[float], A[()], List[str]]) + self.assertEqual(I[float, str, int], + Tuple[List[float], A[str], List[int]]) + self.assertEqual(I[float, str, int, bytes], + Tuple[List[float], A[str, int], List[bytes]]) + + def test_bad_var_substitution(self): + Ts = TypeVarTuple('Ts') + T = TypeVar('T') + T2 = TypeVar('T2') + class G1(Generic[*Ts]): pass + class G2(Generic[Unpack[Ts]]): pass + + for A in G1, G2, Tuple, tuple: + B = A[Ts] + with self.assertRaises(TypeError): + B[int, str] + + C = A[T, T2] + with self.assertRaises(TypeError): + C[*Ts] + with self.assertRaises(TypeError): + C[Unpack[Ts]] + + B = A[T, *Ts, str, T2] + with self.assertRaises(TypeError): + B[int, *Ts] + with self.assertRaises(TypeError): + B[int, *Ts, *Ts] + + C = A[T, Unpack[Ts], str, T2] + with self.assertRaises(TypeError): + C[int, Unpack[Ts]] + with self.assertRaises(TypeError): + C[int, Unpack[Ts], Unpack[Ts]] + + def test_repr_is_correct(self): + Ts = TypeVarTuple('Ts') + + class G1(Generic[*Ts]): pass + class G2(Generic[Unpack[Ts]]): pass + + self.assertEqual(repr(Ts), 'Ts') + + self.assertEqual(repr((*Ts,)[0]), 'typing.Unpack[Ts]') + self.assertEqual(repr(Unpack[Ts]), 'typing.Unpack[Ts]') + + self.assertEqual(repr(tuple[*Ts]), 'tuple[typing.Unpack[Ts]]') + self.assertEqual(repr(Tuple[Unpack[Ts]]), 'typing.Tuple[typing.Unpack[Ts]]') + + self.assertEqual(repr(*tuple[*Ts]), '*tuple[typing.Unpack[Ts]]') + self.assertEqual(repr(Unpack[Tuple[Unpack[Ts]]]), 'typing.Unpack[typing.Tuple[typing.Unpack[Ts]]]') + + def test_variadic_class_repr_is_correct(self): + Ts = TypeVarTuple('Ts') + class A(Generic[*Ts]): pass + class B(Generic[Unpack[Ts]]): pass + + self.assertEndsWith(repr(A[()]), 'A[()]') + self.assertEndsWith(repr(B[()]), 'B[()]') + self.assertEndsWith(repr(A[float]), 'A[float]') + self.assertEndsWith(repr(B[float]), 'B[float]') + self.assertEndsWith(repr(A[float, str]), 'A[float, str]') + self.assertEndsWith(repr(B[float, str]), 'B[float, str]') + + self.assertEndsWith(repr(A[*tuple[int, ...]]), + 'A[*tuple[int, ...]]') + self.assertEndsWith(repr(B[Unpack[Tuple[int, ...]]]), + 'B[typing.Unpack[typing.Tuple[int, ...]]]') + + self.assertEndsWith(repr(A[float, *tuple[int, ...]]), + 'A[float, *tuple[int, ...]]') + self.assertEndsWith(repr(A[float, Unpack[Tuple[int, ...]]]), + 'A[float, typing.Unpack[typing.Tuple[int, ...]]]') + + self.assertEndsWith(repr(A[*tuple[int, ...], str]), + 'A[*tuple[int, ...], str]') + self.assertEndsWith(repr(B[Unpack[Tuple[int, ...]], str]), + 'B[typing.Unpack[typing.Tuple[int, ...]], str]') + + self.assertEndsWith(repr(A[float, *tuple[int, ...], str]), + 'A[float, *tuple[int, ...], str]') + self.assertEndsWith(repr(B[float, Unpack[Tuple[int, ...]], str]), + 'B[float, typing.Unpack[typing.Tuple[int, ...]], str]') + + def test_variadic_class_alias_repr_is_correct(self): + Ts = TypeVarTuple('Ts') + class A(Generic[Unpack[Ts]]): pass + + B = A[*Ts] + self.assertEndsWith(repr(B), 'A[typing.Unpack[Ts]]') + self.assertEndsWith(repr(B[()]), 'A[()]') + self.assertEndsWith(repr(B[float]), 'A[float]') + self.assertEndsWith(repr(B[float, str]), 'A[float, str]') + + C = A[Unpack[Ts]] + self.assertEndsWith(repr(C), 'A[typing.Unpack[Ts]]') + self.assertEndsWith(repr(C[()]), 'A[()]') + self.assertEndsWith(repr(C[float]), 'A[float]') + self.assertEndsWith(repr(C[float, str]), 'A[float, str]') + + D = A[*Ts, int] + self.assertEndsWith(repr(D), 'A[typing.Unpack[Ts], int]') + self.assertEndsWith(repr(D[()]), 'A[int]') + self.assertEndsWith(repr(D[float]), 'A[float, int]') + self.assertEndsWith(repr(D[float, str]), 'A[float, str, int]') + + E = A[Unpack[Ts], int] + self.assertEndsWith(repr(E), 'A[typing.Unpack[Ts], int]') + self.assertEndsWith(repr(E[()]), 'A[int]') + self.assertEndsWith(repr(E[float]), 'A[float, int]') + self.assertEndsWith(repr(E[float, str]), 'A[float, str, int]') + + F = A[int, *Ts] + self.assertEndsWith(repr(F), 'A[int, typing.Unpack[Ts]]') + self.assertEndsWith(repr(F[()]), 'A[int]') + self.assertEndsWith(repr(F[float]), 'A[int, float]') + self.assertEndsWith(repr(F[float, str]), 'A[int, float, str]') + + G = A[int, Unpack[Ts]] + self.assertEndsWith(repr(G), 'A[int, typing.Unpack[Ts]]') + self.assertEndsWith(repr(G[()]), 'A[int]') + self.assertEndsWith(repr(G[float]), 'A[int, float]') + self.assertEndsWith(repr(G[float, str]), 'A[int, float, str]') + + H = A[int, *Ts, str] + self.assertEndsWith(repr(H), 'A[int, typing.Unpack[Ts], str]') + self.assertEndsWith(repr(H[()]), 'A[int, str]') + self.assertEndsWith(repr(H[float]), 'A[int, float, str]') + self.assertEndsWith(repr(H[float, str]), 'A[int, float, str, str]') + + I = A[int, Unpack[Ts], str] + self.assertEndsWith(repr(I), 'A[int, typing.Unpack[Ts], str]') + self.assertEndsWith(repr(I[()]), 'A[int, str]') + self.assertEndsWith(repr(I[float]), 'A[int, float, str]') + self.assertEndsWith(repr(I[float, str]), 'A[int, float, str, str]') + + J = A[*Ts, *tuple[str, ...]] + self.assertEndsWith(repr(J), 'A[typing.Unpack[Ts], *tuple[str, ...]]') + self.assertEndsWith(repr(J[()]), 'A[*tuple[str, ...]]') + self.assertEndsWith(repr(J[float]), 'A[float, *tuple[str, ...]]') + self.assertEndsWith(repr(J[float, str]), 'A[float, str, *tuple[str, ...]]') + + K = A[Unpack[Ts], Unpack[Tuple[str, ...]]] + self.assertEndsWith(repr(K), 'A[typing.Unpack[Ts], typing.Unpack[typing.Tuple[str, ...]]]') + self.assertEndsWith(repr(K[()]), 'A[typing.Unpack[typing.Tuple[str, ...]]]') + self.assertEndsWith(repr(K[float]), 'A[float, typing.Unpack[typing.Tuple[str, ...]]]') + self.assertEndsWith(repr(K[float, str]), 'A[float, str, typing.Unpack[typing.Tuple[str, ...]]]') + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'TypeVarTuple'): + class C(TypeVarTuple): pass + Ts = TypeVarTuple('Ts') + with self.assertRaisesRegex(TypeError, + CANNOT_SUBCLASS_INSTANCE % 'TypeVarTuple'): + class D(Ts): pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class E(type(Unpack)): pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class F(type(*Ts)): pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class G(type(Unpack[Ts])): pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Unpack'): + class H(Unpack): pass + with self.assertRaisesRegex(TypeError, r'Cannot subclass typing.Unpack\[Ts\]'): + class I(*Ts): pass + with self.assertRaisesRegex(TypeError, r'Cannot subclass typing.Unpack\[Ts\]'): + class J(Unpack[Ts]): pass + + def test_variadic_class_args_are_correct(self): + T = TypeVar('T') + Ts = TypeVarTuple('Ts') + class A(Generic[*Ts]): pass + class B(Generic[Unpack[Ts]]): pass + + C = A[()] + D = B[()] + self.assertEqual(C.__args__, ()) + self.assertEqual(D.__args__, ()) + + E = A[int] + F = B[int] + self.assertEqual(E.__args__, (int,)) + self.assertEqual(F.__args__, (int,)) + + G = A[int, str] + H = B[int, str] + self.assertEqual(G.__args__, (int, str)) + self.assertEqual(H.__args__, (int, str)) + + I = A[T] + J = B[T] + self.assertEqual(I.__args__, (T,)) + self.assertEqual(J.__args__, (T,)) + + K = A[*Ts] + L = B[Unpack[Ts]] + self.assertEqual(K.__args__, (*Ts,)) + self.assertEqual(L.__args__, (Unpack[Ts],)) + + M = A[T, *Ts] + N = B[T, Unpack[Ts]] + self.assertEqual(M.__args__, (T, *Ts)) + self.assertEqual(N.__args__, (T, Unpack[Ts])) + + O = A[*Ts, T] + P = B[Unpack[Ts], T] + self.assertEqual(O.__args__, (*Ts, T)) + self.assertEqual(P.__args__, (Unpack[Ts], T)) + + def test_variadic_class_origin_is_correct(self): + Ts = TypeVarTuple('Ts') + + class C(Generic[*Ts]): pass + self.assertIs(C[int].__origin__, C) + self.assertIs(C[T].__origin__, C) + self.assertIs(C[Unpack[Ts]].__origin__, C) + + class D(Generic[Unpack[Ts]]): pass + self.assertIs(D[int].__origin__, D) + self.assertIs(D[T].__origin__, D) + self.assertIs(D[Unpack[Ts]].__origin__, D) + + def test_get_type_hints_on_unpack_args(self): + Ts = TypeVarTuple('Ts') + + def func1(*args: *Ts): pass + self.assertEqual(gth(func1), {'args': Unpack[Ts]}) + + def func2(*args: *tuple[int, str]): pass + self.assertEqual(gth(func2), {'args': Unpack[tuple[int, str]]}) + + class CustomVariadic(Generic[*Ts]): pass + + def func3(*args: *CustomVariadic[int, str]): pass + self.assertEqual(gth(func3), {'args': Unpack[CustomVariadic[int, str]]}) + + def test_get_type_hints_on_unpack_args_string(self): + Ts = TypeVarTuple('Ts') + + def func1(*args: '*Ts'): pass + self.assertEqual(gth(func1, localns={'Ts': Ts}), + {'args': Unpack[Ts]}) + + def func2(*args: '*tuple[int, str]'): pass + self.assertEqual(gth(func2), {'args': Unpack[tuple[int, str]]}) + + class CustomVariadic(Generic[*Ts]): pass + + def func3(*args: '*CustomVariadic[int, str]'): pass + self.assertEqual(gth(func3, localns={'CustomVariadic': CustomVariadic}), + {'args': Unpack[CustomVariadic[int, str]]}) + + + def test_tuple_args_are_correct(self): + Ts = TypeVarTuple('Ts') + + self.assertEqual(tuple[*Ts].__args__, (*Ts,)) + self.assertEqual(Tuple[Unpack[Ts]].__args__, (Unpack[Ts],)) + + self.assertEqual(tuple[*Ts, int].__args__, (*Ts, int)) + self.assertEqual(Tuple[Unpack[Ts], int].__args__, (Unpack[Ts], int)) + + self.assertEqual(tuple[int, *Ts].__args__, (int, *Ts)) + self.assertEqual(Tuple[int, Unpack[Ts]].__args__, (int, Unpack[Ts])) + + self.assertEqual(tuple[int, *Ts, str].__args__, + (int, *Ts, str)) + self.assertEqual(Tuple[int, Unpack[Ts], str].__args__, + (int, Unpack[Ts], str)) + + self.assertEqual(tuple[*Ts, int].__args__, (*Ts, int)) + self.assertEqual(Tuple[Unpack[Ts]].__args__, (Unpack[Ts],)) + + def test_callable_args_are_correct(self): + Ts = TypeVarTuple('Ts') + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + + # TypeVarTuple in the arguments + + a = Callable[[*Ts], None] + b = Callable[[Unpack[Ts]], None] + self.assertEqual(a.__args__, (*Ts, type(None))) + self.assertEqual(b.__args__, (Unpack[Ts], type(None))) + + c = Callable[[int, *Ts], None] + d = Callable[[int, Unpack[Ts]], None] + self.assertEqual(c.__args__, (int, *Ts, type(None))) + self.assertEqual(d.__args__, (int, Unpack[Ts], type(None))) + + e = Callable[[*Ts, int], None] + f = Callable[[Unpack[Ts], int], None] + self.assertEqual(e.__args__, (*Ts, int, type(None))) + self.assertEqual(f.__args__, (Unpack[Ts], int, type(None))) + + g = Callable[[str, *Ts, int], None] + h = Callable[[str, Unpack[Ts], int], None] + self.assertEqual(g.__args__, (str, *Ts, int, type(None))) + self.assertEqual(h.__args__, (str, Unpack[Ts], int, type(None))) + + # TypeVarTuple as the return + + i = Callable[[None], *Ts] + j = Callable[[None], Unpack[Ts]] + self.assertEqual(i.__args__, (type(None), *Ts)) + self.assertEqual(j.__args__, (type(None), Unpack[Ts])) + + k = Callable[[None], tuple[int, *Ts]] + l = Callable[[None], Tuple[int, Unpack[Ts]]] + self.assertEqual(k.__args__, (type(None), tuple[int, *Ts])) + self.assertEqual(l.__args__, (type(None), Tuple[int, Unpack[Ts]])) + + m = Callable[[None], tuple[*Ts, int]] + n = Callable[[None], Tuple[Unpack[Ts], int]] + self.assertEqual(m.__args__, (type(None), tuple[*Ts, int])) + self.assertEqual(n.__args__, (type(None), Tuple[Unpack[Ts], int])) + + o = Callable[[None], tuple[str, *Ts, int]] + p = Callable[[None], Tuple[str, Unpack[Ts], int]] + self.assertEqual(o.__args__, (type(None), tuple[str, *Ts, int])) + self.assertEqual(p.__args__, (type(None), Tuple[str, Unpack[Ts], int])) + + # TypeVarTuple in both + + q = Callable[[*Ts], *Ts] + r = Callable[[Unpack[Ts]], Unpack[Ts]] + self.assertEqual(q.__args__, (*Ts, *Ts)) + self.assertEqual(r.__args__, (Unpack[Ts], Unpack[Ts])) + + s = Callable[[*Ts1], *Ts2] + u = Callable[[Unpack[Ts1]], Unpack[Ts2]] + self.assertEqual(s.__args__, (*Ts1, *Ts2)) + self.assertEqual(u.__args__, (Unpack[Ts1], Unpack[Ts2])) + + def test_variadic_class_with_duplicate_typevartuples_fails(self): + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + + with self.assertRaises(TypeError): + class C(Generic[*Ts1, *Ts1]): pass + with self.assertRaises(TypeError): + class D(Generic[Unpack[Ts1], Unpack[Ts1]]): pass + + with self.assertRaises(TypeError): + class E(Generic[*Ts1, *Ts2, *Ts1]): pass + with self.assertRaises(TypeError): + class F(Generic[Unpack[Ts1], Unpack[Ts2], Unpack[Ts1]]): pass + + def test_type_concatenation_in_variadic_class_argument_list_succeeds(self): + Ts = TypeVarTuple('Ts') + class C(Generic[Unpack[Ts]]): pass + + C[int, *Ts] + C[int, Unpack[Ts]] + + C[*Ts, int] + C[Unpack[Ts], int] + + C[int, *Ts, str] + C[int, Unpack[Ts], str] + + C[int, bool, *Ts, float, str] + C[int, bool, Unpack[Ts], float, str] + + def test_type_concatenation_in_tuple_argument_list_succeeds(self): + Ts = TypeVarTuple('Ts') + + tuple[int, *Ts] + tuple[*Ts, int] + tuple[int, *Ts, str] + tuple[int, bool, *Ts, float, str] + + Tuple[int, Unpack[Ts]] + Tuple[Unpack[Ts], int] + Tuple[int, Unpack[Ts], str] + Tuple[int, bool, Unpack[Ts], float, str] + + def test_variadic_class_definition_using_packed_typevartuple_fails(self): + Ts = TypeVarTuple('Ts') + with self.assertRaises(TypeError): + class C(Generic[Ts]): pass + + def test_variadic_class_definition_using_concrete_types_fails(self): + Ts = TypeVarTuple('Ts') + with self.assertRaises(TypeError): + class F(Generic[*Ts, int]): pass + with self.assertRaises(TypeError): + class E(Generic[Unpack[Ts], int]): pass + + def test_variadic_class_with_2_typevars_accepts_2_or_more_args(self): + Ts = TypeVarTuple('Ts') + T1 = TypeVar('T1') + T2 = TypeVar('T2') + + class A(Generic[T1, T2, *Ts]): pass + A[int, str] + A[int, str, float] + A[int, str, float, bool] + + class B(Generic[T1, T2, Unpack[Ts]]): pass + B[int, str] + B[int, str, float] + B[int, str, float, bool] + + class C(Generic[T1, *Ts, T2]): pass + C[int, str] + C[int, str, float] + C[int, str, float, bool] + + class D(Generic[T1, Unpack[Ts], T2]): pass + D[int, str] + D[int, str, float] + D[int, str, float, bool] + + class E(Generic[*Ts, T1, T2]): pass + E[int, str] + E[int, str, float] + E[int, str, float, bool] + + class F(Generic[Unpack[Ts], T1, T2]): pass + F[int, str] + F[int, str, float] + F[int, str, float, bool] + + def test_variadic_args_annotations_are_correct(self): + Ts = TypeVarTuple('Ts') + + def f(*args: Unpack[Ts]): pass + def g(*args: *Ts): pass + self.assertEqual(f.__annotations__, {'args': Unpack[Ts]}) + self.assertEqual(g.__annotations__, {'args': (*Ts,)[0]}) + + + def test_variadic_args_with_ellipsis_annotations_are_correct(self): + def a(*args: *tuple[int, ...]): pass + self.assertEqual(a.__annotations__, + {'args': (*tuple[int, ...],)[0]}) + + def b(*args: Unpack[Tuple[int, ...]]): pass + self.assertEqual(b.__annotations__, + {'args': Unpack[Tuple[int, ...]]}) + + + def test_concatenation_in_variadic_args_annotations_are_correct(self): + Ts = TypeVarTuple('Ts') + + # Unpacking using `*`, native `tuple` type + + def a(*args: *tuple[int, *Ts]): pass + self.assertEqual( + a.__annotations__, + {'args': (*tuple[int, *Ts],)[0]}, + ) + + def b(*args: *tuple[*Ts, int]): pass + self.assertEqual( + b.__annotations__, + {'args': (*tuple[*Ts, int],)[0]}, + ) + + def c(*args: *tuple[str, *Ts, int]): pass + self.assertEqual( + c.__annotations__, + {'args': (*tuple[str, *Ts, int],)[0]}, + ) + + def d(*args: *tuple[int, bool, *Ts, float, str]): pass + self.assertEqual( + d.__annotations__, + {'args': (*tuple[int, bool, *Ts, float, str],)[0]}, + ) + + # Unpacking using `Unpack`, `Tuple` type from typing.py + + def e(*args: Unpack[Tuple[int, Unpack[Ts]]]): pass + self.assertEqual( + e.__annotations__, + {'args': Unpack[Tuple[int, Unpack[Ts]]]}, + ) + + def f(*args: Unpack[Tuple[Unpack[Ts], int]]): pass + self.assertEqual( + f.__annotations__, + {'args': Unpack[Tuple[Unpack[Ts], int]]}, + ) + + def g(*args: Unpack[Tuple[str, Unpack[Ts], int]]): pass + self.assertEqual( + g.__annotations__, + {'args': Unpack[Tuple[str, Unpack[Ts], int]]}, + ) + + def h(*args: Unpack[Tuple[int, bool, Unpack[Ts], float, str]]): pass + self.assertEqual( + h.__annotations__, + {'args': Unpack[Tuple[int, bool, Unpack[Ts], float, str]]}, + ) + + def test_variadic_class_same_args_results_in_equalty(self): + Ts = TypeVarTuple('Ts') + class C(Generic[*Ts]): pass + class D(Generic[Unpack[Ts]]): pass + + self.assertEqual(C[int], C[int]) + self.assertEqual(D[int], D[int]) + + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + + self.assertEqual( + C[*Ts1], + C[*Ts1], + ) + self.assertEqual( + D[Unpack[Ts1]], + D[Unpack[Ts1]], + ) + + self.assertEqual( + C[*Ts1, *Ts2], + C[*Ts1, *Ts2], + ) + self.assertEqual( + D[Unpack[Ts1], Unpack[Ts2]], + D[Unpack[Ts1], Unpack[Ts2]], + ) + + self.assertEqual( + C[int, *Ts1, *Ts2], + C[int, *Ts1, *Ts2], + ) + self.assertEqual( + D[int, Unpack[Ts1], Unpack[Ts2]], + D[int, Unpack[Ts1], Unpack[Ts2]], + ) + + def test_variadic_class_arg_ordering_matters(self): + Ts = TypeVarTuple('Ts') + class C(Generic[*Ts]): pass + class D(Generic[Unpack[Ts]]): pass + + self.assertNotEqual( + C[int, str], + C[str, int], + ) + self.assertNotEqual( + D[int, str], + D[str, int], + ) + + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + + self.assertNotEqual( + C[*Ts1, *Ts2], + C[*Ts2, *Ts1], + ) + self.assertNotEqual( + D[Unpack[Ts1], Unpack[Ts2]], + D[Unpack[Ts2], Unpack[Ts1]], + ) + + def test_variadic_class_arg_typevartuple_identity_matters(self): + Ts = TypeVarTuple('Ts') + Ts1 = TypeVarTuple('Ts1') + Ts2 = TypeVarTuple('Ts2') + + class C(Generic[*Ts]): pass + class D(Generic[Unpack[Ts]]): pass + + self.assertNotEqual(C[*Ts1], C[*Ts2]) + self.assertNotEqual(D[Unpack[Ts1]], D[Unpack[Ts2]]) + +class TypeVarTuplePicklingTests(BaseTestCase): + # These are slightly awkward tests to run, because TypeVarTuples are only + # picklable if defined in the global scope. We therefore need to push + # various things defined in these tests into the global scope with `global` + # statements at the start of each test. + + # TODO: RUSTPYTHON + @all_pickle_protocols + def test_pickling_then_unpickling_results_in_same_identity(self, proto): + global global_Ts1 # See explanation at start of class. + global_Ts1 = TypeVarTuple('global_Ts1') + global_Ts2 = pickle.loads(pickle.dumps(global_Ts1, proto)) + self.assertIs(global_Ts1, global_Ts2) + + @all_pickle_protocols + def test_pickling_then_unpickling_unpacked_results_in_same_identity(self, proto): + global global_Ts # See explanation at start of class. + global_Ts = TypeVarTuple('global_Ts') + + unpacked1 = (*global_Ts,)[0] + unpacked2 = pickle.loads(pickle.dumps(unpacked1, proto)) + self.assertIs(unpacked1, unpacked2) + + unpacked3 = Unpack[global_Ts] + unpacked4 = pickle.loads(pickle.dumps(unpacked3, proto)) + self.assertIs(unpacked3, unpacked4) + + @all_pickle_protocols + def test_pickling_then_unpickling_tuple_with_typevartuple_equality( + self, proto + ): + global global_T, global_Ts # See explanation at start of class. + global_T = TypeVar('global_T') + global_Ts = TypeVarTuple('global_Ts') + + tuples = [ + tuple[*global_Ts], + Tuple[Unpack[global_Ts]], + + tuple[T, *global_Ts], + Tuple[T, Unpack[global_Ts]], + + tuple[int, *global_Ts], + Tuple[int, Unpack[global_Ts]], + ] + for t in tuples: + t2 = pickle.loads(pickle.dumps(t, proto)) + self.assertEqual(t, t2) + + + +class UnionTests(BaseTestCase): + + def test_basics(self): + u = Union[int, float] + self.assertNotEqual(u, Union) + + def test_union_isinstance(self): + self.assertTrue(isinstance(42, Union[int, str])) + self.assertTrue(isinstance('abc', Union[int, str])) + self.assertFalse(isinstance(3.14, Union[int, str])) + self.assertTrue(isinstance(42, Union[int, list[int]])) + self.assertTrue(isinstance(42, Union[int, Any])) + + def test_union_isinstance_type_error(self): + with self.assertRaises(TypeError): + isinstance(42, Union[str, list[int]]) + with self.assertRaises(TypeError): + isinstance(42, Union[list[int], int]) + with self.assertRaises(TypeError): + isinstance(42, Union[list[int], str]) + with self.assertRaises(TypeError): + isinstance(42, Union[str, Any]) + with self.assertRaises(TypeError): + isinstance(42, Union[Any, int]) + with self.assertRaises(TypeError): + isinstance(42, Union[Any, str]) + + def test_optional_isinstance(self): + self.assertTrue(isinstance(42, Optional[int])) + self.assertTrue(isinstance(None, Optional[int])) + self.assertFalse(isinstance('abc', Optional[int])) + + def test_optional_isinstance_type_error(self): + with self.assertRaises(TypeError): + isinstance(42, Optional[list[int]]) + with self.assertRaises(TypeError): + isinstance(None, Optional[list[int]]) + with self.assertRaises(TypeError): + isinstance(42, Optional[Any]) + with self.assertRaises(TypeError): + isinstance(None, Optional[Any]) + + def test_union_issubclass(self): + self.assertTrue(issubclass(int, Union[int, str])) + self.assertTrue(issubclass(str, Union[int, str])) + self.assertFalse(issubclass(float, Union[int, str])) + self.assertTrue(issubclass(int, Union[int, list[int]])) + self.assertTrue(issubclass(int, Union[int, Any])) + self.assertFalse(issubclass(int, Union[str, Any])) + self.assertTrue(issubclass(int, Union[Any, int])) + self.assertFalse(issubclass(int, Union[Any, str])) + + def test_union_issubclass_type_error(self): + with self.assertRaises(TypeError): + issubclass(int, Union) + with self.assertRaises(TypeError): + issubclass(Union, int) + with self.assertRaises(TypeError): + issubclass(Union[int, str], int) + with self.assertRaises(TypeError): + issubclass(int, Union[str, list[int]]) + with self.assertRaises(TypeError): + issubclass(int, Union[list[int], int]) + with self.assertRaises(TypeError): + issubclass(int, Union[list[int], str]) + + def test_optional_issubclass(self): + self.assertTrue(issubclass(int, Optional[int])) + self.assertTrue(issubclass(type(None), Optional[int])) + self.assertFalse(issubclass(str, Optional[int])) + self.assertTrue(issubclass(Any, Optional[Any])) + self.assertTrue(issubclass(type(None), Optional[Any])) + self.assertFalse(issubclass(int, Optional[Any])) + + def test_optional_issubclass_type_error(self): + with self.assertRaises(TypeError): + issubclass(list[int], Optional[list[int]]) + with self.assertRaises(TypeError): + issubclass(type(None), Optional[list[int]]) + with self.assertRaises(TypeError): + issubclass(int, Optional[list[int]]) + + def test_union_any(self): + u = Union[Any] + self.assertEqual(u, Any) + u1 = Union[int, Any] + u2 = Union[Any, int] + u3 = Union[Any, object] + self.assertEqual(u1, u2) + self.assertNotEqual(u1, Any) + self.assertNotEqual(u2, Any) + self.assertNotEqual(u3, Any) + + def test_union_object(self): + u = Union[object] + self.assertEqual(u, object) + u1 = Union[int, object] + u2 = Union[object, int] + self.assertEqual(u1, u2) + self.assertNotEqual(u1, object) + self.assertNotEqual(u2, object) + + def test_unordered(self): + u1 = Union[int, float] + u2 = Union[float, int] + self.assertEqual(u1, u2) + + def test_single_class_disappears(self): + t = Union[Employee] + self.assertIs(t, Employee) + + def test_base_class_kept(self): + u = Union[Employee, Manager] + self.assertNotEqual(u, Employee) + self.assertIn(Employee, u.__args__) + self.assertIn(Manager, u.__args__) + + def test_union_union(self): + u = Union[int, float] + v = Union[u, Employee] + self.assertEqual(v, Union[int, float, Employee]) + + def test_union_of_unhashable(self): + class UnhashableMeta(type): + __hash__ = None + + class A(metaclass=UnhashableMeta): ... + class B(metaclass=UnhashableMeta): ... + + self.assertEqual(Union[A, B].__args__, (A, B)) + union1 = Union[A, B] + with self.assertRaises(TypeError): + hash(union1) + + union2 = Union[int, B] + with self.assertRaises(TypeError): + hash(union2) + + union3 = Union[A, int] + with self.assertRaises(TypeError): + hash(union3) + + def test_repr(self): + self.assertEqual(repr(Union), 'typing.Union') + u = Union[Employee, int] + self.assertEqual(repr(u), 'typing.Union[%s.Employee, int]' % __name__) + u = Union[int, Employee] + self.assertEqual(repr(u), 'typing.Union[int, %s.Employee]' % __name__) + T = TypeVar('T') + u = Union[T, int][int] + self.assertEqual(repr(u), repr(int)) + u = Union[List[int], int] + self.assertEqual(repr(u), 'typing.Union[typing.List[int], int]') + u = Union[list[int], dict[str, float]] + self.assertEqual(repr(u), 'typing.Union[list[int], dict[str, float]]') + u = Union[int | float] + self.assertEqual(repr(u), 'typing.Union[int, float]') + + u = Union[None, str] + self.assertEqual(repr(u), 'typing.Optional[str]') + u = Union[str, None] + self.assertEqual(repr(u), 'typing.Optional[str]') + u = Union[None, str, int] + self.assertEqual(repr(u), 'typing.Union[NoneType, str, int]') + u = Optional[str] + self.assertEqual(repr(u), 'typing.Optional[str]') + + def test_dir(self): + dir_items = set(dir(Union[str, int])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Union'): + class C(Union): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(Union)): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Union\[int, str\]'): + class E(Union[int, str]): + pass + + def test_cannot_instantiate(self): + with self.assertRaises(TypeError): + Union() + with self.assertRaises(TypeError): + type(Union)() + u = Union[int, float] + with self.assertRaises(TypeError): + u() + with self.assertRaises(TypeError): + type(u)() + + def test_union_generalization(self): + self.assertFalse(Union[str, typing.Iterable[int]] == str) + self.assertFalse(Union[str, typing.Iterable[int]] == typing.Iterable[int]) + self.assertIn(str, Union[str, typing.Iterable[int]].__args__) + self.assertIn(typing.Iterable[int], Union[str, typing.Iterable[int]].__args__) + + def test_union_compare_other(self): + self.assertNotEqual(Union, object) + self.assertNotEqual(Union, Any) + self.assertNotEqual(ClassVar, Union) + self.assertNotEqual(Optional, Union) + self.assertNotEqual([None], Optional) + self.assertNotEqual(Optional, typing.Mapping) + self.assertNotEqual(Optional[typing.MutableMapping], Union) + + def test_optional(self): + o = Optional[int] + u = Union[int, None] + self.assertEqual(o, u) + + def test_empty(self): + with self.assertRaises(TypeError): + Union[()] + + def test_no_eval_union(self): + u = Union[int, str] + def f(x: u): ... + self.assertIs(get_type_hints(f)['x'], u) + + def test_function_repr_union(self): + def fun() -> int: ... + self.assertEqual(repr(Union[fun, int]), 'typing.Union[fun, int]') + + def test_union_str_pattern(self): + # Shouldn't crash; see http://bugs.python.org/issue25390 + A = Union[str, Pattern] + A + + def test_etree(self): + # See https://github.com/python/typing/issues/229 + # (Only relevant for Python 2.) + from xml.etree.ElementTree import Element Union[Element, str] # Shouldn't crash - def Elem(*args): - return Element(*args) + def Elem(*args): + return Element(*args) + + Union[Elem, str] # Nor should this + + def test_union_of_literals(self): + self.assertEqual(Union[Literal[1], Literal[2]].__args__, + (Literal[1], Literal[2])) + self.assertEqual(Union[Literal[1], Literal[1]], + Literal[1]) + + self.assertEqual(Union[Literal[False], Literal[0]].__args__, + (Literal[False], Literal[0])) + self.assertEqual(Union[Literal[True], Literal[1]].__args__, + (Literal[True], Literal[1])) + + import enum + class Ints(enum.IntEnum): + A = 0 + B = 1 - Union[Elem, str] # Nor should this + self.assertEqual(Union[Literal[Ints.A], Literal[Ints.A]], + Literal[Ints.A]) + self.assertEqual(Union[Literal[Ints.B], Literal[Ints.B]], + Literal[Ints.B]) + + self.assertEqual(Union[Literal[Ints.A], Literal[Ints.B]].__args__, + (Literal[Ints.A], Literal[Ints.B])) + + self.assertEqual(Union[Literal[0], Literal[Ints.A], Literal[False]].__args__, + (Literal[0], Literal[Ints.A], Literal[False])) + self.assertEqual(Union[Literal[1], Literal[Ints.B], Literal[True]].__args__, + (Literal[1], Literal[Ints.B], Literal[True])) class TupleTests(BaseTestCase): @@ -476,6 +2347,15 @@ def test_eq_hash(self): self.assertNotEqual(C, Callable[..., int]) self.assertNotEqual(C, Callable) + def test_dir(self): + Callable = self.Callable + dir_items = set(dir(Callable[..., int])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + def test_cannot_instantiate(self): Callable = self.Callable with self.assertRaises(TypeError): @@ -505,13 +2385,13 @@ def test_callable_instance_type_error(self): def f(): pass with self.assertRaises(TypeError): - self.assertIsInstance(f, Callable[[], None]) + isinstance(f, Callable[[], None]) with self.assertRaises(TypeError): - self.assertIsInstance(f, Callable[[], Any]) + isinstance(f, Callable[[], Any]) with self.assertRaises(TypeError): - self.assertNotIsInstance(None, Callable[[], None]) + isinstance(None, Callable[[], None]) with self.assertRaises(TypeError): - self.assertNotIsInstance(None, Callable[[], Any]) + isinstance(None, Callable[[], Any]) def test_repr(self): Callable = self.Callable @@ -558,14 +2438,29 @@ def test_weakref(self): self.assertEqual(weakref.ref(alias)(), alias) def test_pickle(self): + global T_pickle, P_pickle, TS_pickle # needed for pickling Callable = self.Callable - alias = Callable[[int, str], float] - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - s = pickle.dumps(alias, proto) - loaded = pickle.loads(s) - self.assertEqual(alias.__origin__, loaded.__origin__) - self.assertEqual(alias.__args__, loaded.__args__) - self.assertEqual(alias.__parameters__, loaded.__parameters__) + T_pickle = TypeVar('T_pickle') + P_pickle = ParamSpec('P_pickle') + TS_pickle = TypeVarTuple('TS_pickle') + + samples = [ + Callable[[int, str], float], + Callable[P_pickle, int], + Callable[P_pickle, T_pickle], + Callable[Concatenate[int, P_pickle], int], + Callable[Concatenate[*TS_pickle, P_pickle], int], + ] + for alias in samples: + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(alias=alias, proto=proto): + s = pickle.dumps(alias, proto) + loaded = pickle.loads(s) + self.assertEqual(alias.__origin__, loaded.__origin__) + self.assertEqual(alias.__args__, loaded.__args__) + self.assertEqual(alias.__parameters__, loaded.__parameters__) + + del T_pickle, P_pickle, TS_pickle # cleaning up global state def test_var_substitution(self): Callable = self.Callable @@ -574,8 +2469,7 @@ def test_var_substitution(self): C2 = Callable[[KT, T], VT] C3 = Callable[..., T] self.assertEqual(C1[str], Callable[[int, str], str]) - if Callable is typing.Callable: - self.assertEqual(C1[None], Callable[[int, type(None)], type(None)]) + self.assertEqual(C1[None], Callable[[int, type(None)], type(None)]) self.assertEqual(C2[int, float, str], Callable[[int, float], str]) self.assertEqual(C3[int], Callable[..., int]) self.assertEqual(C3[NoReturn], Callable[..., NoReturn]) @@ -592,6 +2486,16 @@ def test_var_substitution(self): self.assertEqual(C5[int, str, float], Callable[[typing.List[int], tuple[str, int], float], int]) + def test_type_subst_error(self): + Callable = self.Callable + P = ParamSpec('P') + T = TypeVar('T') + + pat = "Expected a list of types, an ellipsis, ParamSpec, or Concatenate." + + with self.assertRaisesRegex(TypeError, pat): + Callable[P, T][0, int] + def test_type_erasure(self): Callable = self.Callable class C1(Callable): @@ -649,8 +2553,7 @@ def test_concatenate(self): self.assertEqual(C[[], int], Callable[[int], int]) self.assertEqual(C[Concatenate[str, P2], int], Callable[Concatenate[int, str, P2], int]) - with self.assertRaises(TypeError): - C[..., int] + self.assertEqual(C[..., int], Callable[Concatenate[int, ...], int]) C = Callable[Concatenate[int, P], int] self.assertEqual(repr(C), @@ -661,8 +2564,49 @@ def test_concatenate(self): self.assertEqual(C[[]], Callable[[int], int]) self.assertEqual(C[Concatenate[str, P2]], Callable[Concatenate[int, str, P2], int]) - with self.assertRaises(TypeError): - C[...] + self.assertEqual(C[...], Callable[Concatenate[int, ...], int]) + + def test_nested_paramspec(self): + # Since Callable has some special treatment, we want to be sure + # that substituion works correctly, see gh-103054 + Callable = self.Callable + P = ParamSpec('P') + P2 = ParamSpec('P2') + T = TypeVar('T') + T2 = TypeVar('T2') + Ts = TypeVarTuple('Ts') + class My(Generic[P, T]): + pass + + self.assertEqual(My.__parameters__, (P, T)) + + C1 = My[[int, T2], Callable[P2, T2]] + self.assertEqual(C1.__args__, ((int, T2), Callable[P2, T2])) + self.assertEqual(C1.__parameters__, (T2, P2)) + self.assertEqual(C1[str, [list[int], bytes]], + My[[int, str], Callable[[list[int], bytes], str]]) + + C2 = My[[Callable[[T2], int], list[T2]], str] + self.assertEqual(C2.__args__, ((Callable[[T2], int], list[T2]), str)) + self.assertEqual(C2.__parameters__, (T2,)) + self.assertEqual(C2[list[str]], + My[[Callable[[list[str]], int], list[list[str]]], str]) + + C3 = My[[Callable[P2, T2], T2], T2] + self.assertEqual(C3.__args__, ((Callable[P2, T2], T2), T2)) + self.assertEqual(C3.__parameters__, (P2, T2)) + self.assertEqual(C3[[], int], + My[[Callable[[], int], int], int]) + self.assertEqual(C3[[str, bool], int], + My[[Callable[[str, bool], int], int], int]) + self.assertEqual(C3[[str, bool], T][int], + My[[Callable[[str, bool], int], int], int]) + + C4 = My[[Callable[[int, *Ts, str], T2], T2], T2] + self.assertEqual(C4.__args__, ((Callable[[int, *Ts, str], T2], T2), T2)) + self.assertEqual(C4.__parameters__, (Ts, T2)) + self.assertEqual(C4[bool, bytes, float], + My[[Callable[[int, bool, bytes, str], float], float], float]) def test_errors(self): Callable = self.Callable @@ -676,6 +2620,7 @@ def test_errors(self): with self.assertRaisesRegex(TypeError, "few arguments for"): C1[int] + class TypingCallableTests(BaseCallableTests, BaseTestCase): Callable = typing.Callable @@ -690,25 +2635,11 @@ def test_consistency(self): class CollectionsCallableTests(BaseCallableTests, BaseTestCase): Callable = collections.abc.Callable - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_errors(self): # TODO: RUSTPYTHON, remove when this passes - super().test_errors() # TODO: RUSTPYTHON, remove when this passes - - # TODO: RUSTPYTHON, AssertionError: 'collections.abc.Callable[__main__.ParamSpec, typing.TypeVar]' != 'collections.abc.Callable[~P, ~T]' - @unittest.expectedFailure - def test_paramspec(self): # TODO: RUSTPYTHON, remove when this passes - super().test_paramspec() # TODO: RUSTPYTHON, remove when this passes # TODO: RUSTPYTHON @unittest.expectedFailure - def test_concatenate(self): # TODO: RUSTPYTHON, remove when this passes - super().test_concatenate() # TODO: RUSTPYTHON, remove when this passes - - # TODO: RUSTPYTHON might be fixed by updating typing to 3.12 - @unittest.expectedFailure - def test_repr(self): # TODO: RUSTPYTHON, remove when this passes - super().test_repr() # TODO: RUSTPYTHON, remove when this passes + def test_errors(self): + super().test_errors() class LiteralTests(BaseTestCase): @@ -723,9 +2654,16 @@ def test_basics(self): Literal[Literal[1, 2], Literal[4, 5]] Literal[b"foo", u"bar"] + def test_enum(self): + import enum + class My(enum.Enum): + A = 'A' + + self.assertEqual(Literal[My.A].__args__, (My.A,)) + def test_illegal_parameters_do_not_raise_runtime_errors(self): # Type checkers should reject these types, but we do not - # raise errors at runtime to maintain maximium flexibility. + # raise errors at runtime to maintain maximum flexibility. Literal[int] Literal[3j + 2, ..., ()] Literal[{"foo": 3, "bar": 4}] @@ -743,6 +2681,14 @@ def test_repr(self): self.assertEqual(repr(Literal[None]), "typing.Literal[None]") self.assertEqual(repr(Literal[1, 2, 3, 3]), "typing.Literal[1, 2, 3]") + def test_dir(self): + dir_items = set(dir(Literal[1, 2, 3])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + def test_cannot_init(self): with self.assertRaises(TypeError): Literal() @@ -804,939 +2750,694 @@ def test_flatten(self): self.assertEqual(l, Literal[1, 2, 3]) self.assertEqual(l.__args__, (1, 2, 3)) + def test_does_not_flatten_enum(self): + import enum + class Ints(enum.IntEnum): + A = 1 + B = 2 + + l = Literal[ + Literal[Ints.A], + Literal[Ints.B], + Literal[1], + Literal[2], + ] + self.assertEqual(l.__args__, (Ints.A, Ints.B, 1, 2)) -XK = TypeVar('XK', str, bytes) -XV = TypeVar('XV') - - -class SimpleMapping(Generic[XK, XV]): - - def __getitem__(self, key: XK) -> XV: - ... - - def __setitem__(self, key: XK, value: XV): - ... - - def get(self, key: XK, default: XV = None) -> XV: - ... - - -class MySimpleMapping(SimpleMapping[XK, XV]): - - def __init__(self): - self.store = {} - - def __getitem__(self, key: str): - return self.store[key] - - def __setitem__(self, key: str, value): - self.store[key] = value - - def get(self, key: str, default=None): - try: - return self.store[key] - except KeyError: - return default - - -class Coordinate(Protocol): - x: int - y: int - -@runtime_checkable -class Point(Coordinate, Protocol): - label: str - -class MyPoint: - x: int - y: int - label: str - -class XAxis(Protocol): - x: int - -class YAxis(Protocol): - y: int - -@runtime_checkable -class Position(XAxis, YAxis, Protocol): - pass - -@runtime_checkable -class Proto(Protocol): - attr: int - def meth(self, arg: str) -> int: - ... - -class Concrete(Proto): - pass - -class Other: - attr: int = 1 - def meth(self, arg: str) -> int: - if arg == 'this': - return 1 - return 0 - -class NT(NamedTuple): - x: int - y: int - -@runtime_checkable -class HasCallProtocol(Protocol): - __call__: typing.Callable - - -class ProtocolTests(BaseTestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_basic_protocol(self): - @runtime_checkable - class P(Protocol): - def meth(self): - pass - - class C: pass - - class D: - def meth(self): - pass - - def f(): - pass - - self.assertIsSubclass(D, P) - self.assertIsInstance(D(), P) - self.assertNotIsSubclass(C, P) - self.assertNotIsInstance(C(), P) - self.assertNotIsSubclass(types.FunctionType, P) - self.assertNotIsInstance(f, P) - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_everything_implements_empty_protocol(self): - @runtime_checkable - class Empty(Protocol): - pass - - class C: - pass - - def f(): - pass - - for thing in (object, type, tuple, C, types.FunctionType): - self.assertIsSubclass(thing, Empty) - for thing in (object(), 1, (), typing, f): - self.assertIsInstance(thing, Empty) - - def test_function_implements_protocol(self): - def f(): - pass - - self.assertIsInstance(f, HasCallProtocol) - - def test_no_inheritance_from_nominal(self): - class C: pass - - class BP(Protocol): pass - - with self.assertRaises(TypeError): - class P(C, Protocol): - pass - with self.assertRaises(TypeError): - class P(Protocol, C): - pass - with self.assertRaises(TypeError): - class P(BP, C, Protocol): - pass - - class D(BP, C): pass - - class E(C, BP): pass - - self.assertNotIsInstance(D(), E) - self.assertNotIsInstance(E(), D) - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_no_instantiation(self): - class P(Protocol): pass - - with self.assertRaises(TypeError): - P() - - class C(P): pass - - self.assertIsInstance(C(), C) - with self.assertRaises(TypeError): - C(42) - - T = TypeVar('T') - - class PG(Protocol[T]): pass - - with self.assertRaises(TypeError): - PG() - with self.assertRaises(TypeError): - PG[int]() - with self.assertRaises(TypeError): - PG[T]() - - class CG(PG[T]): pass - - self.assertIsInstance(CG[int](), CG) - with self.assertRaises(TypeError): - CG[int](42) - - def test_cannot_instantiate_abstract(self): - @runtime_checkable - class P(Protocol): - @abc.abstractmethod - def ameth(self) -> int: - raise NotImplementedError - - class B(P): - pass - - class C(B): - def ameth(self) -> int: - return 26 - - with self.assertRaises(TypeError): - B() - self.assertIsInstance(C(), P) - - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_subprotocols_extending(self): - class P1(Protocol): - def meth1(self): - pass - - @runtime_checkable - class P2(P1, Protocol): - def meth2(self): - pass - - class C: - def meth1(self): - pass - - def meth2(self): - pass - class C1: - def meth1(self): - pass +XK = TypeVar('XK', str, bytes) +XV = TypeVar('XV') - class C2: - def meth2(self): - pass - self.assertNotIsInstance(C1(), P2) - self.assertNotIsInstance(C2(), P2) - self.assertNotIsSubclass(C1, P2) - self.assertNotIsSubclass(C2, P2) - self.assertIsInstance(C(), P2) - self.assertIsSubclass(C, P2) +class SimpleMapping(Generic[XK, XV]): - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_subprotocols_merging(self): - class P1(Protocol): - def meth1(self): - pass + def __getitem__(self, key: XK) -> XV: + ... - class P2(Protocol): - def meth2(self): - pass + def __setitem__(self, key: XK, value: XV): + ... - @runtime_checkable - class P(P1, P2, Protocol): - pass + def get(self, key: XK, default: XV = None) -> XV: + ... - class C: - def meth1(self): - pass - def meth2(self): - pass +class MySimpleMapping(SimpleMapping[XK, XV]): - class C1: - def meth1(self): - pass + def __init__(self): + self.store = {} - class C2: - def meth2(self): - pass + def __getitem__(self, key: str): + return self.store[key] - self.assertNotIsInstance(C1(), P) - self.assertNotIsInstance(C2(), P) - self.assertNotIsSubclass(C1, P) - self.assertNotIsSubclass(C2, P) - self.assertIsInstance(C(), P) - self.assertIsSubclass(C, P) + def __setitem__(self, key: str, value): + self.store[key] = value - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_protocols_issubclass(self): - T = TypeVar('T') + def get(self, key: str, default=None): + try: + return self.store[key] + except KeyError: + return default - @runtime_checkable - class P(Protocol): - def x(self): ... - @runtime_checkable - class PG(Protocol[T]): - def x(self): ... +class Coordinate(Protocol): + x: int + y: int - class BadP(Protocol): - def x(self): ... +@runtime_checkable +class Point(Coordinate, Protocol): + label: str - class BadPG(Protocol[T]): - def x(self): ... +class MyPoint: + x: int + y: int + label: str - class C: - def x(self): ... +class XAxis(Protocol): + x: int - self.assertIsSubclass(C, P) - self.assertIsSubclass(C, PG) - self.assertIsSubclass(BadP, PG) +class YAxis(Protocol): + y: int - with self.assertRaises(TypeError): - issubclass(C, PG[T]) - with self.assertRaises(TypeError): - issubclass(C, PG[C]) - with self.assertRaises(TypeError): - issubclass(C, BadP) - with self.assertRaises(TypeError): - issubclass(C, BadPG) - with self.assertRaises(TypeError): - issubclass(P, PG[T]) - with self.assertRaises(TypeError): - issubclass(PG, PG[int]) +@runtime_checkable +class Position(XAxis, YAxis, Protocol): + pass - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_protocols_issubclass_non_callable(self): - class C: - x = 1 +@runtime_checkable +class Proto(Protocol): + attr: int + def meth(self, arg: str) -> int: + ... - @runtime_checkable - class PNonCall(Protocol): - x = 1 +class Concrete(Proto): + pass - with self.assertRaises(TypeError): - issubclass(C, PNonCall) - self.assertIsInstance(C(), PNonCall) - PNonCall.register(C) - with self.assertRaises(TypeError): - issubclass(C, PNonCall) - self.assertIsInstance(C(), PNonCall) +class Other: + attr: int = 1 + def meth(self, arg: str) -> int: + if arg == 'this': + return 1 + return 0 - # check that non-protocol subclasses are not affected - class D(PNonCall): ... +class NT(NamedTuple): + x: int + y: int - self.assertNotIsSubclass(C, D) - self.assertNotIsInstance(C(), D) - D.register(C) - self.assertIsSubclass(C, D) - self.assertIsInstance(C(), D) - with self.assertRaises(TypeError): - issubclass(D, PNonCall) +@runtime_checkable +class HasCallProtocol(Protocol): + __call__: typing.Callable - def test_protocols_isinstance(self): - T = TypeVar('T') +class ProtocolTests(BaseTestCase): + def test_basic_protocol(self): @runtime_checkable class P(Protocol): - def meth(x): ... - - @runtime_checkable - class PG(Protocol[T]): - def meth(x): ... - - class BadP(Protocol): - def meth(x): ... + def meth(self): + pass - class BadPG(Protocol[T]): - def meth(x): ... + class C: pass - class C: - def meth(x): ... + class D: + def meth(self): + pass - self.assertIsInstance(C(), P) - self.assertIsInstance(C(), PG) - with self.assertRaises(TypeError): - isinstance(C(), PG[T]) - with self.assertRaises(TypeError): - isinstance(C(), PG[C]) - with self.assertRaises(TypeError): - isinstance(C(), BadP) - with self.assertRaises(TypeError): - isinstance(C(), BadPG) + def f(): + pass - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_protocols_isinstance_py36(self): - class APoint: - def __init__(self, x, y, label): - self.x = x - self.y = y - self.label = label + self.assertIsSubclass(D, P) + self.assertIsInstance(D(), P) + self.assertNotIsSubclass(C, P) + self.assertNotIsInstance(C(), P) + self.assertNotIsSubclass(types.FunctionType, P) + self.assertNotIsInstance(f, P) - class BPoint: - label = 'B' + def test_runtime_checkable_generic_non_protocol(self): + # Make sure this doesn't raise AttributeError + with self.assertRaisesRegex( + TypeError, + "@runtime_checkable can be only applied to protocol classes", + ): + @runtime_checkable + class Foo[T]: ... - def __init__(self, x, y): - self.x = x - self.y = y + def test_runtime_checkable_generic(self): + @runtime_checkable + class Foo[T](Protocol): + def meth(self) -> T: ... - class C: - def __init__(self, attr): - self.attr = attr + class Impl: + def meth(self) -> int: ... - def meth(self, arg): - return 0 + self.assertIsSubclass(Impl, Foo) - class Bad: pass - - self.assertIsInstance(APoint(1, 2, 'A'), Point) - self.assertIsInstance(BPoint(1, 2), Point) - self.assertNotIsInstance(MyPoint(), Point) - self.assertIsInstance(BPoint(1, 2), Position) - self.assertIsInstance(Other(), Proto) - self.assertIsInstance(Concrete(), Proto) - self.assertIsInstance(C(42), Proto) - self.assertNotIsInstance(Bad(), Proto) - self.assertNotIsInstance(Bad(), Point) - self.assertNotIsInstance(Bad(), Position) - self.assertNotIsInstance(Bad(), Concrete) - self.assertNotIsInstance(Other(), Concrete) - self.assertIsInstance(NT(1, 2), Position) - - def test_protocols_isinstance_init(self): - T = TypeVar('T') + class NotImpl: + def method(self) -> int: ... - @runtime_checkable - class P(Protocol): - x = 1 + self.assertNotIsSubclass(NotImpl, Foo) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pep695_generics_can_be_runtime_checkable(self): @runtime_checkable - class PG(Protocol[T]): - x = 1 + class HasX(Protocol): + x: int - class C: + class Bar[T]: + x: T def __init__(self, x): self.x = x - self.assertIsInstance(C(1), P) - self.assertIsInstance(C(1), PG) + class Capybara[T]: + y: str + def __init__(self, y): + self.y = y - def test_protocol_checks_after_subscript(self): - class P(Protocol[T]): pass - class C(P[T]): pass - class Other1: pass - class Other2: pass - CA = C[Any] + self.assertIsInstance(Bar(1), HasX) + self.assertNotIsInstance(Capybara('a'), HasX) - self.assertNotIsInstance(Other1(), C) - self.assertNotIsSubclass(Other2, C) - class D1(C[Any]): pass - class D2(C[Any]): pass - CI = C[int] + def test_everything_implements_empty_protocol(self): + @runtime_checkable + class Empty(Protocol): + pass - self.assertIsInstance(D1(), C) - self.assertIsSubclass(D2, C) + class C: + pass - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_protocols_support_register(self): - @runtime_checkable - class P(Protocol): - x = 1 + def f(): + pass - class PM(Protocol): - def meth(self): pass + for thing in (object, type, tuple, C, types.FunctionType): + self.assertIsSubclass(thing, Empty) + for thing in (object(), 1, (), typing, f): + self.assertIsInstance(thing, Empty) - class D(PM): pass + def test_function_implements_protocol(self): + def f(): + pass - class C: pass + self.assertIsInstance(f, HasCallProtocol) - D.register(C) - P.register(C) - self.assertIsInstance(C(), P) - self.assertIsInstance(C(), D) + def test_no_inheritance_from_nominal(self): + class C: pass - def test_none_on_non_callable_doesnt_block_implementation(self): - @runtime_checkable - class P(Protocol): - x = 1 + class BP(Protocol): pass - class A: - x = 1 + with self.assertRaises(TypeError): + class P(C, Protocol): + pass + with self.assertRaises(TypeError): + class Q(Protocol, C): + pass + with self.assertRaises(TypeError): + class R(BP, C, Protocol): + pass - class B(A): - x = None + class D(BP, C): pass - class C: - def __init__(self): - self.x = None + class E(C, BP): pass - self.assertIsInstance(B(), P) - self.assertIsInstance(C(), P) + self.assertNotIsInstance(D(), E) + self.assertNotIsInstance(E(), D) # TODO: RUSTPYTHON @unittest.expectedFailure - def test_none_on_callable_blocks_implementation(self): - @runtime_checkable - class P(Protocol): - def x(self): ... - - class A: - def x(self): ... - - class B(A): - x = None + def test_no_instantiation(self): + class P(Protocol): pass - class C: - def __init__(self): - self.x = None + with self.assertRaises(TypeError): + P() - self.assertNotIsInstance(B(), P) - self.assertNotIsInstance(C(), P) + class C(P): pass - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_non_protocol_subclasses(self): - class P(Protocol): - x = 1 + self.assertIsInstance(C(), C) + with self.assertRaises(TypeError): + C(42) - @runtime_checkable - class PR(Protocol): - def meth(self): pass + T = TypeVar('T') - class NonP(P): - x = 1 + class PG(Protocol[T]): pass - class NonPR(PR): pass + with self.assertRaises(TypeError): + PG() + with self.assertRaises(TypeError): + PG[int]() + with self.assertRaises(TypeError): + PG[T]() - class C: - x = 1 + class CG(PG[T]): pass - class D: - def meth(self): pass + self.assertIsInstance(CG[int](), CG) + with self.assertRaises(TypeError): + CG[int](42) - self.assertNotIsInstance(C(), NonP) - self.assertNotIsInstance(D(), NonPR) - self.assertNotIsSubclass(C, NonP) - self.assertNotIsSubclass(D, NonPR) - self.assertIsInstance(NonPR(), PR) - self.assertIsSubclass(NonPR, PR) + def test_protocol_defining_init_does_not_get_overridden(self): + # check that P.__init__ doesn't get clobbered + # see https://bugs.python.org/issue44807 - def test_custom_subclasshook(self): class P(Protocol): - x = 1 + x: int + def __init__(self, x: int) -> None: + self.x = x + class C: pass - class OKClass: pass + c = C() + P.__init__(c, 1) + self.assertEqual(c.x, 1) - class BadClass: - x = 1 - class C(P): - @classmethod - def __subclasshook__(cls, other): - return other.__name__.startswith("OK") + def test_concrete_class_inheriting_init_from_protocol(self): + class P(Protocol): + x: int + def __init__(self, x: int) -> None: + self.x = x + + class C(P): pass - self.assertIsInstance(OKClass(), C) - self.assertNotIsInstance(BadClass(), C) - self.assertIsSubclass(OKClass, C) - self.assertNotIsSubclass(BadClass, C) + c = C(1) + self.assertIsInstance(c, C) + self.assertEqual(c.x, 1) - def test_issubclass_fails_correctly(self): + def test_cannot_instantiate_abstract(self): @runtime_checkable class P(Protocol): - x = 1 + @abc.abstractmethod + def ameth(self) -> int: + raise NotImplementedError - class C: pass + class B(P): + pass + + class C(B): + def ameth(self) -> int: + return 26 with self.assertRaises(TypeError): - issubclass(C(), P) + B() + self.assertIsInstance(C(), P) - def test_defining_generic_protocols(self): - T = TypeVar('T') - S = TypeVar('S') + def test_subprotocols_extending(self): + class P1(Protocol): + def meth1(self): + pass @runtime_checkable - class PR(Protocol[T, S]): - def meth(self): pass - - class P(PR[int, T], Protocol[T]): - y = 1 + class P2(P1, Protocol): + def meth2(self): + pass - with self.assertRaises(TypeError): - PR[int] - with self.assertRaises(TypeError): - P[int, str] + class C: + def meth1(self): + pass - class C(PR[int, T]): pass + def meth2(self): + pass - self.assertIsInstance(C[str](), C) + class C1: + def meth1(self): + pass - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_defining_generic_protocols_old_style(self): - T = TypeVar('T') - S = TypeVar('S') + class C2: + def meth2(self): + pass - @runtime_checkable - class PR(Protocol, Generic[T, S]): - def meth(self): pass + self.assertNotIsInstance(C1(), P2) + self.assertNotIsInstance(C2(), P2) + self.assertNotIsSubclass(C1, P2) + self.assertNotIsSubclass(C2, P2) + self.assertIsInstance(C(), P2) + self.assertIsSubclass(C, P2) - class P(PR[int, str], Protocol): - y = 1 + def test_subprotocols_merging(self): + class P1(Protocol): + def meth1(self): + pass - with self.assertRaises(TypeError): - issubclass(PR[int, str], PR) - self.assertIsSubclass(P, PR) - with self.assertRaises(TypeError): - PR[int] + class P2(Protocol): + def meth2(self): + pass - class P1(Protocol, Generic[T]): - def bar(self, x: T) -> str: ... + @runtime_checkable + class P(P1, P2, Protocol): + pass - class P2(Generic[T], Protocol): - def bar(self, x: T) -> str: ... + class C: + def meth1(self): + pass - @runtime_checkable - class PSub(P1[str], Protocol): - x = 1 + def meth2(self): + pass - class Test: - x = 1 + class C1: + def meth1(self): + pass - def bar(self, x: str) -> str: - return x + class C2: + def meth2(self): + pass - self.assertIsInstance(Test(), PSub) + self.assertNotIsInstance(C1(), P) + self.assertNotIsInstance(C2(), P) + self.assertNotIsSubclass(C1, P) + self.assertNotIsSubclass(C2, P) + self.assertIsInstance(C(), P) + self.assertIsSubclass(C, P) - def test_init_called(self): + def test_protocols_issubclass(self): T = TypeVar('T') - class P(Protocol[T]): pass + @runtime_checkable + class P(Protocol): + def x(self): ... - class C(P[T]): - def __init__(self): - self.test = 'OK' + @runtime_checkable + class PG(Protocol[T]): + def x(self): ... - self.assertEqual(C[int]().test, 'OK') + class BadP(Protocol): + def x(self): ... - class B: - def __init__(self): - self.test = 'OK' + class BadPG(Protocol[T]): + def x(self): ... - class D1(B, P[T]): - pass + class C: + def x(self): ... - self.assertEqual(D1[int]().test, 'OK') + self.assertIsSubclass(C, P) + self.assertIsSubclass(C, PG) + self.assertIsSubclass(BadP, PG) - class D2(P[T], B): - pass + no_subscripted_generics = ( + "Subscripted generics cannot be used with class and instance checks" + ) - self.assertEqual(D2[int]().test, 'OK') + with self.assertRaisesRegex(TypeError, no_subscripted_generics): + issubclass(C, PG[T]) + with self.assertRaisesRegex(TypeError, no_subscripted_generics): + issubclass(C, PG[C]) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_new_called(self): - T = TypeVar('T') + only_runtime_checkable_protocols = ( + "Instance and class checks can only be used with " + "@runtime_checkable protocols" + ) - class P(Protocol[T]): pass + with self.assertRaisesRegex(TypeError, only_runtime_checkable_protocols): + issubclass(C, BadP) + with self.assertRaisesRegex(TypeError, only_runtime_checkable_protocols): + issubclass(C, BadPG) - class C(P[T]): - def __new__(cls, *args): - self = super().__new__(cls, *args) - self.test = 'OK' - return self + with self.assertRaisesRegex(TypeError, no_subscripted_generics): + issubclass(P, PG[T]) + with self.assertRaisesRegex(TypeError, no_subscripted_generics): + issubclass(PG, PG[int]) - self.assertEqual(C[int]().test, 'OK') - with self.assertRaises(TypeError): - C[int](42) - with self.assertRaises(TypeError): - C[int](a=42) + only_classes_allowed = r"issubclass\(\) arg 1 must be a class" - # TODO: RUSTPYTHON the last line breaks any tests that use unittest.mock - # See https://github.com/RustPython/RustPython/issues/5190#issuecomment-2010535802 - # It's possible that updating typing to 3.12 will resolve this - @unittest.skip("TODO: RUSTPYTHON this test breaks other tests that use unittest.mock") - def test_protocols_bad_subscripts(self): - T = TypeVar('T') - S = TypeVar('S') - with self.assertRaises(TypeError): - class P(Protocol[T, T]): pass - with self.assertRaises(TypeError): - class P(Protocol[int]): pass - with self.assertRaises(TypeError): - class P(Protocol[T], Protocol[S]): pass - with self.assertRaises(TypeError): - class P(typing.Mapping[T, S], Protocol[T]): pass + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(1, P) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(1, PG) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(1, BadP) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(1, BadPG) - def test_generic_protocols_repr(self): - T = TypeVar('T') - S = TypeVar('S') - class P(Protocol[T, S]): pass + def test_implicit_issubclass_between_two_protocols(self): + @runtime_checkable + class CallableMembersProto(Protocol): + def meth(self): ... - self.assertTrue(repr(P[T, S]).endswith('P[~T, ~S]')) - self.assertTrue(repr(P[int, str]).endswith('P[int, str]')) + # All the below protocols should be considered "subclasses" + # of CallableMembersProto at runtime, + # even though none of them explicitly subclass CallableMembersProto - def test_generic_protocols_eq(self): - T = TypeVar('T') - S = TypeVar('S') + class IdenticalProto(Protocol): + def meth(self): ... - class P(Protocol[T, S]): pass + class SupersetProto(Protocol): + def meth(self): ... + def meth2(self): ... - self.assertEqual(P, P) - self.assertEqual(P[int, T], P[int, T]) - self.assertEqual(P[T, T][Tuple[T, S]][int, str], - P[Tuple[int, str], Tuple[int, str]]) + class NonCallableMembersProto(Protocol): + meth: Callable[[], None] - def test_generic_protocols_special_from_generic(self): - T = TypeVar('T') + class NonCallableMembersSupersetProto(Protocol): + meth: Callable[[], None] + meth2: Callable[[str, int], bool] - class P(Protocol[T]): pass + class MixedMembersProto1(Protocol): + meth: Callable[[], None] + def meth2(self): ... - self.assertEqual(P.__parameters__, (T,)) - self.assertEqual(P[int].__parameters__, ()) - self.assertEqual(P[int].__args__, (int,)) - self.assertIs(P[int].__origin__, P) + class MixedMembersProto2(Protocol): + def meth(self): ... + meth2: Callable[[str, int], bool] - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_generic_protocols_special_from_protocol(self): - @runtime_checkable - class PR(Protocol): - x = 1 + for proto in ( + IdenticalProto, SupersetProto, NonCallableMembersProto, + NonCallableMembersSupersetProto, MixedMembersProto1, MixedMembersProto2 + ): + with self.subTest(proto=proto.__name__): + self.assertIsSubclass(proto, CallableMembersProto) - class P(Protocol): - def meth(self): - pass + # These two shouldn't be considered subclasses of CallableMembersProto, however, + # since they don't have the `meth` protocol member - T = TypeVar('T') + class EmptyProtocol(Protocol): ... + class UnrelatedProtocol(Protocol): + def wut(self): ... - class PG(Protocol[T]): - x = 1 + self.assertNotIsSubclass(EmptyProtocol, CallableMembersProto) + self.assertNotIsSubclass(UnrelatedProtocol, CallableMembersProto) - def meth(self): - pass + # These aren't protocols at all (despite having annotations), + # so they should only be considered subclasses of CallableMembersProto + # if they *actually have an attribute* matching the `meth` member + # (just having an annotation is insufficient) - self.assertTrue(P._is_protocol) - self.assertTrue(PR._is_protocol) - self.assertTrue(PG._is_protocol) - self.assertFalse(P._is_runtime_protocol) - self.assertTrue(PR._is_runtime_protocol) - self.assertTrue(PG[int]._is_protocol) - self.assertEqual(typing._get_protocol_attrs(P), {'meth'}) - self.assertEqual(typing._get_protocol_attrs(PR), {'x'}) - self.assertEqual(frozenset(typing._get_protocol_attrs(PG)), - frozenset({'x', 'meth'})) + class AnnotatedButNotAProtocol: + meth: Callable[[], None] - def test_no_runtime_deco_on_nominal(self): - with self.assertRaises(TypeError): - @runtime_checkable - class C: pass + class NotAProtocolButAnImplicitSubclass: + def meth(self): pass - class Proto(Protocol): - x = 1 + class NotAProtocolButAnImplicitSubclass2: + meth: Callable[[], None] + def meth(self): pass - with self.assertRaises(TypeError): - @runtime_checkable - class Concrete(Proto): - pass + class NotAProtocolButAnImplicitSubclass3: + meth: Callable[[], None] + meth2: Callable[[int, str], bool] + def meth(self): pass + def meth2(self, x, y): return True + + self.assertNotIsSubclass(AnnotatedButNotAProtocol, CallableMembersProto) + self.assertIsSubclass(NotAProtocolButAnImplicitSubclass, CallableMembersProto) + self.assertIsSubclass(NotAProtocolButAnImplicitSubclass2, CallableMembersProto) + self.assertIsSubclass(NotAProtocolButAnImplicitSubclass3, CallableMembersProto) # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_none_treated_correctly(self): - @runtime_checkable - class P(Protocol): - x = None # type: int + @unittest.skip("TODO: RUSTPYTHON (no gc)") + def test_isinstance_checks_not_at_whim_of_gc(self): + self.addCleanup(gc.enable) + gc.disable() - class B(object): pass + with self.assertRaisesRegex( + TypeError, + "Protocols can only inherit from other protocols" + ): + class Foo(collections.abc.Mapping, Protocol): + pass - self.assertNotIsInstance(B(), P) + self.assertNotIsInstance([], collections.abc.Mapping) + def test_issubclass_and_isinstance_on_Protocol_itself(self): class C: - x = 1 - - class D: - x = None - - self.assertIsInstance(C(), P) - self.assertIsInstance(D(), P) + def x(self): pass - class CI: - def __init__(self): - self.x = 1 + self.assertNotIsSubclass(object, Protocol) + self.assertNotIsInstance(object(), Protocol) - class DI: - def __init__(self): - self.x = None + self.assertNotIsSubclass(str, Protocol) + self.assertNotIsInstance('foo', Protocol) - self.assertIsInstance(C(), P) - self.assertIsInstance(D(), P) + self.assertNotIsSubclass(C, Protocol) + self.assertNotIsInstance(C(), Protocol) - def test_protocols_in_unions(self): - class P(Protocol): - x = None # type: int + only_classes_allowed = r"issubclass\(\) arg 1 must be a class" - Alias = typing.Union[typing.Iterable, P] - Alias2 = typing.Union[P, typing.Iterable] - self.assertEqual(Alias, Alias2) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(1, Protocol) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass('foo', Protocol) + with self.assertRaisesRegex(TypeError, only_classes_allowed): + issubclass(C(), Protocol) - def test_protocols_pickleable(self): - global P, CP # pickle wants to reference the class by name T = TypeVar('T') @runtime_checkable - class P(Protocol[T]): - x = 1 + class EmptyProtocol(Protocol): pass - class CP(P[int]): - pass + @runtime_checkable + class SupportsStartsWith(Protocol): + def startswith(self, x: str) -> bool: ... - c = CP() - c.foo = 42 - c.bar = 'abc' - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - z = pickle.dumps(c, proto) - x = pickle.loads(z) - self.assertEqual(x.foo, 42) - self.assertEqual(x.bar, 'abc') - self.assertEqual(x.x, 1) - self.assertEqual(x.__dict__, {'foo': 42, 'bar': 'abc'}) - s = pickle.dumps(P) - D = pickle.loads(s) + @runtime_checkable + class SupportsX(Protocol[T]): + def x(self): ... - class E: - x = 1 + for proto in EmptyProtocol, SupportsStartsWith, SupportsX: + with self.subTest(proto=proto.__name__): + self.assertIsSubclass(proto, Protocol) - self.assertIsInstance(E(), D) + # gh-105237 / PR #105239: + # check that the presence of Protocol subclasses + # where `issubclass(X, )` evaluates to True + # doesn't influence the result of `issubclass(X, Protocol)` - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_int(self): - self.assertIsSubclass(int, typing.SupportsInt) - self.assertNotIsSubclass(str, typing.SupportsInt) + self.assertIsSubclass(object, EmptyProtocol) + self.assertIsInstance(object(), EmptyProtocol) + self.assertNotIsSubclass(object, Protocol) + self.assertNotIsInstance(object(), Protocol) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_float(self): - self.assertIsSubclass(float, typing.SupportsFloat) - self.assertNotIsSubclass(str, typing.SupportsFloat) + self.assertIsSubclass(str, SupportsStartsWith) + self.assertIsInstance('foo', SupportsStartsWith) + self.assertNotIsSubclass(str, Protocol) + self.assertNotIsInstance('foo', Protocol) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_complex(self): + self.assertIsSubclass(C, SupportsX) + self.assertIsInstance(C(), SupportsX) + self.assertNotIsSubclass(C, Protocol) + self.assertNotIsInstance(C(), Protocol) - # Note: complex itself doesn't have __complex__. + def test_protocols_issubclass_non_callable(self): class C: - def __complex__(self): - return 0j + x = 1 + + @runtime_checkable + class PNonCall(Protocol): + x = 1 + + non_callable_members_illegal = ( + "Protocols with non-method members don't support issubclass()" + ) + + with self.assertRaisesRegex(TypeError, non_callable_members_illegal): + issubclass(C, PNonCall) + + self.assertIsInstance(C(), PNonCall) + PNonCall.register(C) - self.assertIsSubclass(C, typing.SupportsComplex) - self.assertNotIsSubclass(str, typing.SupportsComplex) + with self.assertRaisesRegex(TypeError, non_callable_members_illegal): + issubclass(C, PNonCall) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_bytes(self): + self.assertIsInstance(C(), PNonCall) - # Note: bytes itself doesn't have __bytes__. - class B: - def __bytes__(self): - return b'' + # check that non-protocol subclasses are not affected + class D(PNonCall): ... - self.assertIsSubclass(B, typing.SupportsBytes) - self.assertNotIsSubclass(str, typing.SupportsBytes) + self.assertNotIsSubclass(C, D) + self.assertNotIsInstance(C(), D) + D.register(C) + self.assertIsSubclass(C, D) + self.assertIsInstance(C(), D) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_abs(self): - self.assertIsSubclass(float, typing.SupportsAbs) - self.assertIsSubclass(int, typing.SupportsAbs) - self.assertNotIsSubclass(str, typing.SupportsAbs) + with self.assertRaisesRegex(TypeError, non_callable_members_illegal): + issubclass(D, PNonCall) # TODO: RUSTPYTHON @unittest.expectedFailure - def test_supports_round(self): - issubclass(float, typing.SupportsRound) - self.assertIsSubclass(float, typing.SupportsRound) - self.assertIsSubclass(int, typing.SupportsRound) - self.assertNotIsSubclass(str, typing.SupportsRound) + def test_no_weird_caching_with_issubclass_after_isinstance(self): + @runtime_checkable + class Spam(Protocol): + x: int - def test_reversible(self): - self.assertIsSubclass(list, typing.Reversible) - self.assertNotIsSubclass(int, typing.Reversible) + class Eggs: + def __init__(self) -> None: + self.x = 42 - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_supports_index(self): - self.assertIsSubclass(int, typing.SupportsIndex) - self.assertNotIsSubclass(str, typing.SupportsIndex) + self.assertIsInstance(Eggs(), Spam) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_bundled_protocol_instance_works(self): - self.assertIsInstance(0, typing.SupportsAbs) - class C1(typing.SupportsInt): - def __int__(self) -> int: - return 42 - class C2(C1): - pass - c = C2() - self.assertIsInstance(c, C1) + # gh-104555: If we didn't override ABCMeta.__subclasscheck__ in _ProtocolMeta, + # TypeError wouldn't be raised here, + # as the cached result of the isinstance() check immediately above + # would mean the issubclass() call would short-circuit + # before we got to the "raise TypeError" line + with self.assertRaisesRegex( + TypeError, + "Protocols with non-method members don't support issubclass()" + ): + issubclass(Eggs, Spam) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_collections_protocols_allowed(self): + def test_no_weird_caching_with_issubclass_after_isinstance_2(self): @runtime_checkable - class Custom(collections.abc.Iterable, Protocol): - def close(self): ... - - class A: pass - class B: - def __iter__(self): - return [] - def close(self): - return 0 + class Spam(Protocol): + x: int - self.assertIsSubclass(B, Custom) - self.assertNotIsSubclass(A, Custom) + class Eggs: ... - def test_builtin_protocol_allowlist(self): - with self.assertRaises(TypeError): - class CustomProtocol(TestCase, Protocol): - pass + self.assertNotIsInstance(Eggs(), Spam) - class CustomContextManager(typing.ContextManager, Protocol): - pass + # gh-104555: If we didn't override ABCMeta.__subclasscheck__ in _ProtocolMeta, + # TypeError wouldn't be raised here, + # as the cached result of the isinstance() check immediately above + # would mean the issubclass() call would short-circuit + # before we got to the "raise TypeError" line + with self.assertRaisesRegex( + TypeError, + "Protocols with non-method members don't support issubclass()" + ): + issubclass(Eggs, Spam) - def test_non_runtime_protocol_isinstance_check(self): - class P(Protocol): + def test_no_weird_caching_with_issubclass_after_isinstance_3(self): + @runtime_checkable + class Spam(Protocol): x: int - with self.assertRaisesRegex(TypeError, "@runtime_checkable"): - isinstance(1, P) + class Eggs: + def __getattr__(self, attr): + if attr == "x": + return 42 + raise AttributeError(attr) - def test_super_call_init(self): - class P(Protocol): - x: int + self.assertNotIsInstance(Eggs(), Spam) - class Foo(P): - def __init__(self): - super().__init__() + # gh-104555: If we didn't override ABCMeta.__subclasscheck__ in _ProtocolMeta, + # TypeError wouldn't be raised here, + # as the cached result of the isinstance() check immediately above + # would mean the issubclass() call would short-circuit + # before we got to the "raise TypeError" line + with self.assertRaisesRegex( + TypeError, + "Protocols with non-method members don't support issubclass()" + ): + issubclass(Eggs, Spam) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_no_weird_caching_with_issubclass_after_isinstance_pep695(self): + @runtime_checkable + class Spam[T](Protocol): + x: T - Foo() # Previously triggered RecursionError + class Eggs[T]: + def __init__(self, x: T) -> None: + self.x = x + + self.assertIsInstance(Eggs(42), Spam) + + # gh-104555: If we didn't override ABCMeta.__subclasscheck__ in _ProtocolMeta, + # TypeError wouldn't be raised here, + # as the cached result of the isinstance() check immediately above + # would mean the issubclass() call would short-circuit + # before we got to the "raise TypeError" line + with self.assertRaisesRegex( + TypeError, + "Protocols with non-method members don't support issubclass()" + ): + issubclass(Eggs, Spam) + + # FIXME(arihant2math): start more porting from test_protocols_isinstance class GenericTests(BaseTestCase): @@ -1778,7 +3479,28 @@ class NewGeneric(Generic): ... with self.assertRaises(TypeError): class MyGeneric(Generic[T], Generic[S]): ... with self.assertRaises(TypeError): - class MyGeneric(List[T], Generic[S]): ... + class MyGeneric2(List[T], Generic[S]): ... + with self.assertRaises(TypeError): + Generic[()] + class D(Generic[T]): pass + with self.assertRaises(TypeError): + D[()] + + def test_generic_subclass_checks(self): + for typ in [list[int], List[int], + tuple[int, str], Tuple[int, str], + typing.Callable[..., None], + collections.abc.Callable[..., None]]: + with self.subTest(typ=typ): + self.assertRaises(TypeError, issubclass, typ, object) + self.assertRaises(TypeError, issubclass, typ, type) + self.assertRaises(TypeError, issubclass, typ, typ) + self.assertRaises(TypeError, issubclass, object, typ) + + # isinstance is fine: + self.assertTrue(isinstance(typ, object)) + # but, not when the right arg is also a generic: + self.assertRaises(TypeError, isinstance, typ, typ) def test_init(self): T = TypeVar('T') @@ -1886,6 +3608,18 @@ class C(B[int]): c.bar = 'abc' self.assertEqual(c.__dict__, {'bar': 'abc'}) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_setattr_exceptions(self): + class Immutable[T]: + def __setattr__(self, key, value): + raise RuntimeError("immutable") + + # gh-115165: This used to cause RuntimeError to be raised + # when we tried to set `__orig_class__` on the `Immutable` instance + # returned by the `Immutable[int]()` call + self.assertIsInstance(Immutable[int](), Immutable) + def test_subscripted_generics_as_proxies(self): T = TypeVar('T') class C(Generic[T]): @@ -2019,6 +3753,8 @@ class Meta(type): ... self.assertEqual(Union[T, int][Meta], Union[Meta, int]) self.assertEqual(Callable[..., Meta].__args__, (Ellipsis, Meta)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_generic_hashes(self): class A(Generic[T]): ... @@ -2054,13 +3790,12 @@ class A(Generic[T]): self.assertNotEqual(typing.FrozenSet[A[str]], typing.FrozenSet[mod_generics_cache.B.A[str]]) - if sys.version_info[:2] > (3, 2): - self.assertTrue(repr(Tuple[A[str]]).endswith('.A[str]]')) - self.assertTrue(repr(Tuple[B.A[str]]).endswith('.B.A[str]]')) - self.assertTrue(repr(Tuple[mod_generics_cache.A[str]]) - .endswith('mod_generics_cache.A[str]]')) - self.assertTrue(repr(Tuple[mod_generics_cache.B.A[str]]) - .endswith('mod_generics_cache.B.A[str]]')) + self.assertTrue(repr(Tuple[A[str]]).endswith('.A[str]]')) + self.assertTrue(repr(Tuple[B.A[str]]).endswith('.B.A[str]]')) + self.assertTrue(repr(Tuple[mod_generics_cache.A[str]]) + .endswith('mod_generics_cache.A[str]]')) + self.assertTrue(repr(Tuple[mod_generics_cache.B.A[str]]) + .endswith('mod_generics_cache.B.A[str]]')) def test_extended_generic_rules_eq(self): T = TypeVar('T') @@ -2075,9 +3810,6 @@ def test_extended_generic_rules_eq(self): class Base: ... class Derived(Base): ... self.assertEqual(Union[T, Base][Union[Base, Derived]], Union[Base, Derived]) - with self.assertRaises(TypeError): - Union[T, int][1] - self.assertEqual(Callable[[T], T][KT], Callable[[KT], KT]) self.assertEqual(Callable[..., List[T]][int], Callable[..., List[int]]) @@ -2118,6 +3850,131 @@ def barfoo(x: AT): ... def barfoo2(x: CT): ... self.assertIs(get_type_hints(barfoo2, globals(), locals())['x'], CT) + def test_generic_pep585_forward_ref(self): + # See https://bugs.python.org/issue41370 + + class C1: + a: list['C1'] + self.assertEqual( + get_type_hints(C1, globals(), locals()), + {'a': list[C1]} + ) + + class C2: + a: dict['C1', list[List[list['C2']]]] + self.assertEqual( + get_type_hints(C2, globals(), locals()), + {'a': dict[C1, list[List[list[C2]]]]} + ) + + # Test stringified annotations + scope = {} + exec(textwrap.dedent(''' + from __future__ import annotations + class C3: + a: List[list["C2"]] + '''), scope) + C3 = scope['C3'] + self.assertEqual(C3.__annotations__['a'], "List[list['C2']]") + self.assertEqual( + get_type_hints(C3, globals(), locals()), + {'a': List[list[C2]]} + ) + + # Test recursive types + X = list["X"] + def f(x: X): ... + self.assertEqual( + get_type_hints(f, globals(), locals()), + {'x': list[list[ForwardRef('X')]]} + ) + + def test_pep695_generic_class_with_future_annotations(self): + original_globals = dict(ann_module695.__dict__) + + hints_for_A = get_type_hints(ann_module695.A) + A_type_params = ann_module695.A.__type_params__ + self.assertIs(hints_for_A["x"], A_type_params[0]) + self.assertEqual(hints_for_A["y"].__args__[0], Unpack[A_type_params[1]]) + self.assertIs(hints_for_A["z"].__args__[0], A_type_params[2]) + + # should not have changed as a result of the get_type_hints() calls! + self.assertEqual(ann_module695.__dict__, original_globals) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pep695_generic_class_with_future_annotations_and_local_shadowing(self): + hints_for_B = get_type_hints(ann_module695.B) + self.assertEqual(hints_for_B, {"x": int, "y": str, "z": bytes}) + + def test_pep695_generic_class_with_future_annotations_name_clash_with_global_vars(self): + hints_for_C = get_type_hints(ann_module695.C) + self.assertEqual( + set(hints_for_C.values()), + set(ann_module695.C.__type_params__) + ) + + def test_pep_695_generic_function_with_future_annotations(self): + hints_for_generic_function = get_type_hints(ann_module695.generic_function) + func_t_params = ann_module695.generic_function.__type_params__ + self.assertEqual( + hints_for_generic_function.keys(), {"x", "y", "z", "zz", "return"} + ) + self.assertIs(hints_for_generic_function["x"], func_t_params[0]) + self.assertEqual(hints_for_generic_function["y"], Unpack[func_t_params[1]]) + self.assertIs(hints_for_generic_function["z"].__origin__, func_t_params[2]) + self.assertIs(hints_for_generic_function["zz"].__origin__, func_t_params[2]) + + def test_pep_695_generic_function_with_future_annotations_name_clash_with_global_vars(self): + self.assertEqual( + set(get_type_hints(ann_module695.generic_function_2).values()), + set(ann_module695.generic_function_2.__type_params__) + ) + + def test_pep_695_generic_method_with_future_annotations(self): + hints_for_generic_method = get_type_hints(ann_module695.D.generic_method) + params = { + param.__name__: param + for param in ann_module695.D.generic_method.__type_params__ + } + self.assertEqual( + hints_for_generic_method, + {"x": params["Foo"], "y": params["Bar"], "return": types.NoneType} + ) + + def test_pep_695_generic_method_with_future_annotations_name_clash_with_global_vars(self): + self.assertEqual( + set(get_type_hints(ann_module695.D.generic_method_2).values()), + set(ann_module695.D.generic_method_2.__type_params__) + ) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pep_695_generics_with_future_annotations_nested_in_function(self): + results = ann_module695.nested() + + self.assertEqual( + set(results.hints_for_E.values()), + set(results.E.__type_params__) + ) + self.assertEqual( + set(results.hints_for_E_meth.values()), + set(results.E.generic_method.__type_params__) + ) + self.assertNotEqual( + set(results.hints_for_E_meth.values()), + set(results.E.__type_params__) + ) + self.assertEqual( + set(results.hints_for_E_meth.values()).intersection(results.E.__type_params__), + set() + ) + + self.assertEqual( + set(results.hints_for_generic_func.values()), + set(results.generic_func.__type_params__) + ) + def test_extended_generic_rules_subclassing(self): class T1(Tuple[T, KT]): ... class T2(Tuple[T, ...]): ... @@ -2152,8 +4009,6 @@ def test_fail_with_bare_union(self): List[Union] with self.assertRaises(TypeError): Tuple[Optional] - with self.assertRaises(TypeError): - ClassVar[ClassVar] with self.assertRaises(TypeError): List[ClassVar[int]] @@ -2179,24 +4034,27 @@ class MyDict(typing.Dict[T, T]): ... class MyDef(typing.DefaultDict[str, T]): ... self.assertIs(MyDef[int]().__class__, MyDef) self.assertEqual(MyDef[int]().__orig_class__, MyDef[int]) - # ChainMap was added in 3.3 - if sys.version_info >= (3, 3): - class MyChain(typing.ChainMap[str, T]): ... - self.assertIs(MyChain[int]().__class__, MyChain) - self.assertEqual(MyChain[int]().__orig_class__, MyChain[int]) + class MyChain(typing.ChainMap[str, T]): ... + self.assertIs(MyChain[int]().__class__, MyChain) + self.assertEqual(MyChain[int]().__orig_class__, MyChain[int]) def test_all_repr_eq_any(self): objs = (getattr(typing, el) for el in typing.__all__) for obj in objs: self.assertNotEqual(repr(obj), '') self.assertEqual(obj, obj) - if getattr(obj, '__parameters__', None) and len(obj.__parameters__) == 1: + if (getattr(obj, '__parameters__', None) + and not isinstance(obj, typing.TypeVar) + and isinstance(obj.__parameters__, tuple) + and len(obj.__parameters__) == 1): self.assertEqual(obj[Any].__args__, (Any,)) if isinstance(obj, type): for base in obj.__mro__: self.assertNotEqual(repr(base), '') self.assertEqual(base, base) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_pickle(self): global C # pickle wants to reference the class by name T = TypeVar('T') @@ -2217,7 +4075,8 @@ class C(B[int]): self.assertEqual(x.bar, 'abc') self.assertEqual(x.__dict__, {'foo': 42, 'bar': 'abc'}) samples = [Any, Union, Tuple, Callable, ClassVar, - Union[int, str], ClassVar[List], Tuple[int, ...], Callable[[str], bytes], + Union[int, str], ClassVar[List], Tuple[int, ...], Tuple[()], + Callable[[str], bytes], typing.DefaultDict, typing.FrozenSet[int]] for s in samples: for proto in range(pickle.HIGHEST_PROTOCOL + 1): @@ -2232,10 +4091,23 @@ class C(B[int]): x = pickle.loads(z) self.assertEqual(s, x) + # Test ParamSpec args and kwargs + global PP + PP = ParamSpec('PP') + for thing in [PP.args, PP.kwargs]: + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(thing=thing, proto=proto): + self.assertEqual( + pickle.loads(pickle.dumps(thing, proto)), + thing, + ) + del PP + def test_copy_and_deepcopy(self): T = TypeVar('T') class Node(Generic[T]): ... - things = [Union[T, int], Tuple[T, int], Callable[..., T], Callable[[int], int], + things = [Union[T, int], Tuple[T, int], Tuple[()], + Callable[..., T], Callable[[int], int], Tuple[Any, Any], Node[T], Node[int], Node[Any], typing.Iterable[T], typing.Iterable[Any], typing.Iterable[int], typing.Dict[int, str], typing.Dict[T, Any], ClassVar[int], ClassVar[List[T]], Tuple['T', 'T'], @@ -2247,22 +4119,30 @@ class Node(Generic[T]): ... def test_immutability_by_copy_and_pickle(self): # Special forms like Union, Any, etc., generic aliases to containers like List, # Mapping, etc., and type variabcles are considered immutable by copy and pickle. - global TP, TPB, TPV # for pickle + global TP, TPB, TPV, PP # for pickle TP = TypeVar('TP') TPB = TypeVar('TPB', bound=int) TPV = TypeVar('TPV', bytes, str) - for X in [TP, TPB, TPV, List, typing.Mapping, ClassVar, typing.Iterable, + PP = ParamSpec('PP') + for X in [TP, TPB, TPV, PP, + List, typing.Mapping, ClassVar, typing.Iterable, Union, Any, Tuple, Callable]: - self.assertIs(copy(X), X) - self.assertIs(deepcopy(X), X) - self.assertIs(pickle.loads(pickle.dumps(X)), X) + with self.subTest(thing=X): + self.assertIs(copy(X), X) + self.assertIs(deepcopy(X), X) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + self.assertIs(pickle.loads(pickle.dumps(X, proto)), X) + del TP, TPB, TPV, PP + # Check that local type variables are copyable. TL = TypeVar('TL') TLB = TypeVar('TLB', bound=int) TLV = TypeVar('TLV', bytes, str) - for X in [TL, TLB, TLV]: - self.assertIs(copy(X), X) - self.assertIs(deepcopy(X), X) + PL = ParamSpec('PL') + for X in [TL, TLB, TLV, PL]: + with self.subTest(thing=X): + self.assertIs(copy(X), X) + self.assertIs(deepcopy(X), X) def test_copy_generic_instances(self): T = TypeVar('T') @@ -2292,7 +4172,7 @@ def test_weakref_all(self): T = TypeVar('T') things = [Any, Union[T, int], Callable[..., T], Tuple[Any, Any], Optional[List[int]], typing.Mapping[int, str], - typing.re.Match[bytes], typing.Iterable['whatever']] + typing.Match[bytes], typing.Iterable['whatever']] for t in things: self.assertEqual(weakref.ref(t)(), t) @@ -2359,6 +4239,51 @@ class Y(C[int]): self.assertEqual(Y.__qualname__, 'GenericTests.test_repr_2..Y') + def test_repr_3(self): + T = TypeVar('T') + T1 = TypeVar('T1') + P = ParamSpec('P') + P2 = ParamSpec('P2') + Ts = TypeVarTuple('Ts') + + class MyCallable(Generic[P, T]): + pass + + class DoubleSpec(Generic[P, P2, T]): + pass + + class TsP(Generic[*Ts, P]): + pass + + object_to_expected_repr = { + MyCallable[P, T]: "MyCallable[~P, ~T]", + MyCallable[Concatenate[T1, P], T]: "MyCallable[typing.Concatenate[~T1, ~P], ~T]", + MyCallable[[], bool]: "MyCallable[[], bool]", + MyCallable[[int], bool]: "MyCallable[[int], bool]", + MyCallable[[int, str], bool]: "MyCallable[[int, str], bool]", + MyCallable[[int, list[int]], bool]: "MyCallable[[int, list[int]], bool]", + MyCallable[Concatenate[*Ts, P], T]: "MyCallable[typing.Concatenate[typing.Unpack[Ts], ~P], ~T]", + + DoubleSpec[P2, P, T]: "DoubleSpec[~P2, ~P, ~T]", + DoubleSpec[[int], [str], bool]: "DoubleSpec[[int], [str], bool]", + DoubleSpec[[int, int], [str, str], bool]: "DoubleSpec[[int, int], [str, str], bool]", + + TsP[*Ts, P]: "TsP[typing.Unpack[Ts], ~P]", + TsP[int, str, list[int], []]: "TsP[int, str, list[int], []]", + TsP[int, [str, list[int]]]: "TsP[int, [str, list[int]]]", + + # These lines are just too long to fit: + MyCallable[Concatenate[*Ts, P], int][int, str, [bool, float]]: + "MyCallable[[int, str, bool, float], int]", + } + + for obj, expected_repr in object_to_expected_repr.items(): + with self.subTest(obj=obj, expected_repr=expected_repr): + self.assertRegex( + repr(obj), + fr"^{re.escape(MyCallable.__module__)}.*\.{re.escape(expected_repr)}$", + ) + def test_eq_1(self): self.assertEqual(Generic, Generic) self.assertEqual(Generic[T], Generic[T]) @@ -2377,6 +4302,8 @@ class B(Generic[T]): self.assertEqual(A[T], A[T]) self.assertNotEqual(A[T], B[T]) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_multiple_inheritance(self): class A(Generic[T, VT]): @@ -2396,6 +4323,75 @@ class B(Generic[S]): ... class C(List[int], B): ... self.assertEqual(C.__mro__, (C, list, B, Generic, object)) + def test_multiple_inheritance_non_type_with___mro_entries__(self): + class GoodEntries: + def __mro_entries__(self, bases): + return (object,) + + class A(List[int], GoodEntries()): ... + + self.assertEqual(A.__mro__, (A, list, Generic, object)) + + def test_multiple_inheritance_non_type_without___mro_entries__(self): + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex(TypeError, r"^bases must be types"): + class A(List[int], object()): ... + + def test_multiple_inheritance_non_type_bad___mro_entries__(self): + class BadEntries: + def __mro_entries__(self, bases): + return None + + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex( + TypeError, + r"^__mro_entries__ must return a tuple", + ): + class A(List[int], BadEntries()): ... + + def test_multiple_inheritance___mro_entries___returns_non_type(self): + class BadEntries: + def __mro_entries__(self, bases): + return (object(),) + + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex( + TypeError, + r"^bases must be types", + ): + class A(List[int], BadEntries()): ... + + def test_multiple_inheritance_with_genericalias(self): + class A(typing.Sized, list[int]): ... + + self.assertEqual( + A.__mro__, + (A, collections.abc.Sized, Generic, list, object), + ) + + def test_multiple_inheritance_with_genericalias_2(self): + T = TypeVar("T") + + class BaseSeq(typing.Sequence[T]): ... + class MySeq(List[T], BaseSeq[T]): ... + + self.assertEqual( + MySeq.__mro__, + ( + MySeq, + list, + BaseSeq, + collections.abc.Sequence, + collections.abc.Reversible, + collections.abc.Collection, + collections.abc.Sized, + collections.abc.Iterable, + collections.abc.Container, + Generic, + object, + ), + ) + def test_init_subclass_super_called(self): class FinalException(Exception): pass @@ -2412,7 +4408,7 @@ class Test(Generic[T], Final): class Subclass(Test): pass with self.assertRaises(FinalException): - class Subclass(Test[int]): + class Subclass2(Test[int]): pass def test_nested(self): @@ -2480,11 +4476,11 @@ class D(C): self.assertEqual(D.__parameters__, ()) - with self.assertRaises(Exception): + with self.assertRaises(TypeError): D[int] - with self.assertRaises(Exception): + with self.assertRaises(TypeError): D[Any] - with self.assertRaises(Exception): + with self.assertRaises(TypeError): D[T] def test_new_with_args(self): @@ -2567,6 +4563,7 @@ def test_subclass_special_form(self): Literal[1, 2], Concatenate[int, ParamSpec("P")], TypeGuard[int], + TypeIs[range], ): with self.subTest(msg=obj): with self.assertRaisesRegex( @@ -2575,11 +4572,66 @@ def test_subclass_special_form(self): class Foo(obj): pass + def test_complex_subclasses(self): + T_co = TypeVar("T_co", covariant=True) + + class Base(Generic[T_co]): + ... + + T = TypeVar("T") + + # see gh-94607: this fails in that bug + class Sub(Base, Generic[T]): + ... + + def test_parameter_detection(self): + self.assertEqual(List[T].__parameters__, (T,)) + self.assertEqual(List[List[T]].__parameters__, (T,)) + class A: + __parameters__ = (T,) + # Bare classes should be skipped + for a in (List, list): + for b in (A, int, TypeVar, TypeVarTuple, ParamSpec, types.GenericAlias, types.UnionType): + with self.subTest(generic=a, sub=b): + with self.assertRaisesRegex(TypeError, '.* is not a generic class'): + a[b][str] + # Duck-typing anything that looks like it has __parameters__. + # These tests are optional and failure is okay. + self.assertEqual(List[A()].__parameters__, (T,)) + # C version of GenericAlias + self.assertEqual(list[A()].__parameters__, (T,)) + + def test_non_generic_subscript(self): + T = TypeVar('T') + class G(Generic[T]): + pass + class A: + __parameters__ = (T,) + + for s in (int, G, A, List, list, + TypeVar, TypeVarTuple, ParamSpec, + types.GenericAlias, types.UnionType): + + for t in Tuple, tuple: + with self.subTest(tuple=t, sub=s): + self.assertEqual(t[s, T][int], t[s, int]) + self.assertEqual(t[T, s][int], t[int, s]) + a = t[s] + with self.assertRaises(TypeError): + a[int] + + for c in Callable, collections.abc.Callable: + with self.subTest(callable=c, sub=s): + self.assertEqual(c[[s], T][int], c[[s], int]) + self.assertEqual(c[[T], s][int], c[[int], s]) + a = c[[s], s] + with self.assertRaises(TypeError): + a[int] + + class ClassVarTests(BaseTestCase): def test_basics(self): - with self.assertRaises(TypeError): - ClassVar[1] with self.assertRaises(TypeError): ClassVar[int, str] with self.assertRaises(TypeError): @@ -2593,11 +4645,19 @@ def test_repr(self): self.assertEqual(repr(cv), 'typing.ClassVar[%s.Employee]' % __name__) def test_cannot_subclass(self): - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): class C(type(ClassVar)): pass - with self.assertRaises(TypeError): - class C(type(ClassVar[int])): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(ClassVar[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.ClassVar'): + class E(ClassVar): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.ClassVar\[int\]'): + class F(ClassVar[int]): pass def test_cannot_init(self): @@ -2614,12 +4674,11 @@ def test_no_isinstance(self): with self.assertRaises(TypeError): issubclass(int, ClassVar) + class FinalTests(BaseTestCase): def test_basics(self): Final[int] # OK - with self.assertRaises(TypeError): - Final[1] with self.assertRaises(TypeError): Final[int, str] with self.assertRaises(TypeError): @@ -2627,40 +4686,254 @@ def test_basics(self): with self.assertRaises(TypeError): Optional[Final[int]] - def test_repr(self): - self.assertEqual(repr(Final), 'typing.Final') - cv = Final[int] - self.assertEqual(repr(cv), 'typing.Final[int]') - cv = Final[Employee] - self.assertEqual(repr(cv), 'typing.Final[%s.Employee]' % __name__) - cv = Final[tuple[int]] - self.assertEqual(repr(cv), 'typing.Final[tuple[int]]') + def test_repr(self): + self.assertEqual(repr(Final), 'typing.Final') + cv = Final[int] + self.assertEqual(repr(cv), 'typing.Final[int]') + cv = Final[Employee] + self.assertEqual(repr(cv), 'typing.Final[%s.Employee]' % __name__) + cv = Final[tuple[int]] + self.assertEqual(repr(cv), 'typing.Final[tuple[int]]') + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(Final)): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(Final[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Final'): + class E(Final): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Final\[int\]'): + class F(Final[int]): + pass + + def test_cannot_init(self): + with self.assertRaises(TypeError): + Final() + with self.assertRaises(TypeError): + type(Final)() + with self.assertRaises(TypeError): + type(Final[Optional[int]])() + + def test_no_isinstance(self): + with self.assertRaises(TypeError): + isinstance(1, Final[int]) + with self.assertRaises(TypeError): + issubclass(int, Final) + + +class FinalDecoratorTests(BaseTestCase): + def test_final_unmodified(self): + def func(x): ... + self.assertIs(func, final(func)) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_dunder_final(self): + @final + def func(): ... + @final + class Cls: ... + self.assertIs(True, func.__final__) + self.assertIs(True, Cls.__final__) + + class Wrapper: + __slots__ = ("func",) + def __init__(self, func): + self.func = func + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) + + # Check that no error is thrown if the attribute + # is not writable. + @final + @Wrapper + def wrapped(): ... + self.assertIsInstance(wrapped, Wrapper) + self.assertIs(False, hasattr(wrapped, "__final__")) + + class Meta(type): + @property + def __final__(self): return "can't set me" + @final + class WithMeta(metaclass=Meta): ... + self.assertEqual(WithMeta.__final__, "can't set me") + + # Builtin classes throw TypeError if you try to set an + # attribute. + final(int) + self.assertIs(False, hasattr(int, "__final__")) + + # Make sure it works with common builtin decorators + class Methods: + @final + @classmethod + def clsmethod(cls): ... + + @final + @staticmethod + def stmethod(): ... + + # The other order doesn't work because property objects + # don't allow attribute assignment. + @property + @final + def prop(self): ... + + @final + @lru_cache() + def cached(self): ... + + # Use getattr_static because the descriptor returns the + # underlying function, which doesn't have __final__. + self.assertIs( + True, + inspect.getattr_static(Methods, "clsmethod").__final__ + ) + self.assertIs( + True, + inspect.getattr_static(Methods, "stmethod").__final__ + ) + self.assertIs(True, Methods.prop.fget.__final__) + self.assertIs(True, Methods.cached.__final__) + + +class OverrideDecoratorTests(BaseTestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_override(self): + class Base: + def normal_method(self): ... + @classmethod + def class_method_good_order(cls): ... + @classmethod + def class_method_bad_order(cls): ... + @staticmethod + def static_method_good_order(): ... + @staticmethod + def static_method_bad_order(): ... + + class Derived(Base): + @override + def normal_method(self): + return 42 + + @classmethod + @override + def class_method_good_order(cls): + return 42 + @override + @classmethod + def class_method_bad_order(cls): + return 42 + + @staticmethod + @override + def static_method_good_order(): + return 42 + @override + @staticmethod + def static_method_bad_order(): + return 42 - def test_cannot_subclass(self): - with self.assertRaises(TypeError): - class C(type(Final)): - pass - with self.assertRaises(TypeError): - class C(type(Final[int])): - pass + self.assertIsSubclass(Derived, Base) + instance = Derived() + self.assertEqual(instance.normal_method(), 42) + self.assertIs(True, Derived.normal_method.__override__) + self.assertIs(True, instance.normal_method.__override__) + + self.assertEqual(Derived.class_method_good_order(), 42) + self.assertIs(True, Derived.class_method_good_order.__override__) + self.assertEqual(Derived.class_method_bad_order(), 42) + self.assertIs(False, hasattr(Derived.class_method_bad_order, "__override__")) + + self.assertEqual(Derived.static_method_good_order(), 42) + self.assertIs(True, Derived.static_method_good_order.__override__) + self.assertEqual(Derived.static_method_bad_order(), 42) + self.assertIs(False, hasattr(Derived.static_method_bad_order, "__override__")) + + # Base object is not changed: + self.assertIs(False, hasattr(Base.normal_method, "__override__")) + self.assertIs(False, hasattr(Base.class_method_good_order, "__override__")) + self.assertIs(False, hasattr(Base.class_method_bad_order, "__override__")) + self.assertIs(False, hasattr(Base.static_method_good_order, "__override__")) + self.assertIs(False, hasattr(Base.static_method_bad_order, "__override__")) - def test_cannot_init(self): - with self.assertRaises(TypeError): - Final() - with self.assertRaises(TypeError): - type(Final)() - with self.assertRaises(TypeError): - type(Final[Optional[int]])() + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_property(self): + class Base: + @property + def correct(self) -> int: + return 1 + @property + def wrong(self) -> int: + return 1 + + class Child(Base): + @property + @override + def correct(self) -> int: + return 2 + @override + @property + def wrong(self) -> int: + return 2 + + instance = Child() + self.assertEqual(instance.correct, 2) + self.assertTrue(Child.correct.fget.__override__) + self.assertEqual(instance.wrong, 2) + self.assertFalse(hasattr(Child.wrong, "__override__")) + self.assertFalse(hasattr(Child.wrong.fset, "__override__")) - def test_no_isinstance(self): - with self.assertRaises(TypeError): - isinstance(1, Final[int]) - with self.assertRaises(TypeError): - issubclass(int, Final) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_silent_failure(self): + class CustomProp: + __slots__ = ('fget',) + def __init__(self, fget): + self.fget = fget + def __get__(self, obj, objtype=None): + return self.fget(obj) + + class WithOverride: + @override # must not fail on object with `__slots__` + @CustomProp + def some(self): + return 1 + + self.assertEqual(WithOverride.some, 1) + self.assertFalse(hasattr(WithOverride.some, "__override__")) - def test_final_unmodified(self): - def func(x): ... - self.assertIs(func, final(func)) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_multiple_decorators(self): + def with_wraps(f): # similar to `lru_cache` definition + @wraps(f) + def wrapper(*args, **kwargs): + return f(*args, **kwargs) + return wrapper + + class WithOverride: + @override + @with_wraps + def on_top(self, a: int) -> int: + return a + 1 + @with_wraps + @override + def on_bottom(self, a: int) -> int: + return a + 2 + + instance = WithOverride() + self.assertEqual(instance.on_top(1), 2) + self.assertTrue(instance.on_top.__override__) + self.assertEqual(instance.on_bottom(1), 3) + self.assertTrue(instance.on_bottom.__override__) class CastTests(BaseTestCase): @@ -2681,8 +4954,36 @@ def test_errors(self): cast('hello', 42) -class ForwardRefTests(BaseTestCase): +class AssertTypeTests(BaseTestCase): + + def test_basics(self): + arg = 42 + self.assertIs(assert_type(arg, int), arg) + self.assertIs(assert_type(arg, str | float), arg) + self.assertIs(assert_type(arg, AnyStr), arg) + self.assertIs(assert_type(arg, None), arg) + + def test_errors(self): + # Bogus calls are not expected to fail. + arg = 42 + self.assertIs(assert_type(arg, 42), arg) + self.assertIs(assert_type(arg, 'hello'), arg) + + + +# We need this to make sure that `@no_type_check` respects `__module__` attr: +from test.typinganndata import ann_module8 + +@no_type_check +class NoTypeCheck_Outer: + Inner = ann_module8.NoTypeCheck_Outer.Inner + +@no_type_check +class NoTypeCheck_WithFunction: + NoTypeCheck_function = ann_module8.NoTypeCheck_function + +class ForwardRefTests(BaseTestCase): def test_basics(self): class Node(Generic[T]): @@ -2708,16 +5009,15 @@ def add_right(self, node: 'Node[T]' = None): t = Node[int] both_hints = get_type_hints(t.add_both, globals(), locals()) self.assertEqual(both_hints['left'], Optional[Node[T]]) - self.assertEqual(both_hints['right'], Optional[Node[T]]) - self.assertEqual(both_hints['left'], both_hints['right']) - self.assertEqual(both_hints['stuff'], Optional[int]) + self.assertEqual(both_hints['right'], Node[T]) + self.assertEqual(both_hints['stuff'], int) self.assertNotIn('blah', both_hints) left_hints = get_type_hints(t.add_left, globals(), locals()) self.assertEqual(left_hints['node'], Optional[Node[T]]) right_hints = get_type_hints(t.add_right, globals(), locals()) - self.assertEqual(right_hints['node'], Optional[Node[T]]) + self.assertEqual(right_hints['node'], Node[T]) def test_forwardref_instance_type_error(self): fr = typing.ForwardRef('int') @@ -2811,6 +5111,8 @@ def fun(x: a): def test_forward_repr(self): self.assertEqual(repr(List['int']), "typing.List[ForwardRef('int')]") + self.assertEqual(repr(List[ForwardRef('int', module='mod')]), + "typing.List[ForwardRef('int', module='mod')]") def test_union_forward(self): @@ -2866,10 +5168,11 @@ def fun(x: a): pass def cmp(o1, o2): return o1 == o2 - r1 = namespace1() - r2 = namespace2() - self.assertIsNot(r1, r2) - self.assertRaises(RecursionError, cmp, r1, r2) + with infinite_recursion(25): + r1 = namespace1() + r2 = namespace2() + self.assertIsNot(r1, r2) + self.assertRaises(RecursionError, cmp, r1, r2) def test_union_forward_recursion(self): ValueList = List['Value'] @@ -2924,12 +5227,16 @@ def test_special_forms_forward(self): class C: a: Annotated['ClassVar[int]', (3, 5)] = 4 b: Annotated['Final[int]', "const"] = 4 + x: 'ClassVar' = 4 + y: 'Final' = 4 class CF: b: List['Final[int]'] = 4 self.assertEqual(get_type_hints(C, globals())['a'], ClassVar[int]) self.assertEqual(get_type_hints(C, globals())['b'], Final[int]) + self.assertEqual(get_type_hints(C, globals())['x'], ClassVar) + self.assertEqual(get_type_hints(C, globals())['y'], Final) with self.assertRaises(TypeError): get_type_hints(CF, globals()), @@ -2946,13 +5253,11 @@ def foo(a: 'Node[T'): with self.assertRaises(SyntaxError): get_type_hints(foo) - def test_type_error(self): - - def foo(a: Tuple['42']): - pass - - with self.assertRaises(TypeError): - get_type_hints(foo) + def test_syntax_error_empty_string(self): + for form in [typing.List, typing.Set, typing.Type, typing.Deque]: + with self.subTest(form=form): + with self.assertRaises(SyntaxError): + form[''] def test_name_error(self): @@ -2989,9 +5294,100 @@ def meth(self, x: int): ... @no_type_check class D(C): c = C + # verify that @no_type_check never affects bases self.assertEqual(get_type_hints(C.meth), {'x': int}) + # and never child classes: + class Child(D): + def foo(self, x: int): ... + + self.assertEqual(get_type_hints(Child.foo), {'x': int}) + + def test_no_type_check_nested_types(self): + # See https://bugs.python.org/issue46571 + class Other: + o: int + class B: # Has the same `__name__`` as `A.B` and different `__qualname__` + o: int + @no_type_check + class A: + a: int + class B: + b: int + class C: + c: int + class D: + d: int + + Other = Other + + for klass in [A, A.B, A.B.C, A.D]: + with self.subTest(klass=klass): + self.assertTrue(klass.__no_type_check__) + self.assertEqual(get_type_hints(klass), {}) + + for not_modified in [Other, B]: + with self.subTest(not_modified=not_modified): + with self.assertRaises(AttributeError): + not_modified.__no_type_check__ + self.assertNotEqual(get_type_hints(not_modified), {}) + + def test_no_type_check_class_and_static_methods(self): + @no_type_check + class Some: + @staticmethod + def st(x: int) -> int: ... + @classmethod + def cl(cls, y: int) -> int: ... + + self.assertTrue(Some.st.__no_type_check__) + self.assertEqual(get_type_hints(Some.st), {}) + self.assertTrue(Some.cl.__no_type_check__) + self.assertEqual(get_type_hints(Some.cl), {}) + + def test_no_type_check_other_module(self): + self.assertTrue(NoTypeCheck_Outer.__no_type_check__) + with self.assertRaises(AttributeError): + ann_module8.NoTypeCheck_Outer.__no_type_check__ + with self.assertRaises(AttributeError): + ann_module8.NoTypeCheck_Outer.Inner.__no_type_check__ + + self.assertTrue(NoTypeCheck_WithFunction.__no_type_check__) + with self.assertRaises(AttributeError): + ann_module8.NoTypeCheck_function.__no_type_check__ + + def test_no_type_check_foreign_functions(self): + # We should not modify this function: + def some(*args: int) -> int: + ... + + @no_type_check + class A: + some_alias = some + some_class = classmethod(some) + some_static = staticmethod(some) + + with self.assertRaises(AttributeError): + some.__no_type_check__ + self.assertEqual(get_type_hints(some), {'args': int, 'return': int}) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_no_type_check_lambda(self): + @no_type_check + class A: + # Corner case: `lambda` is both an assignment and a function: + bar: Callable[[int], int] = lambda arg: arg + + self.assertTrue(A.bar.__no_type_check__) + self.assertEqual(get_type_hints(A.bar), {}) + + def test_no_type_check_TypeError(self): + # This simply should not fail with + # `TypeError: can't set attributes of built-in/extension type 'dict'` + no_type_check(dict) + def test_no_type_check_forward_ref_as_string(self): class C: foo: typing.ClassVar[int] = 7 @@ -3006,21 +5402,15 @@ class F: for clazz in [C, D, E, F]: self.assertEqual(get_type_hints(clazz), expected_result) - def test_nested_classvar_fails_forward_ref_check(self): - class E: - foo: 'typing.ClassVar[typing.ClassVar[int]]' = 7 - class F: - foo: ClassVar['ClassVar[int]'] = 7 - - for clazz in [E, F]: - with self.assertRaises(TypeError): - get_type_hints(clazz) - def test_meta_no_type_check(self): - - @no_type_check_decorator - def magic_decorator(func): - return func + depr_msg = ( + "'typing.no_type_check_decorator' is deprecated " + "and slated for removal in Python 3.15" + ) + with self.assertWarnsRegex(DeprecationWarning, depr_msg): + @no_type_check_decorator + def magic_decorator(func): + return func self.assertEqual(magic_decorator.__name__, 'magic_decorator') @@ -3057,13 +5447,66 @@ def test_final_forward_ref(self): self.assertNotEqual(gth(Loop, globals())['attr'], Final[int]) self.assertNotEqual(gth(Loop, globals())['attr'], Final) + def test_or(self): + X = ForwardRef('X') + # __or__/__ror__ itself + self.assertEqual(X | "x", Union[X, "x"]) + self.assertEqual("x" | X, Union["x", X]) + + +class InternalsTests(BaseTestCase): + def test_deprecation_for_no_type_params_passed_to__evaluate(self): + with self.assertWarnsRegex( + DeprecationWarning, + ( + "Failing to pass a value to the 'type_params' parameter " + "of 'typing._eval_type' is deprecated" + ) + ) as cm: + self.assertEqual(typing._eval_type(list["int"], globals(), {}), list[int]) + + self.assertEqual(cm.filename, __file__) + + f = ForwardRef("int") + + with self.assertWarnsRegex( + DeprecationWarning, + ( + "Failing to pass a value to the 'type_params' parameter " + "of 'typing.ForwardRef._evaluate' is deprecated" + ) + ) as cm: + self.assertIs(f._evaluate(globals(), {}, recursive_guard=frozenset()), int) + + self.assertEqual(cm.filename, __file__) + + def test_collect_parameters(self): + typing = import_helper.import_fresh_module("typing") + with self.assertWarnsRegex( + DeprecationWarning, + "The private _collect_parameters function is deprecated" + ) as cm: + typing._collect_parameters + self.assertEqual(cm.filename, __file__) + + +@lru_cache() +def cached_func(x, y): + return 3 * x + y + + +class MethodHolder: + @classmethod + def clsmethod(cls): ... + @staticmethod + def stmethod(): ... + def method(self): ... + class OverloadTests(BaseTestCase): def test_overload_fails(self): - from typing import overload - - with self.assertRaises(RuntimeError): + with self.assertRaises(NotImplementedError): @overload def blah(): @@ -3072,8 +5515,6 @@ def blah(): blah() def test_overload_succeeds(self): - from typing import overload - @overload def blah(): pass @@ -3083,9 +5524,80 @@ def blah(): blah() + @cpython_only # gh-98713 + def test_overload_on_compiled_functions(self): + with patch("typing._overload_registry", + defaultdict(lambda: defaultdict(dict))): + # The registry starts out empty: + self.assertEqual(typing._overload_registry, {}) + + # This should just not fail: + overload(sum) + overload(print) + + # No overloads are recorded (but, it still has a side-effect): + self.assertEqual(typing.get_overloads(sum), []) + self.assertEqual(typing.get_overloads(print), []) + + def set_up_overloads(self): + def blah(): + pass + + overload1 = blah + overload(blah) + + def blah(): + pass + + overload2 = blah + overload(blah) + + def blah(): + pass + + return blah, [overload1, overload2] + + # Make sure we don't clear the global overload registry + @patch("typing._overload_registry", + defaultdict(lambda: defaultdict(dict))) + def test_overload_registry(self): + # The registry starts out empty + self.assertEqual(typing._overload_registry, {}) + + impl, overloads = self.set_up_overloads() + self.assertNotEqual(typing._overload_registry, {}) + self.assertEqual(list(get_overloads(impl)), overloads) + + def some_other_func(): pass + overload(some_other_func) + other_overload = some_other_func + def some_other_func(): pass + self.assertEqual(list(get_overloads(some_other_func)), [other_overload]) + # Unrelated function still has no overloads: + def not_overloaded(): pass + self.assertEqual(list(get_overloads(not_overloaded)), []) + + # Make sure that after we clear all overloads, the registry is + # completely empty. + clear_overloads() + self.assertEqual(typing._overload_registry, {}) + self.assertEqual(get_overloads(impl), []) + + # Querying a function with no overloads shouldn't change the registry. + def the_only_one(): pass + self.assertEqual(get_overloads(the_only_one), []) + self.assertEqual(typing._overload_registry, {}) + + def test_overload_registry_repeated(self): + for _ in range(2): + impl, overloads = self.set_up_overloads() + + self.assertEqual(list(get_overloads(impl)), overloads) + +from test.typinganndata import ( + ann_module, ann_module2, ann_module3, ann_module5, ann_module6, +) -ASYNCIO_TESTS = """ -import asyncio T_a = TypeVar('T_a') @@ -3118,19 +5630,7 @@ async def __aenter__(self) -> int: return 42 async def __aexit__(self, etype, eval, tb): return None -""" - -try: - exec(ASYNCIO_TESTS) -except ImportError: - ASYNCIO = False # multithreading is not enabled -else: - ASYNCIO = True -# Definitions needed for features introduced in Python 3.6 - -from test import ann_module, ann_module2, ann_module3, ann_module5, ann_module6 -from typing import AsyncContextManager class A: y: float @@ -3177,20 +5677,60 @@ class Point2D(TypedDict): x: int y: int +class Point2DGeneric(Generic[T], TypedDict): + a: T + b: T + class Bar(_typed_dict_helper.Foo, total=False): b: int +class BarGeneric(_typed_dict_helper.FooGeneric[T], total=False): + b: int + class LabelPoint2D(Point2D, Label): ... class Options(TypedDict, total=False): log_level: int log_path: str -class HasForeignBaseClass(mod_generics_cache.A): - some_xrepr: 'XRepr' - other_a: 'mod_generics_cache.A' +class TotalMovie(TypedDict): + title: str + year: NotRequired[int] + +class NontotalMovie(TypedDict, total=False): + title: Required[str] + year: int + +class ParentNontotalMovie(TypedDict, total=False): + title: Required[str] + +class ChildTotalMovie(ParentNontotalMovie): + year: NotRequired[int] + +class ParentDeeplyAnnotatedMovie(TypedDict): + title: Annotated[Annotated[Required[str], "foobar"], "another level"] -async def g_with(am: AsyncContextManager[int]): +class ChildDeeplyAnnotatedMovie(ParentDeeplyAnnotatedMovie): + year: NotRequired[Annotated[int, 2000]] + +class AnnotatedMovie(TypedDict): + title: Annotated[Required[str], "foobar"] + year: NotRequired[Annotated[int, 2000]] + +class DeeplyAnnotatedMovie(TypedDict): + title: Annotated[Annotated[Required[str], "foobar"], "another level"] + year: NotRequired[Annotated[int, 2000]] + +class WeirdlyQuotedMovie(TypedDict): + title: Annotated['Annotated[Required[str], "foobar"]', "another level"] + year: NotRequired['Annotated[int, 2000]'] + +# TODO: RUSTPYTHON +# class HasForeignBaseClass(mod_generics_cache.A): +# some_xrepr: 'XRepr' +# other_a: 'mod_generics_cache.A' + +async def g_with(am: typing.AsyncContextManager[int]): x: int async with am as x: return x @@ -3202,6 +5742,7 @@ async def g_with(am: AsyncContextManager[int]): gth = get_type_hints + class ForRefExample: @ann_module.dec def func(self: 'ForRefExample'): @@ -3240,6 +5781,8 @@ def test_get_type_hints_modules_forwardref(self): 'default_b': Optional[mod_generics_cache.B]} self.assertEqual(gth(mod_generics_cache), mgc_hints) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_get_type_hints_classes(self): self.assertEqual(gth(ann_module.C), # gth will find the right globalns {'y': Optional[ann_module.C]}) @@ -3349,7 +5892,7 @@ def foobar(x: list[ForwardRef('X')]): ... BA = Tuple[Annotated[T, (1, 0)], ...] def barfoo(x: BA): ... self.assertEqual(get_type_hints(barfoo, globals(), locals())['x'], Tuple[T, ...]) - self.assertIs( + self.assertEqual( get_type_hints(barfoo, globals(), locals(), include_extras=True)['x'], BA ) @@ -3357,7 +5900,7 @@ def barfoo(x: BA): ... BA = tuple[Annotated[T, (1, 0)], ...] def barfoo(x: BA): ... self.assertEqual(get_type_hints(barfoo, globals(), locals())['x'], tuple[T, ...]) - self.assertIs( + self.assertEqual( get_type_hints(barfoo, globals(), locals(), include_extras=True)['x'], BA ) @@ -3422,6 +5965,18 @@ def __iand__(self, other: Const["MySet[T]"]) -> "MySet[T]": {'other': MySet[T], 'return': MySet[T]} ) + def test_get_type_hints_annotated_with_none_default(self): + # See: https://bugs.python.org/issue46195 + def annotated_with_none_default(x: Annotated[int, 'data'] = None): ... + self.assertEqual( + get_type_hints(annotated_with_none_default), + {'x': int}, + ) + self.assertEqual( + get_type_hints(annotated_with_none_default, include_extras=True), + {'x': Annotated[int, 'data']}, + ) + def test_get_type_hints_classes_str_annotations(self): class Foo: y = str @@ -3447,10 +6002,82 @@ class BadType(BadBase): self.assertNotIn('bad', sys.modules) self.assertEqual(get_type_hints(BadType), {'foo': tuple, 'bar': list}) + def test_forward_ref_and_final(self): + # https://bugs.python.org/issue45166 + hints = get_type_hints(ann_module5) + self.assertEqual(hints, {'name': Final[str]}) + + hints = get_type_hints(ann_module5.MyClass) + self.assertEqual(hints, {'value': Final}) + + def test_top_level_class_var(self): + # https://bugs.python.org/issue45166 + with self.assertRaisesRegex( + TypeError, + r'typing.ClassVar\[int\] is not valid as type argument', + ): + get_type_hints(ann_module6) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_get_type_hints_typeddict(self): + self.assertEqual(get_type_hints(TotalMovie), {'title': str, 'year': int}) + self.assertEqual(get_type_hints(TotalMovie, include_extras=True), { + 'title': str, + 'year': NotRequired[int], + }) + + self.assertEqual(get_type_hints(AnnotatedMovie), {'title': str, 'year': int}) + self.assertEqual(get_type_hints(AnnotatedMovie, include_extras=True), { + 'title': Annotated[Required[str], "foobar"], + 'year': NotRequired[Annotated[int, 2000]], + }) + + self.assertEqual(get_type_hints(DeeplyAnnotatedMovie), {'title': str, 'year': int}) + self.assertEqual(get_type_hints(DeeplyAnnotatedMovie, include_extras=True), { + 'title': Annotated[Required[str], "foobar", "another level"], + 'year': NotRequired[Annotated[int, 2000]], + }) + + self.assertEqual(get_type_hints(WeirdlyQuotedMovie), {'title': str, 'year': int}) + self.assertEqual(get_type_hints(WeirdlyQuotedMovie, include_extras=True), { + 'title': Annotated[Required[str], "foobar", "another level"], + 'year': NotRequired[Annotated[int, 2000]], + }) + + self.assertEqual(get_type_hints(_typed_dict_helper.VeryAnnotated), {'a': int}) + self.assertEqual(get_type_hints(_typed_dict_helper.VeryAnnotated, include_extras=True), { + 'a': Annotated[Required[int], "a", "b", "c"] + }) + + self.assertEqual(get_type_hints(ChildTotalMovie), {"title": str, "year": int}) + self.assertEqual(get_type_hints(ChildTotalMovie, include_extras=True), { + "title": Required[str], "year": NotRequired[int] + }) + + self.assertEqual(get_type_hints(ChildDeeplyAnnotatedMovie), {"title": str, "year": int}) + self.assertEqual(get_type_hints(ChildDeeplyAnnotatedMovie, include_extras=True), { + "title": Annotated[Required[str], "foobar", "another level"], + "year": NotRequired[Annotated[int, 2000]] + }) + + def test_get_type_hints_collections_abc_callable(self): + # https://github.com/python/cpython/issues/91621 + P = ParamSpec('P') + def f(x: collections.abc.Callable[[int], int]): ... + def g(x: collections.abc.Callable[..., int]): ... + def h(x: collections.abc.Callable[P, int]): ... + + self.assertEqual(get_type_hints(f), {'x': collections.abc.Callable[[int], int]}) + self.assertEqual(get_type_hints(g), {'x': collections.abc.Callable[..., int]}) + self.assertEqual(get_type_hints(h), {'x': collections.abc.Callable[P, int]}) + + class GetUtilitiesTestCase(TestCase): def test_get_origin(self): T = TypeVar('T') + Ts = TypeVarTuple('Ts') P = ParamSpec('P') class C(Generic[T]): pass self.assertIs(get_origin(C[int]), C) @@ -3472,27 +6099,46 @@ class C(Generic[T]): pass self.assertIs(get_origin(list | str), types.UnionType) self.assertIs(get_origin(P.args), P) self.assertIs(get_origin(P.kwargs), P) + self.assertIs(get_origin(Required[int]), Required) + self.assertIs(get_origin(NotRequired[int]), NotRequired) + self.assertIs(get_origin((*Ts,)[0]), Unpack) + self.assertIs(get_origin(Unpack[Ts]), Unpack) + self.assertIs(get_origin((*tuple[*Ts],)[0]), tuple) + self.assertIs(get_origin(Unpack[Tuple[Unpack[Ts]]]), Unpack) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_get_args(self): T = TypeVar('T') class C(Generic[T]): pass self.assertEqual(get_args(C[int]), (int,)) self.assertEqual(get_args(C[T]), (T,)) + self.assertEqual(get_args(typing.SupportsAbs[int]), (int,)) # Protocol + self.assertEqual(get_args(typing.SupportsAbs[T]), (T,)) + self.assertEqual(get_args(Point2DGeneric[int]), (int,)) # TypedDict + self.assertEqual(get_args(Point2DGeneric[T]), (T,)) + self.assertEqual(get_args(T), ()) self.assertEqual(get_args(int), ()) + self.assertEqual(get_args(Any), ()) + self.assertEqual(get_args(Self), ()) + self.assertEqual(get_args(LiteralString), ()) self.assertEqual(get_args(ClassVar[int]), (int,)) self.assertEqual(get_args(Union[int, str]), (int, str)) self.assertEqual(get_args(Literal[42, 43]), (42, 43)) self.assertEqual(get_args(Final[List[int]]), (List[int],)) + self.assertEqual(get_args(Optional[int]), (int, type(None))) + self.assertEqual(get_args(Union[int, None]), (int, type(None))) self.assertEqual(get_args(Union[int, Tuple[T, int]][str]), (int, Tuple[str, int])) self.assertEqual(get_args(typing.Dict[int, Tuple[T, T]][Optional[int]]), (int, Tuple[Optional[int], Optional[int]])) self.assertEqual(get_args(Callable[[], T][int]), ([], int)) self.assertEqual(get_args(Callable[..., int]), (..., int)) + self.assertEqual(get_args(Callable[[int], str]), ([int], str)) self.assertEqual(get_args(Union[int, Callable[[Tuple[T, ...]], str]]), (int, Callable[[Tuple[T, ...]], str])) self.assertEqual(get_args(Tuple[int, ...]), (int, ...)) - self.assertEqual(get_args(Tuple[()]), ((),)) + self.assertEqual(get_args(Tuple[()]), ()) self.assertEqual(get_args(Annotated[T, 'one', 2, ['three']]), (T, 'one', 2, ['three'])) self.assertEqual(get_args(List), ()) self.assertEqual(get_args(Tuple), ()) @@ -3505,26 +6151,31 @@ class C(Generic[T]): pass self.assertEqual(get_args(collections.abc.Callable[[int], str]), get_args(Callable[[int], str])) P = ParamSpec('P') + self.assertEqual(get_args(P), ()) + self.assertEqual(get_args(P.args), ()) + self.assertEqual(get_args(P.kwargs), ()) self.assertEqual(get_args(Callable[P, int]), (P, int)) + self.assertEqual(get_args(collections.abc.Callable[P, int]), (P, int)) self.assertEqual(get_args(Callable[Concatenate[int, P], int]), (Concatenate[int, P], int)) + self.assertEqual(get_args(collections.abc.Callable[Concatenate[int, P], int]), + (Concatenate[int, P], int)) + self.assertEqual(get_args(Concatenate[int, str, P]), (int, str, P)) self.assertEqual(get_args(list | str), (list, str)) + self.assertEqual(get_args(Required[int]), (int,)) + self.assertEqual(get_args(NotRequired[int]), (int,)) + self.assertEqual(get_args(TypeAlias), ()) + self.assertEqual(get_args(TypeGuard[int]), (int,)) + self.assertEqual(get_args(TypeIs[range]), (range,)) + Ts = TypeVarTuple('Ts') + self.assertEqual(get_args(Ts), ()) + self.assertEqual(get_args((*Ts,)[0]), (Ts,)) + self.assertEqual(get_args(Unpack[Ts]), (Ts,)) + self.assertEqual(get_args(tuple[*Ts]), (*Ts,)) + self.assertEqual(get_args(tuple[Unpack[Ts]]), (Unpack[Ts],)) + self.assertEqual(get_args((*tuple[*Ts],)[0]), (*Ts,)) + self.assertEqual(get_args(Unpack[tuple[Unpack[Ts]]]), (tuple[Unpack[Ts]],)) - def test_forward_ref_and_final(self): - # https://bugs.python.org/issue45166 - hints = get_type_hints(ann_module5) - self.assertEqual(hints, {'name': Final[str]}) - - hints = get_type_hints(ann_module5.MyClass) - self.assertEqual(hints, {'value': Final}) - - def test_top_level_class_var(self): - # https://bugs.python.org/issue45166 - with self.assertRaisesRegex( - TypeError, - r'typing.ClassVar\[int\] is not valid as type argument', - ): - get_type_hints(ann_module6) class CollectionsAbcTests(BaseTestCase): @@ -3549,27 +6200,17 @@ def test_iterator(self): self.assertIsInstance(it, typing.Iterator) self.assertNotIsInstance(42, typing.Iterator) - @skipUnless(ASYNCIO, 'Python 3.5 and multithreading required') def test_awaitable(self): - ns = {} - exec( - "async def foo() -> typing.Awaitable[int]:\n" - " return await AwaitableWrapper(42)\n", - globals(), ns) - foo = ns['foo'] + async def foo() -> typing.Awaitable[int]: + return await AwaitableWrapper(42) g = foo() self.assertIsInstance(g, typing.Awaitable) self.assertNotIsInstance(foo, typing.Awaitable) g.send(None) # Run foo() till completion, to avoid warning. - @skipUnless(ASYNCIO, 'Python 3.5 and multithreading required') def test_coroutine(self): - ns = {} - exec( - "async def foo():\n" - " return\n", - globals(), ns) - foo = ns['foo'] + async def foo(): + return g = foo() self.assertIsInstance(g, typing.Coroutine) with self.assertRaises(TypeError): @@ -3580,7 +6221,6 @@ def test_coroutine(self): except StopIteration: pass - @skipUnless(ASYNCIO, 'Python 3.5 and multithreading required') def test_async_iterable(self): base_it = range(10) # type: Iterator[int] it = AsyncIteratorWrapper(base_it) @@ -3588,7 +6228,6 @@ def test_async_iterable(self): self.assertIsInstance(it, typing.AsyncIterable) self.assertNotIsInstance(42, typing.AsyncIterable) - @skipUnless(ASYNCIO, 'Python 3.5 and multithreading required') def test_async_iterator(self): base_it = range(10) # type: Iterator[int] it = AsyncIteratorWrapper(base_it) @@ -3634,8 +6273,14 @@ def test_mutablesequence(self): self.assertNotIsInstance((), typing.MutableSequence) def test_bytestring(self): - self.assertIsInstance(b'', typing.ByteString) - self.assertIsInstance(bytearray(b''), typing.ByteString) + with self.assertWarns(DeprecationWarning): + self.assertIsInstance(b'', typing.ByteString) + with self.assertWarns(DeprecationWarning): + self.assertIsInstance(bytearray(b''), typing.ByteString) + with self.assertWarns(DeprecationWarning): + class Foo(typing.ByteString): ... + with self.assertWarns(DeprecationWarning): + class Bar(typing.ByteString, typing.Awaitable): ... def test_list(self): self.assertIsSubclass(list, typing.List) @@ -3742,7 +6387,6 @@ class MyOrdDict(typing.OrderedDict[str, int]): self.assertIsSubclass(MyOrdDict, collections.OrderedDict) self.assertNotIsSubclass(collections.OrderedDict, MyOrdDict) - @skipUnless(sys.version_info >= (3, 3), 'ChainMap was added in 3.3') def test_chainmap_instantiation(self): self.assertIs(type(typing.ChainMap()), collections.ChainMap) self.assertIs(type(typing.ChainMap[KT, VT]()), collections.ChainMap) @@ -3750,7 +6394,6 @@ def test_chainmap_instantiation(self): class CM(typing.ChainMap[KT, VT]): ... self.assertIs(type(CM[int, str]()), CM) - @skipUnless(sys.version_info >= (3, 3), 'ChainMap was added in 3.3') def test_chainmap_subclass(self): class MyChainMap(typing.ChainMap[str, int]): @@ -3832,6 +6475,17 @@ def foo(): g = foo() self.assertIsSubclass(type(g), typing.Generator) + def test_generator_default(self): + g1 = typing.Generator[int] + g2 = typing.Generator[int, None, None] + self.assertEqual(get_args(g1), (int, type(None), type(None))) + self.assertEqual(get_args(g1), get_args(g2)) + + g3 = typing.Generator[int, float] + g4 = typing.Generator[int, float, None] + self.assertEqual(get_args(g3), (int, float, type(None))) + self.assertEqual(get_args(g3), get_args(g4)) + def test_no_generator_instantiation(self): with self.assertRaises(TypeError): typing.Generator() @@ -3841,10 +6495,9 @@ def test_no_generator_instantiation(self): typing.Generator[int, int, int]() def test_async_generator(self): - ns = {} - exec("async def f():\n" - " yield 42\n", globals(), ns) - g = ns['f']() + async def f(): + yield 42 + g = f() self.assertIsSubclass(type(g), typing.AsyncGenerator) def test_no_async_generator_instantiation(self): @@ -3855,8 +6508,6 @@ def test_no_async_generator_instantiation(self): with self.assertRaises(TypeError): typing.AsyncGenerator[int, int]() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_subclassing(self): class MMA(typing.MutableMapping): @@ -3878,7 +6529,7 @@ def __len__(self): return 0 self.assertEqual(len(MMC()), 0) - assert callable(MMC.update) + self.assertTrue(callable(MMC.update)) self.assertIsInstance(MMC(), typing.Mapping) class MMB(typing.MutableMapping[KT, VT]): @@ -3933,9 +6584,8 @@ def asend(self, value): def athrow(self, typ, val=None, tb=None): pass - ns = {} - exec('async def g(): yield 0', globals(), ns) - g = ns['g'] + async def g(): yield 0 + self.assertIsSubclass(G, typing.AsyncGenerator) self.assertIsSubclass(G, typing.AsyncIterable) self.assertIsSubclass(G, collections.abc.AsyncGenerator) @@ -4020,7 +6670,17 @@ def manager(): self.assertIsInstance(cm, typing.ContextManager) self.assertNotIsInstance(42, typing.ContextManager) - @skipUnless(ASYNCIO, 'Python 3.5 required') + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_contextmanager_type_params(self): + cm1 = typing.ContextManager[int] + self.assertEqual(get_args(cm1), (int, bool | None)) + cm2 = typing.ContextManager[int, None] + self.assertEqual(get_args(cm2), (int, types.NoneType)) + + type gen_cm[T1, T2] = typing.ContextManager[T1, T2] + self.assertEqual(get_args(gen_cm.__value__[int, None]), (int, types.NoneType)) + def test_async_contextmanager(self): class NotACM: pass @@ -4032,11 +6692,17 @@ def manager(): cm = manager() self.assertNotIsInstance(cm, typing.AsyncContextManager) - self.assertEqual(typing.AsyncContextManager[int].__args__, (int,)) + self.assertEqual(typing.AsyncContextManager[int].__args__, (int, bool | None)) with self.assertRaises(TypeError): isinstance(42, typing.AsyncContextManager[int]) with self.assertRaises(TypeError): - typing.AsyncContextManager[int, str] + typing.AsyncContextManager[int, str, float] + + def test_asynccontextmanager_type_params(self): + cm1 = typing.AsyncContextManager[int] + self.assertEqual(get_args(cm1), (int, bool | None)) + cm2 = typing.AsyncContextManager[int, None] + self.assertEqual(get_args(cm2), (int, types.NoneType)) class TypeTests(BaseTestCase): @@ -4074,16 +6740,24 @@ def foo(a: A) -> Optional[BaseException]: else: return a() - assert isinstance(foo(KeyboardInterrupt), KeyboardInterrupt) - assert foo(None) is None + self.assertIsInstance(foo(KeyboardInterrupt), KeyboardInterrupt) + self.assertIsNone(foo(None)) + + +class TestModules(TestCase): + func_names = ['_idfunc'] + + def test_c_functions(self): + for fname in self.func_names: + self.assertEqual(getattr(typing, fname).__module__, '_typing') class NewTypeTests(BaseTestCase): @classmethod def setUpClass(cls): global UserId - UserId = NewType('UserId', int) - cls.UserName = NewType(cls.__qualname__ + '.UserName', str) + UserId = typing.NewType('UserId', int) + cls.UserName = typing.NewType(cls.__qualname__ + '.UserName', str) @classmethod def tearDownClass(cls): @@ -4091,9 +6765,6 @@ def tearDownClass(cls): del UserId del cls.UserName - def tearDown(self): - self.clear_caches() - def test_basic(self): self.assertIsInstance(UserId(5), int) self.assertIsInstance(self.UserName('Joe'), str) @@ -4109,11 +6780,11 @@ class D(UserId): def test_or(self): for cls in (int, self.UserName): with self.subTest(cls=cls): - self.assertEqual(UserId | cls, Union[UserId, cls]) - self.assertEqual(cls | UserId, Union[cls, UserId]) + self.assertEqual(UserId | cls, typing.Union[UserId, cls]) + self.assertEqual(cls | UserId, typing.Union[cls, UserId]) - self.assertEqual(get_args(UserId | cls), (UserId, cls)) - self.assertEqual(get_args(cls | UserId), (cls, UserId)) + self.assertEqual(typing.get_args(UserId | cls), (UserId, cls)) + self.assertEqual(typing.get_args(cls | UserId), (cls, UserId)) def test_special_attrs(self): self.assertEqual(UserId.__name__, 'UserId') @@ -4134,7 +6805,7 @@ def test_repr(self): f'{__name__}.{self.__class__.__qualname__}.UserName') def test_pickle(self): - UserAge = NewType('UserAge', float) + UserAge = typing.NewType('UserAge', float) for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.subTest(proto=proto): pickled = pickle.dumps(UserId, proto) @@ -4154,6 +6825,18 @@ def test_missing__name__(self): ) exec(code, {}) + def test_error_message_when_subclassing(self): + with self.assertRaisesRegex( + TypeError, + re.escape( + "Cannot subclass an instance of NewType. Perhaps you were looking for: " + "`ProUserId = NewType('ProUserId', UserId)`" + ) + ): + class ProUserId(UserId): + ... + + class NamedTupleTests(BaseTestCase): class NestedEmployee(NamedTuple): @@ -4176,14 +6859,6 @@ def test_basics(self): self.assertEqual(Emp.__annotations__, collections.OrderedDict([('name', str), ('id', int)])) - def test_namedtuple_pyversion(self): - if sys.version_info[:2] < (3, 6): - with self.assertRaises(TypeError): - NamedTuple('Name', one=int, other=str) - with self.assertRaises(TypeError): - class NotYet(NamedTuple): - whatever = 0 - def test_annotation_usage(self): tim = CoolEmployee('Tim', 9000) self.assertIsInstance(tim, CoolEmployee) @@ -4239,22 +6914,120 @@ class A: with self.assertRaises(TypeError): class X(NamedTuple, A): x: int + with self.assertRaises(TypeError): + class Y(NamedTuple, tuple): + x: int + with self.assertRaises(TypeError): + class Z(NamedTuple, NamedTuple): + x: int + class B(NamedTuple): + x: int + with self.assertRaises(TypeError): + class C(NamedTuple, B): + y: str + + def test_generic(self): + class X(NamedTuple, Generic[T]): + x: T + self.assertEqual(X.__bases__, (tuple, Generic)) + self.assertEqual(X.__orig_bases__, (NamedTuple, Generic[T])) + self.assertEqual(X.__mro__, (X, tuple, Generic, object)) + + class Y(Generic[T], NamedTuple): + x: T + self.assertEqual(Y.__bases__, (Generic, tuple)) + self.assertEqual(Y.__orig_bases__, (Generic[T], NamedTuple)) + self.assertEqual(Y.__mro__, (Y, Generic, tuple, object)) + + for G in X, Y: + with self.subTest(type=G): + self.assertEqual(G.__parameters__, (T,)) + self.assertEqual(G[T].__args__, (T,)) + self.assertEqual(get_args(G[T]), (T,)) + A = G[int] + self.assertIs(A.__origin__, G) + self.assertEqual(A.__args__, (int,)) + self.assertEqual(get_args(A), (int,)) + self.assertEqual(A.__parameters__, ()) + + a = A(3) + self.assertIs(type(a), G) + self.assertEqual(a.x, 3) + + with self.assertRaises(TypeError): + G[int, str] + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_generic_pep695(self): + class X[T](NamedTuple): + x: T + T, = X.__type_params__ + self.assertIsInstance(T, TypeVar) + self.assertEqual(T.__name__, 'T') + self.assertEqual(X.__bases__, (tuple, Generic)) + self.assertEqual(X.__orig_bases__, (NamedTuple, Generic[T])) + self.assertEqual(X.__mro__, (X, tuple, Generic, object)) + self.assertEqual(X.__parameters__, (T,)) + self.assertEqual(X[str].__args__, (str,)) + self.assertEqual(X[str].__parameters__, ()) + + def test_non_generic_subscript(self): + # For backward compatibility, subscription works + # on arbitrary NamedTuple types. + class Group(NamedTuple): + key: T + group: list[T] + A = Group[int] + self.assertEqual(A.__origin__, Group) + self.assertEqual(A.__parameters__, ()) + self.assertEqual(A.__args__, (int,)) + a = A(1, [2]) + self.assertIs(type(a), Group) + self.assertEqual(a, (1, [2])) + + def test_namedtuple_keyword_usage(self): + with self.assertWarnsRegex( + DeprecationWarning, + "Creating NamedTuple classes using keyword arguments is deprecated" + ): + LocalEmployee = NamedTuple("LocalEmployee", name=str, age=int) - def test_namedtuple_keyword_usage(self): - LocalEmployee = NamedTuple("LocalEmployee", name=str, age=int) nick = LocalEmployee('Nick', 25) self.assertIsInstance(nick, tuple) self.assertEqual(nick.name, 'Nick') self.assertEqual(LocalEmployee.__name__, 'LocalEmployee') self.assertEqual(LocalEmployee._fields, ('name', 'age')) self.assertEqual(LocalEmployee.__annotations__, dict(name=str, age=int)) - with self.assertRaises(TypeError): + + with self.assertRaisesRegex( + TypeError, + "Either list of fields or keywords can be provided to NamedTuple, not both" + ): NamedTuple('Name', [('x', int)], y=str) - with self.assertRaises(TypeError): - NamedTuple('Name', x=1, y='a') + + with self.assertRaisesRegex( + TypeError, + "Either list of fields or keywords can be provided to NamedTuple, not both" + ): + NamedTuple('Name', [], y=str) + + with self.assertRaisesRegex( + TypeError, + ( + r"Cannot pass `None` as the 'fields' parameter " + r"and also specify fields using keyword arguments" + ) + ): + NamedTuple('Name', None, x=int) def test_namedtuple_special_keyword_names(self): - NT = NamedTuple("NT", cls=type, self=object, typename=str, fields=list) + with self.assertWarnsRegex( + DeprecationWarning, + "Creating NamedTuple classes using keyword arguments is deprecated" + ): + NT = NamedTuple("NT", cls=type, self=object, typename=str, fields=list) + self.assertEqual(NT.__name__, 'NT') self.assertEqual(NT._fields, ('cls', 'self', 'typename', 'fields')) a = NT(cls=str, self=42, typename='foo', fields=[('bar', tuple)]) @@ -4264,31 +7037,67 @@ def test_namedtuple_special_keyword_names(self): self.assertEqual(a.fields, [('bar', tuple)]) def test_empty_namedtuple(self): - NT = NamedTuple('NT') + expected_warning = re.escape( + "Failing to pass a value for the 'fields' parameter is deprecated " + "and will be disallowed in Python 3.15. " + "To create a NamedTuple class with 0 fields " + "using the functional syntax, " + "pass an empty list, e.g. `NT1 = NamedTuple('NT1', [])`." + ) + with self.assertWarnsRegex(DeprecationWarning, fr"^{expected_warning}$"): + NT1 = NamedTuple('NT1') + + expected_warning = re.escape( + "Passing `None` as the 'fields' parameter is deprecated " + "and will be disallowed in Python 3.15. " + "To create a NamedTuple class with 0 fields " + "using the functional syntax, " + "pass an empty list, e.g. `NT2 = NamedTuple('NT2', [])`." + ) + with self.assertWarnsRegex(DeprecationWarning, fr"^{expected_warning}$"): + NT2 = NamedTuple('NT2', None) + + NT3 = NamedTuple('NT2', []) class CNT(NamedTuple): pass # empty body - for struct in [NT, CNT]: + for struct in NT1, NT2, NT3, CNT: with self.subTest(struct=struct): self.assertEqual(struct._fields, ()) self.assertEqual(struct._field_defaults, {}) self.assertEqual(struct.__annotations__, {}) self.assertIsInstance(struct(), struct) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_namedtuple_errors(self): with self.assertRaises(TypeError): NamedTuple.__new__() - with self.assertRaises(TypeError): + + with self.assertRaisesRegex( + TypeError, + "missing 1 required positional argument" + ): NamedTuple() - with self.assertRaises(TypeError): + + with self.assertRaisesRegex( + TypeError, + "takes from 1 to 2 positional arguments but 3 were given" + ): NamedTuple('Emp', [('name', str)], None) - with self.assertRaises(ValueError): + + with self.assertRaisesRegex( + ValueError, + "Field names cannot start with an underscore" + ): NamedTuple('Emp', [('_name', str)]) - with self.assertRaises(TypeError): + + with self.assertRaisesRegex( + TypeError, + "missing 1 required positional argument: 'typename'" + ): NamedTuple(typename='Emp', name=str, id=int) - with self.assertRaises(TypeError): - NamedTuple('Emp', fields=[('name', str), ('id', int)]) def test_copy_and_pickle(self): global Emp # pickle wants to reference the class by name @@ -4310,6 +7119,101 @@ def test_copy_and_pickle(self): self.assertEqual(jane2, jane) self.assertIsInstance(jane2, cls) + def test_orig_bases(self): + T = TypeVar('T') + + class SimpleNamedTuple(NamedTuple): + pass + + class GenericNamedTuple(NamedTuple, Generic[T]): + pass + + self.assertEqual(SimpleNamedTuple.__orig_bases__, (NamedTuple,)) + self.assertEqual(GenericNamedTuple.__orig_bases__, (NamedTuple, Generic[T])) + + CallNamedTuple = NamedTuple('CallNamedTuple', []) + + self.assertEqual(CallNamedTuple.__orig_bases__, (NamedTuple,)) + + def test_setname_called_on_values_in_class_dictionary(self): + class Vanilla: + def __set_name__(self, owner, name): + self.name = name + + class Foo(NamedTuple): + attr = Vanilla() + + foo = Foo() + self.assertEqual(len(foo), 0) + self.assertNotIn('attr', Foo._fields) + self.assertIsInstance(foo.attr, Vanilla) + self.assertEqual(foo.attr.name, "attr") + + class Bar(NamedTuple): + attr: Vanilla = Vanilla() + + bar = Bar() + self.assertEqual(len(bar), 1) + self.assertIn('attr', Bar._fields) + self.assertIsInstance(bar.attr, Vanilla) + self.assertEqual(bar.attr.name, "attr") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_setname_raises_the_same_as_on_other_classes(self): + class CustomException(BaseException): pass + + class Annoying: + def __set_name__(self, owner, name): + raise CustomException + + annoying = Annoying() + + with self.assertRaises(CustomException) as cm: + class NormalClass: + attr = annoying + normal_exception = cm.exception + + with self.assertRaises(CustomException) as cm: + class NamedTupleClass(NamedTuple): + attr = annoying + namedtuple_exception = cm.exception + + self.assertIs(type(namedtuple_exception), CustomException) + self.assertIs(type(namedtuple_exception), type(normal_exception)) + + self.assertEqual(len(namedtuple_exception.__notes__), 1) + self.assertEqual( + len(namedtuple_exception.__notes__), len(normal_exception.__notes__) + ) + + expected_note = ( + "Error calling __set_name__ on 'Annoying' instance " + "'attr' in 'NamedTupleClass'" + ) + self.assertEqual(namedtuple_exception.__notes__[0], expected_note) + self.assertEqual( + namedtuple_exception.__notes__[0], + normal_exception.__notes__[0].replace("NormalClass", "NamedTupleClass") + ) + + def test_strange_errors_when_accessing_set_name_itself(self): + class CustomException(Exception): pass + + class Meta(type): + def __getattribute__(self, attr): + if attr == "__set_name__": + raise CustomException + return object.__getattribute__(self, attr) + + class VeryAnnoying(metaclass=Meta): pass + + very_annoying = VeryAnnoying() + + with self.assertRaises(CustomException): + class Foo(NamedTuple): + attr = very_annoying + class TypedDictTests(BaseTestCase): def test_basics_functional_syntax(self): @@ -4326,33 +7230,10 @@ def test_basics_functional_syntax(self): self.assertEqual(Emp.__bases__, (dict,)) self.assertEqual(Emp.__annotations__, {'name': str, 'id': int}) self.assertEqual(Emp.__total__, True) - - def test_basics_keywords_syntax(self): - Emp = TypedDict('Emp', name=str, id=int) - self.assertIsSubclass(Emp, dict) - self.assertIsSubclass(Emp, typing.MutableMapping) - self.assertNotIsSubclass(Emp, collections.abc.Sequence) - jim = Emp(name='Jim', id=1) - self.assertIs(type(jim), dict) - self.assertEqual(jim['name'], 'Jim') - self.assertEqual(jim['id'], 1) - self.assertEqual(Emp.__name__, 'Emp') - self.assertEqual(Emp.__module__, __name__) - self.assertEqual(Emp.__bases__, (dict,)) - self.assertEqual(Emp.__annotations__, {'name': str, 'id': int}) - self.assertEqual(Emp.__total__, True) - - def test_typeddict_special_keyword_names(self): - TD = TypedDict("TD", cls=type, self=object, typename=str, _typename=int, fields=list, _fields=dict) - self.assertEqual(TD.__name__, 'TD') - self.assertEqual(TD.__annotations__, {'cls': type, 'self': object, 'typename': str, '_typename': int, 'fields': list, '_fields': dict}) - a = TD(cls=str, self=42, typename='foo', _typename=53, fields=[('bar', tuple)], _fields={'baz', set}) - self.assertEqual(a['cls'], str) - self.assertEqual(a['self'], 42) - self.assertEqual(a['typename'], 'foo') - self.assertEqual(a['_typename'], 53) - self.assertEqual(a['fields'], [('bar', tuple)]) - self.assertEqual(a['_fields'], {'baz', set}) + self.assertEqual(Emp.__required_keys__, {'name', 'id'}) + self.assertIsInstance(Emp.__required_keys__, frozenset) + self.assertEqual(Emp.__optional_keys__, set()) + self.assertIsInstance(Emp.__optional_keys__, frozenset) def test_typeddict_create_errors(self): with self.assertRaises(TypeError): @@ -4361,11 +7242,10 @@ def test_typeddict_create_errors(self): TypedDict() with self.assertRaises(TypeError): TypedDict('Emp', [('name', str)], None) - with self.assertRaises(TypeError): - TypedDict(_typename='Emp', name=str, id=int) + TypedDict(_typename='Emp') with self.assertRaises(TypeError): - TypedDict('Emp', _fields={'name': str, 'id': int}) + TypedDict('Emp', name=str, id=int) def test_typeddict_errors(self): Emp = TypedDict('Emp', {'name': str, 'id': int}) @@ -4377,10 +7257,6 @@ def test_typeddict_errors(self): isinstance(jim, Emp) with self.assertRaises(TypeError): issubclass(dict, Emp) - with self.assertRaises(TypeError): - TypedDict('Hi', x=1) - with self.assertRaises(TypeError): - TypedDict('Hi', [('x', int), ('y', 1)]) with self.assertRaises(TypeError): TypedDict('Hi', [('x', int)], y=int) @@ -4399,7 +7275,7 @@ def test_py36_class_syntax_usage(self): def test_pickle(self): global EmpD # pickle wants to reference the class by name - EmpD = TypedDict('EmpD', name=str, id=int) + EmpD = TypedDict('EmpD', {'name': str, 'id': int}) jane = EmpD({'name': 'jane', 'id': 37}) for proto in range(pickle.HIGHEST_PROTOCOL + 1): z = pickle.dumps(jane, proto) @@ -4410,8 +7286,19 @@ def test_pickle(self): EmpDnew = pickle.loads(ZZ) self.assertEqual(EmpDnew({'name': 'jane', 'id': 37}), jane) + def test_pickle_generic(self): + point = Point2DGeneric(a=5.0, b=3.0) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + z = pickle.dumps(point, proto) + point2 = pickle.loads(z) + self.assertEqual(point2, point) + self.assertEqual(point2, {'a': 5.0, 'b': 3.0}) + ZZ = pickle.dumps(Point2DGeneric, proto) + Point2DGenericNew = pickle.loads(ZZ) + self.assertEqual(Point2DGenericNew({'a': 5.0, 'b': 3.0}), point) + def test_optional(self): - EmpD = TypedDict('EmpD', name=str, id=int) + EmpD = TypedDict('EmpD', {'name': str, 'id': int}) self.assertEqual(typing.Optional[EmpD], typing.Union[None, EmpD]) self.assertNotEqual(typing.List[EmpD], typing.Tuple[EmpD]) @@ -4422,7 +7309,9 @@ def test_total(self): self.assertEqual(D(x=1), {'x': 1}) self.assertEqual(D.__total__, False) self.assertEqual(D.__required_keys__, frozenset()) + self.assertIsInstance(D.__required_keys__, frozenset) self.assertEqual(D.__optional_keys__, {'x'}) + self.assertIsInstance(D.__optional_keys__, frozenset) self.assertEqual(Options(), {}) self.assertEqual(Options(log_level=2), {'log_level': 2}) @@ -4430,12 +7319,25 @@ def test_total(self): self.assertEqual(Options.__required_keys__, frozenset()) self.assertEqual(Options.__optional_keys__, {'log_level', 'log_path'}) + def test_total_inherits_non_total(self): + class TD1(TypedDict, total=False): + a: int + + self.assertIs(TD1.__total__, False) + + class TD2(TD1): + b: str + + self.assertIs(TD2.__total__, True) + def test_optional_keys(self): class Point2Dor3D(Point2D, total=False): z: int - assert Point2Dor3D.__required_keys__ == frozenset(['x', 'y']) - assert Point2Dor3D.__optional_keys__ == frozenset(['z']) + self.assertEqual(Point2Dor3D.__required_keys__, frozenset(['x', 'y'])) + self.assertIsInstance(Point2Dor3D.__required_keys__, frozenset) + self.assertEqual(Point2Dor3D.__optional_keys__, frozenset(['z'])) + self.assertIsInstance(Point2Dor3D.__optional_keys__, frozenset) def test_keys_inheritance(self): class BaseAnimal(TypedDict): @@ -4448,26 +7350,102 @@ class Animal(BaseAnimal, total=False): class Cat(Animal): fur_color: str - assert BaseAnimal.__required_keys__ == frozenset(['name']) - assert BaseAnimal.__optional_keys__ == frozenset([]) - assert BaseAnimal.__annotations__ == {'name': str} + self.assertEqual(BaseAnimal.__required_keys__, frozenset(['name'])) + self.assertEqual(BaseAnimal.__optional_keys__, frozenset([])) + self.assertEqual(BaseAnimal.__annotations__, {'name': str}) - assert Animal.__required_keys__ == frozenset(['name']) - assert Animal.__optional_keys__ == frozenset(['tail', 'voice']) - assert Animal.__annotations__ == { + self.assertEqual(Animal.__required_keys__, frozenset(['name'])) + self.assertEqual(Animal.__optional_keys__, frozenset(['tail', 'voice'])) + self.assertEqual(Animal.__annotations__, { 'name': str, 'tail': bool, 'voice': str, - } + }) - assert Cat.__required_keys__ == frozenset(['name', 'fur_color']) - assert Cat.__optional_keys__ == frozenset(['tail', 'voice']) - assert Cat.__annotations__ == { + self.assertEqual(Cat.__required_keys__, frozenset(['name', 'fur_color'])) + self.assertEqual(Cat.__optional_keys__, frozenset(['tail', 'voice'])) + self.assertEqual(Cat.__annotations__, { 'fur_color': str, 'name': str, 'tail': bool, 'voice': str, - } + }) + + def test_keys_inheritance_with_same_name(self): + class NotTotal(TypedDict, total=False): + a: int + + class Total(NotTotal): + a: int + + self.assertEqual(NotTotal.__required_keys__, frozenset()) + self.assertEqual(NotTotal.__optional_keys__, frozenset(['a'])) + self.assertEqual(Total.__required_keys__, frozenset(['a'])) + self.assertEqual(Total.__optional_keys__, frozenset()) + + class Base(TypedDict): + a: NotRequired[int] + b: Required[int] + + class Child(Base): + a: Required[int] + b: NotRequired[int] + + self.assertEqual(Base.__required_keys__, frozenset(['b'])) + self.assertEqual(Base.__optional_keys__, frozenset(['a'])) + self.assertEqual(Child.__required_keys__, frozenset(['a'])) + self.assertEqual(Child.__optional_keys__, frozenset(['b'])) + + def test_multiple_inheritance_with_same_key(self): + class Base1(TypedDict): + a: NotRequired[int] + + class Base2(TypedDict): + a: Required[str] + + class Child(Base1, Base2): + pass + + # Last base wins + self.assertEqual(Child.__annotations__, {'a': Required[str]}) + self.assertEqual(Child.__required_keys__, frozenset(['a'])) + self.assertEqual(Child.__optional_keys__, frozenset()) + + def test_required_notrequired_keys(self): + self.assertEqual(NontotalMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(NontotalMovie.__optional_keys__, + frozenset({"year"})) + + self.assertEqual(TotalMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(TotalMovie.__optional_keys__, + frozenset({"year"})) + + self.assertEqual(_typed_dict_helper.VeryAnnotated.__required_keys__, + frozenset()) + self.assertEqual(_typed_dict_helper.VeryAnnotated.__optional_keys__, + frozenset({"a"})) + + self.assertEqual(AnnotatedMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(AnnotatedMovie.__optional_keys__, + frozenset({"year"})) + + self.assertEqual(WeirdlyQuotedMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(WeirdlyQuotedMovie.__optional_keys__, + frozenset({"year"})) + + self.assertEqual(ChildTotalMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(ChildTotalMovie.__optional_keys__, + frozenset({"year"})) + + self.assertEqual(ChildDeeplyAnnotatedMovie.__required_keys__, + frozenset({"title"})) + self.assertEqual(ChildDeeplyAnnotatedMovie.__optional_keys__, + frozenset({"year"})) def test_multiple_inheritance(self): class One(TypedDict): @@ -4557,31 +7535,428 @@ class Wrong(*bases): pass def test_is_typeddict(self): - assert is_typeddict(Point2D) is True - assert is_typeddict(Union[str, int]) is False + self.assertIs(is_typeddict(Point2D), True) + self.assertIs(is_typeddict(Union[str, int]), False) # classes, not instances - assert is_typeddict(Point2D()) is False + self.assertIs(is_typeddict(Point2D()), False) + call_based = TypedDict('call_based', {'a': int}) + self.assertIs(is_typeddict(call_based), True) + self.assertIs(is_typeddict(call_based()), False) + + T = TypeVar("T") + class BarGeneric(TypedDict, Generic[T]): + a: T + self.assertIs(is_typeddict(BarGeneric), True) + self.assertIs(is_typeddict(BarGeneric[int]), False) + self.assertIs(is_typeddict(BarGeneric()), False) + + class NewGeneric[T](TypedDict): + a: T + self.assertIs(is_typeddict(NewGeneric), True) + self.assertIs(is_typeddict(NewGeneric[int]), False) + self.assertIs(is_typeddict(NewGeneric()), False) + + # The TypedDict constructor is not itself a TypedDict + self.assertIs(is_typeddict(TypedDict), False) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_get_type_hints(self): self.assertEqual( get_type_hints(Bar), {'a': typing.Optional[int], 'b': int} ) - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_non_generic_subscript(self): - # For backward compatibility, subscription works - # on arbitrary TypedDict types. - class TD(TypedDict): - a: T - A = TD[int] - self.assertEqual(A.__origin__, TD) - self.assertEqual(A.__parameters__, ()) - self.assertEqual(A.__args__, (int,)) - a = A(a = 1) - self.assertIs(type(a), dict) - self.assertEqual(a, {'a': 1}) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_get_type_hints_generic(self): + self.assertEqual( + get_type_hints(BarGeneric), + {'a': typing.Optional[T], 'b': int} + ) + + class FooBarGeneric(BarGeneric[int]): + c: str + + self.assertEqual( + get_type_hints(FooBarGeneric), + {'a': typing.Optional[T], 'b': int, 'c': str} + ) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pep695_generic_typeddict(self): + class A[T](TypedDict): + a: T + + T, = A.__type_params__ + self.assertIsInstance(T, TypeVar) + self.assertEqual(T.__name__, 'T') + self.assertEqual(A.__bases__, (Generic, dict)) + self.assertEqual(A.__orig_bases__, (TypedDict, Generic[T])) + self.assertEqual(A.__mro__, (A, Generic, dict, object)) + self.assertEqual(A.__parameters__, (T,)) + self.assertEqual(A[str].__parameters__, ()) + self.assertEqual(A[str].__args__, (str,)) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_generic_inheritance(self): + class A(TypedDict, Generic[T]): + a: T + + self.assertEqual(A.__bases__, (Generic, dict)) + self.assertEqual(A.__orig_bases__, (TypedDict, Generic[T])) + self.assertEqual(A.__mro__, (A, Generic, dict, object)) + self.assertEqual(A.__parameters__, (T,)) + self.assertEqual(A[str].__parameters__, ()) + self.assertEqual(A[str].__args__, (str,)) + + class A2(Generic[T], TypedDict): + a: T + + self.assertEqual(A2.__bases__, (Generic, dict)) + self.assertEqual(A2.__orig_bases__, (Generic[T], TypedDict)) + self.assertEqual(A2.__mro__, (A2, Generic, dict, object)) + self.assertEqual(A2.__parameters__, (T,)) + self.assertEqual(A2[str].__parameters__, ()) + self.assertEqual(A2[str].__args__, (str,)) + + class B(A[KT], total=False): + b: KT + + self.assertEqual(B.__bases__, (Generic, dict)) + self.assertEqual(B.__orig_bases__, (A[KT],)) + self.assertEqual(B.__mro__, (B, Generic, dict, object)) + self.assertEqual(B.__parameters__, (KT,)) + self.assertEqual(B.__total__, False) + self.assertEqual(B.__optional_keys__, frozenset(['b'])) + self.assertEqual(B.__required_keys__, frozenset(['a'])) + + self.assertEqual(B[str].__parameters__, ()) + self.assertEqual(B[str].__args__, (str,)) + self.assertEqual(B[str].__origin__, B) + + class C(B[int]): + c: int + + self.assertEqual(C.__bases__, (Generic, dict)) + self.assertEqual(C.__orig_bases__, (B[int],)) + self.assertEqual(C.__mro__, (C, Generic, dict, object)) + self.assertEqual(C.__parameters__, ()) + self.assertEqual(C.__total__, True) + self.assertEqual(C.__optional_keys__, frozenset(['b'])) + self.assertEqual(C.__required_keys__, frozenset(['a', 'c'])) + self.assertEqual(C.__annotations__, { + 'a': T, + 'b': KT, + 'c': int, + }) + with self.assertRaises(TypeError): + C[str] + + + class Point3D(Point2DGeneric[T], Generic[T, KT]): + c: KT + + self.assertEqual(Point3D.__bases__, (Generic, dict)) + self.assertEqual(Point3D.__orig_bases__, (Point2DGeneric[T], Generic[T, KT])) + self.assertEqual(Point3D.__mro__, (Point3D, Generic, dict, object)) + self.assertEqual(Point3D.__parameters__, (T, KT)) + self.assertEqual(Point3D.__total__, True) + self.assertEqual(Point3D.__optional_keys__, frozenset()) + self.assertEqual(Point3D.__required_keys__, frozenset(['a', 'b', 'c'])) + self.assertEqual(Point3D.__annotations__, { + 'a': T, + 'b': T, + 'c': KT, + }) + self.assertEqual(Point3D[int, str].__origin__, Point3D) + + with self.assertRaises(TypeError): + Point3D[int] + + with self.assertRaises(TypeError): + class Point3D(Point2DGeneric[T], Generic[KT]): + c: KT + + def test_implicit_any_inheritance(self): + class A(TypedDict, Generic[T]): + a: T + + class B(A[KT], total=False): + b: KT + + class WithImplicitAny(B): + c: int + + self.assertEqual(WithImplicitAny.__bases__, (Generic, dict,)) + self.assertEqual(WithImplicitAny.__mro__, (WithImplicitAny, Generic, dict, object)) + # Consistent with GenericTests.test_implicit_any + self.assertEqual(WithImplicitAny.__parameters__, ()) + self.assertEqual(WithImplicitAny.__total__, True) + self.assertEqual(WithImplicitAny.__optional_keys__, frozenset(['b'])) + self.assertEqual(WithImplicitAny.__required_keys__, frozenset(['a', 'c'])) + self.assertEqual(WithImplicitAny.__annotations__, { + 'a': T, + 'b': KT, + 'c': int, + }) + with self.assertRaises(TypeError): + WithImplicitAny[str] + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_non_generic_subscript(self): + # For backward compatibility, subscription works + # on arbitrary TypedDict types. + class TD(TypedDict): + a: T + A = TD[int] + self.assertEqual(A.__origin__, TD) + self.assertEqual(A.__parameters__, ()) + self.assertEqual(A.__args__, (int,)) + a = A(a = 1) + self.assertIs(type(a), dict) + self.assertEqual(a, {'a': 1}) + + def test_orig_bases(self): + T = TypeVar('T') + + class Parent(TypedDict): + pass + + class Child(Parent): + pass + + class OtherChild(Parent): + pass + + class MixedChild(Child, OtherChild, Parent): + pass + + class GenericParent(TypedDict, Generic[T]): + pass + + class GenericChild(GenericParent[int]): + pass + + class OtherGenericChild(GenericParent[str]): + pass + + class MixedGenericChild(GenericChild, OtherGenericChild, GenericParent[float]): + pass + + class MultipleGenericBases(GenericParent[int], GenericParent[float]): + pass + + CallTypedDict = TypedDict('CallTypedDict', {}) + + self.assertEqual(Parent.__orig_bases__, (TypedDict,)) + self.assertEqual(Child.__orig_bases__, (Parent,)) + self.assertEqual(OtherChild.__orig_bases__, (Parent,)) + self.assertEqual(MixedChild.__orig_bases__, (Child, OtherChild, Parent,)) + self.assertEqual(GenericParent.__orig_bases__, (TypedDict, Generic[T])) + self.assertEqual(GenericChild.__orig_bases__, (GenericParent[int],)) + self.assertEqual(OtherGenericChild.__orig_bases__, (GenericParent[str],)) + self.assertEqual(MixedGenericChild.__orig_bases__, (GenericChild, OtherGenericChild, GenericParent[float])) + self.assertEqual(MultipleGenericBases.__orig_bases__, (GenericParent[int], GenericParent[float])) + self.assertEqual(CallTypedDict.__orig_bases__, (TypedDict,)) + + def test_zero_fields_typeddicts(self): + T1 = TypedDict("T1", {}) + class T2(TypedDict): pass + class T3[tvar](TypedDict): pass + S = TypeVar("S") + class T4(TypedDict, Generic[S]): pass + + expected_warning = re.escape( + "Failing to pass a value for the 'fields' parameter is deprecated " + "and will be disallowed in Python 3.15. " + "To create a TypedDict class with 0 fields " + "using the functional syntax, " + "pass an empty dictionary, e.g. `T5 = TypedDict('T5', {})`." + ) + with self.assertWarnsRegex(DeprecationWarning, fr"^{expected_warning}$"): + T5 = TypedDict('T5') + + expected_warning = re.escape( + "Passing `None` as the 'fields' parameter is deprecated " + "and will be disallowed in Python 3.15. " + "To create a TypedDict class with 0 fields " + "using the functional syntax, " + "pass an empty dictionary, e.g. `T6 = TypedDict('T6', {})`." + ) + with self.assertWarnsRegex(DeprecationWarning, fr"^{expected_warning}$"): + T6 = TypedDict('T6', None) + + for klass in T1, T2, T3, T4, T5, T6: + with self.subTest(klass=klass.__name__): + self.assertEqual(klass.__annotations__, {}) + self.assertEqual(klass.__required_keys__, set()) + self.assertEqual(klass.__optional_keys__, set()) + self.assertIsInstance(klass(), dict) + + def test_readonly_inheritance(self): + class Base1(TypedDict): + a: ReadOnly[int] + + class Child1(Base1): + b: str + + self.assertEqual(Child1.__readonly_keys__, frozenset({'a'})) + self.assertEqual(Child1.__mutable_keys__, frozenset({'b'})) + + class Base2(TypedDict): + a: int + + class Child2(Base2): + b: ReadOnly[str] + + self.assertEqual(Child2.__readonly_keys__, frozenset({'b'})) + self.assertEqual(Child2.__mutable_keys__, frozenset({'a'})) + + def test_cannot_make_mutable_key_readonly(self): + class Base(TypedDict): + a: int + + with self.assertRaises(TypeError): + class Child(Base): + a: ReadOnly[int] + + def test_can_make_readonly_key_mutable(self): + class Base(TypedDict): + a: ReadOnly[int] + + class Child(Base): + a: int + + self.assertEqual(Child.__readonly_keys__, frozenset()) + self.assertEqual(Child.__mutable_keys__, frozenset({'a'})) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_combine_qualifiers(self): + class AllTheThings(TypedDict): + a: Annotated[Required[ReadOnly[int]], "why not"] + b: Required[Annotated[ReadOnly[int], "why not"]] + c: ReadOnly[NotRequired[Annotated[int, "why not"]]] + d: NotRequired[Annotated[int, "why not"]] + + self.assertEqual(AllTheThings.__required_keys__, frozenset({'a', 'b'})) + self.assertEqual(AllTheThings.__optional_keys__, frozenset({'c', 'd'})) + self.assertEqual(AllTheThings.__readonly_keys__, frozenset({'a', 'b', 'c'})) + self.assertEqual(AllTheThings.__mutable_keys__, frozenset({'d'})) + + self.assertEqual( + get_type_hints(AllTheThings, include_extras=False), + {'a': int, 'b': int, 'c': int, 'd': int}, + ) + self.assertEqual( + get_type_hints(AllTheThings, include_extras=True), + { + 'a': Annotated[Required[ReadOnly[int]], 'why not'], + 'b': Required[Annotated[ReadOnly[int], 'why not']], + 'c': ReadOnly[NotRequired[Annotated[int, 'why not']]], + 'd': NotRequired[Annotated[int, 'why not']], + }, + ) + + +class RequiredTests(BaseTestCase): + + def test_basics(self): + with self.assertRaises(TypeError): + Required[NotRequired] + with self.assertRaises(TypeError): + Required[int, str] + with self.assertRaises(TypeError): + Required[int][str] + + def test_repr(self): + self.assertEqual(repr(Required), 'typing.Required') + cv = Required[int] + self.assertEqual(repr(cv), 'typing.Required[int]') + cv = Required[Employee] + self.assertEqual(repr(cv), f'typing.Required[{__name__}.Employee]') + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(Required)): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(Required[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Required'): + class E(Required): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.Required\[int\]'): + class F(Required[int]): + pass + + def test_cannot_init(self): + with self.assertRaises(TypeError): + Required() + with self.assertRaises(TypeError): + type(Required)() + with self.assertRaises(TypeError): + type(Required[Optional[int]])() + + def test_no_isinstance(self): + with self.assertRaises(TypeError): + isinstance(1, Required[int]) + with self.assertRaises(TypeError): + issubclass(int, Required) + + +class NotRequiredTests(BaseTestCase): + + def test_basics(self): + with self.assertRaises(TypeError): + NotRequired[Required] + with self.assertRaises(TypeError): + NotRequired[int, str] + with self.assertRaises(TypeError): + NotRequired[int][str] + + def test_repr(self): + self.assertEqual(repr(NotRequired), 'typing.NotRequired') + cv = NotRequired[int] + self.assertEqual(repr(cv), 'typing.NotRequired[int]') + cv = NotRequired[Employee] + self.assertEqual(repr(cv), f'typing.NotRequired[{__name__}.Employee]') + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(NotRequired)): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(NotRequired[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.NotRequired'): + class E(NotRequired): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.NotRequired\[int\]'): + class F(NotRequired[int]): + pass + + def test_cannot_init(self): + with self.assertRaises(TypeError): + NotRequired() + with self.assertRaises(TypeError): + type(NotRequired)() + with self.assertRaises(TypeError): + type(NotRequired[Optional[int]])() + + def test_no_isinstance(self): + with self.assertRaises(TypeError): + isinstance(1, NotRequired[int]) + with self.assertRaises(TypeError): + issubclass(int, NotRequired) class IOTests(BaseTestCase): @@ -4610,14 +7985,6 @@ def stuff(a: BinaryIO) -> bytes: a = stuff.__annotations__['a'] self.assertEqual(a.__parameters__, ()) - def test_io_submodule(self): - from typing.io import IO, TextIO, BinaryIO, __all__, __name__ - self.assertIs(IO, typing.IO) - self.assertIs(TextIO, typing.TextIO) - self.assertIs(BinaryIO, typing.BinaryIO) - self.assertEqual(set(__all__), set(['IO', 'TextIO', 'BinaryIO'])) - self.assertEqual(__name__, 'typing.io') - class RETests(BaseTestCase): # Much of this is really testing _TypeAlias. @@ -4662,31 +8029,29 @@ def test_repr(self): self.assertEqual(repr(Match[str]), 'typing.Match[str]') self.assertEqual(repr(Match[bytes]), 'typing.Match[bytes]') - def test_re_submodule(self): - from typing.re import Match, Pattern, __all__, __name__ - self.assertIs(Match, typing.Match) - self.assertIs(Pattern, typing.Pattern) - self.assertEqual(set(__all__), set(['Match', 'Pattern'])) - self.assertEqual(__name__, 'typing.re') - # TODO: RUSTPYTHON @unittest.expectedFailure def test_cannot_subclass(self): - with self.assertRaises(TypeError) as ex: - + with self.assertRaisesRegex( + TypeError, + r"type 're\.Match' is not an acceptable base type", + ): class A(typing.Match): pass + with self.assertRaisesRegex( + TypeError, + r"type 're\.Pattern' is not an acceptable base type", + ): + class B(typing.Pattern): + pass - self.assertEqual(str(ex.exception), - "type 're.Match' is not an acceptable base type") class AnnotatedTests(BaseTestCase): def test_new(self): with self.assertRaisesRegex( - TypeError, - 'Type Annotated cannot be instantiated', + TypeError, 'Cannot instantiate typing.Annotated', ): Annotated() @@ -4700,12 +8065,91 @@ def test_repr(self): "typing.Annotated[typing.List[int], 4, 5]" ) + def test_dir(self): + dir_items = set(dir(Annotated[int, 4])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + '__metadata__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + def test_flatten(self): A = Annotated[Annotated[int, 4], 5] self.assertEqual(A, Annotated[int, 4, 5]) self.assertEqual(A.__metadata__, (4, 5)) self.assertEqual(A.__origin__, int) + def test_deduplicate_from_union(self): + # Regular: + self.assertEqual(get_args(Annotated[int, 1] | int), + (Annotated[int, 1], int)) + self.assertEqual(get_args(Union[Annotated[int, 1], int]), + (Annotated[int, 1], int)) + self.assertEqual(get_args(Annotated[int, 1] | Annotated[int, 2] | int), + (Annotated[int, 1], Annotated[int, 2], int)) + self.assertEqual(get_args(Union[Annotated[int, 1], Annotated[int, 2], int]), + (Annotated[int, 1], Annotated[int, 2], int)) + self.assertEqual(get_args(Annotated[int, 1] | Annotated[str, 1] | int), + (Annotated[int, 1], Annotated[str, 1], int)) + self.assertEqual(get_args(Union[Annotated[int, 1], Annotated[str, 1], int]), + (Annotated[int, 1], Annotated[str, 1], int)) + + # Duplicates: + self.assertEqual(Annotated[int, 1] | Annotated[int, 1] | int, + Annotated[int, 1] | int) + self.assertEqual(Union[Annotated[int, 1], Annotated[int, 1], int], + Union[Annotated[int, 1], int]) + + # Unhashable metadata: + self.assertEqual(get_args(str | Annotated[int, {}] | Annotated[int, set()] | int), + (str, Annotated[int, {}], Annotated[int, set()], int)) + self.assertEqual(get_args(Union[str, Annotated[int, {}], Annotated[int, set()], int]), + (str, Annotated[int, {}], Annotated[int, set()], int)) + self.assertEqual(get_args(str | Annotated[int, {}] | Annotated[str, {}] | int), + (str, Annotated[int, {}], Annotated[str, {}], int)) + self.assertEqual(get_args(Union[str, Annotated[int, {}], Annotated[str, {}], int]), + (str, Annotated[int, {}], Annotated[str, {}], int)) + + self.assertEqual(get_args(Annotated[int, 1] | str | Annotated[str, {}] | int), + (Annotated[int, 1], str, Annotated[str, {}], int)) + self.assertEqual(get_args(Union[Annotated[int, 1], str, Annotated[str, {}], int]), + (Annotated[int, 1], str, Annotated[str, {}], int)) + + import dataclasses + @dataclasses.dataclass + class ValueRange: + lo: int + hi: int + v = ValueRange(1, 2) + self.assertEqual(get_args(Annotated[int, v] | None), + (Annotated[int, v], types.NoneType)) + self.assertEqual(get_args(Union[Annotated[int, v], None]), + (Annotated[int, v], types.NoneType)) + self.assertEqual(get_args(Optional[Annotated[int, v]]), + (Annotated[int, v], types.NoneType)) + + # Unhashable metadata duplicated: + self.assertEqual(Annotated[int, {}] | Annotated[int, {}] | int, + Annotated[int, {}] | int) + self.assertEqual(Annotated[int, {}] | Annotated[int, {}] | int, + int | Annotated[int, {}]) + self.assertEqual(Union[Annotated[int, {}], Annotated[int, {}], int], + Union[Annotated[int, {}], int]) + self.assertEqual(Union[Annotated[int, {}], Annotated[int, {}], int], + Union[int, Annotated[int, {}]]) + + def test_order_in_union(self): + expr1 = Annotated[int, 1] | str | Annotated[str, {}] | int + for args in itertools.permutations(get_args(expr1)): + with self.subTest(args=args): + self.assertEqual(expr1, reduce(operator.or_, args)) + + expr2 = Union[Annotated[int, 1], str, Annotated[str, {}], int] + for args in itertools.permutations(get_args(expr2)): + with self.subTest(args=args): + self.assertEqual(expr2, Union[args]) + def test_specialize(self): L = Annotated[List[T], "my decoration"] LI = Annotated[List[int], "my decoration"] @@ -4726,6 +8170,16 @@ def test_hash_eq(self): {Annotated[int, 4, 5], Annotated[int, 4, 5], Annotated[T, 4, 5]}, {Annotated[int, 4, 5], Annotated[T, 4, 5]} ) + # Unhashable `metadata` raises `TypeError`: + a1 = Annotated[int, []] + with self.assertRaises(TypeError): + hash(a1) + + class A: + __hash__ = None + a2 = Annotated[int, A()] + with self.assertRaises(TypeError): + hash(a2) def test_instantiate(self): class C: @@ -4751,6 +8205,17 @@ def test_instantiate_generic(self): self.assertEqual(MyCount([4, 4, 5]), {4: 2, 5: 1}) self.assertEqual(MyCount[int]([4, 4, 5]), {4: 2, 5: 1}) + def test_instantiate_immutable(self): + class C: + def __setattr__(self, key, value): + raise Exception("should be ignored") + + A = Annotated[C, "a decoration"] + # gh-115165: This used to cause RuntimeError to be raised + # when we tried to set `__orig_class__` on the `C` instance + # returned by the `A()` call + self.assertIsInstance(A(), C) + def test_cannot_instantiate_forward(self): A = Annotated["int", (5, 6)] with self.assertRaises(TypeError): @@ -4782,15 +8247,33 @@ class C: self.assertEqual(get_type_hints(C, globals())['classvar'], ClassVar[int]) self.assertEqual(get_type_hints(C, globals())['const'], Final[int]) - def test_hash_eq(self): - self.assertEqual(len({Annotated[int, 4, 5], Annotated[int, 4, 5]}), 1) - self.assertNotEqual(Annotated[int, 4, 5], Annotated[int, 5, 4]) - self.assertNotEqual(Annotated[int, 4, 5], Annotated[str, 4, 5]) - self.assertNotEqual(Annotated[int, 4], Annotated[int, 4, 4]) - self.assertEqual( - {Annotated[int, 4, 5], Annotated[int, 4, 5], Annotated[T, 4, 5]}, - {Annotated[int, 4, 5], Annotated[T, 4, 5]} - ) + def test_special_forms_nesting(self): + # These are uncommon types and are to ensure runtime + # is lax on validation. See gh-89547 for more context. + class CF: + x: ClassVar[Final[int]] + + class FC: + x: Final[ClassVar[int]] + + class ACF: + x: Annotated[ClassVar[Final[int]], "a decoration"] + + class CAF: + x: ClassVar[Annotated[Final[int], "a decoration"]] + + class AFC: + x: Annotated[Final[ClassVar[int]], "a decoration"] + + class FAC: + x: Final[Annotated[ClassVar[int], "a decoration"]] + + self.assertEqual(get_type_hints(CF, globals())['x'], ClassVar[Final[int]]) + self.assertEqual(get_type_hints(FC, globals())['x'], Final[ClassVar[int]]) + self.assertEqual(get_type_hints(ACF, globals())['x'], ClassVar[Final[int]]) + self.assertEqual(get_type_hints(CAF, globals())['x'], ClassVar[Final[int]]) + self.assertEqual(get_type_hints(AFC, globals())['x'], Final[ClassVar[int]]) + self.assertEqual(get_type_hints(FAC, globals())['x'], Final[ClassVar[int]]) def test_cannot_subclass(self): with self.assertRaisesRegex(TypeError, "Cannot subclass .*Annotated"): @@ -4868,6 +8351,120 @@ def test_subst(self): with self.assertRaises(TypeError): LI[None] + def test_typevar_subst(self): + dec = "a decoration" + Ts = TypeVarTuple('Ts') + T = TypeVar('T') + T1 = TypeVar('T1') + T2 = TypeVar('T2') + + A = Annotated[tuple[*Ts], dec] + self.assertEqual(A[int], Annotated[tuple[int], dec]) + self.assertEqual(A[str, int], Annotated[tuple[str, int], dec]) + with self.assertRaises(TypeError): + Annotated[*Ts, dec] + + B = Annotated[Tuple[Unpack[Ts]], dec] + self.assertEqual(B[int], Annotated[Tuple[int], dec]) + self.assertEqual(B[str, int], Annotated[Tuple[str, int], dec]) + with self.assertRaises(TypeError): + Annotated[Unpack[Ts], dec] + + C = Annotated[tuple[T, *Ts], dec] + self.assertEqual(C[int], Annotated[tuple[int], dec]) + self.assertEqual(C[int, str], Annotated[tuple[int, str], dec]) + self.assertEqual( + C[int, str, float], + Annotated[tuple[int, str, float], dec] + ) + with self.assertRaises(TypeError): + C[()] + + D = Annotated[Tuple[T, Unpack[Ts]], dec] + self.assertEqual(D[int], Annotated[Tuple[int], dec]) + self.assertEqual(D[int, str], Annotated[Tuple[int, str], dec]) + self.assertEqual( + D[int, str, float], + Annotated[Tuple[int, str, float], dec] + ) + with self.assertRaises(TypeError): + D[()] + + E = Annotated[tuple[*Ts, T], dec] + self.assertEqual(E[int], Annotated[tuple[int], dec]) + self.assertEqual(E[int, str], Annotated[tuple[int, str], dec]) + self.assertEqual( + E[int, str, float], + Annotated[tuple[int, str, float], dec] + ) + with self.assertRaises(TypeError): + E[()] + + F = Annotated[Tuple[Unpack[Ts], T], dec] + self.assertEqual(F[int], Annotated[Tuple[int], dec]) + self.assertEqual(F[int, str], Annotated[Tuple[int, str], dec]) + self.assertEqual( + F[int, str, float], + Annotated[Tuple[int, str, float], dec] + ) + with self.assertRaises(TypeError): + F[()] + + G = Annotated[tuple[T1, *Ts, T2], dec] + self.assertEqual(G[int, str], Annotated[tuple[int, str], dec]) + self.assertEqual( + G[int, str, float], + Annotated[tuple[int, str, float], dec] + ) + self.assertEqual( + G[int, str, bool, float], + Annotated[tuple[int, str, bool, float], dec] + ) + with self.assertRaises(TypeError): + G[int] + + H = Annotated[Tuple[T1, Unpack[Ts], T2], dec] + self.assertEqual(H[int, str], Annotated[Tuple[int, str], dec]) + self.assertEqual( + H[int, str, float], + Annotated[Tuple[int, str, float], dec] + ) + self.assertEqual( + H[int, str, bool, float], + Annotated[Tuple[int, str, bool, float], dec] + ) + with self.assertRaises(TypeError): + H[int] + + # Now let's try creating an alias from an alias. + + Ts2 = TypeVarTuple('Ts2') + T3 = TypeVar('T3') + T4 = TypeVar('T4') + + # G is Annotated[tuple[T1, *Ts, T2], dec]. + I = G[T3, *Ts2, T4] + J = G[T3, Unpack[Ts2], T4] + + for x, y in [ + (I, Annotated[tuple[T3, *Ts2, T4], dec]), + (J, Annotated[tuple[T3, Unpack[Ts2], T4], dec]), + (I[int, str], Annotated[tuple[int, str], dec]), + (J[int, str], Annotated[tuple[int, str], dec]), + (I[int, str, float], Annotated[tuple[int, str, float], dec]), + (J[int, str, float], Annotated[tuple[int, str, float], dec]), + (I[int, str, bool, float], + Annotated[tuple[int, str, bool, float], dec]), + (J[int, str, bool, float], + Annotated[tuple[int, str, bool, float], dec]), + ]: + self.assertEqual(x, y) + + with self.assertRaises(TypeError): + I[int] + with self.assertRaises(TypeError): + J[int] + def test_annotated_in_other_types(self): X = List[Annotated[T, 5]] self.assertEqual(X[int], List[Annotated[int, 5]]) @@ -4877,6 +8474,40 @@ class X(Annotated[int, (1, 10)]): ... self.assertEqual(X.__mro__, (X, int, object), "Annotated should be transparent.") + def test_annotated_cached_with_types(self): + class A(str): ... + class B(str): ... + + field_a1 = Annotated[str, A("X")] + field_a2 = Annotated[str, B("X")] + a1_metadata = field_a1.__metadata__[0] + a2_metadata = field_a2.__metadata__[0] + + self.assertIs(type(a1_metadata), A) + self.assertEqual(a1_metadata, A("X")) + self.assertIs(type(a2_metadata), B) + self.assertEqual(a2_metadata, B("X")) + self.assertIsNot(type(a1_metadata), type(a2_metadata)) + + field_b1 = Annotated[str, A("Y")] + field_b2 = Annotated[str, B("Y")] + b1_metadata = field_b1.__metadata__[0] + b2_metadata = field_b2.__metadata__[0] + + self.assertIs(type(b1_metadata), A) + self.assertEqual(b1_metadata, A("Y")) + self.assertIs(type(b2_metadata), B) + self.assertEqual(b2_metadata, B("Y")) + self.assertIsNot(type(b1_metadata), type(b2_metadata)) + + field_c1 = Annotated[int, 1] + field_c2 = Annotated[int, 1.0] + field_c3 = Annotated[int, True] + + self.assertIs(type(field_c1.__metadata__[0]), int) + self.assertIs(type(field_c2.__metadata__[0]), float) + self.assertIs(type(field_c3.__metadata__[0]), bool) + class TypeAliasTests(BaseTestCase): def test_canonical_usage_with_variable_annotation(self): @@ -4906,12 +8537,13 @@ def test_no_issubclass(self): issubclass(TypeAlias, Employee) def test_cannot_subclass(self): - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.TypeAlias'): class C(TypeAlias): pass with self.assertRaises(TypeError): - class C(type(TypeAlias)): + class D(type(TypeAlias)): pass def test_repr(self): @@ -4922,15 +8554,24 @@ def test_cannot_subscript(self): TypeAlias[int] + class ParamSpecTests(BaseTestCase): def test_basic_plain(self): P = ParamSpec('P') self.assertEqual(P, P) self.assertIsInstance(P, ParamSpec) + self.assertEqual(P.__name__, 'P') + self.assertEqual(P.__module__, __name__) + + def test_basic_with_exec(self): + ns = {} + exec('from typing import ParamSpec; P = ParamSpec("P")', ns, ns) + P = ns['P'] + self.assertIsInstance(P, ParamSpec) + self.assertEqual(P.__name__, 'P') + self.assertIs(P.__module__, None) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_valid_uses(self): P = ParamSpec('P') T = TypeVar('T') @@ -4948,6 +8589,8 @@ def test_valid_uses(self): self.assertEqual(C4.__args__, (P, T)) self.assertEqual(C4.__parameters__, (P, T)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_args_kwargs(self): P = ParamSpec('P') P_2 = ParamSpec('P_2') @@ -4967,6 +8610,7 @@ def test_args_kwargs(self): self.assertEqual(repr(P.args), "P.args") self.assertEqual(repr(P.kwargs), "P.kwargs") + def test_stringized(self): P = ParamSpec('P') class C(Generic[P]): @@ -5047,32 +8691,208 @@ class X(Generic[P, P2]): self.assertEqual(G1.__args__, ((int, str), (bytes,))) self.assertEqual(G2.__args__, ((int,), (str, bytes))) + def test_typevartuple_and_paramspecs_in_user_generics(self): + Ts = TypeVarTuple("Ts") + P = ParamSpec("P") + + class X(Generic[*Ts, P]): + f: Callable[P, int] + g: Tuple[*Ts] + + G1 = X[int, [bytes]] + self.assertEqual(G1.__args__, (int, (bytes,))) + G2 = X[int, str, [bytes]] + self.assertEqual(G2.__args__, (int, str, (bytes,))) + G3 = X[[bytes]] + self.assertEqual(G3.__args__, ((bytes,),)) + G4 = X[[]] + self.assertEqual(G4.__args__, ((),)) + with self.assertRaises(TypeError): + X[()] + + class Y(Generic[P, *Ts]): + f: Callable[P, int] + g: Tuple[*Ts] + + G1 = Y[[bytes], int] + self.assertEqual(G1.__args__, ((bytes,), int)) + G2 = Y[[bytes], int, str] + self.assertEqual(G2.__args__, ((bytes,), int, str)) + G3 = Y[[bytes]] + self.assertEqual(G3.__args__, ((bytes,),)) + G4 = Y[[]] + self.assertEqual(G4.__args__, ((),)) + with self.assertRaises(TypeError): + Y[()] + + def test_typevartuple_and_paramspecs_in_generic_aliases(self): + P = ParamSpec('P') + T = TypeVar('T') + Ts = TypeVarTuple('Ts') + + for C in Callable, collections.abc.Callable: + with self.subTest(generic=C): + A = C[P, Tuple[*Ts]] + B = A[[int, str], bytes, float] + self.assertEqual(B.__args__, (int, str, Tuple[bytes, float])) + + class X(Generic[T, P]): + pass + + A = X[Tuple[*Ts], P] + B = A[bytes, float, [int, str]] + self.assertEqual(B.__args__, (Tuple[bytes, float], (int, str,))) + + class Y(Generic[P, T]): + pass + + A = Y[P, Tuple[*Ts]] + B = A[[int, str], bytes, float] + self.assertEqual(B.__args__, ((int, str,), Tuple[bytes, float])) + + def test_var_substitution(self): + P = ParamSpec("P") + subst = P.__typing_subst__ + self.assertEqual(subst((int, str)), (int, str)) + self.assertEqual(subst([int, str]), (int, str)) + self.assertEqual(subst([None]), (type(None),)) + self.assertIs(subst(...), ...) + self.assertIs(subst(P), P) + self.assertEqual(subst(Concatenate[int, P]), Concatenate[int, P]) + def test_bad_var_substitution(self): T = TypeVar('T') P = ParamSpec('P') bad_args = (42, int, None, T, int|str, Union[int, str]) for arg in bad_args: with self.subTest(arg=arg): + with self.assertRaises(TypeError): + P.__typing_subst__(arg) with self.assertRaises(TypeError): typing.Callable[P, T][arg, str] with self.assertRaises(TypeError): collections.abc.Callable[P, T][arg, str] - def test_no_paramspec_in__parameters__(self): - # ParamSpec should not be found in __parameters__ - # of generics. Usages outside Callable, Concatenate - # and Generic are invalid. - T = TypeVar("T") - P = ParamSpec("P") - self.assertNotIn(P, List[P].__parameters__) - self.assertIn(T, Tuple[T, P].__parameters__) + def test_type_var_subst_for_other_type_vars(self): + T = TypeVar('T') + T2 = TypeVar('T2') + P = ParamSpec('P') + P2 = ParamSpec('P2') + Ts = TypeVarTuple('Ts') + + class Base(Generic[P]): + pass + + A1 = Base[T] + self.assertEqual(A1.__parameters__, (T,)) + self.assertEqual(A1.__args__, ((T,),)) + self.assertEqual(A1[int], Base[int]) + + A2 = Base[[T]] + self.assertEqual(A2.__parameters__, (T,)) + self.assertEqual(A2.__args__, ((T,),)) + self.assertEqual(A2[int], Base[int]) + + A3 = Base[[int, T]] + self.assertEqual(A3.__parameters__, (T,)) + self.assertEqual(A3.__args__, ((int, T),)) + self.assertEqual(A3[str], Base[[int, str]]) + + A4 = Base[[T, int, T2]] + self.assertEqual(A4.__parameters__, (T, T2)) + self.assertEqual(A4.__args__, ((T, int, T2),)) + self.assertEqual(A4[str, bool], Base[[str, int, bool]]) + + A5 = Base[[*Ts, int]] + self.assertEqual(A5.__parameters__, (Ts,)) + self.assertEqual(A5.__args__, ((*Ts, int),)) + self.assertEqual(A5[str, bool], Base[[str, bool, int]]) + + A5_2 = Base[[int, *Ts]] + self.assertEqual(A5_2.__parameters__, (Ts,)) + self.assertEqual(A5_2.__args__, ((int, *Ts),)) + self.assertEqual(A5_2[str, bool], Base[[int, str, bool]]) + + A6 = Base[[T, *Ts]] + self.assertEqual(A6.__parameters__, (T, Ts)) + self.assertEqual(A6.__args__, ((T, *Ts),)) + self.assertEqual(A6[int, str, bool], Base[[int, str, bool]]) + + A7 = Base[[T, T]] + self.assertEqual(A7.__parameters__, (T,)) + self.assertEqual(A7.__args__, ((T, T),)) + self.assertEqual(A7[int], Base[[int, int]]) + + A8 = Base[[T, list[T]]] + self.assertEqual(A8.__parameters__, (T,)) + self.assertEqual(A8.__args__, ((T, list[T]),)) + self.assertEqual(A8[int], Base[[int, list[int]]]) + + A9 = Base[[Tuple[*Ts], *Ts]] + self.assertEqual(A9.__parameters__, (Ts,)) + self.assertEqual(A9.__args__, ((Tuple[*Ts], *Ts),)) + self.assertEqual(A9[int, str], Base[Tuple[int, str], int, str]) + + A10 = Base[P2] + self.assertEqual(A10.__parameters__, (P2,)) + self.assertEqual(A10.__args__, (P2,)) + self.assertEqual(A10[[int, str]], Base[[int, str]]) + + class DoubleP(Generic[P, P2]): + pass + + B1 = DoubleP[P, P2] + self.assertEqual(B1.__parameters__, (P, P2)) + self.assertEqual(B1.__args__, (P, P2)) + self.assertEqual(B1[[int, str], [bool]], DoubleP[[int, str], [bool]]) + self.assertEqual(B1[[], []], DoubleP[[], []]) + + B2 = DoubleP[[int, str], P2] + self.assertEqual(B2.__parameters__, (P2,)) + self.assertEqual(B2.__args__, ((int, str), P2)) + self.assertEqual(B2[[bool, bool]], DoubleP[[int, str], [bool, bool]]) + self.assertEqual(B2[[]], DoubleP[[int, str], []]) + + B3 = DoubleP[P, [bool, bool]] + self.assertEqual(B3.__parameters__, (P,)) + self.assertEqual(B3.__args__, (P, (bool, bool))) + self.assertEqual(B3[[int, str]], DoubleP[[int, str], [bool, bool]]) + self.assertEqual(B3[[]], DoubleP[[], [bool, bool]]) + + B4 = DoubleP[[T, int], [bool, T2]] + self.assertEqual(B4.__parameters__, (T, T2)) + self.assertEqual(B4.__args__, ((T, int), (bool, T2))) + self.assertEqual(B4[str, float], DoubleP[[str, int], [bool, float]]) + + B5 = DoubleP[[*Ts, int], [bool, T2]] + self.assertEqual(B5.__parameters__, (Ts, T2)) + self.assertEqual(B5.__args__, ((*Ts, int), (bool, T2))) + self.assertEqual(B5[str, bytes, float], + DoubleP[[str, bytes, int], [bool, float]]) + + B6 = DoubleP[[T, int], [bool, *Ts]] + self.assertEqual(B6.__parameters__, (T, Ts)) + self.assertEqual(B6.__args__, ((T, int), (bool, *Ts))) + self.assertEqual(B6[str, bytes, float], + DoubleP[[str, int], [bool, bytes, float]]) + + class PandT(Generic[P, T]): + pass - # Test for consistency with builtin generics. - self.assertNotIn(P, list[P].__parameters__) - self.assertIn(T, tuple[T, P].__parameters__) + C1 = PandT[P, T] + self.assertEqual(C1.__parameters__, (P, T)) + self.assertEqual(C1.__args__, (P, T)) + self.assertEqual(C1[[int, str], bool], PandT[[int, str], bool]) - self.assertNotIn(P, (list[P] | int).__parameters__) - self.assertIn(T, (tuple[T, P] | int).__parameters__) + C2 = PandT[[int, T], T] + self.assertEqual(C2.__parameters__, (T,)) + self.assertEqual(C2.__args__, ((int, T), T)) + self.assertEqual(C2[str], PandT[[int, str], str]) + + C3 = PandT[[int, *Ts], T] + self.assertEqual(C3.__parameters__, (Ts, T)) + self.assertEqual(C3.__args__, ((int, *Ts), T)) + self.assertEqual(C3[str, bool, bytes], PandT[[int, str, bool], bytes]) def test_paramspec_in_nested_generics(self): # Although ParamSpec should not be found in __parameters__ of most @@ -5113,6 +8933,25 @@ def test_paramspec_gets_copied(self): self.assertEqual(C2[Concatenate[str, P2]].__parameters__, (P2,)) self.assertEqual(C2[Concatenate[T, P2]].__parameters__, (T, P2)) + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpec'): + class C(ParamSpec): pass + with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpecArgs'): + class D(ParamSpecArgs): pass + with self.assertRaisesRegex(TypeError, NOT_A_BASE_TYPE % 'ParamSpecKwargs'): + class E(ParamSpecKwargs): pass + P = ParamSpec('P') + with self.assertRaisesRegex(TypeError, + CANNOT_SUBCLASS_INSTANCE % 'ParamSpec'): + class F(P): pass + with self.assertRaisesRegex(TypeError, + CANNOT_SUBCLASS_INSTANCE % 'ParamSpecArgs'): + class G(P.args): pass + with self.assertRaisesRegex(TypeError, + CANNOT_SUBCLASS_INSTANCE % 'ParamSpecKwargs'): + class H(P.kwargs): pass + + class ConcatenateTests(BaseTestCase): def test_basics(self): @@ -5121,6 +8960,15 @@ class MyClass: ... c = Concatenate[MyClass, P] self.assertNotEqual(c, Concatenate) + def test_dir(self): + P = ParamSpec('P') + dir_items = set(dir(Concatenate[int, P])) + for required_item in [ + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + def test_valid_uses(self): P = ParamSpec('P') T = TypeVar('T') @@ -5139,6 +8987,18 @@ def test_valid_uses(self): self.assertEqual(C4.__args__, (Concatenate[int, T, P], T)) self.assertEqual(C4.__parameters__, (T, P)) + def test_invalid_uses(self): + with self.assertRaisesRegex(TypeError, 'Concatenate of no types'): + Concatenate[()] + with self.assertRaisesRegex( + TypeError, + ( + 'The last parameter to Concatenate should be a ' + 'ParamSpec variable or ellipsis' + ), + ): + Concatenate[int] + def test_var_substitution(self): T = TypeVar('T') P = ParamSpec('P') @@ -5149,8 +9009,7 @@ def test_var_substitution(self): self.assertEqual(C[int, []], (int,)) self.assertEqual(C[int, Concatenate[str, P2]], Concatenate[int, str, P2]) - with self.assertRaises(TypeError): - C[int, ...] + self.assertEqual(C[int, ...], Concatenate[int, ...]) C = Concatenate[int, P] self.assertEqual(C[P2], Concatenate[int, P2]) @@ -5158,8 +9017,8 @@ def test_var_substitution(self): self.assertEqual(C[str, float], (int, str, float)) self.assertEqual(C[[]], (int,)) self.assertEqual(C[Concatenate[str, P2]], Concatenate[int, str, P2]) - with self.assertRaises(TypeError): - C[...] + self.assertEqual(C[...], Concatenate[int, ...]) + class TypeGuardTests(BaseTestCase): def test_basics(self): @@ -5168,6 +9027,9 @@ def test_basics(self): def foo(arg) -> TypeGuard[int]: ... self.assertEqual(gth(foo), {'return': TypeGuard[int]}) + with self.assertRaises(TypeError): + TypeGuard[int, str] + def test_repr(self): self.assertEqual(repr(TypeGuard), 'typing.TypeGuard') cv = TypeGuard[int] @@ -5178,11 +9040,19 @@ def test_repr(self): self.assertEqual(repr(cv), 'typing.TypeGuard[tuple[int]]') def test_cannot_subclass(self): - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): class C(type(TypeGuard)): pass - with self.assertRaises(TypeError): - class C(type(TypeGuard[int])): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(TypeGuard[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.TypeGuard'): + class E(TypeGuard): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.TypeGuard\[int\]'): + class F(TypeGuard[int]): pass def test_cannot_init(self): @@ -5200,14 +9070,61 @@ def test_no_isinstance(self): issubclass(int, TypeGuard) +class TypeIsTests(BaseTestCase): + def test_basics(self): + TypeIs[int] # OK + + def foo(arg) -> TypeIs[int]: ... + self.assertEqual(gth(foo), {'return': TypeIs[int]}) + + with self.assertRaises(TypeError): + TypeIs[int, str] + + def test_repr(self): + self.assertEqual(repr(TypeIs), 'typing.TypeIs') + cv = TypeIs[int] + self.assertEqual(repr(cv), 'typing.TypeIs[int]') + cv = TypeIs[Employee] + self.assertEqual(repr(cv), 'typing.TypeIs[%s.Employee]' % __name__) + cv = TypeIs[tuple[int]] + self.assertEqual(repr(cv), 'typing.TypeIs[tuple[int]]') + + def test_cannot_subclass(self): + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class C(type(TypeIs)): + pass + with self.assertRaisesRegex(TypeError, CANNOT_SUBCLASS_TYPE): + class D(type(TypeIs[int])): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.TypeIs'): + class E(TypeIs): + pass + with self.assertRaisesRegex(TypeError, + r'Cannot subclass typing\.TypeIs\[int\]'): + class F(TypeIs[int]): + pass + + def test_cannot_init(self): + with self.assertRaises(TypeError): + TypeIs() + with self.assertRaises(TypeError): + type(TypeIs)() + with self.assertRaises(TypeError): + type(TypeIs[Optional[int]])() + + def test_no_isinstance(self): + with self.assertRaises(TypeError): + isinstance(1, TypeIs[int]) + with self.assertRaises(TypeError): + issubclass(int, TypeIs) + + SpecialAttrsP = typing.ParamSpec('SpecialAttrsP') SpecialAttrsT = typing.TypeVar('SpecialAttrsT', int, float, complex) class SpecialAttrsTests(BaseTestCase): - - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_special_attrs(self): cls_to_check = { # ABC classes @@ -5251,7 +9168,7 @@ def test_special_attrs(self): typing.ValuesView: 'ValuesView', # Subscribed ABC classes typing.AbstractSet[Any]: 'AbstractSet', - typing.AsyncContextManager[Any]: 'AsyncContextManager', + typing.AsyncContextManager[Any, Any]: 'AsyncContextManager', typing.AsyncGenerator[Any, Any]: 'AsyncGenerator', typing.AsyncIterable[Any]: 'AsyncIterable', typing.AsyncIterator[Any]: 'AsyncIterator', @@ -5261,7 +9178,7 @@ def test_special_attrs(self): typing.ChainMap[Any, Any]: 'ChainMap', typing.Collection[Any]: 'Collection', typing.Container[Any]: 'Container', - typing.ContextManager[Any]: 'ContextManager', + typing.ContextManager[Any, Any]: 'ContextManager', typing.Coroutine[Any, Any, Any]: 'Coroutine', typing.Counter[Any]: 'Counter', typing.DefaultDict[Any, Any]: 'DefaultDict', @@ -5297,13 +9214,17 @@ def test_special_attrs(self): typing.Literal: 'Literal', typing.NewType: 'NewType', typing.NoReturn: 'NoReturn', + typing.Never: 'Never', typing.Optional: 'Optional', typing.TypeAlias: 'TypeAlias', typing.TypeGuard: 'TypeGuard', + typing.TypeIs: 'TypeIs', typing.TypeVar: 'TypeVar', typing.Union: 'Union', + typing.Self: 'Self', # Subscribed special forms typing.Annotated[Any, "Annotation"]: 'Annotated', + typing.Annotated[int, 'Annotation']: 'Annotated', typing.ClassVar[Any]: 'ClassVar', typing.Concatenate[Any, SpecialAttrsP]: 'Concatenate', typing.Final[Any]: 'Final', @@ -5312,6 +9233,7 @@ def test_special_attrs(self): typing.Literal[True, 2]: 'Literal', typing.Optional[Any]: 'Optional', typing.TypeGuard[Any]: 'TypeGuard', + typing.TypeIs[Any]: 'TypeIs', typing.Union[Any]: 'Any', typing.Union[int, float]: 'Union', # Incompatible special forms (tested in test_special_attrs2) @@ -5345,7 +9267,7 @@ def test_special_attrs2(self): self.assertEqual(fr.__module__, 'typing') # Forward refs are currently unpicklable. for proto in range(pickle.HIGHEST_PROTOCOL + 1): - with self.assertRaises(TypeError) as exc: + with self.assertRaises(TypeError): pickle.dumps(fr, proto) self.assertEqual(SpecialAttrsTests.TypeName.__name__, 'TypeName') @@ -5390,10 +9312,157 @@ class Foo(Generic[T]): def bar(self): pass baz = 3 + __magic__ = 4 + # The class attributes of the original class should be visible even # in dir() of the GenericAlias. See bpo-45755. - self.assertIn('bar', dir(Foo[int])) - self.assertIn('baz', dir(Foo[int])) + dir_items = set(dir(Foo[int])) + for required_item in [ + 'bar', 'baz', + '__args__', '__parameters__', '__origin__', + ]: + with self.subTest(required_item=required_item): + self.assertIn(required_item, dir_items) + self.assertNotIn('__magic__', dir_items) + + +class RevealTypeTests(BaseTestCase): + def test_reveal_type(self): + obj = object() + with captured_stderr() as stderr: + self.assertIs(obj, reveal_type(obj)) + self.assertEqual(stderr.getvalue(), "Runtime type is 'object'\n") + + +class DataclassTransformTests(BaseTestCase): + def test_decorator(self): + def create_model(*, frozen: bool = False, kw_only: bool = True): + return lambda cls: cls + + decorated = dataclass_transform(kw_only_default=True, order_default=False)(create_model) + + class CustomerModel: + id: int + + self.assertIs(decorated, create_model) + self.assertEqual( + decorated.__dataclass_transform__, + { + "eq_default": True, + "order_default": False, + "kw_only_default": True, + "frozen_default": False, + "field_specifiers": (), + "kwargs": {}, + } + ) + self.assertIs( + decorated(frozen=True, kw_only=False)(CustomerModel), + CustomerModel + ) + + def test_base_class(self): + class ModelBase: + def __init_subclass__(cls, *, frozen: bool = False): ... + + Decorated = dataclass_transform( + eq_default=True, + order_default=True, + # Arbitrary unrecognized kwargs are accepted at runtime. + make_everything_awesome=True, + )(ModelBase) + + class CustomerModel(Decorated, frozen=True): + id: int + + self.assertIs(Decorated, ModelBase) + self.assertEqual( + Decorated.__dataclass_transform__, + { + "eq_default": True, + "order_default": True, + "kw_only_default": False, + "frozen_default": False, + "field_specifiers": (), + "kwargs": {"make_everything_awesome": True}, + } + ) + self.assertIsSubclass(CustomerModel, Decorated) + + def test_metaclass(self): + class Field: ... + + class ModelMeta(type): + def __new__( + cls, name, bases, namespace, *, init: bool = True, + ): + return super().__new__(cls, name, bases, namespace) + + Decorated = dataclass_transform( + order_default=True, frozen_default=True, field_specifiers=(Field,) + )(ModelMeta) + + class ModelBase(metaclass=Decorated): ... + + class CustomerModel(ModelBase, init=False): + id: int + + self.assertIs(Decorated, ModelMeta) + self.assertEqual( + Decorated.__dataclass_transform__, + { + "eq_default": True, + "order_default": True, + "kw_only_default": False, + "frozen_default": True, + "field_specifiers": (Field,), + "kwargs": {}, + } + ) + self.assertIsInstance(CustomerModel, Decorated) + + +class NoDefaultTests(BaseTestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_pickling(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + s = pickle.dumps(NoDefault, proto) + loaded = pickle.loads(s) + self.assertIs(NoDefault, loaded) + + def test_constructor(self): + self.assertIs(NoDefault, type(NoDefault)()) + with self.assertRaises(TypeError): + type(NoDefault)(1) + + def test_repr(self): + self.assertEqual(repr(NoDefault), 'typing.NoDefault') + + @requires_docstrings + def test_doc(self): + self.assertIsInstance(NoDefault.__doc__, str) + + def test_class(self): + self.assertIs(NoDefault.__class__, type(NoDefault)) + + def test_no_call(self): + with self.assertRaises(TypeError): + NoDefault() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_no_attributes(self): + with self.assertRaises(AttributeError): + NoDefault.foo = 3 + with self.assertRaises(AttributeError): + NoDefault.foo + + # TypeError is consistent with the behavior of NoneType + with self.assertRaises(TypeError): + type(NoDefault).foo = 3 + with self.assertRaises(AttributeError): + type(NoDefault).foo class AllTests(BaseTestCase): @@ -5409,7 +9478,7 @@ def test_all(self): # Context managers. self.assertIn('ContextManager', a) self.assertIn('AsyncContextManager', a) - # Check that io and re are not exported. + # Check that former namespaces io and re are not exported. self.assertNotIn('io', a) self.assertNotIn('re', a) # Spot-check that stdlib modules aren't exported. @@ -5422,6 +9491,10 @@ def test_all(self): self.assertIn('SupportsComplex', a) def test_all_exported_names(self): + # ensure all dynamically created objects are actualised + for name in typing.__all__: + getattr(typing, name) + actual_all = set(typing.__all__) computed_all = { k for k, v in vars(typing).items() @@ -5429,10 +9502,6 @@ def test_all_exported_names(self): if k in actual_all or ( # avoid private names not k.startswith('_') and - # avoid things in the io / re typing submodules - k not in typing.io.__all__ and - k not in typing.re.__all__ and - k not in {'io', 're'} and # there's a few types and metaclasses that aren't exported not k.endswith(('Meta', '_contra', '_co')) and not k.upper() == k and @@ -5443,6 +9512,45 @@ def test_all_exported_names(self): self.assertSetEqual(computed_all, actual_all) +class TypeIterationTests(BaseTestCase): + _UNITERABLE_TYPES = ( + Any, + Union, + Union[str, int], + Union[str, T], + List, + Tuple, + Callable, + Callable[..., T], + Callable[[T], str], + Annotated, + Annotated[T, ''], + ) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_cannot_iterate(self): + expected_error_regex = "object is not iterable" + for test_type in self._UNITERABLE_TYPES: + with self.subTest(type=test_type): + with self.assertRaisesRegex(TypeError, expected_error_regex): + iter(test_type) + with self.assertRaisesRegex(TypeError, expected_error_regex): + list(test_type) + with self.assertRaisesRegex(TypeError, expected_error_regex): + for _ in test_type: + pass + + def test_is_not_instance_of_iterable(self): + for type_to_test in self._UNITERABLE_TYPES: + self.assertNotIsInstance(type_to_test, collections.abc.Iterable) + + +def load_tests(loader, tests, pattern): + import doctest + tests.addTests(doctest.DocTestSuite(typing)) + return tests + if __name__ == '__main__': main() diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 4da63c54d4..1a8a8f7ee9 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -378,8 +378,6 @@ def test_rindex(self): self.assertRaises(ValueError, ('a' * 100).rindex, '\U00100304a') self.assertRaises(ValueError, ('\u0102' * 100).rindex, '\U00100304\u0102') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_maketrans_translate(self): # these work with plain translate() self.checkequalnofix('bbbc', 'abababc', 'translate', @@ -818,16 +816,6 @@ def test_isidentifier(self): self.assertFalse("©".isidentifier()) self.assertFalse("0".isidentifier()) - @support.cpython_only - @support.requires_legacy_unicode_capi() - @unittest.skipIf(_testcapi is None, 'need _testcapi module') - def test_isidentifier_legacy(self): - u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' - self.assertTrue(u.isidentifier()) - with warnings_helper.check_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier()) - def test_isprintable(self): self.assertTrue("".isprintable()) self.assertTrue(" ".isprintable()) @@ -2522,26 +2510,6 @@ def test_getnewargs(self): self.assertEqual(args[0], text) self.assertEqual(len(args), 1) - @support.cpython_only - @support.requires_legacy_unicode_capi() - @unittest.skipIf(_testcapi is None, 'need _testcapi module') - def test_resize(self): - for length in range(1, 100, 7): - # generate a fresh string (refcount=1) - text = 'a' * length + 'b' - - # fill wstr internal field - with self.assertWarns(DeprecationWarning): - abc = _testcapi.getargs_u(text) - self.assertEqual(abc, text) - - # resize text: wstr field must be cleared and then recomputed - text += 'c' - with self.assertWarns(DeprecationWarning): - abcdef = _testcapi.getargs_u(text) - self.assertNotEqual(abc, abcdef) - self.assertEqual(abcdef, text) - def test_compare(self): # Issue #17615 N = 10 diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index c9e0b234ef..29da4a25a3 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -179,8 +179,6 @@ def test_decomposition(self): self.assertRaises(TypeError, self.db.decomposition) self.assertRaises(TypeError, self.db.decomposition, 'xx') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_mirrored(self): self.assertEqual(self.db.mirrored('\uFFFE'), 0) self.assertEqual(self.db.mirrored('a'), 0) diff --git a/Lib/test/test_wave.py b/Lib/test/test_wave.py new file mode 100644 index 0000000000..5e771c8de9 --- /dev/null +++ b/Lib/test/test_wave.py @@ -0,0 +1,227 @@ +import unittest +from test import audiotests +from test import support +import io +import struct +import sys +import wave + + +class WaveTest(audiotests.AudioWriteTests, + audiotests.AudioTestsWithSourceFile): + module = wave + + +class WavePCM8Test(WaveTest, unittest.TestCase): + sndfilename = 'pluck-pcm8.wav' + sndfilenframes = 3307 + nchannels = 2 + sampwidth = 1 + framerate = 11025 + nframes = 48 + comptype = 'NONE' + compname = 'not compressed' + frames = bytes.fromhex("""\ + 827F CB80 B184 0088 4B86 C883 3F81 837E 387A 3473 A96B 9A66 \ + 6D64 4662 8E60 6F60 D762 7B68 936F 5877 177B 757C 887B 5F7B \ + 917A BE7B 3C7C E67F 4F84 C389 418E D192 6E97 0296 FF94 0092 \ + C98E D28D 6F8F 4E8F 648C E38A 888A AB8B D18E 0B91 368E C48A \ + """) + + +class WavePCM16Test(WaveTest, unittest.TestCase): + sndfilename = 'pluck-pcm16.wav' + sndfilenframes = 3307 + nchannels = 2 + sampwidth = 2 + framerate = 11025 + nframes = 48 + comptype = 'NONE' + compname = 'not compressed' + frames = bytes.fromhex("""\ + 022EFFEA 4B5C00F9 311404EF 80DC0843 CBDF06B2 48AA03F3 BFE701B2 036BFE7C \ + B857FA3E B4B2F34F 2999EBCA 1A5FE6D7 EDFCE491 C626E279 0E05E0B8 EF27E02D \ + 5754E275 FB31E843 1373EF89 D827F72C 978BFB7A F5F7FC11 0866FB9C DF30FB42 \ + 117FFA36 3EE4FB5D BC75FCB6 66D5FF5F CF16040E 43220978 C1BC0EC8 511F12A4 \ + EEDF1755 82061666 7FFF1446 80001296 499C0EB2 52BA0DB9 EFB70F5C CE400FBC \ + E4B50CEB 63440A5A 08CA0A1F 2BBA0B0B 51460E47 8BCB113C B6F50EEA 44150A59 \ + """) + if sys.byteorder != 'big': + frames = wave._byteswap(frames, 2) + + +class WavePCM24Test(WaveTest, unittest.TestCase): + sndfilename = 'pluck-pcm24.wav' + sndfilenframes = 3307 + nchannels = 2 + sampwidth = 3 + framerate = 11025 + nframes = 48 + comptype = 'NONE' + compname = 'not compressed' + frames = bytes.fromhex("""\ + 022D65FFEB9D 4B5A0F00FA54 3113C304EE2B 80DCD6084303 \ + CBDEC006B261 48A99803F2F8 BFE82401B07D 036BFBFE7B5D \ + B85756FA3EC9 B4B055F3502B 299830EBCB62 1A5CA7E6D99A \ + EDFA3EE491BD C625EBE27884 0E05A9E0B6CF EF2929E02922 \ + 5758D8E27067 FB3557E83E16 1377BFEF8402 D82C5BF7272A \ + 978F16FB7745 F5F865FC1013 086635FB9C4E DF30FCFB40EE \ + 117FE0FA3438 3EE6B8FB5AC3 BC77A3FCB2F4 66D6DAFF5F32 \ + CF13B9041275 431D69097A8C C1BB600EC74E 5120B912A2BA \ + EEDF641754C0 8207001664B7 7FFFFF14453F 8000001294E6 \ + 499C1B0EB3B2 52B73E0DBCA0 EFB2B20F5FD8 CE3CDB0FBE12 \ + E4B49C0CEA2D 6344A80A5A7C 08C8FE0A1FFE 2BB9860B0A0E \ + 51486F0E44E1 8BCC64113B05 B6F4EC0EEB36 4413170A5B48 \ + """) + if sys.byteorder != 'big': + frames = wave._byteswap(frames, 3) + + +class WavePCM24ExtTest(WaveTest, unittest.TestCase): + sndfilename = 'pluck-pcm24-ext.wav' + sndfilenframes = 3307 + nchannels = 2 + sampwidth = 3 + framerate = 11025 + nframes = 48 + comptype = 'NONE' + compname = 'not compressed' + frames = bytes.fromhex("""\ + 022D65FFEB9D 4B5A0F00FA54 3113C304EE2B 80DCD6084303 \ + CBDEC006B261 48A99803F2F8 BFE82401B07D 036BFBFE7B5D \ + B85756FA3EC9 B4B055F3502B 299830EBCB62 1A5CA7E6D99A \ + EDFA3EE491BD C625EBE27884 0E05A9E0B6CF EF2929E02922 \ + 5758D8E27067 FB3557E83E16 1377BFEF8402 D82C5BF7272A \ + 978F16FB7745 F5F865FC1013 086635FB9C4E DF30FCFB40EE \ + 117FE0FA3438 3EE6B8FB5AC3 BC77A3FCB2F4 66D6DAFF5F32 \ + CF13B9041275 431D69097A8C C1BB600EC74E 5120B912A2BA \ + EEDF641754C0 8207001664B7 7FFFFF14453F 8000001294E6 \ + 499C1B0EB3B2 52B73E0DBCA0 EFB2B20F5FD8 CE3CDB0FBE12 \ + E4B49C0CEA2D 6344A80A5A7C 08C8FE0A1FFE 2BB9860B0A0E \ + 51486F0E44E1 8BCC64113B05 B6F4EC0EEB36 4413170A5B48 \ + """) + if sys.byteorder != 'big': + frames = wave._byteswap(frames, 3) + + +class WavePCM32Test(WaveTest, unittest.TestCase): + sndfilename = 'pluck-pcm32.wav' + sndfilenframes = 3307 + nchannels = 2 + sampwidth = 4 + framerate = 11025 + nframes = 48 + comptype = 'NONE' + compname = 'not compressed' + frames = bytes.fromhex("""\ + 022D65BCFFEB9D92 4B5A0F8000FA549C 3113C34004EE2BC0 80DCD680084303E0 \ + CBDEC0C006B26140 48A9980003F2F8FC BFE8248001B07D92 036BFB60FE7B5D34 \ + B8575600FA3EC920 B4B05500F3502BC0 29983000EBCB6240 1A5CA7A0E6D99A60 \ + EDFA3E80E491BD40 C625EB80E27884A0 0E05A9A0E0B6CFE0 EF292940E0292280 \ + 5758D800E2706700 FB3557D8E83E1640 1377BF00EF840280 D82C5B80F7272A80 \ + 978F1600FB774560 F5F86510FC101364 086635A0FB9C4E20 DF30FC40FB40EE28 \ + 117FE0A0FA3438B0 3EE6B840FB5AC3F0 BC77A380FCB2F454 66D6DA80FF5F32B4 \ + CF13B980041275B0 431D6980097A8C00 C1BB60000EC74E00 5120B98012A2BAA0 \ + EEDF64C01754C060 820700001664B780 7FFFFFFF14453F40 800000001294E6E0 \ + 499C1B000EB3B270 52B73E000DBCA020 EFB2B2E00F5FD880 CE3CDB400FBE1270 \ + E4B49CC00CEA2D90 6344A8800A5A7CA0 08C8FE800A1FFEE0 2BB986C00B0A0E00 \ + 51486F800E44E190 8BCC6480113B0580 B6F4EC000EEB3630 441317800A5B48A0 \ + """) + if sys.byteorder != 'big': + frames = wave._byteswap(frames, 4) + + +class MiscTestCase(unittest.TestCase): + def test__all__(self): + not_exported = {'WAVE_FORMAT_PCM', 'WAVE_FORMAT_EXTENSIBLE', 'KSDATAFORMAT_SUBTYPE_PCM'} + support.check__all__(self, wave, not_exported=not_exported) + + def test_read_deprecations(self): + filename = support.findfile('pluck-pcm8.wav', subdir='audiodata') + with wave.open(filename) as reader: + with self.assertWarns(DeprecationWarning): + with self.assertRaises(wave.Error): + reader.getmark('mark') + with self.assertWarns(DeprecationWarning): + self.assertIsNone(reader.getmarkers()) + + def test_write_deprecations(self): + with io.BytesIO(b'') as tmpfile: + with wave.open(tmpfile, 'wb') as writer: + writer.setnchannels(1) + writer.setsampwidth(1) + writer.setframerate(1) + writer.setcomptype('NONE', 'not compressed') + + with self.assertWarns(DeprecationWarning): + with self.assertRaises(wave.Error): + writer.setmark(0, 0, 'mark') + with self.assertWarns(DeprecationWarning): + with self.assertRaises(wave.Error): + writer.getmark('mark') + with self.assertWarns(DeprecationWarning): + self.assertIsNone(writer.getmarkers()) + + +class WaveLowLevelTest(unittest.TestCase): + + def test_read_no_chunks(self): + b = b'SPAM' + with self.assertRaises(EOFError): + wave.open(io.BytesIO(b)) + + def test_read_no_riff_chunk(self): + b = b'SPAM' + struct.pack(' None: ... + + +def generic_function_2[Eggs, **Spam](x: Eggs, y: Spam): pass + + +class D: + Foo = int + Bar = str + + def generic_method[Foo, **Bar]( + self, x: Foo, y: Bar + ) -> None: ... + + def generic_method_2[Eggs, **Spam](self, x: Eggs, y: Spam): pass + + +def nested(): + from types import SimpleNamespace + from typing import get_type_hints + + Eggs = bytes + Spam = memoryview + + + class E[Eggs, **Spam]: + x: Eggs + y: Spam + + def generic_method[Eggs, **Spam](self, x: Eggs, y: Spam): pass + + + def generic_function[Eggs, **Spam](x: Eggs, y: Spam): pass + + + return SimpleNamespace( + E=E, + hints_for_E=get_type_hints(E), + hints_for_E_meth=get_type_hints(E.generic_method), + generic_func=generic_function, + hints_for_generic_func=get_type_hints(generic_function) + ) diff --git a/Lib/test/typinganndata/mod_generics_cache.py b/Lib/test/typinganndata/mod_generics_cache.py new file mode 100644 index 0000000000..62deea9859 --- /dev/null +++ b/Lib/test/typinganndata/mod_generics_cache.py @@ -0,0 +1,26 @@ +"""Module for testing the behavior of generics across different modules.""" + +from typing import TypeVar, Generic, Optional, TypeAliasType + +# TODO: RUSTPYTHON + +# default_a: Optional['A'] = None +# default_b: Optional['B'] = None + +# T = TypeVar('T') + + +# class A(Generic[T]): +# some_b: 'B' + + +# class B(Generic[T]): +# class A(Generic[T]): +# pass + +# my_inner_a1: 'B.A' +# my_inner_a2: A +# my_outer_a: 'A' # unless somebody calls get_type_hints with localns=B.__dict__ + +# type Alias = int +# OldStyle = TypeAliasType("OldStyle", int) diff --git a/Lib/timeit.py b/Lib/timeit.py index f323e65572..258dedccd0 100755 --- a/Lib/timeit.py +++ b/Lib/timeit.py @@ -50,9 +50,9 @@ """ import gc +import itertools import sys import time -import itertools __all__ = ["Timer", "timeit", "repeat", "default_timer"] @@ -77,9 +77,11 @@ def inner(_it, _timer{init}): return _t1 - _t0 """ + def reindent(src, indent): """Helper to reindent a multi-line statement.""" - return src.replace("\n", "\n" + " "*indent) + return src.replace("\n", "\n" + " " * indent) + class Timer: """Class for timing execution speed of small code snippets. @@ -166,7 +168,7 @@ def timeit(self, number=default_number): To be precise, this executes the setup statement once, and then returns the time it takes to execute the main statement - a number of times, as a float measured in seconds. The + a number of times, as float seconds if using the default timer. The argument is the number of times through the loop, defaulting to one million. The main statement, the setup statement and the timer function to be used are passed to the constructor. @@ -230,16 +232,19 @@ def autorange(self, callback=None): return (number, time_taken) i *= 10 + def timeit(stmt="pass", setup="pass", timer=default_timer, number=default_number, globals=None): """Convenience function to create Timer object and call timeit method.""" return Timer(stmt, setup, timer, globals).timeit(number) + def repeat(stmt="pass", setup="pass", timer=default_timer, repeat=default_repeat, number=default_number, globals=None): """Convenience function to create Timer object and call repeat method.""" return Timer(stmt, setup, timer, globals).repeat(repeat, number) + def main(args=None, *, _wrap_timer=None): """Main program, used when run as a script. @@ -261,10 +266,9 @@ def main(args=None, *, _wrap_timer=None): args = sys.argv[1:] import getopt try: - opts, args = getopt.getopt(args, "n:u:s:r:tcpvh", + opts, args = getopt.getopt(args, "n:u:s:r:pvh", ["number=", "setup=", "repeat=", - "time", "clock", "process", - "verbose", "unit=", "help"]) + "process", "verbose", "unit=", "help"]) except getopt.error as err: print(err) print("use -h/--help for command line help") @@ -272,7 +276,7 @@ def main(args=None, *, _wrap_timer=None): timer = default_timer stmt = "\n".join(args) or "pass" - number = 0 # auto-determine + number = 0 # auto-determine setup = [] repeat = default_repeat verbose = 0 @@ -289,7 +293,7 @@ def main(args=None, *, _wrap_timer=None): time_unit = a else: print("Unrecognized unit. Please select nsec, usec, msec, or sec.", - file=sys.stderr) + file=sys.stderr) return 2 if o in ("-r", "--repeat"): repeat = int(a) @@ -323,7 +327,7 @@ def callback(number, time_taken): msg = "{num} loop{s} -> {secs:.{prec}g} secs" plural = (number != 1) print(msg.format(num=number, s='s' if plural else '', - secs=time_taken, prec=precision)) + secs=time_taken, prec=precision)) try: number, _ = t.autorange(callback) except: @@ -374,5 +378,6 @@ def format_time(dt): UserWarning, '', 0) return None + if __name__ == "__main__": sys.exit(main()) diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index 8b2502b4c0..df3b936ccd 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -2451,7 +2451,9 @@ def __init__(self, screenName=None, baseName=None, className='Tk', self.tk = None if baseName is None: import os - baseName = os.path.basename(sys.argv[0]) + # TODO: RUSTPYTHON + # baseName = os.path.basename(sys.argv[0]) + baseName = "" # sys.argv[0] baseName, ext = os.path.splitext(baseName) if ext not in ('.py', '.pyc'): baseName = baseName + ext diff --git a/Lib/typing.py b/Lib/typing.py index 75ec2a6a2e..a7397356d6 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1,34 +1,46 @@ """ -The typing module: Support for gradual typing as defined by PEP 484. - -At large scale, the structure of the module is following: -* Imports and exports, all public names should be explicitly added to __all__. -* Internal helper functions: these should never be used in code outside this module. -* _SpecialForm and its instances (special forms): - Any, NoReturn, ClassVar, Union, Optional, Concatenate -* Classes whose instances can be type arguments in addition to types: - ForwardRef, TypeVar and ParamSpec -* The core of internal generics API: _GenericAlias and _VariadicGenericAlias, the latter is - currently only used by Tuple and Callable. All subscripted types like X[int], Union[int, str], - etc., are instances of either of these classes. -* The public counterpart of the generics API consists of two classes: Generic and Protocol. -* Public helper functions: get_type_hints, overload, cast, no_type_check, - no_type_check_decorator. -* Generic aliases for collections.abc ABCs and few additional protocols. +The typing module: Support for gradual typing as defined by PEP 484 and subsequent PEPs. + +Among other things, the module includes the following: +* Generic, Protocol, and internal machinery to support generic aliases. + All subscripted types like X[int], Union[int, str] are generic aliases. +* Various "special forms" that have unique meanings in type annotations: + NoReturn, Never, ClassVar, Self, Concatenate, Unpack, and others. +* Classes whose instances can be type arguments to generic classes and functions: + TypeVar, ParamSpec, TypeVarTuple. +* Public helper functions: get_type_hints, overload, cast, final, and others. +* Several protocols to support duck-typing: + SupportsFloat, SupportsIndex, SupportsAbs, and others. * Special types: NewType, NamedTuple, TypedDict. -* Wrapper submodules for re and io related types. +* Deprecated aliases for builtin types and collections.abc ABCs. + +Any name not present in __all__ is an implementation detail +that may be changed without notice. Use at your own risk! """ from abc import abstractmethod, ABCMeta import collections +from collections import defaultdict import collections.abc +import copyreg import functools import operator -import re as stdlib_re # Avoid confusion with the re we export. import sys import types from types import WrapperDescriptorType, MethodWrapperType, MethodDescriptorType, GenericAlias +from _typing import ( + _idfunc, + TypeVar, + ParamSpec, + TypeVarTuple, + ParamSpecArgs, + ParamSpecKwargs, + TypeAliasType, + Generic, + NoDefault, +) + # Please keep __all__ alphabetized within each category. __all__ = [ # Super-special typing primitives. @@ -47,6 +59,7 @@ 'Tuple', 'Type', 'TypeVar', + 'TypeVarTuple', 'Union', # ABCs (from collections.abc). @@ -108,30 +121,45 @@ # One-off things. 'AnyStr', + 'assert_type', + 'assert_never', 'cast', + 'clear_overloads', + 'dataclass_transform', 'final', 'get_args', 'get_origin', + 'get_overloads', + 'get_protocol_members', 'get_type_hints', + 'is_protocol', 'is_typeddict', + 'LiteralString', + 'Never', 'NewType', 'no_type_check', 'no_type_check_decorator', + 'NoDefault', 'NoReturn', + 'NotRequired', 'overload', + 'override', 'ParamSpecArgs', 'ParamSpecKwargs', + 'ReadOnly', + 'Required', + 'reveal_type', 'runtime_checkable', + 'Self', 'Text', 'TYPE_CHECKING', 'TypeAlias', 'TypeGuard', + 'TypeIs', + 'TypeAliasType', + 'Unpack', ] -# The pseudo-submodules 're' and 'io' are part of the public -# namespace, but excluded from __all__ because they might stomp on -# legitimate imports of those modules. - def _type_convert(arg, module=None, *, allow_special_forms=False): """For converting None to type(None), and strings to ForwardRef.""" @@ -148,7 +176,7 @@ def _type_check(arg, msg, is_argument=True, module=None, *, allow_special_forms= As a special case, accept None and return type(None) instead. Also wrap strings into ForwardRef instances. Consider several corner cases, for example plain special forms like Union are not valid, while Union[int, str] is OK, etc. - The msg argument is a human-readable error message, e.g:: + The msg argument is a human-readable error message, e.g.:: "Union[arg, ...]: arg should be a type." @@ -164,14 +192,13 @@ def _type_check(arg, msg, is_argument=True, module=None, *, allow_special_forms= if (isinstance(arg, _GenericAlias) and arg.__origin__ in invalid_generic_forms): raise TypeError(f"{arg} is not valid as type argument") - if arg in (Any, NoReturn, Final, TypeAlias): + if arg in (Any, LiteralString, NoReturn, Never, Self, TypeAlias): + return arg + if allow_special_forms and arg in (ClassVar, Final): return arg if isinstance(arg, _SpecialForm) or arg in (Generic, Protocol): raise TypeError(f"Plain {arg} is not valid as type argument") - if isinstance(arg, (type, TypeVar, ForwardRef, types.UnionType, ParamSpec, - ParamSpecArgs, ParamSpecKwargs)): - return arg - if not callable(arg): + if type(arg) is tuple: raise TypeError(f"{msg} Got {arg!r:.100}.") return arg @@ -181,6 +208,30 @@ def _is_param_expr(arg): (tuple, list, ParamSpec, _ConcatenateGenericAlias)) +def _should_unflatten_callable_args(typ, args): + """Internal helper for munging collections.abc.Callable's __args__. + + The canonical representation for a Callable's __args__ flattens the + argument types, see https://github.com/python/cpython/issues/86361. + + For example:: + + >>> import collections.abc + >>> P = ParamSpec('P') + >>> collections.abc.Callable[[int, int], str].__args__ == (int, int, str) + True + >>> collections.abc.Callable[P, str].__args__ == (P, str) + True + + As a result, if we need to reconstruct the Callable from its __args__, + we need to unflatten it. + """ + return ( + typ.__origin__ is collections.abc.Callable + and not (len(args) == 2 and _is_param_expr(args[0])) + ) + + def _type_repr(obj): """Return the repr() of an object, special-casing types (internal helper). @@ -189,99 +240,160 @@ def _type_repr(obj): typically enough to uniquely identify a type. For everything else, we fall back on repr(obj). """ - if isinstance(obj, types.GenericAlias): - return repr(obj) + # When changing this function, don't forget about + # `_collections_abc._type_repr`, which does the same thing + # and must be consistent with this one. if isinstance(obj, type): if obj.__module__ == 'builtins': return obj.__qualname__ return f'{obj.__module__}.{obj.__qualname__}' if obj is ...: - return('...') + return '...' if isinstance(obj, types.FunctionType): return obj.__name__ + if isinstance(obj, tuple): + # Special case for `repr` of types with `ParamSpec`: + return '[' + ', '.join(_type_repr(t) for t in obj) + ']' return repr(obj) -def _collect_type_vars(types_, typevar_types=None): - """Collect all type variable contained - in types in order of first appearance (lexicographic order). For example:: +def _collect_type_parameters(args, *, enforce_default_ordering: bool = True): + """Collect all type parameters in args + in order of first appearance (lexicographic order). - _collect_type_vars((T, List[S, T])) == (T, S) + For example:: + + >>> P = ParamSpec('P') + >>> T = TypeVar('T') + >>> _collect_type_parameters((T, Callable[P, T])) + (~T, ~P) """ - if typevar_types is None: - typevar_types = TypeVar - tvars = [] - for t in types_: - if isinstance(t, typevar_types) and t not in tvars: - tvars.append(t) - if isinstance(t, (_GenericAlias, GenericAlias, types.UnionType)): - tvars.extend([t for t in t.__parameters__ if t not in tvars]) - return tuple(tvars) + # required type parameter cannot appear after parameter with default + default_encountered = False + # or after TypeVarTuple + type_var_tuple_encountered = False + parameters = [] + for t in args: + if isinstance(t, type): + # We don't want __parameters__ descriptor of a bare Python class. + pass + elif isinstance(t, tuple): + # `t` might be a tuple, when `ParamSpec` is substituted with + # `[T, int]`, or `[int, *Ts]`, etc. + for x in t: + for collected in _collect_type_parameters([x]): + if collected not in parameters: + parameters.append(collected) + elif hasattr(t, '__typing_subst__'): + if t not in parameters: + if enforce_default_ordering: + if type_var_tuple_encountered and t.has_default(): + raise TypeError('Type parameter with a default' + ' follows TypeVarTuple') + + if t.has_default(): + default_encountered = True + elif default_encountered: + raise TypeError(f'Type parameter {t!r} without a default' + ' follows type parameter with a default') + + parameters.append(t) + else: + if _is_unpacked_typevartuple(t): + type_var_tuple_encountered = True + for x in getattr(t, '__parameters__', ()): + if x not in parameters: + parameters.append(x) + return tuple(parameters) -def _check_generic(cls, parameters, elen): +def _check_generic_specialization(cls, arguments): """Check correct count for parameters of a generic cls (internal helper). + This gives a nice error message in case of count mismatch. """ - if not elen: + expected_len = len(cls.__parameters__) + if not expected_len: raise TypeError(f"{cls} is not a generic class") - alen = len(parameters) - if alen != elen: - raise TypeError(f"Too {'many' if alen > elen else 'few'} arguments for {cls};" - f" actual {alen}, expected {elen}") - -def _prepare_paramspec_params(cls, params): - """Prepares the parameters for a Generic containing ParamSpec - variables (internal helper). - """ - # Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612. - if (len(cls.__parameters__) == 1 - and params and not _is_param_expr(params[0])): - assert isinstance(cls.__parameters__[0], ParamSpec) - return (params,) - else: - _check_generic(cls, params, len(cls.__parameters__)) - _params = [] - # Convert lists to tuples to help other libraries cache the results. - for p, tvar in zip(params, cls.__parameters__): - if isinstance(tvar, ParamSpec) and isinstance(p, list): - p = tuple(p) - _params.append(p) - return tuple(_params) - -def _deduplicate(params): - # Weed out strict duplicates, preserving the first of each occurrence. - all_params = set(params) - if len(all_params) < len(params): - new_params = [] - for t in params: - if t in all_params: - new_params.append(t) - all_params.remove(t) - params = new_params - assert not all_params, all_params - return params + actual_len = len(arguments) + if actual_len != expected_len: + # deal with defaults + if actual_len < expected_len: + # If the parameter at index `actual_len` in the parameters list + # has a default, then all parameters after it must also have + # one, because we validated as much in _collect_type_parameters(). + # That means that no error needs to be raised here, despite + # the number of arguments being passed not matching the number + # of parameters: all parameters that aren't explicitly + # specialized in this call are parameters with default values. + if cls.__parameters__[actual_len].has_default(): + return + + expected_len -= sum(p.has_default() for p in cls.__parameters__) + expect_val = f"at least {expected_len}" + else: + expect_val = expected_len + raise TypeError(f"Too {'many' if actual_len > expected_len else 'few'} arguments" + f" for {cls}; actual {actual_len}, expected {expect_val}") + + +def _unpack_args(*args): + newargs = [] + for arg in args: + subargs = getattr(arg, '__typing_unpacked_tuple_args__', None) + if subargs is not None and not (subargs and subargs[-1] is ...): + newargs.extend(subargs) + else: + newargs.append(arg) + return newargs + +def _deduplicate(params, *, unhashable_fallback=False): + # Weed out strict duplicates, preserving the first of each occurrence. + try: + return dict.fromkeys(params) + except TypeError: + if not unhashable_fallback: + raise + # Happens for cases like `Annotated[dict, {'x': IntValidator()}]` + return _deduplicate_unhashable(params) + +def _deduplicate_unhashable(unhashable_params): + new_unhashable = [] + for t in unhashable_params: + if t not in new_unhashable: + new_unhashable.append(t) + return new_unhashable + +def _compare_args_orderless(first_args, second_args): + first_unhashable = _deduplicate_unhashable(first_args) + second_unhashable = _deduplicate_unhashable(second_args) + t = list(second_unhashable) + try: + for elem in first_unhashable: + t.remove(elem) + except ValueError: + return False + return not t def _remove_dups_flatten(parameters): - """An internal helper for Union creation and substitution: flatten Unions - among parameters, then remove duplicates. + """Internal helper for Union creation and substitution. + + Flatten Unions among parameters, then remove duplicates. """ # Flatten out Union[Union[...], ...]. params = [] for p in parameters: if isinstance(p, (_UnionGenericAlias, types.UnionType)): params.extend(p.__args__) - elif isinstance(p, tuple) and len(p) > 0 and p[0] is Union: - params.extend(p[1:]) else: params.append(p) - return tuple(_deduplicate(params)) + return tuple(_deduplicate(params, unhashable_fallback=True)) def _flatten_literal_params(parameters): - """An internal helper for Literal creation: flatten Literals among parameters""" + """Internal helper for Literal creation: flatten Literals among parameters.""" params = [] for p in parameters: if isinstance(p, _LiteralGenericAlias): @@ -292,20 +404,29 @@ def _flatten_literal_params(parameters): _cleanups = [] +_caches = {} def _tp_cache(func=None, /, *, typed=False): - """Internal wrapper caching __getitem__ of generic types with a fallback to - original function for non-hashable arguments. + """Internal wrapper caching __getitem__ of generic types. + + For non-hashable arguments, the original function is used as a fallback. """ def decorator(func): - cached = functools.lru_cache(typed=typed)(func) - _cleanups.append(cached.cache_clear) + # The callback 'inner' references the newly created lru_cache + # indirectly by performing a lookup in the global '_caches' dictionary. + # This breaks a reference that can be problematic when combined with + # C API extensions that leak references to types. See GH-98253. + + cache = functools.lru_cache(typed=typed)(func) + _caches[func] = cache + _cleanups.append(cache.cache_clear) + del cache @functools.wraps(func) def inner(*args, **kwds): try: - return cached(*args, **kwds) + return _caches[func](*args, **kwds) except TypeError: pass # All real errors (not unhashable args) are raised below. return func(*args, **kwds) @@ -316,16 +437,61 @@ def inner(*args, **kwds): return decorator -def _eval_type(t, globalns, localns, recursive_guard=frozenset()): + +def _deprecation_warning_for_no_type_params_passed(funcname: str) -> None: + import warnings + + depr_message = ( + f"Failing to pass a value to the 'type_params' parameter " + f"of {funcname!r} is deprecated, as it leads to incorrect behaviour " + f"when calling {funcname} on a stringified annotation " + f"that references a PEP 695 type parameter. " + f"It will be disallowed in Python 3.15." + ) + warnings.warn(depr_message, category=DeprecationWarning, stacklevel=3) + + +class _Sentinel: + __slots__ = () + def __repr__(self): + return '' + + +_sentinel = _Sentinel() + + +def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=frozenset()): """Evaluate all forward references in the given type t. + For use of globalns and localns see the docstring for get_type_hints(). recursive_guard is used to prevent infinite recursion with a recursive ForwardRef. """ + if type_params is _sentinel: + _deprecation_warning_for_no_type_params_passed("typing._eval_type") + type_params = () if isinstance(t, ForwardRef): - return t._evaluate(globalns, localns, recursive_guard) + return t._evaluate(globalns, localns, type_params, recursive_guard=recursive_guard) if isinstance(t, (_GenericAlias, GenericAlias, types.UnionType)): - ev_args = tuple(_eval_type(a, globalns, localns, recursive_guard) for a in t.__args__) + if isinstance(t, GenericAlias): + args = tuple( + ForwardRef(arg) if isinstance(arg, str) else arg + for arg in t.__args__ + ) + is_unpacked = t.__unpacked__ + if _should_unflatten_callable_args(t, args): + t = t.__origin__[(args[:-1], args[-1])] + else: + t = t.__origin__[args] + if is_unpacked: + t = Unpack[t] + + ev_args = tuple( + _eval_type( + a, globalns, localns, type_params, recursive_guard=recursive_guard + ) + for a in t.__args__ + ) if ev_args == t.__args__: return t if isinstance(t, GenericAlias): @@ -338,28 +504,36 @@ def _eval_type(t, globalns, localns, recursive_guard=frozenset()): class _Final: - """Mixin to prohibit subclassing""" + """Mixin to prohibit subclassing.""" __slots__ = ('__weakref__',) - def __init_subclass__(self, /, *args, **kwds): + def __init_subclass__(cls, /, *args, **kwds): if '_root' not in kwds: raise TypeError("Cannot subclass special typing classes") -class _Immutable: - """Mixin to indicate that object should not be copied.""" - __slots__ = () - def __copy__(self): - return self +class _NotIterable: + """Mixin to prevent iteration, without being compatible with Iterable. + + That is, we could do:: - def __deepcopy__(self, memo): - return self + def __iter__(self): raise TypeError() + + But this would make users of this mixin duck type-compatible with + collections.abc.Iterable - isinstance(foo, Iterable) would be True. + + Luckily, we can instead prevent iteration by setting __iter__ to None, which + is treated specially. + """ + + __slots__ = () + __iter__ = None # Internal indicator of special typing constructs. # See __doc__ instance attribute for specific docs. -class _SpecialForm(_Final, _root=True): +class _SpecialForm(_Final, _NotIterable, _root=True): __slots__ = ('_name', '__doc__', '_getitem') def __init__(self, getitem): @@ -402,15 +576,26 @@ def __getitem__(self, parameters): return self._getitem(self, parameters) -class _LiteralSpecialForm(_SpecialForm, _root=True): +class _TypedCacheSpecialForm(_SpecialForm, _root=True): def __getitem__(self, parameters): if not isinstance(parameters, tuple): parameters = (parameters,) return self._getitem(self, *parameters) -@_SpecialForm -def Any(self, parameters): +class _AnyMeta(type): + def __instancecheck__(self, obj): + if self is Any: + raise TypeError("typing.Any cannot be used with isinstance()") + return super().__instancecheck__(obj) + + def __repr__(self): + if self is Any: + return "typing.Any" + return super().__repr__() # respect to subclasses + + +class Any(metaclass=_AnyMeta): """Special type indicating an unconstrained type. - Any is compatible with every type. @@ -419,43 +604,128 @@ def Any(self, parameters): Note that all the above statements are true from the point of view of static type checkers. At runtime, Any should not be used with instance - or class checks. + checks. """ - raise TypeError(f"{self} is not subscriptable") + + def __new__(cls, *args, **kwargs): + if cls is Any: + raise TypeError("Any cannot be instantiated") + return super().__new__(cls) + @_SpecialForm def NoReturn(self, parameters): """Special type indicating functions that never return. + + Example:: + + from typing import NoReturn + + def stop() -> NoReturn: + raise Exception('no way') + + NoReturn can also be used as a bottom type, a type that + has no values. Starting in Python 3.11, the Never type should + be used for this concept instead. Type checkers should treat the two + equivalently. + """ + raise TypeError(f"{self} is not subscriptable") + +# This is semantically identical to NoReturn, but it is implemented +# separately so that type checkers can distinguish between the two +# if they want. +@_SpecialForm +def Never(self, parameters): + """The bottom type, a type that has no members. + + This can be used to define a function that should never be + called, or a function that never returns:: + + from typing import Never + + def never_call_me(arg: Never) -> None: + pass + + def int_or_str(arg: int | str) -> None: + never_call_me(arg) # type checker error + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + never_call_me(arg) # OK, arg is of type Never + """ + raise TypeError(f"{self} is not subscriptable") + + +@_SpecialForm +def Self(self, parameters): + """Used to spell the type of "self" in classes. + Example:: - from typing import NoReturn + from typing import Self - def stop() -> NoReturn: - raise Exception('no way') + class Foo: + def return_self(self) -> Self: + ... + return self - This type is invalid in other positions, e.g., ``List[NoReturn]`` - will fail in static type checkers. + This is especially useful for: + - classmethods that are used as alternative constructors + - annotating an `__enter__` method which returns self """ raise TypeError(f"{self} is not subscriptable") + +@_SpecialForm +def LiteralString(self, parameters): + """Represents an arbitrary literal string. + + Example:: + + from typing import LiteralString + + def run_query(sql: LiteralString) -> None: + ... + + def caller(arbitrary_string: str, literal_string: LiteralString) -> None: + run_query("SELECT * FROM students") # OK + run_query(literal_string) # OK + run_query("SELECT * FROM " + literal_string) # OK + run_query(arbitrary_string) # type checker error + run_query( # type checker error + f"SELECT * FROM students WHERE name = {arbitrary_string}" + ) + + Only string literals and other LiteralStrings are compatible + with LiteralString. This provides a tool to help prevent + security issues such as SQL injection. + """ + raise TypeError(f"{self} is not subscriptable") + + @_SpecialForm def ClassVar(self, parameters): """Special type construct to mark class variables. An annotation wrapped in ClassVar indicates that a given attribute is intended to be used as a class variable and - should not be set on instances of that class. Usage:: + should not be set on instances of that class. + + Usage:: - class Starship: - stats: ClassVar[Dict[str, int]] = {} # class variable - damage: int = 10 # instance variable + class Starship: + stats: ClassVar[dict[str, int]] = {} # class variable + damage: int = 10 # instance variable ClassVar accepts only types and cannot be further subscribed. Note that ClassVar is not a class itself, and should not be used with isinstance() or issubclass(). """ - item = _type_check(parameters, f'{self} accepts only single type.') + item = _type_check(parameters, f'{self} accepts only single type.', allow_special_forms=True) return _GenericAlias(self, (item,)) @_SpecialForm @@ -463,45 +733,50 @@ def Final(self, parameters): """Special typing construct to indicate final names to type checkers. A final name cannot be re-assigned or overridden in a subclass. - For example: - MAX_SIZE: Final = 9000 - MAX_SIZE += 1 # Error reported by type checker + For example:: + + MAX_SIZE: Final = 9000 + MAX_SIZE += 1 # Error reported by type checker - class Connection: - TIMEOUT: Final[int] = 10 + class Connection: + TIMEOUT: Final[int] = 10 - class FastConnector(Connection): - TIMEOUT = 1 # Error reported by type checker + class FastConnector(Connection): + TIMEOUT = 1 # Error reported by type checker There is no runtime checking of these properties. """ - item = _type_check(parameters, f'{self} accepts only single type.') + item = _type_check(parameters, f'{self} accepts only single type.', allow_special_forms=True) return _GenericAlias(self, (item,)) @_SpecialForm def Union(self, parameters): """Union type; Union[X, Y] means either X or Y. - To define a union, use e.g. Union[int, str]. Details: + On Python 3.10 and higher, the | operator + can also be used to denote unions; + X | Y means the same thing to the type checker as Union[X, Y]. + + To define a union, use e.g. Union[int, str]. Details: - The arguments must be types and there must be at least one. - None as an argument is a special case and is replaced by type(None). - Unions of unions are flattened, e.g.:: - Union[Union[int, str], float] == Union[int, str, float] + assert Union[Union[int, str], float] == Union[int, str, float] - Unions of a single argument vanish, e.g.:: - Union[int] == int # The constructor actually returns int + assert Union[int] == int # The constructor actually returns int - Redundant arguments are skipped, e.g.:: - Union[int, str, int] == Union[int, str] + assert Union[int, str, int] == Union[int, str] - When comparing unions, the argument order is ignored, e.g.:: - Union[int, str] == Union[str, int] + assert Union[int, str] == Union[str, int] - You cannot subclass or instantiate a union. - You can use Optional[X] as a shorthand for Union[X, None]. @@ -519,33 +794,39 @@ def Union(self, parameters): return _UnionGenericAlias(self, parameters, name="Optional") return _UnionGenericAlias(self, parameters) -@_SpecialForm -def Optional(self, parameters): - """Optional type. +def _make_union(left, right): + """Used from the C implementation of TypeVar. - Optional[X] is equivalent to Union[X, None]. + TypeVar.__or__ calls this instead of returning types.UnionType + because we want to allow unions between TypeVars and strings + (forward references). """ + return Union[left, right] + +@_SpecialForm +def Optional(self, parameters): + """Optional[X] is equivalent to Union[X, None].""" arg = _type_check(parameters, f"{self} requires a single type.") return Union[arg, type(None)] -@_LiteralSpecialForm +@_TypedCacheSpecialForm @_tp_cache(typed=True) def Literal(self, *parameters): """Special typing form to define literal types (a.k.a. value types). This form can be used to indicate to type checkers that the corresponding variable or function parameter has a value equivalent to the provided - literal (or one of several literals): + literal (or one of several literals):: - def validate_simple(data: Any) -> Literal[True]: # always returns True - ... + def validate_simple(data: Any) -> Literal[True]: # always returns True + ... - MODE = Literal['r', 'rb', 'w', 'wb'] - def open_helper(file: str, mode: MODE) -> str: - ... + MODE = Literal['r', 'rb', 'w', 'wb'] + def open_helper(file: str, mode: MODE) -> str: + ... - open_helper('/some/path', 'r') # Passes type check - open_helper('/other/path', 'typo') # Error in type checker + open_helper('/some/path', 'r') # Passes type check + open_helper('/other/path', 'typo') # Error in type checker Literal[...] cannot be subclassed. At runtime, an arbitrary value is allowed as type argument to Literal[...], but type checkers may @@ -565,7 +846,9 @@ def open_helper(file: str, mode: MODE) -> str: @_SpecialForm def TypeAlias(self, parameters): - """Special marker indicating that an assignment should + """Special form for marking type aliases. + + Use TypeAlias to indicate that an assignment should be recognized as a proper type alias definition by type checkers. @@ -580,13 +863,15 @@ def TypeAlias(self, parameters): @_SpecialForm def Concatenate(self, parameters): - """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a - higher order function which adds, removes or transforms parameters of a - callable. + """Special form for annotating higher-order functions. + + ``Concatenate`` can be used in conjunction with ``ParamSpec`` and + ``Callable`` to represent a higher-order function which adds, removes or + transforms the parameters of a callable. For example:: - Callable[Concatenate[int, P], int] + Callable[Concatenate[int, P], int] See PEP 612 for detailed information. """ @@ -594,56 +879,62 @@ def Concatenate(self, parameters): raise TypeError("Cannot take a Concatenate of no types.") if not isinstance(parameters, tuple): parameters = (parameters,) - if not isinstance(parameters[-1], ParamSpec): + if not (parameters[-1] is ... or isinstance(parameters[-1], ParamSpec)): raise TypeError("The last parameter to Concatenate should be a " - "ParamSpec variable.") + "ParamSpec variable or ellipsis.") msg = "Concatenate[arg, ...]: each arg must be a type." parameters = (*(_type_check(p, msg) for p in parameters[:-1]), parameters[-1]) - return _ConcatenateGenericAlias(self, parameters, - _typevar_types=(TypeVar, ParamSpec), - _paramspec_tvars=True) + return _ConcatenateGenericAlias(self, parameters) @_SpecialForm def TypeGuard(self, parameters): - """Special typing form used to annotate the return type of a user-defined - type guard function. ``TypeGuard`` only accepts a single type argument. + """Special typing construct for marking user-defined type predicate functions. + + ``TypeGuard`` can be used to annotate the return type of a user-defined + type predicate function. ``TypeGuard`` only accepts a single type argument. At runtime, functions marked this way should return a boolean. ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static type checkers to determine a more precise type of an expression within a program's code flow. Usually type narrowing is done by analyzing conditional code flow and applying the narrowing to a block of code. The - conditional expression here is sometimes referred to as a "type guard". + conditional expression here is sometimes referred to as a "type predicate". Sometimes it would be convenient to use a user-defined boolean function - as a type guard. Such a function should use ``TypeGuard[...]`` as its - return type to alert static type checkers to this intention. + as a type predicate. Such a function should use ``TypeGuard[...]`` or + ``TypeIs[...]`` as its return type to alert static type checkers to + this intention. ``TypeGuard`` should be used over ``TypeIs`` when narrowing + from an incompatible type (e.g., ``list[object]`` to ``list[int]``) or when + the function does not return ``True`` for all instances of the narrowed type. - Using ``-> TypeGuard`` tells the static type checker that for a given - function: + Using ``-> TypeGuard[NarrowedType]`` tells the static type checker that + for a given function: 1. The return value is a boolean. 2. If the return value is ``True``, the type of its argument - is the type inside ``TypeGuard``. + is ``NarrowedType``. - For example:: + For example:: + + def is_str_list(val: list[object]) -> TypeGuard[list[str]]: + '''Determines whether all objects in the list are strings''' + return all(isinstance(x, str) for x in val) - def is_str(val: Union[str, float]): - # "isinstance" type guard - if isinstance(val, str): - # Type of ``val`` is narrowed to ``str`` - ... - else: - # Else, type of ``val`` is narrowed to ``float``. - ... + def func1(val: list[object]): + if is_str_list(val): + # Type of ``val`` is narrowed to ``list[str]``. + print(" ".join(val)) + else: + # Type of ``val`` remains as ``list[object]``. + print("Not a list of strings!") Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower form of ``TypeA`` (it can even be a wider form) and this may lead to type-unsafe results. The main reason is to allow for things like - narrowing ``List[object]`` to ``List[str]`` even though the latter is not - a subtype of the former, since ``List`` is invariant. The responsibility of - writing type-safe type guards is left to the user. + narrowing ``list[object]`` to ``list[str]`` even though the latter is not + a subtype of the former, since ``list`` is invariant. The responsibility of + writing type-safe type predicates is left to the user. ``TypeGuard`` also works with type variables. For more information, see PEP 647 (User-Defined Type Guards). @@ -652,6 +943,75 @@ def is_str(val: Union[str, float]): return _GenericAlias(self, (item,)) +@_SpecialForm +def TypeIs(self, parameters): + """Special typing construct for marking user-defined type predicate functions. + + ``TypeIs`` can be used to annotate the return type of a user-defined + type predicate function. ``TypeIs`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean and accept + at least one argument. + + ``TypeIs`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type predicate". + + Sometimes it would be convenient to use a user-defined boolean function + as a type predicate. Such a function should use ``TypeIs[...]`` or + ``TypeGuard[...]`` as its return type to alert static type checkers to + this intention. ``TypeIs`` usually has more intuitive behavior than + ``TypeGuard``, but it cannot be used when the input and output types + are incompatible (e.g., ``list[object]`` to ``list[int]``) or when the + function does not return ``True`` for all instances of the narrowed type. + + Using ``-> TypeIs[NarrowedType]`` tells the static type checker that for + a given function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the intersection of the argument's original type and + ``NarrowedType``. + 3. If the return value is ``False``, the type of its argument + is narrowed to exclude ``NarrowedType``. + + For example:: + + from typing import assert_type, final, TypeIs + + class Parent: pass + class Child(Parent): pass + @final + class Unrelated: pass + + def is_parent(val: object) -> TypeIs[Parent]: + return isinstance(val, Parent) + + def run(arg: Child | Unrelated): + if is_parent(arg): + # Type of ``arg`` is narrowed to the intersection + # of ``Parent`` and ``Child``, which is equivalent to + # ``Child``. + assert_type(arg, Child) + else: + # Type of ``arg`` is narrowed to exclude ``Parent``, + # so only ``Unrelated`` is left. + assert_type(arg, Unrelated) + + The type inside ``TypeIs`` must be consistent with the type of the + function's argument; if it is not, static type checkers will raise + an error. An incorrectly written ``TypeIs`` function can lead to + unsound behavior in the type system; it is the user's responsibility + to write such functions in a type-safe manner. + + ``TypeIs`` also works with type variables. For more information, see + PEP 742 (Narrowing types with ``TypeIs``). + """ + item = _type_check(parameters, f'{self} accepts only single type.') + return _GenericAlias(self, (item,)) + + class ForwardRef(_Final, _root=True): """Internal wrapper to hold a forward reference.""" @@ -663,10 +1023,19 @@ class ForwardRef(_Final, _root=True): def __init__(self, arg, is_argument=True, module=None, *, is_class=False): if not isinstance(arg, str): raise TypeError(f"Forward reference must be a string -- got {arg!r}") + + # If we do `def f(*args: *Ts)`, then we'll have `arg = '*Ts'`. + # Unfortunately, this isn't a valid expression on its own, so we + # do the unpacking manually. + if arg.startswith('*'): + arg_to_compile = f'({arg},)[0]' # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] + else: + arg_to_compile = arg try: - code = compile(arg, '', 'eval') + code = compile(arg_to_compile, '', 'eval') except SyntaxError: raise SyntaxError(f"Forward reference must be an expression -- got {arg!r}") + self.__forward_arg__ = arg self.__forward_code__ = code self.__forward_evaluated__ = False @@ -675,7 +1044,10 @@ def __init__(self, arg, is_argument=True, module=None, *, is_class=False): self.__forward_is_class__ = is_class self.__forward_module__ = module - def _evaluate(self, globalns, localns, recursive_guard): + def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): + if type_params is _sentinel: + _deprecation_warning_for_no_type_params_passed("typing.ForwardRef._evaluate") + type_params = () if self.__forward_arg__ in recursive_guard: return self if not self.__forward_evaluated__ or localns is not globalns: @@ -689,6 +1061,22 @@ def _evaluate(self, globalns, localns, recursive_guard): globalns = getattr( sys.modules.get(self.__forward_module__, None), '__dict__', globalns ) + + # type parameters require some special handling, + # as they exist in their own scope + # but `eval()` does not have a dedicated parameter for that scope. + # For classes, names in type parameter scopes should override + # names in the global scope (which here are called `localns`!), + # but should in turn be overridden by names in the class scope + # (which here are called `globalns`!) + if type_params: + globalns, localns = dict(globalns), dict(localns) + for param in type_params: + param_name = param.__name__ + if not self.__forward_is_class__ or param_name not in globalns: + globalns[param_name] = param + localns.pop(param_name, None) + type_ = _type_check( eval(self.__forward_code__, globalns, localns), "Forward references must evaluate to types.", @@ -696,7 +1084,11 @@ def _evaluate(self, globalns, localns, recursive_guard): allow_special_forms=self.__forward_is_class__, ) self.__forward_value__ = _eval_type( - type_, globalns, localns, recursive_guard | {self.__forward_arg__} + type_, + globalns, + localns, + type_params, + recursive_guard=(recursive_guard | {self.__forward_arg__}), ) self.__forward_evaluated__ = True return self.__forward_value__ @@ -713,236 +1105,205 @@ def __eq__(self, other): def __hash__(self): return hash((self.__forward_arg__, self.__forward_module__)) - def __repr__(self): - return f'ForwardRef({self.__forward_arg__!r})' - -class _TypeVarLike: - """Mixin for TypeVar-like types (TypeVar and ParamSpec).""" - def __init__(self, bound, covariant, contravariant): - """Used to setup TypeVars and ParamSpec's bound, covariant and - contravariant attributes. - """ - if covariant and contravariant: - raise ValueError("Bivariant types are not supported.") - self.__covariant__ = bool(covariant) - self.__contravariant__ = bool(contravariant) - if bound: - self.__bound__ = _type_check(bound, "Bound must be a type.") - else: - self.__bound__ = None - - def __or__(self, right): - return Union[self, right] + def __or__(self, other): + return Union[self, other] - def __ror__(self, left): - return Union[left, self] + def __ror__(self, other): + return Union[other, self] def __repr__(self): - if self.__covariant__: - prefix = '+' - elif self.__contravariant__: - prefix = '-' + if self.__forward_module__ is None: + module_repr = '' else: - prefix = '~' - return prefix + self.__name__ - - def __reduce__(self): - return self.__name__ + module_repr = f', module={self.__forward_module__!r}' + return f'ForwardRef({self.__forward_arg__!r}{module_repr})' -class TypeVar( _Final, _Immutable, _TypeVarLike, _root=True): - """Type variable. +def _is_unpacked_typevartuple(x: Any) -> bool: + return ((not isinstance(x, type)) and + getattr(x, '__typing_is_unpacked_typevartuple__', False)) - Usage:: - - T = TypeVar('T') # Can be anything - A = TypeVar('A', str, bytes) # Must be str or bytes - - Type variables exist primarily for the benefit of static type - checkers. They serve as the parameters for generic types as well - as for generic function definitions. See class Generic for more - information on generic types. Generic functions work as follows: - - def repeat(x: T, n: int) -> List[T]: - '''Return a list containing n references to x.''' - return [x]*n - - def longest(x: A, y: A) -> A: - '''Return the longest of two strings.''' - return x if len(x) >= len(y) else y - - The latter example's signature is essentially the overloading - of (str, str) -> str and (bytes, bytes) -> bytes. Also note - that if the arguments are instances of some subclass of str, - the return type is still plain str. - - At runtime, isinstance(x, T) and issubclass(C, T) will raise TypeError. - - Type variables defined with covariant=True or contravariant=True - can be used to declare covariant or contravariant generic types. - See PEP 484 for more details. By default generic types are invariant - in all type variables. - - Type variables can be introspected. e.g.: - - T.__name__ == 'T' - T.__constraints__ == () - T.__covariant__ == False - T.__contravariant__ = False - A.__constraints__ == (str, bytes) - - Note that only type variables defined in global scope can be pickled. - """ - - __slots__ = ('__name__', '__bound__', '__constraints__', - '__covariant__', '__contravariant__', '__dict__') - - def __init__(self, name, *constraints, bound=None, - covariant=False, contravariant=False): - self.__name__ = name - super().__init__(bound, covariant, contravariant) - if constraints and bound is not None: - raise TypeError("Constraints cannot be combined with bound=...") - if constraints and len(constraints) == 1: - raise TypeError("A single constraint is not allowed") - msg = "TypeVar(name, constraint, ...): constraints must be types." - self.__constraints__ = tuple(_type_check(t, msg) for t in constraints) - try: - def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') # for pickling - except (AttributeError, ValueError): - def_mod = None - if def_mod != 'typing': - self.__module__ = def_mod - - -class ParamSpecArgs(_Final, _Immutable, _root=True): - """The args for a ParamSpec object. - - Given a ParamSpec object P, P.args is an instance of ParamSpecArgs. - ParamSpecArgs objects have a reference back to their ParamSpec: +def _is_typevar_like(x: Any) -> bool: + return isinstance(x, (TypeVar, ParamSpec)) or _is_unpacked_typevartuple(x) - P.args.__origin__ is P - This type is meant for runtime introspection and has no special meaning to - static type checkers. - """ - def __init__(self, origin): - self.__origin__ = origin +def _typevar_subst(self, arg): + msg = "Parameters to generic types must be types." + arg = _type_check(arg, msg, is_argument=True) + if ((isinstance(arg, _GenericAlias) and arg.__origin__ is Unpack) or + (isinstance(arg, GenericAlias) and getattr(arg, '__unpacked__', False))): + raise TypeError(f"{arg} is not valid as type argument") + return arg - def __repr__(self): - return f"{self.__origin__.__name__}.args" - def __eq__(self, other): - if not isinstance(other, ParamSpecArgs): - return NotImplemented - return self.__origin__ == other.__origin__ +def _typevartuple_prepare_subst(self, alias, args): + params = alias.__parameters__ + typevartuple_index = params.index(self) + for param in params[typevartuple_index + 1:]: + if isinstance(param, TypeVarTuple): + raise TypeError(f"More than one TypeVarTuple parameter in {alias}") + + alen = len(args) + plen = len(params) + left = typevartuple_index + right = plen - typevartuple_index - 1 + var_tuple_index = None + fillarg = None + for k, arg in enumerate(args): + if not isinstance(arg, type): + subargs = getattr(arg, '__typing_unpacked_tuple_args__', None) + if subargs and len(subargs) == 2 and subargs[-1] is ...: + if var_tuple_index is not None: + raise TypeError("More than one unpacked arbitrary-length tuple argument") + var_tuple_index = k + fillarg = subargs[0] + if var_tuple_index is not None: + left = min(left, var_tuple_index) + right = min(right, alen - var_tuple_index - 1) + elif left + right > alen: + raise TypeError(f"Too few arguments for {alias};" + f" actual {alen}, expected at least {plen-1}") + if left == alen - right and self.has_default(): + replacement = _unpack_args(self.__default__) + else: + replacement = args[left: alen - right] + + return ( + *args[:left], + *([fillarg]*(typevartuple_index - left)), + replacement, + *([fillarg]*(plen - right - left - typevartuple_index - 1)), + *args[alen - right:], + ) + + +def _paramspec_subst(self, arg): + if isinstance(arg, (list, tuple)): + arg = tuple(_type_check(a, "Expected a type.") for a in arg) + elif not _is_param_expr(arg): + raise TypeError(f"Expected a list of types, an ellipsis, " + f"ParamSpec, or Concatenate. Got {arg}") + return arg -class ParamSpecKwargs(_Final, _Immutable, _root=True): - """The kwargs for a ParamSpec object. +def _paramspec_prepare_subst(self, alias, args): + params = alias.__parameters__ + i = params.index(self) + if i == len(args) and self.has_default(): + args = [*args, self.__default__] + if i >= len(args): + raise TypeError(f"Too few arguments for {alias}") + # Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612. + if len(params) == 1 and not _is_param_expr(args[0]): + assert i == 0 + args = (args,) + # Convert lists to tuples to help other libraries cache the results. + elif isinstance(args[i], list): + args = (*args[:i], tuple(args[i]), *args[i+1:]) + return args - Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs. - ParamSpecKwargs objects have a reference back to their ParamSpec: +@_tp_cache +def _generic_class_getitem(cls, args): + """Parameterizes a generic class. - P.kwargs.__origin__ is P + At least, parameterizing a generic class is the *main* thing this method + does. For example, for some generic class `Foo`, this is called when we + do `Foo[int]` - there, with `cls=Foo` and `args=int`. - This type is meant for runtime introspection and has no special meaning to - static type checkers. + However, note that this method is also called when defining generic + classes in the first place with `class Foo(Generic[T]): ...`. """ - def __init__(self, origin): - self.__origin__ = origin - - def __repr__(self): - return f"{self.__origin__.__name__}.kwargs" - - def __eq__(self, other): - if not isinstance(other, ParamSpecKwargs): - return NotImplemented - return self.__origin__ == other.__origin__ + if not isinstance(args, tuple): + args = (args,) + args = tuple(_type_convert(p) for p in args) + is_generic_or_protocol = cls in (Generic, Protocol) -class ParamSpec(_Final, _Immutable, _TypeVarLike, _root=True): - """Parameter specification variable. - - Usage:: - - P = ParamSpec('P') - - Parameter specification variables exist primarily for the benefit of static - type checkers. They are used to forward the parameter types of one - callable to another callable, a pattern commonly found in higher order - functions and decorators. They are only valid when used in ``Concatenate``, - or as the first argument to ``Callable``, or as parameters for user-defined - Generics. See class Generic for more information on generic types. An - example for annotating a decorator:: - - T = TypeVar('T') - P = ParamSpec('P') - - def add_logging(f: Callable[P, T]) -> Callable[P, T]: - '''A type-safe decorator to add logging to a function.''' - def inner(*args: P.args, **kwargs: P.kwargs) -> T: - logging.info(f'{f.__name__} was called') - return f(*args, **kwargs) - return inner - - @add_logging - def add_two(x: float, y: float) -> float: - '''Add two numbers together.''' - return x + y - - Parameter specification variables defined with covariant=True or - contravariant=True can be used to declare covariant or contravariant - generic types. These keyword arguments are valid, but their actual semantics - are yet to be decided. See PEP 612 for details. - - Parameter specification variables can be introspected. e.g.: - - P.__name__ == 'T' - P.__bound__ == None - P.__covariant__ == False - P.__contravariant__ == False - - Note that only parameter specification variables defined in global scope can - be pickled. - """ + if is_generic_or_protocol: + # Generic and Protocol can only be subscripted with unique type variables. + if not args: + raise TypeError( + f"Parameter list to {cls.__qualname__}[...] cannot be empty" + ) + if not all(_is_typevar_like(p) for p in args): + raise TypeError( + f"Parameters to {cls.__name__}[...] must all be type variables " + f"or parameter specification variables.") + if len(set(args)) != len(args): + raise TypeError( + f"Parameters to {cls.__name__}[...] must all be unique") + else: + # Subscripting a regular Generic subclass. + for param in cls.__parameters__: + prepare = getattr(param, '__typing_prepare_subst__', None) + if prepare is not None: + args = prepare(cls, args) + _check_generic_specialization(cls, args) - __slots__ = ('__name__', '__bound__', '__covariant__', '__contravariant__', - '__dict__') + new_args = [] + for param, new_arg in zip(cls.__parameters__, args): + if isinstance(param, TypeVarTuple): + new_args.extend(new_arg) + else: + new_args.append(new_arg) + args = tuple(new_args) - @property - def args(self): - return ParamSpecArgs(self) + return _GenericAlias(cls, args) - @property - def kwargs(self): - return ParamSpecKwargs(self) - def __init__(self, name, *, bound=None, covariant=False, contravariant=False): - self.__name__ = name - super().__init__(bound, covariant, contravariant) - try: - def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - def_mod = None - if def_mod != 'typing': - self.__module__ = def_mod +def _generic_init_subclass(cls, *args, **kwargs): + super(Generic, cls).__init_subclass__(*args, **kwargs) + tvars = [] + if '__orig_bases__' in cls.__dict__: + error = Generic in cls.__orig_bases__ + else: + error = (Generic in cls.__bases__ and + cls.__name__ != 'Protocol' and + type(cls) != _TypedDictMeta) + if error: + raise TypeError("Cannot inherit from plain Generic") + if '__orig_bases__' in cls.__dict__: + tvars = _collect_type_parameters(cls.__orig_bases__) + # Look for Generic[T1, ..., Tn]. + # If found, tvars must be a subset of it. + # If not found, tvars is it. + # Also check for and reject plain Generic, + # and reject multiple Generic[...]. + gvars = None + for base in cls.__orig_bases__: + if (isinstance(base, _GenericAlias) and + base.__origin__ is Generic): + if gvars is not None: + raise TypeError( + "Cannot inherit from Generic[...] multiple times.") + gvars = base.__parameters__ + if gvars is not None: + tvarset = set(tvars) + gvarset = set(gvars) + if not tvarset <= gvarset: + s_vars = ', '.join(str(t) for t in tvars if t not in gvarset) + s_args = ', '.join(str(g) for g in gvars) + raise TypeError(f"Some type variables ({s_vars}) are" + f" not listed in Generic[{s_args}]") + tvars = gvars + cls.__parameters__ = tuple(tvars) def _is_dunder(attr): return attr.startswith('__') and attr.endswith('__') class _BaseGenericAlias(_Final, _root=True): - """The central part of internal API. + """The central part of the internal API. This represents a generic version of type 'origin' with type arguments 'params'. There are two kind of these aliases: user defined and special. The special ones are wrappers around builtin collections and ABCs in collections.abc. These must - have 'name' always set. If 'inst' is False, then the alias can't be instantiated, + have 'name' always set. If 'inst' is False, then the alias can't be instantiated; this is used by e.g. typing.List and typing.Dict. """ + def __init__(self, origin, *, inst=True, name=None): self._inst = inst self._name = name @@ -956,7 +1317,9 @@ def __call__(self, *args, **kwargs): result = self.__origin__(*args, **kwargs) try: result.__orig_class__ = self - except AttributeError: + # Some objects raise TypeError (or something even more exotic) + # if you try to set attributes on them; we guard against that here + except Exception: pass return result @@ -964,9 +1327,29 @@ def __mro_entries__(self, bases): res = [] if self.__origin__ not in bases: res.append(self.__origin__) + + # Check if any base that occurs after us in `bases` is either itself a + # subclass of Generic, or something which will add a subclass of Generic + # to `__bases__` via its `__mro_entries__`. If not, add Generic + # ourselves. The goal is to ensure that Generic (or a subclass) will + # appear exactly once in the final bases tuple. If we let it appear + # multiple times, we risk "can't form a consistent MRO" errors. i = bases.index(self) for b in bases[i+1:]: - if isinstance(b, _BaseGenericAlias) or issubclass(b, Generic): + if isinstance(b, _BaseGenericAlias): + break + if not isinstance(b, type): + meth = getattr(b, "__mro_entries__", None) + new_bases = meth(bases) if meth else None + if ( + isinstance(new_bases, tuple) and + any( + isinstance(b2, type) and issubclass(b2, Generic) + for b2 in new_bases + ) + ): + break + elif issubclass(b, Generic): break else: res.append(Generic) @@ -983,8 +1366,7 @@ def __getattr__(self, attr): raise AttributeError(attr) def __setattr__(self, attr, val): - if _is_dunder(attr) or attr in {'_name', '_inst', '_nparams', - '_typevar_types', '_paramspec_tvars'}: + if _is_dunder(attr) or attr in {'_name', '_inst', '_nparams', '_defaults'}: super().__setattr__(attr, val) else: setattr(self.__origin__, attr, val) @@ -1000,6 +1382,7 @@ def __dir__(self): return list(set(super().__dir__() + [attr for attr in dir(self.__origin__) if not _is_dunder(attr)])) + # Special typing constructs Union, Optional, Generic, Callable and Tuple # use three special attributes for internal bookkeeping of generic types: # * __parameters__ is a tuple of unique free type parameters of a generic @@ -1012,18 +1395,42 @@ def __dir__(self): class _GenericAlias(_BaseGenericAlias, _root=True): - def __init__(self, origin, params, *, inst=True, name=None, - _typevar_types=TypeVar, - _paramspec_tvars=False): + # The type of parameterized generics. + # + # That is, for example, `type(List[int])` is `_GenericAlias`. + # + # Objects which are instances of this class include: + # * Parameterized container types, e.g. `Tuple[int]`, `List[int]`. + # * Note that native container types, e.g. `tuple`, `list`, use + # `types.GenericAlias` instead. + # * Parameterized classes: + # class C[T]: pass + # # C[int] is a _GenericAlias + # * `Callable` aliases, generic `Callable` aliases, and + # parameterized `Callable` aliases: + # T = TypeVar('T') + # # _CallableGenericAlias inherits from _GenericAlias. + # A = Callable[[], None] # _CallableGenericAlias + # B = Callable[[T], None] # _CallableGenericAlias + # C = B[int] # _CallableGenericAlias + # * Parameterized `Final`, `ClassVar`, `TypeGuard`, and `TypeIs`: + # # All _GenericAlias + # Final[int] + # ClassVar[float] + # TypeGuard[bool] + # TypeIs[range] + + def __init__(self, origin, args, *, inst=True, name=None): super().__init__(origin, inst=inst, name=name) - if not isinstance(params, tuple): - params = (params,) + if not isinstance(args, tuple): + args = (args,) self.__args__ = tuple(... if a is _TypingEllipsis else - () if a is _TypingEmpty else - a for a in params) - self.__parameters__ = _collect_type_vars(params, typevar_types=_typevar_types) - self._typevar_types = _typevar_types - self._paramspec_tvars = _paramspec_tvars + a for a in args) + enforce_default_ordering = origin in (Generic, Protocol) + self.__parameters__ = _collect_type_parameters( + args, + enforce_default_ordering=enforce_default_ordering, + ) if not name: self.__module__ = origin.__module__ @@ -1043,53 +1450,140 @@ def __ror__(self, left): return Union[left, self] @_tp_cache - def __getitem__(self, params): + def __getitem__(self, args): + # Parameterizes an already-parameterized object. + # + # For example, we arrive here doing something like: + # T1 = TypeVar('T1') + # T2 = TypeVar('T2') + # T3 = TypeVar('T3') + # class A(Generic[T1]): pass + # B = A[T2] # B is a _GenericAlias + # C = B[T3] # Invokes _GenericAlias.__getitem__ + # + # We also arrive here when parameterizing a generic `Callable` alias: + # T = TypeVar('T') + # C = Callable[[T], None] + # C[int] # Invokes _GenericAlias.__getitem__ + if self.__origin__ in (Generic, Protocol): # Can't subscript Generic[...] or Protocol[...]. raise TypeError(f"Cannot subscript already-subscripted {self}") - if not isinstance(params, tuple): - params = (params,) - params = tuple(_type_convert(p) for p in params) - if (self._paramspec_tvars - and any(isinstance(t, ParamSpec) for t in self.__parameters__)): - params = _prepare_paramspec_params(self, params) - else: - _check_generic(self, params, len(self.__parameters__)) + if not self.__parameters__: + raise TypeError(f"{self} is not a generic class") - subst = dict(zip(self.__parameters__, params)) + # Preprocess `args`. + if not isinstance(args, tuple): + args = (args,) + args = _unpack_args(*(_type_convert(p) for p in args)) + new_args = self._determine_new_args(args) + r = self.copy_with(new_args) + return r + + def _determine_new_args(self, args): + # Determines new __args__ for __getitem__. + # + # For example, suppose we had: + # T1 = TypeVar('T1') + # T2 = TypeVar('T2') + # class A(Generic[T1, T2]): pass + # T3 = TypeVar('T3') + # B = A[int, T3] + # C = B[str] + # `B.__args__` is `(int, T3)`, so `C.__args__` should be `(int, str)`. + # Unfortunately, this is harder than it looks, because if `T3` is + # anything more exotic than a plain `TypeVar`, we need to consider + # edge cases. + + params = self.__parameters__ + # In the example above, this would be {T3: str} + for param in params: + prepare = getattr(param, '__typing_prepare_subst__', None) + if prepare is not None: + args = prepare(self, args) + alen = len(args) + plen = len(params) + if alen != plen: + raise TypeError(f"Too {'many' if alen > plen else 'few'} arguments for {self};" + f" actual {alen}, expected {plen}") + new_arg_by_param = dict(zip(params, args)) + return tuple(self._make_substitution(self.__args__, new_arg_by_param)) + + def _make_substitution(self, args, new_arg_by_param): + """Create a list of new type arguments.""" new_args = [] - for arg in self.__args__: - if isinstance(arg, self._typevar_types): - if isinstance(arg, ParamSpec): - arg = subst[arg] - if not _is_param_expr(arg): - raise TypeError(f"Expected a list of types, an ellipsis, " - f"ParamSpec, or Concatenate. Got {arg}") + for old_arg in args: + if isinstance(old_arg, type): + new_args.append(old_arg) + continue + + substfunc = getattr(old_arg, '__typing_subst__', None) + if substfunc: + new_arg = substfunc(new_arg_by_param[old_arg]) + else: + subparams = getattr(old_arg, '__parameters__', ()) + if not subparams: + new_arg = old_arg else: - arg = subst[arg] - elif isinstance(arg, (_GenericAlias, GenericAlias, types.UnionType)): - subparams = arg.__parameters__ - if subparams: - subargs = tuple(subst[x] for x in subparams) - arg = arg[subargs] - # Required to flatten out the args for CallableGenericAlias - if self.__origin__ == collections.abc.Callable and isinstance(arg, tuple): - new_args.extend(arg) + subargs = [] + for x in subparams: + if isinstance(x, TypeVarTuple): + subargs.extend(new_arg_by_param[x]) + else: + subargs.append(new_arg_by_param[x]) + new_arg = old_arg[tuple(subargs)] + + if self.__origin__ == collections.abc.Callable and isinstance(new_arg, tuple): + # Consider the following `Callable`. + # C = Callable[[int], str] + # Here, `C.__args__` should be (int, str) - NOT ([int], str). + # That means that if we had something like... + # P = ParamSpec('P') + # T = TypeVar('T') + # C = Callable[P, T] + # D = C[[int, str], float] + # ...we need to be careful; `new_args` should end up as + # `(int, str, float)` rather than `([int, str], float)`. + new_args.extend(new_arg) + elif _is_unpacked_typevartuple(old_arg): + # Consider the following `_GenericAlias`, `B`: + # class A(Generic[*Ts]): ... + # B = A[T, *Ts] + # If we then do: + # B[float, int, str] + # The `new_arg` corresponding to `T` will be `float`, and the + # `new_arg` corresponding to `*Ts` will be `(int, str)`. We + # should join all these types together in a flat list + # `(float, int, str)` - so again, we should `extend`. + new_args.extend(new_arg) + elif isinstance(old_arg, tuple): + # Corner case: + # P = ParamSpec('P') + # T = TypeVar('T') + # class Base(Generic[P]): ... + # Can be substituted like this: + # X = Base[[int, T]] + # In this case, `old_arg` will be a tuple: + new_args.append( + tuple(self._make_substitution(old_arg, new_arg_by_param)), + ) else: - new_args.append(arg) - return self.copy_with(tuple(new_args)) + new_args.append(new_arg) + return new_args - def copy_with(self, params): - return self.__class__(self.__origin__, params, name=self._name, inst=self._inst, - _typevar_types=self._typevar_types, - _paramspec_tvars=self._paramspec_tvars) + def copy_with(self, args): + return self.__class__(self.__origin__, args, name=self._name, inst=self._inst) def __repr__(self): if self._name: name = 'typing.' + self._name else: name = _type_repr(self.__origin__) - args = ", ".join([_type_repr(a) for a in self.__args__]) + if self.__args__: + args = ", ".join([_type_repr(a) for a in self.__args__]) + else: + # To ensure the repr is eval-able. + args = "()" return f'{name}[{args}]' def __reduce__(self): @@ -1117,17 +1611,21 @@ def __mro_entries__(self, bases): return () return (self.__origin__,) + def __iter__(self): + yield Unpack[self] + # _nparams is the number of accepted parameters, e.g. 0 for Hashable, # 1 for List and 2 for Dict. It may be -1 if variable number of # parameters are accepted (needs custom __getitem__). -class _SpecialGenericAlias(_BaseGenericAlias, _root=True): - def __init__(self, origin, nparams, *, inst=True, name=None): +class _SpecialGenericAlias(_NotIterable, _BaseGenericAlias, _root=True): + def __init__(self, origin, nparams, *, inst=True, name=None, defaults=()): if name is None: name = origin.__name__ super().__init__(origin, inst=inst, name=name) self._nparams = nparams + self._defaults = defaults if origin.__module__ == 'builtins': self.__doc__ = f'A generic version of {origin.__qualname__}.' else: @@ -1139,7 +1637,22 @@ def __getitem__(self, params): params = (params,) msg = "Parameters to generic types must be types." params = tuple(_type_check(p, msg) for p in params) - _check_generic(self, params, self._nparams) + if (self._defaults + and len(params) < self._nparams + and len(params) + len(self._defaults) >= self._nparams + ): + params = (*params, *self._defaults[len(params) - self._nparams:]) + actual_len = len(params) + + if actual_len != self._nparams: + if self._defaults: + expected = f"at least {self._nparams - len(self._defaults)}" + else: + expected = str(self._nparams) + if not self._nparams: + raise TypeError(f"{self} is not a generic class") + raise TypeError(f"Too {'many' if actual_len > self._nparams else 'few'} arguments for {self};" + f" actual {actual_len}, expected {expected}") return self.copy_with(params) def copy_with(self, params): @@ -1165,7 +1678,23 @@ def __or__(self, right): def __ror__(self, left): return Union[left, self] -class _CallableGenericAlias(_GenericAlias, _root=True): + +class _DeprecatedGenericAlias(_SpecialGenericAlias, _root=True): + def __init__( + self, origin, nparams, *, removal_version, inst=True, name=None + ): + super().__init__(origin, nparams, inst=inst, name=name) + self._removal_version = removal_version + + def __instancecheck__(self, inst): + import warnings + warnings._deprecated( + f"{self.__module__}.{self._name}", remove=self._removal_version + ) + return super().__instancecheck__(inst) + + +class _CallableGenericAlias(_NotIterable, _GenericAlias, _root=True): def __repr__(self): assert self._name == 'Callable' args = self.__args__ @@ -1185,9 +1714,7 @@ def __reduce__(self): class _CallableType(_SpecialGenericAlias, _root=True): def copy_with(self, params): return _CallableGenericAlias(self.__origin__, params, - name=self._name, inst=self._inst, - _typevar_types=(TypeVar, ParamSpec), - _paramspec_tvars=True) + name=self._name, inst=self._inst) def __getitem__(self, params): if not isinstance(params, tuple) or len(params) != 2: @@ -1220,27 +1747,28 @@ def __getitem_inner__(self, params): class _TupleType(_SpecialGenericAlias, _root=True): @_tp_cache def __getitem__(self, params): - if params == (): - return self.copy_with((_TypingEmpty,)) if not isinstance(params, tuple): params = (params,) - if len(params) == 2 and params[1] is ...: + if len(params) >= 2 and params[-1] is ...: msg = "Tuple[t, ...]: t must be a type." - p = _type_check(params[0], msg) - return self.copy_with((p, _TypingEllipsis)) + params = tuple(_type_check(p, msg) for p in params[:-1]) + return self.copy_with((*params, _TypingEllipsis)) msg = "Tuple[t0, t1, ...]: each t must be a type." params = tuple(_type_check(p, msg) for p in params) return self.copy_with(params) -class _UnionGenericAlias(_GenericAlias, _root=True): +class _UnionGenericAlias(_NotIterable, _GenericAlias, _root=True): def copy_with(self, params): return Union[params] def __eq__(self, other): if not isinstance(other, (_UnionGenericAlias, types.UnionType)): return NotImplemented - return set(self.__args__) == set(other.__args__) + try: # fast path + return set(self.__args__) == set(other.__args__) + except TypeError: # not hashable, slow path + return _compare_args_orderless(self.__args__, other.__args__) def __hash__(self): return hash(frozenset(self.__args__)) @@ -1255,12 +1783,16 @@ def __repr__(self): return super().__repr__() def __instancecheck__(self, obj): - return self.__subclasscheck__(type(obj)) + for arg in self.__args__: + if isinstance(obj, arg): + return True + return False def __subclasscheck__(self, cls): for arg in self.__args__: if issubclass(cls, arg): return True + return False def __reduce__(self): func, (origin, args) = super().__reduce__() @@ -1272,7 +1804,6 @@ def _value_and_type_iter(parameters): class _LiteralGenericAlias(_GenericAlias, _root=True): - def __eq__(self, other): if not isinstance(other, _LiteralGenericAlias): return NotImplemented @@ -1289,118 +1820,108 @@ def copy_with(self, params): return (*params[:-1], *params[-1]) if isinstance(params[-1], _ConcatenateGenericAlias): params = (*params[:-1], *params[-1].__args__) - elif not isinstance(params[-1], ParamSpec): - raise TypeError("The last parameter to Concatenate should be a " - "ParamSpec variable.") return super().copy_with(params) -class Generic: - """Abstract base class for generic types. +@_SpecialForm +def Unpack(self, parameters): + """Type unpack operator. - A generic type is typically declared by inheriting from - this class parameterized with one or more type variables. - For example, a generic mapping type might be defined as:: + The type unpack operator takes the child types from some container type, + such as `tuple[int, str]` or a `TypeVarTuple`, and 'pulls them out'. - class Mapping(Generic[KT, VT]): - def __getitem__(self, key: KT) -> VT: - ... - # Etc. + For example:: - This class can then be used as follows:: + # For some generic class `Foo`: + Foo[Unpack[tuple[int, str]]] # Equivalent to Foo[int, str] - def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: - try: - return mapping[key] - except KeyError: - return default - """ - __slots__ = () - _is_protocol = False + Ts = TypeVarTuple('Ts') + # Specifies that `Bar` is generic in an arbitrary number of types. + # (Think of `Ts` as a tuple of an arbitrary number of individual + # `TypeVar`s, which the `Unpack` is 'pulling out' directly into the + # `Generic[]`.) + class Bar(Generic[Unpack[Ts]]): ... + Bar[int] # Valid + Bar[int, str] # Also valid - @_tp_cache - def __class_getitem__(cls, params): - if not isinstance(params, tuple): - params = (params,) - if not params and cls is not Tuple: - raise TypeError( - f"Parameter list to {cls.__qualname__}[...] cannot be empty") - params = tuple(_type_convert(p) for p in params) - if cls in (Generic, Protocol): - # Generic and Protocol can only be subscripted with unique type variables. - if not all(isinstance(p, (TypeVar, ParamSpec)) for p in params): - raise TypeError( - f"Parameters to {cls.__name__}[...] must all be type variables " - f"or parameter specification variables.") - if len(set(params)) != len(params): - raise TypeError( - f"Parameters to {cls.__name__}[...] must all be unique") - else: - # Subscripting a regular Generic subclass. - if any(isinstance(t, ParamSpec) for t in cls.__parameters__): - params = _prepare_paramspec_params(cls, params) - else: - _check_generic(cls, params, len(cls.__parameters__)) - return _GenericAlias(cls, params, - _typevar_types=(TypeVar, ParamSpec), - _paramspec_tvars=True) + From Python 3.11, this can also be done using the `*` operator:: - def __init_subclass__(cls, *args, **kwargs): - super().__init_subclass__(*args, **kwargs) - tvars = [] - if '__orig_bases__' in cls.__dict__: - error = Generic in cls.__orig_bases__ - else: - error = Generic in cls.__bases__ and cls.__name__ != 'Protocol' - if error: - raise TypeError("Cannot inherit from plain Generic") - if '__orig_bases__' in cls.__dict__: - tvars = _collect_type_vars(cls.__orig_bases__, (TypeVar, ParamSpec)) - # Look for Generic[T1, ..., Tn]. - # If found, tvars must be a subset of it. - # If not found, tvars is it. - # Also check for and reject plain Generic, - # and reject multiple Generic[...]. - gvars = None - for base in cls.__orig_bases__: - if (isinstance(base, _GenericAlias) and - base.__origin__ is Generic): - if gvars is not None: - raise TypeError( - "Cannot inherit from Generic[...] multiple types.") - gvars = base.__parameters__ - if gvars is not None: - tvarset = set(tvars) - gvarset = set(gvars) - if not tvarset <= gvarset: - s_vars = ', '.join(str(t) for t in tvars if t not in gvarset) - s_args = ', '.join(str(g) for g in gvars) - raise TypeError(f"Some type variables ({s_vars}) are" - f" not listed in Generic[{s_args}]") - tvars = gvars - cls.__parameters__ = tuple(tvars) - - -class _TypingEmpty: - """Internal placeholder for () or []. Used by TupleMeta and CallableMeta - to allow empty list/tuple in specific places, without allowing them - to sneak in where prohibited. + Foo[*tuple[int, str]] + class Bar(Generic[*Ts]): ... + + And from Python 3.12, it can be done using built-in syntax for generics:: + + Foo[*tuple[int, str]] + class Bar[*Ts]: ... + + The operator can also be used along with a `TypedDict` to annotate + `**kwargs` in a function signature:: + + class Movie(TypedDict): + name: str + year: int + + # This function expects two keyword arguments - *name* of type `str` and + # *year* of type `int`. + def foo(**kwargs: Unpack[Movie]): ... + + Note that there is only some runtime checking of this operator. Not + everything the runtime allows may be accepted by static type checkers. + + For more information, see PEPs 646 and 692. """ + item = _type_check(parameters, f'{self} accepts only single type.') + return _UnpackGenericAlias(origin=self, args=(item,)) + + +class _UnpackGenericAlias(_GenericAlias, _root=True): + def __repr__(self): + # `Unpack` only takes one argument, so __args__ should contain only + # a single item. + return f'typing.Unpack[{_type_repr(self.__args__[0])}]' + + def __getitem__(self, args): + if self.__typing_is_unpacked_typevartuple__: + return args + return super().__getitem__(args) + + @property + def __typing_unpacked_tuple_args__(self): + assert self.__origin__ is Unpack + assert len(self.__args__) == 1 + arg, = self.__args__ + if isinstance(arg, (_GenericAlias, types.GenericAlias)): + if arg.__origin__ is not tuple: + raise TypeError("Unpack[...] must be used with a tuple type") + return arg.__args__ + return None + + @property + def __typing_is_unpacked_typevartuple__(self): + assert self.__origin__ is Unpack + assert len(self.__args__) == 1 + return isinstance(self.__args__[0], TypeVarTuple) class _TypingEllipsis: """Internal placeholder for ... (ellipsis).""" -_TYPING_INTERNALS = ['__parameters__', '__orig_bases__', '__orig_class__', - '_is_protocol', '_is_runtime_protocol'] +_TYPING_INTERNALS = frozenset({ + '__parameters__', '__orig_bases__', '__orig_class__', + '_is_protocol', '_is_runtime_protocol', '__protocol_attrs__', + '__non_callable_proto_members__', '__type_params__', +}) -_SPECIAL_NAMES = ['__abstractmethods__', '__annotations__', '__dict__', '__doc__', - '__init__', '__module__', '__new__', '__slots__', - '__subclasshook__', '__weakref__', '__class_getitem__'] +_SPECIAL_NAMES = frozenset({ + '__abstractmethods__', '__annotations__', '__dict__', '__doc__', + '__init__', '__module__', '__new__', '__slots__', + '__subclasshook__', '__weakref__', '__class_getitem__', + '__match_args__', '__static_attributes__', '__firstlineno__', +}) # These special attributes will be not collected as protocol members. -EXCLUDED_ATTRIBUTES = _TYPING_INTERNALS + _SPECIAL_NAMES + ['_MutableMapping__marker'] +EXCLUDED_ATTRIBUTES = _TYPING_INTERNALS | _SPECIAL_NAMES | {'_MutableMapping__marker'} def _get_protocol_attrs(cls): @@ -1411,20 +1932,15 @@ def _get_protocol_attrs(cls): """ attrs = set() for base in cls.__mro__[:-1]: # without object - if base.__name__ in ('Protocol', 'Generic'): + if base.__name__ in {'Protocol', 'Generic'}: continue annotations = getattr(base, '__annotations__', {}) - for attr in list(base.__dict__.keys()) + list(annotations.keys()): + for attr in (*base.__dict__, *annotations): if not attr.startswith('_abc_') and attr not in EXCLUDED_ATTRIBUTES: attrs.add(attr) return attrs -def _is_callable_members_only(cls): - # PEP 544 prohibits using issubclass() with protocols that have non-method members. - return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls)) - - def _no_init_or_replace_init(self, *args, **kwargs): cls = type(self) @@ -1455,59 +1971,193 @@ def _no_init_or_replace_init(self, *args, **kwargs): def _caller(depth=1, default='__main__'): + try: + return sys._getframemodulename(depth + 1) or default + except AttributeError: # For platforms without _getframemodulename() + pass try: return sys._getframe(depth + 1).f_globals.get('__name__', default) except (AttributeError, ValueError): # For platforms without _getframe() - return None - + pass + return None -def _allow_reckless_class_checks(depth=3): +def _allow_reckless_class_checks(depth=2): """Allow instance and class checks for special stdlib modules. The abc and functools modules indiscriminately call isinstance() and issubclass() on the whole MRO of a user class, which may contain protocols. """ - try: - return sys._getframe(depth).f_globals['__name__'] in ['abc', 'functools'] - except (AttributeError, ValueError): # For platforms without _getframe(). - return True + # XXX: RUSTPYTHON; https://github.com/python/cpython/pull/136115 + return _caller(depth) in {'abc', '_py_abc', 'functools', None} _PROTO_ALLOWLIST = { 'collections.abc': [ 'Callable', 'Awaitable', 'Iterable', 'Iterator', 'AsyncIterable', - 'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', + 'AsyncIterator', 'Hashable', 'Sized', 'Container', 'Collection', + 'Reversible', 'Buffer', ], 'contextlib': ['AbstractContextManager', 'AbstractAsyncContextManager'], } +@functools.cache +def _lazy_load_getattr_static(): + # Import getattr_static lazily so as not to slow down the import of typing.py + # Cache the result so we don't slow down _ProtocolMeta.__instancecheck__ unnecessarily + from inspect import getattr_static + return getattr_static + + +_cleanups.append(_lazy_load_getattr_static.cache_clear) + +def _pickle_psargs(psargs): + return ParamSpecArgs, (psargs.__origin__,) + +copyreg.pickle(ParamSpecArgs, _pickle_psargs) + +def _pickle_pskwargs(pskwargs): + return ParamSpecKwargs, (pskwargs.__origin__,) + +copyreg.pickle(ParamSpecKwargs, _pickle_pskwargs) + +del _pickle_psargs, _pickle_pskwargs + + +# Preload these once, as globals, as a micro-optimisation. +# This makes a significant difference to the time it takes +# to do `isinstance()`/`issubclass()` checks +# against runtime-checkable protocols with only one callable member. +_abc_instancecheck = ABCMeta.__instancecheck__ +_abc_subclasscheck = ABCMeta.__subclasscheck__ + + +def _type_check_issubclass_arg_1(arg): + """Raise TypeError if `arg` is not an instance of `type` + in `issubclass(arg, )`. + + In most cases, this is verified by type.__subclasscheck__. + Checking it again unnecessarily would slow down issubclass() checks, + so, we don't perform this check unless we absolutely have to. + + For various error paths, however, + we want to ensure that *this* error message is shown to the user + where relevant, rather than a typing.py-specific error message. + """ + if not isinstance(arg, type): + # Same error message as for issubclass(1, int). + raise TypeError('issubclass() arg 1 must be a class') + + class _ProtocolMeta(ABCMeta): - # This metaclass is really unfortunate and exists only because of - # the lack of __instancehook__. + # This metaclass is somewhat unfortunate, + # but is necessary for several reasons... + def __new__(mcls, name, bases, namespace, /, **kwargs): + if name == "Protocol" and bases == (Generic,): + pass + elif Protocol in bases: + for base in bases: + if not ( + base in {object, Generic} + or base.__name__ in _PROTO_ALLOWLIST.get(base.__module__, []) + or ( + issubclass(base, Generic) + and getattr(base, "_is_protocol", False) + ) + ): + raise TypeError( + f"Protocols can only inherit from other protocols, " + f"got {base!r}" + ) + return super().__new__(mcls, name, bases, namespace, **kwargs) + + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + if getattr(cls, "_is_protocol", False): + cls.__protocol_attrs__ = _get_protocol_attrs(cls) + + def __subclasscheck__(cls, other): + if cls is Protocol: + return type.__subclasscheck__(cls, other) + if ( + getattr(cls, '_is_protocol', False) + and not _allow_reckless_class_checks() + ): + if not getattr(cls, '_is_runtime_protocol', False): + _type_check_issubclass_arg_1(other) + raise TypeError( + "Instance and class checks can only be used with " + "@runtime_checkable protocols" + ) + if ( + # this attribute is set by @runtime_checkable: + cls.__non_callable_proto_members__ + and cls.__dict__.get("__subclasshook__") is _proto_hook + ): + _type_check_issubclass_arg_1(other) + non_method_attrs = sorted(cls.__non_callable_proto_members__) + raise TypeError( + "Protocols with non-method members don't support issubclass()." + f" Non-method members: {str(non_method_attrs)[1:-1]}." + ) + return _abc_subclasscheck(cls, other) + def __instancecheck__(cls, instance): # We need this method for situations where attributes are # assigned in __init__. + if cls is Protocol: + return type.__instancecheck__(cls, instance) + if not getattr(cls, "_is_protocol", False): + # i.e., it's a concrete subclass of a protocol + return _abc_instancecheck(cls, instance) + if ( - getattr(cls, '_is_protocol', False) and not getattr(cls, '_is_runtime_protocol', False) and - not _allow_reckless_class_checks(depth=2) + not _allow_reckless_class_checks() ): raise TypeError("Instance and class checks can only be used with" " @runtime_checkable protocols") - if ((not getattr(cls, '_is_protocol', False) or - _is_callable_members_only(cls)) and - issubclass(instance.__class__, cls)): + if _abc_instancecheck(cls, instance): + return True + + getattr_static = _lazy_load_getattr_static() + for attr in cls.__protocol_attrs__: + try: + val = getattr_static(instance, attr) + except AttributeError: + break + # this attribute is set by @runtime_checkable: + if val is None and attr not in cls.__non_callable_proto_members__: + break + else: return True - if cls._is_protocol: - if all(hasattr(instance, attr) and - # All *methods* can be blocked by setting them to None. - (not callable(getattr(cls, attr, None)) or - getattr(instance, attr) is not None) - for attr in _get_protocol_attrs(cls)): - return True - return super().__instancecheck__(instance) + + return False + + +@classmethod +def _proto_hook(cls, other): + if not cls.__dict__.get('_is_protocol', False): + return NotImplemented + + for attr in cls.__protocol_attrs__: + for base in other.__mro__: + # Check if the members appears in the class dictionary... + if attr in base.__dict__: + if base.__dict__[attr] is None: + return NotImplemented + break + + # ...or in annotations, if it is a sub-protocol. + annotations = getattr(base, '__annotations__', {}) + if (isinstance(annotations, collections.abc.Mapping) and + attr in annotations and + issubclass(other, Generic) and getattr(other, '_is_protocol', False)): + break + else: + return NotImplemented + return True class Protocol(Generic, metaclass=_ProtocolMeta): @@ -1520,7 +2170,9 @@ def meth(self) -> int: ... Such classes are primarily used with static type checkers that recognize - structural subtyping (static duck-typing), for example:: + structural subtyping (static duck-typing). + + For example:: class C: def meth(self) -> int: @@ -1536,10 +2188,11 @@ def func(x: Proto) -> int: only the presence of given attributes, ignoring their type signatures. Protocol classes can be generic, they are defined as:: - class GenProto(Protocol[T]): + class GenProto[T](Protocol): def meth(self) -> T: ... """ + __slots__ = () _is_protocol = True _is_runtime_protocol = False @@ -1552,75 +2205,30 @@ def __init_subclass__(cls, *args, **kwargs): cls._is_protocol = any(b is Protocol for b in cls.__bases__) # Set (or override) the protocol subclass hook. - def _proto_hook(other): - if not cls.__dict__.get('_is_protocol', False): - return NotImplemented - - # First, perform various sanity checks. - if not getattr(cls, '_is_runtime_protocol', False): - if _allow_reckless_class_checks(): - return NotImplemented - raise TypeError("Instance and class checks can only be used with" - " @runtime_checkable protocols") - if not _is_callable_members_only(cls): - if _allow_reckless_class_checks(): - return NotImplemented - raise TypeError("Protocols with non-method members" - " don't support issubclass()") - if not isinstance(other, type): - # Same error message as for issubclass(1, int). - raise TypeError('issubclass() arg 1 must be a class') - - # Second, perform the actual structural compatibility check. - for attr in _get_protocol_attrs(cls): - for base in other.__mro__: - # Check if the members appears in the class dictionary... - if attr in base.__dict__: - if base.__dict__[attr] is None: - return NotImplemented - break - - # ...or in annotations, if it is a sub-protocol. - annotations = getattr(base, '__annotations__', {}) - if (isinstance(annotations, collections.abc.Mapping) and - attr in annotations and - issubclass(other, Generic) and other._is_protocol): - break - else: - return NotImplemented - return True - if '__subclasshook__' not in cls.__dict__: cls.__subclasshook__ = _proto_hook - # We have nothing more to do for non-protocols... - if not cls._is_protocol: - return - - # ... otherwise check consistency of bases, and prohibit instantiation. - for base in cls.__bases__: - if not (base in (object, Generic) or - base.__module__ in _PROTO_ALLOWLIST and - base.__name__ in _PROTO_ALLOWLIST[base.__module__] or - issubclass(base, Generic) and base._is_protocol): - raise TypeError('Protocols can only inherit from other' - ' protocols, got %r' % base) - cls.__init__ = _no_init_or_replace_init + # Prohibit instantiation for protocol classes + if cls._is_protocol and cls.__init__ is Protocol.__init__: + cls.__init__ = _no_init_or_replace_init -class _AnnotatedAlias(_GenericAlias, _root=True): +class _AnnotatedAlias(_NotIterable, _GenericAlias, _root=True): """Runtime representation of an annotated type. At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't' - with extra annotations. The alias behaves like a normal typing alias, - instantiating is the same as instantiating the underlying type, binding + with extra annotations. The alias behaves like a normal typing alias. + Instantiating is the same as instantiating the underlying type; binding it to types is also the same. + + The metadata itself is stored in a '__metadata__' attribute as a tuple. """ + def __init__(self, origin, metadata): if isinstance(origin, _AnnotatedAlias): metadata = origin.__metadata__ + metadata origin = origin.__origin__ - super().__init__(origin, origin) + super().__init__(origin, origin, name='Annotated') self.__metadata__ = metadata def copy_with(self, params): @@ -1653,9 +2261,14 @@ def __getattr__(self, attr): return 'Annotated' return super().__getattr__(attr) + def __mro_entries__(self, bases): + return (self.__origin__,) + -class Annotated: - """Add context specific metadata to a type. +@_TypedCacheSpecialForm +@_tp_cache(typed=True) +def Annotated(self, *params): + """Add context-specific metadata to a type. Example: Annotated[int, runtime_check.Unsigned] indicates to the hypothetical runtime_check module that this type is an unsigned int. @@ -1667,44 +2280,51 @@ class Annotated: Details: - It's an error to call `Annotated` with less than two arguments. - - Nested Annotated are flattened:: + - Access the metadata via the ``__metadata__`` attribute:: + + assert Annotated[int, '$'].__metadata__ == ('$',) + + - Nested Annotated types are flattened:: - Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] + assert Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] - Instantiating an annotated type is equivalent to instantiating the underlying type:: - Annotated[C, Ann1](5) == C(5) + assert Annotated[C, Ann1](5) == C(5) - Annotated can be used as a generic type alias:: - Optimized = Annotated[T, runtime.Optimize()] - Optimized[int] == Annotated[int, runtime.Optimize()] + type Optimized[T] = Annotated[T, runtime.Optimize()] + # type checker will treat Optimized[int] + # as equivalent to Annotated[int, runtime.Optimize()] - OptimizedList = Annotated[List[T], runtime.Optimize()] - OptimizedList[int] == Annotated[List[int], runtime.Optimize()] - """ + type OptimizedList[T] = Annotated[list[T], runtime.Optimize()] + # type checker will treat OptimizedList[int] + # as equivalent to Annotated[list[int], runtime.Optimize()] - __slots__ = () + - Annotated cannot be used with an unpacked TypeVarTuple:: - def __new__(cls, *args, **kwargs): - raise TypeError("Type Annotated cannot be instantiated.") + type Variadic[*Ts] = Annotated[*Ts, Ann1] # NOT valid - @_tp_cache - def __class_getitem__(cls, params): - if not isinstance(params, tuple) or len(params) < 2: - raise TypeError("Annotated[...] should be used " - "with at least two arguments (a type and an " - "annotation).") - msg = "Annotated[t, ...]: t must be a type." - origin = _type_check(params[0], msg, allow_special_forms=True) - metadata = tuple(params[1:]) - return _AnnotatedAlias(origin, metadata) + This would be equivalent to:: - def __init_subclass__(cls, *args, **kwargs): - raise TypeError( - "Cannot subclass {}.Annotated".format(cls.__module__) - ) + Annotated[T1, T2, T3, ..., Ann1] + + where T1, T2 etc. are TypeVars, which would be invalid, because + only one type should be passed to Annotated. + """ + if len(params) < 2: + raise TypeError("Annotated[...] should be used " + "with at least two arguments (a type and an " + "annotation).") + if _is_unpacked_typevartuple(params[0]): + raise TypeError("Annotated[...] should not be used with an " + "unpacked TypeVarTuple") + msg = "Annotated[t, ...]: t must be a type." + origin = _type_check(params[0], msg, allow_special_forms=True) + metadata = tuple(params[1:]) + return _AnnotatedAlias(origin, metadata) def runtime_checkable(cls): @@ -1714,6 +2334,7 @@ def runtime_checkable(cls): Raise TypeError if applied to a non-protocol class. This allows a simple-minded structural check very similar to one trick ponies in collections.abc such as Iterable. + For example:: @runtime_checkable @@ -1725,10 +2346,26 @@ def close(self): ... Warning: this will check only the presence of the required methods, not their type signatures! """ - if not issubclass(cls, Generic) or not cls._is_protocol: + if not issubclass(cls, Generic) or not getattr(cls, '_is_protocol', False): raise TypeError('@runtime_checkable can be only applied to protocol classes,' ' got %r' % cls) cls._is_runtime_protocol = True + # PEP 544 prohibits using issubclass() + # with protocols that have non-method members. + # See gh-113320 for why we compute this attribute here, + # rather than in `_ProtocolMeta.__init__` + cls.__non_callable_proto_members__ = set() + for attr in cls.__protocol_attrs__: + try: + is_callable = callable(getattr(cls, attr, None)) + except Exception as e: + raise TypeError( + f"Failed to determine whether protocol member {attr!r} " + "is a method member" + ) from e + else: + if not is_callable: + cls.__non_callable_proto_members__.add(attr) return cls @@ -1743,24 +2380,20 @@ def cast(typ, val): return val -def _get_defaults(func): - """Internal helper to extract the default arguments, by name.""" - try: - code = func.__code__ - except AttributeError: - # Some built-in functions don't have __code__, __defaults__, etc. - return {} - pos_count = code.co_argcount - arg_names = code.co_varnames - arg_names = arg_names[:pos_count] - defaults = func.__defaults__ or () - kwdefaults = func.__kwdefaults__ - res = dict(kwdefaults) if kwdefaults else {} - pos_offset = pos_count - len(defaults) - for name, value in zip(arg_names[pos_offset:], defaults): - assert name not in res - res[name] = value - return res +def assert_type(val, typ, /): + """Ask a static type checker to confirm that the value is of the given type. + + At runtime this does nothing: it returns the first argument unchanged with no + checks or side effects, no matter the actual type of the argument. + + When a static type checker encounters a call to assert_type(), it + emits an error if the value is not of the specified type:: + + def greet(name: str) -> None: + assert_type(name, str) # OK + assert_type(name, int) # type checker error + """ + return val _allowed_types = (types.FunctionType, types.BuiltinFunctionType, @@ -1772,8 +2405,7 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): """Return type hints for an object. This is often the same as obj.__annotations__, but it handles - forward references encoded as string literals, adds Optional[t] if a - default value equal to None is set and recursively replaces all + forward references encoded as string literals and recursively replaces all 'Annotated[T, ...]' with 'T' (unless 'include_extras=True'). The argument may be a module, class, method, or function. The annotations @@ -1800,7 +2432,6 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): - If two dict arguments are passed, they specify globals and locals, respectively. """ - if getattr(obj, '__no_type_check__', None): return {} # Classes require a special treatment. @@ -1828,7 +2459,7 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): value = type(None) if isinstance(value, str): value = ForwardRef(value, is_argument=False, is_class=True) - value = _eval_type(value, base_globals, base_locals) + value = _eval_type(value, base_globals, base_locals, base.__type_params__) hints[name] = value return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()} @@ -1853,8 +2484,8 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): else: raise TypeError('{!r} is not a module, class, method, ' 'or function.'.format(obj)) - defaults = _get_defaults(obj) hints = dict(hints) + type_params = getattr(obj, "__type_params__", ()) for name, value in hints.items(): if value is None: value = type(None) @@ -1866,18 +2497,16 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): is_argument=not isinstance(obj, types.ModuleType), is_class=False, ) - value = _eval_type(value, globalns, localns) - if name in defaults and defaults[name] is None: - value = Optional[value] - hints[name] = value + hints[name] = _eval_type(value, globalns, localns, type_params) return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()} def _strip_annotations(t): - """Strips the annotations from a given type. - """ + """Strip the annotations from a given type.""" if isinstance(t, _AnnotatedAlias): return _strip_annotations(t.__origin__) + if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired, ReadOnly): + return _strip_annotations(t.__args__[0]) if isinstance(t, _GenericAlias): stripped_args = tuple(_strip_annotations(a) for a in t.__args__) if stripped_args == t.__args__: @@ -1900,17 +2529,20 @@ def _strip_annotations(t): def get_origin(tp): """Get the unsubscripted version of a type. - This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar - and Annotated. Return None for unsupported types. Examples:: - - get_origin(Literal[42]) is Literal - get_origin(int) is None - get_origin(ClassVar[int]) is ClassVar - get_origin(Generic) is Generic - get_origin(Generic[T]) is Generic - get_origin(Union[T, int]) is Union - get_origin(List[Tuple[T, T]][int]) == list - get_origin(P.args) is P + This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar, + Annotated, and others. Return None for unsupported types. + + Examples:: + + >>> P = ParamSpec('P') + >>> assert get_origin(Literal[42]) is Literal + >>> assert get_origin(int) is None + >>> assert get_origin(ClassVar[int]) is ClassVar + >>> assert get_origin(Generic) is Generic + >>> assert get_origin(Generic[T]) is Generic + >>> assert get_origin(Union[T, int]) is Union + >>> assert get_origin(List[Tuple[T, T]][int]) is list + >>> assert get_origin(P.args) is P """ if isinstance(tp, _AnnotatedAlias): return Annotated @@ -1928,19 +2560,21 @@ def get_args(tp): """Get type arguments with all substitutions performed. For unions, basic simplifications used by Union constructor are performed. + Examples:: - get_args(Dict[str, int]) == (str, int) - get_args(int) == () - get_args(Union[int, Union[T, int], str][int]) == (int, str) - get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) - get_args(Callable[[], T][int]) == ([], int) + + >>> T = TypeVar('T') + >>> assert get_args(Dict[str, int]) == (str, int) + >>> assert get_args(int) == () + >>> assert get_args(Union[int, Union[T, int], str][int]) == (int, str) + >>> assert get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) + >>> assert get_args(Callable[[], T][int]) == ([], int) """ if isinstance(tp, _AnnotatedAlias): return (tp.__origin__,) + tp.__metadata__ if isinstance(tp, (_GenericAlias, GenericAlias)): res = tp.__args__ - if (tp.__origin__ is collections.abc.Callable - and not (len(res) == 2 and _is_param_expr(res[0]))): + if _should_unflatten_callable_args(tp, res): res = (list(res[:-1]), res[-1]) return res if isinstance(tp, types.UnionType): @@ -1949,19 +2583,51 @@ def get_args(tp): def is_typeddict(tp): - """Check if an annotation is a TypedDict class + """Check if an annotation is a TypedDict class. For example:: - class Film(TypedDict): - title: str - year: int - is_typeddict(Film) # => True - is_typeddict(Union[list, str]) # => False + >>> from typing import TypedDict + >>> class Film(TypedDict): + ... title: str + ... year: int + ... + >>> is_typeddict(Film) + True + >>> is_typeddict(dict) + False """ return isinstance(tp, _TypedDictMeta) +_ASSERT_NEVER_REPR_MAX_LENGTH = 100 + + +def assert_never(arg: Never, /) -> Never: + """Statically assert that a line of code is unreachable. + + Example:: + + def int_or_str(arg: int | str) -> None: + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + assert_never(arg) + + If a type checker finds that a call to assert_never() is + reachable, it will emit an error. + + At runtime, this throws an exception when called. + """ + value = repr(arg) + if len(value) > _ASSERT_NEVER_REPR_MAX_LENGTH: + value = value[:_ASSERT_NEVER_REPR_MAX_LENGTH] + '...' + raise AssertionError(f"Expected code to be unreachable, but got: {value}") + + def no_type_check(arg): """Decorator to indicate that annotations are not type hints. @@ -1972,13 +2638,23 @@ def no_type_check(arg): This mutates the function(s) or class(es) in place. """ if isinstance(arg, type): - arg_attrs = arg.__dict__.copy() - for attr, val in arg.__dict__.items(): - if val in arg.__bases__ + (arg,): - arg_attrs.pop(attr) - for obj in arg_attrs.values(): + for key in dir(arg): + obj = getattr(arg, key) + if ( + not hasattr(obj, '__qualname__') + or obj.__qualname__ != f'{arg.__qualname__}.{obj.__name__}' + or getattr(obj, '__module__', None) != arg.__module__ + ): + # We only modify objects that are defined in this type directly. + # If classes / methods are nested in multiple layers, + # we will modify them when processing their direct holders. + continue + # Instance, class, and static methods: if isinstance(obj, types.FunctionType): obj.__no_type_check__ = True + if isinstance(obj, types.MethodType): + obj.__func__.__no_type_check__ = True + # Nested types: if isinstance(obj, type): no_type_check(obj) try: @@ -1994,7 +2670,8 @@ def no_type_check_decorator(decorator): This wraps the decorator with something that wraps the decorated function in @no_type_check. """ - + import warnings + warnings._deprecated("typing.no_type_check_decorator", remove=(3, 15)) @functools.wraps(decorator) def wrapped_decorator(*args, **kwds): func = decorator(*args, **kwds) @@ -2013,63 +2690,107 @@ def _overload_dummy(*args, **kwds): "by an implementation that is not @overload-ed.") +# {module: {qualname: {firstlineno: func}}} +_overload_registry = defaultdict(functools.partial(defaultdict, dict)) + + def overload(func): """Decorator for overloaded functions/methods. In a stub file, place two or more stub definitions for the same - function in a row, each decorated with @overload. For example: + function in a row, each decorated with @overload. + + For example:: - @overload - def utf8(value: None) -> None: ... - @overload - def utf8(value: bytes) -> bytes: ... - @overload - def utf8(value: str) -> bytes: ... + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... In a non-stub file (i.e. a regular .py file), do the same but follow it with an implementation. The implementation should *not* - be decorated with @overload. For example: - - @overload - def utf8(value: None) -> None: ... - @overload - def utf8(value: bytes) -> bytes: ... - @overload - def utf8(value: str) -> bytes: ... - def utf8(value): - # implementation goes here + be decorated with @overload:: + + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... + def utf8(value): + ... # implementation goes here + + The overloads for a function can be retrieved at runtime using the + get_overloads() function. """ + # classmethod and staticmethod + f = getattr(func, "__func__", func) + try: + _overload_registry[f.__module__][f.__qualname__][f.__code__.co_firstlineno] = func + except AttributeError: + # Not a normal function; ignore. + pass return _overload_dummy +def get_overloads(func): + """Return all defined overloads for *func* as a sequence.""" + # classmethod and staticmethod + f = getattr(func, "__func__", func) + if f.__module__ not in _overload_registry: + return [] + mod_dict = _overload_registry[f.__module__] + if f.__qualname__ not in mod_dict: + return [] + return list(mod_dict[f.__qualname__].values()) + + +def clear_overloads(): + """Clear all overloads in the registry.""" + _overload_registry.clear() + + def final(f): - """A decorator to indicate final methods and final classes. + """Decorator to indicate final methods and final classes. Use this decorator to indicate to type checkers that the decorated method cannot be overridden, and decorated class cannot be subclassed. - For example: - - class Base: - @final - def done(self) -> None: - ... - class Sub(Base): - def done(self) -> None: # Error reported by type checker + + For example:: + + class Base: + @final + def done(self) -> None: + ... + class Sub(Base): + def done(self) -> None: # Error reported by type checker ... - @final - class Leaf: - ... - class Other(Leaf): # Error reported by type checker - ... + @final + class Leaf: + ... + class Other(Leaf): # Error reported by type checker + ... - There is no runtime checking of these properties. + There is no runtime checking of these properties. The decorator + attempts to set the ``__final__`` attribute to ``True`` on the decorated + object to allow runtime introspection. """ + try: + f.__final__ = True + except (AttributeError, TypeError): + # Skip the attribute silently if it is not writable. + # AttributeError happens if the object has __slots__ or a + # read-only property, TypeError if it's a builtin class. + pass return f -# Some unconstrained type variables. These are used by the container types. -# (These are not for export.) +# Some unconstrained type variables. These were initially used by the container types. +# They were never meant for export and are now unused, but we keep them around to +# avoid breaking compatibility with users who import them. T = TypeVar('T') # Any type. KT = TypeVar('KT') # Key type. VT = TypeVar('VT') # Value type. @@ -2080,6 +2801,7 @@ class Other(Leaf): # Error reported by type checker # Internal type variable used for Type[]. CT_co = TypeVar('CT_co', covariant=True, bound=type) + # A useful type variable with constraints. This represents string types. # (This one *is* for export!) AnyStr = TypeVar('AnyStr', bytes, str) @@ -2101,13 +2823,17 @@ class Other(Leaf): # Error reported by type checker Collection = _alias(collections.abc.Collection, 1) Callable = _CallableType(collections.abc.Callable, 2) Callable.__doc__ = \ - """Callable type; Callable[[int], str] is a function of (int) -> str. + """Deprecated alias to collections.abc.Callable. + + Callable[[int], str] signifies a function that takes a single + parameter of type int and returns a str. The subscription syntax must always be used with exactly two - values: the argument list and the return type. The argument list - must be a list of types or ellipsis; the return type must be a single type. + values: the argument list and the return type. + The argument list must be a list of types, a ParamSpec, + Concatenate or ellipsis. The return type must be a single type. - There is no syntax to indicate optional or keyword arguments, + There is no syntax to indicate optional or keyword arguments; such function types are rarely used as callback types. """ AbstractSet = _alias(collections.abc.Set, 1, name='AbstractSet') @@ -2117,11 +2843,15 @@ class Other(Leaf): # Error reported by type checker MutableMapping = _alias(collections.abc.MutableMapping, 2) Sequence = _alias(collections.abc.Sequence, 1) MutableSequence = _alias(collections.abc.MutableSequence, 1) -ByteString = _alias(collections.abc.ByteString, 0) # Not generic +ByteString = _DeprecatedGenericAlias( + collections.abc.ByteString, 0, removal_version=(3, 14) # Not generic. +) # Tuple accepts variable number of parameters. Tuple = _TupleType(tuple, -1, inst=False, name='Tuple') Tuple.__doc__ = \ - """Tuple type; Tuple[X, Y] is the cross-product type of X and Y. + """Deprecated alias to builtins.tuple. + + Tuple[X, Y] is the cross-product type of X and Y. Example: Tuple[T1, T2] is a tuple of two elements corresponding to type variables T1 and T2. Tuple[int, float, str] is a tuple @@ -2137,41 +2867,34 @@ class Other(Leaf): # Error reported by type checker KeysView = _alias(collections.abc.KeysView, 1) ItemsView = _alias(collections.abc.ItemsView, 2) ValuesView = _alias(collections.abc.ValuesView, 1) -try: - # XXX: RUSTPYTHON; contextlib support for wasm - import contextlib - ContextManager = _alias(contextlib.AbstractContextManager, 1, name='ContextManager') - AsyncContextManager = _alias(contextlib.AbstractAsyncContextManager, 1, name='AsyncContextManager') -except ImportError: - pass Dict = _alias(dict, 2, inst=False, name='Dict') DefaultDict = _alias(collections.defaultdict, 2, name='DefaultDict') OrderedDict = _alias(collections.OrderedDict, 2) Counter = _alias(collections.Counter, 1) ChainMap = _alias(collections.ChainMap, 2) -Generator = _alias(collections.abc.Generator, 3) -AsyncGenerator = _alias(collections.abc.AsyncGenerator, 2) +Generator = _alias(collections.abc.Generator, 3, defaults=(types.NoneType, types.NoneType)) +AsyncGenerator = _alias(collections.abc.AsyncGenerator, 2, defaults=(types.NoneType,)) Type = _alias(type, 1, inst=False, name='Type') Type.__doc__ = \ - """A special construct usable to annotate class objects. + """Deprecated alias to builtins.type. + builtins.type or typing.Type can be used to annotate class objects. For example, suppose we have the following classes:: - class User: ... # Abstract base for User classes - class BasicUser(User): ... - class ProUser(User): ... - class TeamUser(User): ... + class User: ... # Abstract base for User classes + class BasicUser(User): ... + class ProUser(User): ... + class TeamUser(User): ... And a function that takes a class argument that's a subclass of User and returns an instance of the corresponding class:: - U = TypeVar('U', bound=User) - def new_user(user_class: Type[U]) -> U: - user = user_class() - # (Here we could write the user object to a database) - return user + def new_user[U](user_class: Type[U]) -> U: + user = user_class() + # (Here we could write the user object to a database) + return user - joe = new_user(BasicUser) + joe = new_user(BasicUser) At this point the type checker knows that joe has type BasicUser. """ @@ -2180,6 +2903,7 @@ def new_user(user_class: Type[U]) -> U: @runtime_checkable class SupportsInt(Protocol): """An ABC with one abstract method __int__.""" + __slots__ = () @abstractmethod @@ -2190,6 +2914,7 @@ def __int__(self) -> int: @runtime_checkable class SupportsFloat(Protocol): """An ABC with one abstract method __float__.""" + __slots__ = () @abstractmethod @@ -2200,6 +2925,7 @@ def __float__(self) -> float: @runtime_checkable class SupportsComplex(Protocol): """An ABC with one abstract method __complex__.""" + __slots__ = () @abstractmethod @@ -2210,6 +2936,7 @@ def __complex__(self) -> complex: @runtime_checkable class SupportsBytes(Protocol): """An ABC with one abstract method __bytes__.""" + __slots__ = () @abstractmethod @@ -2220,6 +2947,7 @@ def __bytes__(self) -> bytes: @runtime_checkable class SupportsIndex(Protocol): """An ABC with one abstract method __index__.""" + __slots__ = () @abstractmethod @@ -2228,22 +2956,24 @@ def __index__(self) -> int: @runtime_checkable -class SupportsAbs(Protocol[T_co]): +class SupportsAbs[T](Protocol): """An ABC with one abstract method __abs__ that is covariant in its return type.""" + __slots__ = () @abstractmethod - def __abs__(self) -> T_co: + def __abs__(self) -> T: pass @runtime_checkable -class SupportsRound(Protocol[T_co]): +class SupportsRound[T](Protocol): """An ABC with one abstract method __round__ that is covariant in its return type.""" + __slots__ = () @abstractmethod - def __round__(self, ndigits: int = 0) -> T_co: + def __round__(self, ndigits: int = 0) -> T: pass @@ -2266,9 +2996,13 @@ def _make_nmtuple(name, types, module, defaults = ()): class NamedTupleMeta(type): - def __new__(cls, typename, bases, ns): - assert bases[0] is _NamedTuple + assert _NamedTuple in bases + for base in bases: + if base is not _NamedTuple and base is not Generic: + raise TypeError( + 'can only inherit from a NamedTuple type and Generic') + bases = tuple(tuple if base is _NamedTuple else base for base in bases) types = ns.get('__annotations__', {}) default_names = [] for field_name in types: @@ -2282,19 +3016,40 @@ def __new__(cls, typename, bases, ns): nm_tpl = _make_nmtuple(typename, types.items(), defaults=[ns[n] for n in default_names], module=ns['__module__']) + nm_tpl.__bases__ = bases + if Generic in bases: + class_getitem = _generic_class_getitem + nm_tpl.__class_getitem__ = classmethod(class_getitem) # update from user namespace without overriding special namedtuple attributes - for key in ns: + for key, val in ns.items(): if key in _prohibited: raise AttributeError("Cannot overwrite NamedTuple attribute " + key) - elif key not in _special and key not in nm_tpl._fields: - setattr(nm_tpl, key, ns[key]) + elif key not in _special: + if key not in nm_tpl._fields: + setattr(nm_tpl, key, val) + try: + set_name = type(val).__set_name__ + except AttributeError: + pass + else: + try: + set_name(val, nm_tpl, key) + except BaseException as e: + e.add_note( + f"Error calling __set_name__ on {type(val).__name__!r} " + f"instance {key!r} in {typename!r}" + ) + raise + + if Generic in bases: + nm_tpl.__init_subclass__() return nm_tpl -def NamedTuple(typename, fields=None, /, **kwargs): +def NamedTuple(typename, fields=_sentinel, /, **kwargs): """Typed version of namedtuple. - Usage in Python versions >= 3.6:: + Usage:: class Employee(NamedTuple): name: str @@ -2307,39 +3062,86 @@ class Employee(NamedTuple): The resulting class has an extra __annotations__ attribute, giving a dict that maps field names to types. (The field names are also in the _fields attribute, which is part of the namedtuple API.) - Alternative equivalent keyword syntax is also accepted:: - - Employee = NamedTuple('Employee', name=str, id=int) - - In Python versions <= 3.5 use:: + An alternative equivalent functional syntax is also accepted:: Employee = NamedTuple('Employee', [('name', str), ('id', int)]) """ - if fields is None: - fields = kwargs.items() + if fields is _sentinel: + if kwargs: + deprecated_thing = "Creating NamedTuple classes using keyword arguments" + deprecation_msg = ( + "{name} is deprecated and will be disallowed in Python {remove}. " + "Use the class-based or functional syntax instead." + ) + else: + deprecated_thing = "Failing to pass a value for the 'fields' parameter" + example = f"`{typename} = NamedTuple({typename!r}, [])`" + deprecation_msg = ( + "{name} is deprecated and will be disallowed in Python {remove}. " + "To create a NamedTuple class with 0 fields " + "using the functional syntax, " + "pass an empty list, e.g. " + ) + example + "." + elif fields is None: + if kwargs: + raise TypeError( + "Cannot pass `None` as the 'fields' parameter " + "and also specify fields using keyword arguments" + ) + else: + deprecated_thing = "Passing `None` as the 'fields' parameter" + example = f"`{typename} = NamedTuple({typename!r}, [])`" + deprecation_msg = ( + "{name} is deprecated and will be disallowed in Python {remove}. " + "To create a NamedTuple class with 0 fields " + "using the functional syntax, " + "pass an empty list, e.g. " + ) + example + "." elif kwargs: raise TypeError("Either list of fields or keywords" " can be provided to NamedTuple, not both") - try: - module = sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - module = None - return _make_nmtuple(typename, fields, module=module) + if fields is _sentinel or fields is None: + import warnings + warnings._deprecated(deprecated_thing, message=deprecation_msg, remove=(3, 15)) + fields = kwargs.items() + nt = _make_nmtuple(typename, fields, module=_caller()) + nt.__orig_bases__ = (NamedTuple,) + return nt _NamedTuple = type.__new__(NamedTupleMeta, 'NamedTuple', (), {}) def _namedtuple_mro_entries(bases): - if len(bases) > 1: - raise TypeError("Multiple inheritance with NamedTuple is not supported") - assert bases[0] is NamedTuple + assert NamedTuple in bases return (_NamedTuple,) NamedTuple.__mro_entries__ = _namedtuple_mro_entries +def _get_typeddict_qualifiers(annotation_type): + while True: + annotation_origin = get_origin(annotation_type) + if annotation_origin is Annotated: + annotation_args = get_args(annotation_type) + if annotation_args: + annotation_type = annotation_args[0] + else: + break + elif annotation_origin is Required: + yield Required + (annotation_type,) = get_args(annotation_type) + elif annotation_origin is NotRequired: + yield NotRequired + (annotation_type,) = get_args(annotation_type) + elif annotation_origin is ReadOnly: + yield ReadOnly + (annotation_type,) = get_args(annotation_type) + else: + break + + class _TypedDictMeta(type): def __new__(cls, name, bases, ns, total=True): - """Create new typed dict class object. + """Create a new typed dict class object. This method is called when TypedDict is subclassed, or when TypedDict is instantiated. This way @@ -2347,14 +3149,22 @@ def __new__(cls, name, bases, ns, total=True): Subclasses and instances of TypedDict return actual dictionaries. """ for base in bases: - if type(base) is not _TypedDictMeta: + if type(base) is not _TypedDictMeta and base is not Generic: raise TypeError('cannot inherit from both a TypedDict type ' 'and a non-TypedDict base class') - tp_dict = type.__new__(_TypedDictMeta, name, (dict,), ns) + + if any(issubclass(b, Generic) for b in bases): + generic_base = (Generic,) + else: + generic_base = () + + tp_dict = type.__new__(_TypedDictMeta, name, (*generic_base, dict), ns) + + if not hasattr(tp_dict, '__orig_bases__'): + tp_dict.__orig_bases__ = bases annotations = {} own_annotations = ns.get('__annotations__', {}) - own_annotation_keys = set(own_annotations.keys()) msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" own_annotations = { n: _type_check(tp, msg, module=tp_dict.__module__) @@ -2362,23 +3172,61 @@ def __new__(cls, name, bases, ns, total=True): } required_keys = set() optional_keys = set() + readonly_keys = set() + mutable_keys = set() for base in bases: annotations.update(base.__dict__.get('__annotations__', {})) - required_keys.update(base.__dict__.get('__required_keys__', ())) - optional_keys.update(base.__dict__.get('__optional_keys__', ())) + + base_required = base.__dict__.get('__required_keys__', set()) + required_keys |= base_required + optional_keys -= base_required + + base_optional = base.__dict__.get('__optional_keys__', set()) + required_keys -= base_optional + optional_keys |= base_optional + + readonly_keys.update(base.__dict__.get('__readonly_keys__', ())) + mutable_keys.update(base.__dict__.get('__mutable_keys__', ())) annotations.update(own_annotations) - if total: - required_keys.update(own_annotation_keys) - else: - optional_keys.update(own_annotation_keys) + for annotation_key, annotation_type in own_annotations.items(): + qualifiers = set(_get_typeddict_qualifiers(annotation_type)) + if Required in qualifiers: + is_required = True + elif NotRequired in qualifiers: + is_required = False + else: + is_required = total + if is_required: + required_keys.add(annotation_key) + optional_keys.discard(annotation_key) + else: + optional_keys.add(annotation_key) + required_keys.discard(annotation_key) + + if ReadOnly in qualifiers: + if annotation_key in mutable_keys: + raise TypeError( + f"Cannot override mutable key {annotation_key!r}" + " with read-only key" + ) + readonly_keys.add(annotation_key) + else: + mutable_keys.add(annotation_key) + readonly_keys.discard(annotation_key) + + assert required_keys.isdisjoint(optional_keys), ( + f"Required keys overlap with optional keys in {name}:" + f" {required_keys=}, {optional_keys=}" + ) tp_dict.__annotations__ = annotations tp_dict.__required_keys__ = frozenset(required_keys) tp_dict.__optional_keys__ = frozenset(optional_keys) - if not hasattr(tp_dict, '__total__'): - tp_dict.__total__ = total + tp_dict.__readonly_keys__ = frozenset(readonly_keys) + tp_dict.__mutable_keys__ = frozenset(mutable_keys) + tp_dict.__total__ = total return tp_dict __call__ = dict # static method @@ -2390,72 +3238,164 @@ def __subclasscheck__(cls, other): __instancecheck__ = __subclasscheck__ -def TypedDict(typename, fields=None, /, *, total=True, **kwargs): +def TypedDict(typename, fields=_sentinel, /, *, total=True): """A simple typed namespace. At runtime it is equivalent to a plain dict. - TypedDict creates a dictionary type that expects all of its + TypedDict creates a dictionary type such that a type checker will expect all instances to have a certain set of keys, where each key is associated with a value of a consistent type. This expectation - is not checked at runtime but is only enforced by type checkers. - Usage:: - - class Point2D(TypedDict): - x: int - y: int - label: str + is not checked at runtime. - a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK - b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check + Usage:: - assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first') + >>> class Point2D(TypedDict): + ... x: int + ... y: int + ... label: str + ... + >>> a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK + >>> b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check + >>> Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first') + True The type info can be accessed via the Point2D.__annotations__ dict, and the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets. - TypedDict supports two additional equivalent forms:: + TypedDict supports an additional equivalent form:: - Point2D = TypedDict('Point2D', x=int, y=int, label=str) Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str}) By default, all keys must be present in a TypedDict. It is possible - to override this by specifying totality. - Usage:: + to override this by specifying totality:: - class point2D(TypedDict, total=False): + class Point2D(TypedDict, total=False): x: int y: int - This means that a point2D TypedDict can have any of the keys omitted.A type + This means that a Point2D TypedDict can have any of the keys omitted. A type checker is only expected to support a literal False or True as the value of the total argument. True is the default, and makes all items defined in the class body be required. - The class syntax is only supported in Python 3.6+, while two other - syntax forms work for Python 2.7 and 3.2+ + The Required and NotRequired special forms can also be used to mark + individual keys as being required or not required:: + + class Point2D(TypedDict): + x: int # the "x" key must always be present (Required is the default) + y: NotRequired[int] # the "y" key can be omitted + + See PEP 655 for more details on Required and NotRequired. + + The ReadOnly special form can be used + to mark individual keys as immutable for type checkers:: + + class DatabaseUser(TypedDict): + id: ReadOnly[int] # the "id" key must not be modified + username: str # the "username" key can be changed + """ - if fields is None: - fields = kwargs - elif kwargs: - raise TypeError("TypedDict takes either a dict or keyword arguments," - " but not both") + if fields is _sentinel or fields is None: + import warnings + + if fields is _sentinel: + deprecated_thing = "Failing to pass a value for the 'fields' parameter" + else: + deprecated_thing = "Passing `None` as the 'fields' parameter" + + example = f"`{typename} = TypedDict({typename!r}, {{{{}}}})`" + deprecation_msg = ( + "{name} is deprecated and will be disallowed in Python {remove}. " + "To create a TypedDict class with 0 fields " + "using the functional syntax, " + "pass an empty dictionary, e.g. " + ) + example + "." + warnings._deprecated(deprecated_thing, message=deprecation_msg, remove=(3, 15)) + fields = {} ns = {'__annotations__': dict(fields)} - try: + module = _caller() + if module is not None: # Setting correct module is necessary to make typed dict classes pickleable. - ns['__module__'] = sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - pass + ns['__module__'] = module - return _TypedDictMeta(typename, (), ns, total=total) + td = _TypedDictMeta(typename, (), ns, total=total) + td.__orig_bases__ = (TypedDict,) + return td _TypedDict = type.__new__(_TypedDictMeta, 'TypedDict', (), {}) TypedDict.__mro_entries__ = lambda bases: (_TypedDict,) +@_SpecialForm +def Required(self, parameters): + """Special typing construct to mark a TypedDict key as required. + + This is mainly useful for total=False TypedDicts. + + For example:: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """ + item = _type_check(parameters, f'{self._name} accepts only a single type.') + return _GenericAlias(self, (item,)) + + +@_SpecialForm +def NotRequired(self, parameters): + """Special typing construct to mark a TypedDict key as potentially missing. + + For example:: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """ + item = _type_check(parameters, f'{self._name} accepts only a single type.') + return _GenericAlias(self, (item,)) + + +@_SpecialForm +def ReadOnly(self, parameters): + """A special typing construct to mark an item of a TypedDict as read-only. + + For example:: + + class Movie(TypedDict): + title: ReadOnly[str] + year: int + + def mutate_movie(m: Movie) -> None: + m["year"] = 1992 # allowed + m["title"] = "The Matrix" # typechecker error + + There is no runtime checking for this property. + """ + item = _type_check(parameters, f'{self._name} accepts only a single type.') + return _GenericAlias(self, (item,)) + + class NewType: - """NewType creates simple unique types with almost zero - runtime overhead. NewType(name, tp) is considered a subtype of tp + """NewType creates simple unique types with almost zero runtime overhead. + + NewType(name, tp) is considered a subtype of tp by static type checkers. At runtime, NewType(name, tp) returns - a dummy callable that simply returns its argument. Usage:: + a dummy callable that simply returns its argument. + + Usage:: UserId = NewType('UserId', int) @@ -2470,6 +3410,8 @@ def name_by_id(user_id: UserId) -> str: num = UserId(5) + 1 # type: int """ + __call__ = _idfunc + def __init__(self, name, tp): self.__qualname__ = name if '.' in name: @@ -2480,12 +3422,24 @@ def __init__(self, name, tp): if def_mod != 'typing': self.__module__ = def_mod + def __mro_entries__(self, bases): + # We defined __mro_entries__ to get a better error message + # if a user attempts to subclass a NewType instance. bpo-46170 + superclass_name = self.__name__ + + class Dummy: + def __init_subclass__(cls): + subclass_name = cls.__name__ + raise TypeError( + f"Cannot subclass an instance of NewType. Perhaps you were looking for: " + f"`{subclass_name} = NewType({subclass_name!r}, {superclass_name})`" + ) + + return (Dummy,) + def __repr__(self): return f'{self.__module__}.{self.__qualname__}' - def __call__(self, x): - return x - def __reduce__(self): return self.__qualname__ @@ -2652,28 +3606,215 @@ def __enter__(self) -> 'TextIO': pass -class io: - """Wrapper namespace for IO generic classes.""" +def reveal_type[T](obj: T, /) -> T: + """Ask a static type checker to reveal the inferred type of an expression. + + When a static type checker encounters a call to ``reveal_type()``, + it will emit the inferred type of the argument:: + + x: int = 1 + reveal_type(x) + + Running a static type checker (e.g., mypy) on this example + will produce output similar to 'Revealed type is "builtins.int"'. + + At runtime, the function prints the runtime type of the + argument and returns the argument unchanged. + """ + print(f"Runtime type is {type(obj).__name__!r}", file=sys.stderr) + return obj + + +class _IdentityCallable(Protocol): + def __call__[T](self, arg: T, /) -> T: + ... + + +def dataclass_transform( + *, + eq_default: bool = True, + order_default: bool = False, + kw_only_default: bool = False, + frozen_default: bool = False, + field_specifiers: tuple[type[Any] | Callable[..., Any], ...] = (), + **kwargs: Any, +) -> _IdentityCallable: + """Decorator to mark an object as providing dataclass-like behaviour. + + The decorator can be applied to a function, class, or metaclass. + + Example usage with a decorator function:: + + @dataclass_transform() + def create_model[T](cls: type[T]) -> type[T]: + ... + return cls + + @create_model + class CustomerModel: + id: int + name: str + + On a base class:: + + @dataclass_transform() + class ModelBase: ... + + class CustomerModel(ModelBase): + id: int + name: str + + On a metaclass:: + + @dataclass_transform() + class ModelMeta(type): ... + + class ModelBase(metaclass=ModelMeta): ... + + class CustomerModel(ModelBase): + id: int + name: str + + The ``CustomerModel`` classes defined above will + be treated by type checkers similarly to classes created with + ``@dataclasses.dataclass``. + For example, type checkers will assume these classes have + ``__init__`` methods that accept ``id`` and ``name``. + + The arguments to this decorator can be used to customize this behavior: + - ``eq_default`` indicates whether the ``eq`` parameter is assumed to be + ``True`` or ``False`` if it is omitted by the caller. + - ``order_default`` indicates whether the ``order`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``kw_only_default`` indicates whether the ``kw_only`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``frozen_default`` indicates whether the ``frozen`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``field_specifiers`` specifies a static list of supported classes + or functions that describe fields, similar to ``dataclasses.field()``. + - Arbitrary other keyword arguments are accepted in order to allow for + possible future extensions. + + At runtime, this decorator records its arguments in the + ``__dataclass_transform__`` attribute on the decorated object. + It has no other runtime effect. + + See PEP 681 for more details. + """ + def decorator(cls_or_fn): + cls_or_fn.__dataclass_transform__ = { + "eq_default": eq_default, + "order_default": order_default, + "kw_only_default": kw_only_default, + "frozen_default": frozen_default, + "field_specifiers": field_specifiers, + "kwargs": kwargs, + } + return cls_or_fn + return decorator + + +type _Func = Callable[..., Any] + + +def override[F: _Func](method: F, /) -> F: + """Indicate that a method is intended to override a method in a base class. + + Usage:: + + class Base: + def method(self) -> None: + pass + + class Child(Base): + @override + def method(self) -> None: + super().method() + + When this decorator is applied to a method, the type checker will + validate that it overrides a method or attribute with the same name on a + base class. This helps prevent bugs that may occur when a base class is + changed without an equivalent change to a child class. + + There is no runtime checking of this property. The decorator attempts to + set the ``__override__`` attribute to ``True`` on the decorated object to + allow runtime introspection. + + See PEP 698 for details. + """ + try: + method.__override__ = True + except (AttributeError, TypeError): + # Skip the attribute silently if it is not writable. + # AttributeError happens if the object has __slots__ or a + # read-only property, TypeError if it's a builtin class. + pass + return method + + +def is_protocol(tp: type, /) -> bool: + """Return True if the given type is a Protocol. + + Example:: + + >>> from typing import Protocol, is_protocol + >>> class P(Protocol): + ... def a(self) -> str: ... + ... b: int + >>> is_protocol(P) + True + >>> is_protocol(int) + False + """ + return ( + isinstance(tp, type) + and getattr(tp, '_is_protocol', False) + and tp != Protocol + ) - __all__ = ['IO', 'TextIO', 'BinaryIO'] - IO = IO - TextIO = TextIO - BinaryIO = BinaryIO +def get_protocol_members(tp: type, /) -> frozenset[str]: + """Return the set of members defined in a Protocol. -io.__name__ = __name__ + '.io' -sys.modules[io.__name__] = io + Example:: -Pattern = _alias(stdlib_re.Pattern, 1) -Match = _alias(stdlib_re.Match, 1) + >>> from typing import Protocol, get_protocol_members + >>> class P(Protocol): + ... def a(self) -> str: ... + ... b: int + >>> get_protocol_members(P) == frozenset({'a', 'b'}) + True -class re: - """Wrapper namespace for re type aliases.""" + Raise a TypeError for arguments that are not Protocols. + """ + if not is_protocol(tp): + raise TypeError(f'{tp!r} is not a Protocol') + return frozenset(tp.__protocol_attrs__) - __all__ = ['Pattern', 'Match'] - Pattern = Pattern - Match = Match +def __getattr__(attr): + """Improve the import time of the typing module. -re.__name__ = __name__ + '.re' -sys.modules[re.__name__] = re + Soft-deprecated objects which are costly to create + are only created on-demand here. + """ + if attr in {"Pattern", "Match"}: + import re + obj = _alias(getattr(re, attr), 1) + elif attr in {"ContextManager", "AsyncContextManager"}: + import contextlib + obj = _alias(getattr(contextlib, f"Abstract{attr}"), 2, name=attr, defaults=(bool | None,)) + elif attr == "_collect_parameters": + import warnings + + depr_message = ( + "The private _collect_parameters function is deprecated and will be" + " removed in a future version of Python. Any use of private functions" + " is discouraged and may break in the future." + ) + warnings.warn(depr_message, category=DeprecationWarning, stacklevel=2) + obj = _collect_type_parameters + else: + raise AttributeError(f"module {__name__!r} has no attribute {attr!r}") + globals()[attr] = obj + return obj diff --git a/Lib/wave.py b/Lib/wave.py new file mode 100644 index 0000000000..a34af244c3 --- /dev/null +++ b/Lib/wave.py @@ -0,0 +1,663 @@ +"""Stuff to parse WAVE files. + +Usage. + +Reading WAVE files: + f = wave.open(file, 'r') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods read(), seek(), and close(). +When the setpos() and rewind() methods are not used, the seek() +method is not necessary. + +This returns an instance of a class with the following public methods: + getnchannels() -- returns number of audio channels (1 for + mono, 2 for stereo) + getsampwidth() -- returns sample width in bytes + getframerate() -- returns sampling frequency + getnframes() -- returns number of audio frames + getcomptype() -- returns compression type ('NONE' for linear samples) + getcompname() -- returns human-readable version of + compression type ('not compressed' linear samples) + getparams() -- returns a namedtuple consisting of all of the + above in the above order + getmarkers() -- returns None (for compatibility with the + old aifc module) + getmark(id) -- raises an error since the mark does not + exist (for compatibility with the old aifc module) + readframes(n) -- returns at most n frames of audio + rewind() -- rewind to the beginning of the audio stream + setpos(pos) -- seek to the specified position + tell() -- return the current position + close() -- close the instance (make it unusable) +The position returned by tell() and the position given to setpos() +are compatible and have nothing to do with the actual position in the +file. +The close() method is called automatically when the class instance +is destroyed. + +Writing WAVE files: + f = wave.open(file, 'w') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods write(), tell(), seek(), and +close(). + +This returns an instance of a class with the following public methods: + setnchannels(n) -- set the number of channels + setsampwidth(n) -- set the sample width + setframerate(n) -- set the frame rate + setnframes(n) -- set the number of frames + setcomptype(type, name) + -- set the compression type and the + human-readable compression type + setparams(tuple) + -- set all parameters at once + tell() -- return current position in output file + writeframesraw(data) + -- write audio frames without patching up the + file header + writeframes(data) + -- write audio frames and patch up the file header + close() -- patch up the file header and close the + output file +You should set the parameters before the first writeframesraw or +writeframes. The total number of frames does not need to be set, +but when it is set to the correct value, the header does not have to +be patched up. +It is best to first set all parameters, perhaps possibly the +compression type, and then write audio frames using writeframesraw. +When all frames have been written, either call writeframes(b'') or +close() to patch up the sizes in the header. +The close() method is called automatically when the class instance +is destroyed. +""" + +from collections import namedtuple +import builtins +import struct +import sys + + +__all__ = ["open", "Error", "Wave_read", "Wave_write"] + +class Error(Exception): + pass + +WAVE_FORMAT_PCM = 0x0001 +WAVE_FORMAT_EXTENSIBLE = 0xFFFE +# Derived from uuid.UUID("00000001-0000-0010-8000-00aa00389b71").bytes_le +KSDATAFORMAT_SUBTYPE_PCM = b'\x01\x00\x00\x00\x00\x00\x10\x00\x80\x00\x00\xaa\x008\x9bq' + +_array_fmts = None, 'b', 'h', None, 'i' + +_wave_params = namedtuple('_wave_params', + 'nchannels sampwidth framerate nframes comptype compname') + + +def _byteswap(data, width): + swapped_data = bytearray(len(data)) + + for i in range(0, len(data), width): + for j in range(width): + swapped_data[i + width - 1 - j] = data[i + j] + + return bytes(swapped_data) + + +class _Chunk: + def __init__(self, file, align=True, bigendian=True, inclheader=False): + self.closed = False + self.align = align # whether to align to word (2-byte) boundaries + if bigendian: + strflag = '>' + else: + strflag = '<' + self.file = file + self.chunkname = file.read(4) + if len(self.chunkname) < 4: + raise EOFError + try: + self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] + except struct.error: + raise EOFError from None + if inclheader: + self.chunksize = self.chunksize - 8 # subtract header + self.size_read = 0 + try: + self.offset = self.file.tell() + except (AttributeError, OSError): + self.seekable = False + else: + self.seekable = True + + def getname(self): + """Return the name (ID) of the current chunk.""" + return self.chunkname + + def close(self): + if not self.closed: + try: + self.skip() + finally: + self.closed = True + + def seek(self, pos, whence=0): + """Seek to specified position into the chunk. + Default position is 0 (start of chunk). + If the file is not seekable, this will result in an error. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if not self.seekable: + raise OSError("cannot seek") + if whence == 1: + pos = pos + self.size_read + elif whence == 2: + pos = pos + self.chunksize + if pos < 0 or pos > self.chunksize: + raise RuntimeError + self.file.seek(self.offset + pos, 0) + self.size_read = pos + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.size_read + + def read(self, size=-1): + """Read at most size bytes from the chunk. + If size is omitted or negative, read until the end + of the chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.size_read >= self.chunksize: + return b'' + if size < 0: + size = self.chunksize - self.size_read + if size > self.chunksize - self.size_read: + size = self.chunksize - self.size_read + data = self.file.read(size) + self.size_read = self.size_read + len(data) + if self.size_read == self.chunksize and \ + self.align and \ + (self.chunksize & 1): + dummy = self.file.read(1) + self.size_read = self.size_read + len(dummy) + return data + + def skip(self): + """Skip the rest of the chunk. + If you are not interested in the contents of the chunk, + this method should be called so that the file points to + the start of the next chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.seekable: + try: + n = self.chunksize - self.size_read + # maybe fix alignment + if self.align and (self.chunksize & 1): + n = n + 1 + self.file.seek(n, 1) + self.size_read = self.size_read + n + return + except OSError: + pass + while self.size_read < self.chunksize: + n = min(8192, self.chunksize - self.size_read) + dummy = self.read(n) + if not dummy: + raise EOFError + + +class Wave_read: + """Variables used in this class: + + These variables are available to the user though appropriate + methods of this class: + _file -- the open file with methods read(), close(), and seek() + set through the __init__() method + _nchannels -- the number of audio channels + available through the getnchannels() method + _nframes -- the number of audio frames + available through the getnframes() method + _sampwidth -- the number of bytes per audio sample + available through the getsampwidth() method + _framerate -- the sampling frequency + available through the getframerate() method + _comptype -- the AIFF-C compression type ('NONE' if AIFF) + available through the getcomptype() method + _compname -- the human-readable AIFF-C compression type + available through the getcomptype() method + _soundpos -- the position in the audio stream + available through the tell() method, set through the + setpos() method + + These variables are used internally only: + _fmt_chunk_read -- 1 iff the FMT chunk has been read + _data_seek_needed -- 1 iff positioned correctly in audio + file for readframes() + _data_chunk -- instantiation of a chunk class for the DATA chunk + _framesize -- size of one frame in the file + """ + + def initfp(self, file): + self._convert = None + self._soundpos = 0 + self._file = _Chunk(file, bigendian = 0) + if self._file.getname() != b'RIFF': + raise Error('file does not start with RIFF id') + if self._file.read(4) != b'WAVE': + raise Error('not a WAVE file') + self._fmt_chunk_read = 0 + self._data_chunk = None + while 1: + self._data_seek_needed = 1 + try: + chunk = _Chunk(self._file, bigendian = 0) + except EOFError: + break + chunkname = chunk.getname() + if chunkname == b'fmt ': + self._read_fmt_chunk(chunk) + self._fmt_chunk_read = 1 + elif chunkname == b'data': + if not self._fmt_chunk_read: + raise Error('data chunk before fmt chunk') + self._data_chunk = chunk + self._nframes = chunk.chunksize // self._framesize + self._data_seek_needed = 0 + break + chunk.skip() + if not self._fmt_chunk_read or not self._data_chunk: + raise Error('fmt chunk and/or data chunk missing') + + def __init__(self, f): + self._i_opened_the_file = None + if isinstance(f, str): + f = builtins.open(f, 'rb') + self._i_opened_the_file = f + # else, assume it is an open file object already + try: + self.initfp(f) + except: + if self._i_opened_the_file: + f.close() + raise + + def __del__(self): + self.close() + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + # + # User visible methods. + # + def getfp(self): + return self._file + + def rewind(self): + self._data_seek_needed = 1 + self._soundpos = 0 + + def close(self): + self._file = None + file = self._i_opened_the_file + if file: + self._i_opened_the_file = None + file.close() + + def tell(self): + return self._soundpos + + def getnchannels(self): + return self._nchannels + + def getnframes(self): + return self._nframes + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + + def getparams(self): + return _wave_params(self.getnchannels(), self.getsampwidth(), + self.getframerate(), self.getnframes(), + self.getcomptype(), self.getcompname()) + + def getmarkers(self): + import warnings + warnings._deprecated("Wave_read.getmarkers", remove=(3, 15)) + return None + + def getmark(self, id): + import warnings + warnings._deprecated("Wave_read.getmark", remove=(3, 15)) + raise Error('no marks') + + def setpos(self, pos): + if pos < 0 or pos > self._nframes: + raise Error('position not in range') + self._soundpos = pos + self._data_seek_needed = 1 + + def readframes(self, nframes): + if self._data_seek_needed: + self._data_chunk.seek(0, 0) + pos = self._soundpos * self._framesize + if pos: + self._data_chunk.seek(pos, 0) + self._data_seek_needed = 0 + if nframes == 0: + return b'' + data = self._data_chunk.read(nframes * self._framesize) + if self._sampwidth != 1 and sys.byteorder == 'big': + data = _byteswap(data, self._sampwidth) + if self._convert and data: + data = self._convert(data) + self._soundpos = self._soundpos + len(data) // (self._nchannels * self._sampwidth) + return data + + # + # Internal methods. + # + + def _read_fmt_chunk(self, chunk): + try: + wFormatTag, self._nchannels, self._framerate, dwAvgBytesPerSec, wBlockAlign = struct.unpack_from(' 4: + raise Error('bad sample width') + self._sampwidth = sampwidth + + def getsampwidth(self): + if not self._sampwidth: + raise Error('sample width not set') + return self._sampwidth + + def setframerate(self, framerate): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if framerate <= 0: + raise Error('bad frame rate') + self._framerate = int(round(framerate)) + + def getframerate(self): + if not self._framerate: + raise Error('frame rate not set') + return self._framerate + + def setnframes(self, nframes): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + self._nframes = nframes + + def getnframes(self): + return self._nframeswritten + + def setcomptype(self, comptype, compname): + if self._datawritten: + raise Error('cannot change parameters after starting to write') + if comptype not in ('NONE',): + raise Error('unsupported compression type') + self._comptype = comptype + self._compname = compname + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + + def setparams(self, params): + nchannels, sampwidth, framerate, nframes, comptype, compname = params + if self._datawritten: + raise Error('cannot change parameters after starting to write') + self.setnchannels(nchannels) + self.setsampwidth(sampwidth) + self.setframerate(framerate) + self.setnframes(nframes) + self.setcomptype(comptype, compname) + + def getparams(self): + if not self._nchannels or not self._sampwidth or not self._framerate: + raise Error('not all parameters set') + return _wave_params(self._nchannels, self._sampwidth, self._framerate, + self._nframes, self._comptype, self._compname) + + def setmark(self, id, pos, name): + import warnings + warnings._deprecated("Wave_write.setmark", remove=(3, 15)) + raise Error('setmark() not supported') + + def getmark(self, id): + import warnings + warnings._deprecated("Wave_write.getmark", remove=(3, 15)) + raise Error('no marks') + + def getmarkers(self): + import warnings + warnings._deprecated("Wave_write.getmarkers", remove=(3, 15)) + return None + + def tell(self): + return self._nframeswritten + + def writeframesraw(self, data): + if not isinstance(data, (bytes, bytearray)): + data = memoryview(data).cast('B') + self._ensure_header_written(len(data)) + nframes = len(data) // (self._sampwidth * self._nchannels) + if self._convert: + data = self._convert(data) + if self._sampwidth != 1 and sys.byteorder == 'big': + data = _byteswap(data, self._sampwidth) + self._file.write(data) + self._datawritten += len(data) + self._nframeswritten = self._nframeswritten + nframes + + def writeframes(self, data): + self.writeframesraw(data) + if self._datalength != self._datawritten: + self._patchheader() + + def close(self): + try: + if self._file: + self._ensure_header_written(0) + if self._datalength != self._datawritten: + self._patchheader() + self._file.flush() + finally: + self._file = None + file = self._i_opened_the_file + if file: + self._i_opened_the_file = None + file.close() + + # + # Internal methods. + # + + def _ensure_header_written(self, datasize): + if not self._headerwritten: + if not self._nchannels: + raise Error('# channels not specified') + if not self._sampwidth: + raise Error('sample width not specified') + if not self._framerate: + raise Error('sampling rate not specified') + self._write_header(datasize) + + def _write_header(self, initlength): + assert not self._headerwritten + self._file.write(b'RIFF') + if not self._nframes: + self._nframes = initlength // (self._nchannels * self._sampwidth) + self._datalength = self._nframes * self._nchannels * self._sampwidth + try: + self._form_length_pos = self._file.tell() + except (AttributeError, OSError): + self._form_length_pos = None + self._file.write(struct.pack(' + # The Links/elinks browsers if shutil.which("links"): register("links", None, GenericBrowser("links")) if shutil.which("elinks"): register("elinks", None, Elinks("elinks")) - # The Lynx browser , + # The Lynx browser , if shutil.which("lynx"): register("lynx", None, GenericBrowser("lynx")) # The w3m browser @@ -613,105 +586,125 @@ def open(self, url, new=0, autoraise=True): return True # -# Platform support for MacOS +# Platform support for macOS # if sys.platform == 'darwin': - # Adapted from patch submitted to SourceForge by Steven J. Burr - class MacOSX(BaseBrowser): - """Launcher class for Aqua browsers on Mac OS X - - Optionally specify a browser name on instantiation. Note that this - will not work for Aqua browsers if the user has moved the application - package after installation. - - If no browser is specified, the default browser, as specified in the - Internet System Preferences panel, will be used. - """ - def __init__(self, name): - self.name = name + class MacOSXOSAScript(BaseBrowser): + def __init__(self, name='default'): + super().__init__(name) def open(self, url, new=0, autoraise=True): sys.audit("webbrowser.open", url) - assert "'" not in url - # hack for local urls - if not ':' in url: - url = 'file:'+url - - # new must be 0 or 1 - new = int(bool(new)) - if self.name == "default": - # User called open, open_new or get without a browser parameter - script = 'open location "%s"' % url.replace('"', '%22') # opens in default browser + url = url.replace('"', '%22') + if self.name == 'default': + script = f'open location "{url}"' # opens in default browser else: - # User called get and chose a browser - if self.name == "OmniWeb": - toWindow = "" - else: - # Include toWindow parameter of OpenURL command for browsers - # that support it. 0 == new window; -1 == existing - toWindow = "toWindow %d" % (new - 1) - cmd = 'OpenURL "%s"' % url.replace('"', '%22') - script = '''tell application "%s" - activate - %s %s - end tell''' % (self.name, cmd, toWindow) - # Open pipe to AppleScript through osascript command + script = f''' + tell application "{self.name}" + activate + open location "{url}" + end + ''' + osapipe = os.popen("osascript", "w") if osapipe is None: return False - # Write script to osascript's stdin + osapipe.write(script) rc = osapipe.close() return not rc - class MacOSXOSAScript(BaseBrowser): - def __init__(self, name): - self._name = name +# +# Platform support for iOS +# +if sys.platform == "ios": + from _ios_support import objc + if objc: + # If objc exists, we know ctypes is also importable. + from ctypes import c_void_p, c_char_p, c_ulong + class IOSBrowser(BaseBrowser): def open(self, url, new=0, autoraise=True): - if self._name == 'default': - script = 'open location "%s"' % url.replace('"', '%22') # opens in default browser - else: - script = ''' - tell application "%s" - activate - open location "%s" - end - '''%(self._name, url.replace('"', '%22')) - - osapipe = os.popen("osascript", "w") - if osapipe is None: + sys.audit("webbrowser.open", url) + # If ctypes isn't available, we can't open a browser + if objc is None: return False - osapipe.write(script) - rc = osapipe.close() - return not rc + # All the messages in this call return object references. + objc.objc_msgSend.restype = c_void_p + + # This is the equivalent of: + # NSString url_string = + # [NSString stringWithCString:url.encode("utf-8") + # encoding:NSUTF8StringEncoding]; + NSString = objc.objc_getClass(b"NSString") + constructor = objc.sel_registerName(b"stringWithCString:encoding:") + objc.objc_msgSend.argtypes = [c_void_p, c_void_p, c_char_p, c_ulong] + url_string = objc.objc_msgSend( + NSString, + constructor, + url.encode("utf-8"), + 4, # NSUTF8StringEncoding = 4 + ) + + # Create an NSURL object representing the URL + # This is the equivalent of: + # NSURL *nsurl = [NSURL URLWithString:url]; + NSURL = objc.objc_getClass(b"NSURL") + urlWithString_ = objc.sel_registerName(b"URLWithString:") + objc.objc_msgSend.argtypes = [c_void_p, c_void_p, c_void_p] + ns_url = objc.objc_msgSend(NSURL, urlWithString_, url_string) + + # Get the shared UIApplication instance + # This code is the equivalent of: + # UIApplication shared_app = [UIApplication sharedApplication] + UIApplication = objc.objc_getClass(b"UIApplication") + sharedApplication = objc.sel_registerName(b"sharedApplication") + objc.objc_msgSend.argtypes = [c_void_p, c_void_p] + shared_app = objc.objc_msgSend(UIApplication, sharedApplication) + + # Open the URL on the shared application + # This code is the equivalent of: + # [shared_app openURL:ns_url + # options:NIL + # completionHandler:NIL]; + openURL_ = objc.sel_registerName(b"openURL:options:completionHandler:") + objc.objc_msgSend.argtypes = [ + c_void_p, c_void_p, c_void_p, c_void_p, c_void_p + ] + # Method returns void + objc.objc_msgSend.restype = None + objc.objc_msgSend(shared_app, openURL_, ns_url, None, None) + return True -def main(): - import getopt - usage = """Usage: %s [-n | -t] url - -n: open new window - -t: open new tab""" % sys.argv[0] - try: - opts, args = getopt.getopt(sys.argv[1:], 'ntd') - except getopt.error as msg: - print(msg, file=sys.stderr) - print(usage, file=sys.stderr) - sys.exit(1) - new_win = 0 - for o, a in opts: - if o == '-n': new_win = 1 - elif o == '-t': new_win = 2 - if len(args) != 1: - print(usage, file=sys.stderr) - sys.exit(1) - - url = args[0] - open(url, new_win) + +def parse_args(arg_list: list[str] | None): + import argparse + parser = argparse.ArgumentParser(description="Open URL in a web browser.") + parser.add_argument("url", help="URL to open") + + group = parser.add_mutually_exclusive_group() + group.add_argument("-n", "--new-window", action="store_const", + const=1, default=0, dest="new_win", + help="open new window") + group.add_argument("-t", "--new-tab", action="store_const", + const=2, default=0, dest="new_win", + help="open new tab") + + args = parser.parse_args(arg_list) + + return args + + +def main(arg_list: list[str] | None = None): + args = parse_args(arg_list) + + open(args.url, args.new_win) print("\a") + if __name__ == "__main__": main() diff --git a/README.md b/README.md index 38e4d8fa8c..ce5f02bee2 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ A Python-3 (CPython >= 3.13.0) Interpreter written in Rust :snake: :scream: [![docs.rs](https://docs.rs/rustpython/badge.svg)](https://docs.rs/rustpython/) [![Crates.io](https://img.shields.io/crates/v/rustpython)](https://crates.io/crates/rustpython) [![dependency status](https://deps.rs/crate/rustpython/0.1.1/status.svg)](https://deps.rs/crate/rustpython/0.1.1) -[![WAPM package](https://wapm.io/package/rustpython/badge.svg?style=flat)](https://wapm.io/package/rustpython) [![Open in Gitpod](https://img.shields.io/static/v1?label=Open%20in&message=Gitpod&color=1aa6e4&logo=gitpod)](https://gitpod.io#https://github.com/RustPython/RustPython) ## Usage @@ -32,6 +31,11 @@ To build RustPython locally, first, clone the source code: git clone https://github.com/RustPython/RustPython ``` +RustPython uses symlinks to manage python libraries in `Lib/`. If on windows, running the following helps: +```bash +git config core.symlinks true +``` + Then you can change into the RustPython directory and run the demo (Note: `--release` is needed to prevent stack overflow on Windows): @@ -222,7 +226,7 @@ To enhance CPython compatibility, try to increase unittest coverage by checking Another approach is to checkout the source code: builtin functions and object methods are often the simplest and easiest way to contribute. -You can also simply run `./whats_left.py` to assist in finding any unimplemented +You can also simply run `python -I whats_left.py` to assist in finding any unimplemented method. ## Compiling to WebAssembly diff --git a/benches/execution.rs b/benches/execution.rs index d38dab0890..7a7ba247e5 100644 --- a/benches/execution.rs +++ b/benches/execution.rs @@ -9,16 +9,18 @@ use std::collections::HashMap; use std::path::Path; fn bench_cpython_code(b: &mut Bencher, source: &str) { + let c_str_source_head = std::ffi::CString::new(source).unwrap(); + let c_str_source = c_str_source_head.as_c_str(); pyo3::Python::with_gil(|py| { b.iter(|| { - let module = pyo3::types::PyModule::from_code_bound(py, source, "", "") + let module = pyo3::types::PyModule::from_code(py, c_str_source, c"", c"") .expect("Error running source"); black_box(module); }) }) } -fn bench_rustpy_code(b: &mut Bencher, name: &str, source: &str) { +fn bench_rustpython_code(b: &mut Bencher, name: &str, source: &str) { // NOTE: Take long time. let mut settings = Settings::default(); settings.path_list.push("Lib/".to_string()); @@ -41,7 +43,7 @@ pub fn benchmark_file_execution(group: &mut BenchmarkGroup, name: &str bench_cpython_code(b, contents) }); group.bench_function(BenchmarkId::new(name, "rustpython"), |b| { - bench_rustpy_code(b, name, contents) + bench_rustpython_code(b, name, contents) }); } @@ -53,8 +55,8 @@ pub fn benchmark_file_parsing(group: &mut BenchmarkGroup, name: &str, group.bench_function(BenchmarkId::new("cpython", name), |b| { use pyo3::types::PyAnyMethods; pyo3::Python::with_gil(|py| { - let builtins = pyo3::types::PyModule::import_bound(py, "builtins") - .expect("Failed to import builtins"); + let builtins = + pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins"); let compile = builtins.getattr("compile").expect("no compile in builtins"); b.iter(|| { let x = compile @@ -69,7 +71,7 @@ pub fn benchmark_file_parsing(group: &mut BenchmarkGroup, name: &str, pub fn benchmark_pystone(group: &mut BenchmarkGroup, contents: String) { // Default is 50_000. This takes a while, so reduce it to 30k. for idx in (10_000..=30_000).step_by(10_000) { - let code_with_loops = format!("LOOPS = {}\n{}", idx, contents); + let code_with_loops = format!("LOOPS = {idx}\n{contents}"); let code_str = code_with_loops.as_str(); group.throughput(Throughput::Elements(idx as u64)); @@ -77,7 +79,7 @@ pub fn benchmark_pystone(group: &mut BenchmarkGroup, contents: String) bench_cpython_code(b, code_str) }); group.bench_function(BenchmarkId::new("rustpython", idx), |b| { - bench_rustpy_code(b, "pystone", code_str) + bench_rustpython_code(b, "pystone", code_str) }); } } diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs index 6f41f00d6c..5f04f4bbf8 100644 --- a/benches/microbenchmarks.rs +++ b/benches/microbenchmarks.rs @@ -45,7 +45,7 @@ fn bench_cpython_code(group: &mut BenchmarkGroup, bench: &MicroBenchma // Grab the exec function in advance so we don't have lookups in the hot code let builtins = - pyo3::types::PyModule::import_bound(py, "builtins").expect("Failed to import builtins"); + pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins"); let exec = builtins.getattr("exec").expect("no exec in builtins"); let bench_func = |(globals, locals): &mut ( @@ -60,8 +60,8 @@ fn bench_cpython_code(group: &mut BenchmarkGroup, bench: &MicroBenchma }; let bench_setup = |iterations| { - let globals = pyo3::types::PyDict::new_bound(py); - let locals = pyo3::types::PyDict::new_bound(py); + let globals = pyo3::types::PyDict::new(py); + let locals = pyo3::types::PyDict::new(py); if let Some(idx) = iterations { globals.set_item("ITERATIONS", idx).unwrap(); } @@ -99,12 +99,12 @@ fn cpy_compile_code<'a>( name: &str, ) -> pyo3::PyResult> { let builtins = - pyo3::types::PyModule::import_bound(py, "builtins").expect("Failed to import builtins"); + pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins"); let compile = builtins.getattr("compile").expect("no compile in builtins"); compile.call1((code, name, "exec"))?.extract() } -fn bench_rustpy_code(group: &mut BenchmarkGroup, bench: &MicroBenchmark) { +fn bench_rustpython_code(group: &mut BenchmarkGroup, bench: &MicroBenchmark) { let mut settings = Settings::default(); settings.path_list.push("Lib/".to_string()); settings.write_bytecode = false; @@ -169,7 +169,7 @@ pub fn run_micro_benchmark(c: &mut Criterion, benchmark: MicroBenchmark) { let mut group = c.benchmark_group("microbenchmarks"); bench_cpython_code(&mut group, &benchmark); - bench_rustpy_code(&mut group, &benchmark); + bench_rustpython_code(&mut group, &benchmark); group.finish(); } diff --git a/build.rs b/build.rs new file mode 100644 index 0000000000..adebd659ad --- /dev/null +++ b/build.rs @@ -0,0 +1,17 @@ +fn main() { + if std::env::var("CARGO_CFG_TARGET_OS").unwrap() == "windows" { + println!("cargo:rerun-if-changed=logo.ico"); + let mut res = winresource::WindowsResource::new(); + if std::path::Path::new("logo.ico").exists() { + res.set_icon("logo.ico"); + } else { + println!("cargo:warning=logo.ico not found, skipping icon embedding"); + return; + } + res.compile() + .map_err(|e| { + println!("cargo:warning=Failed to compile Windows resources: {e}"); + }) + .ok(); + } +} diff --git a/common/Cargo.toml b/common/Cargo.toml index 299c2875b2..4eab8440df 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -30,11 +30,10 @@ num-traits = { workspace = true } once_cell = { workspace = true } parking_lot = { workspace = true, optional = true } unicode_names2 = { workspace = true } +radium = { workspace = true } lock_api = "0.4" -radium = "0.7" siphasher = "1" -volatile = "0.3" [target.'cfg(windows)'.dependencies] widestring = { workspace = true } diff --git a/common/src/borrow.rs b/common/src/borrow.rs index ce86d71e27..610084006e 100644 --- a/common/src/borrow.rs +++ b/common/src/borrow.rs @@ -56,6 +56,7 @@ impl<'a, T: ?Sized> BorrowedValue<'a, T> { impl Deref for BorrowedValue<'_, T> { type Target = T; + fn deref(&self) -> &T { match self { Self::Ref(r) => r, @@ -81,6 +82,7 @@ pub enum BorrowedValueMut<'a, T: ?Sized> { WriteLock(PyRwLockWriteGuard<'a, T>), MappedWriteLock(PyMappedRwLockWriteGuard<'a, T>), } + impl_from!('a, T, BorrowedValueMut<'a, T>, RefMut(&'a mut T), MuLock(PyMutexGuard<'a, T>), @@ -108,6 +110,7 @@ impl<'a, T: ?Sized> BorrowedValueMut<'a, T> { impl Deref for BorrowedValueMut<'_, T> { type Target = T; + fn deref(&self) -> &T { match self { Self::RefMut(r) => r, diff --git a/common/src/boxvec.rs b/common/src/boxvec.rs index 1a1d57c169..2b464509bd 100644 --- a/common/src/boxvec.rs +++ b/common/src/boxvec.rs @@ -1,3 +1,4 @@ +// spell-checker:disable //! An unresizable vector backed by a `Box<[T]>` #![allow(clippy::needless_lifetimes)] @@ -45,25 +46,25 @@ impl BoxVec { } #[inline] - pub fn len(&self) -> usize { + pub const fn len(&self) -> usize { self.len } #[inline] - pub fn is_empty(&self) -> bool { + pub const fn is_empty(&self) -> bool { self.len() == 0 } #[inline] - pub fn capacity(&self) -> usize { + pub const fn capacity(&self) -> usize { self.xs.len() } - pub fn is_full(&self) -> bool { + pub const fn is_full(&self) -> bool { self.len() == self.capacity() } - pub fn remaining_capacity(&self) -> usize { + pub const fn remaining_capacity(&self) -> usize { self.capacity() - self.len() } @@ -335,6 +336,7 @@ impl BoxVec { impl Deref for BoxVec { type Target = [T]; + #[inline] fn deref(&self) -> &[T] { unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) } @@ -353,6 +355,7 @@ impl DerefMut for BoxVec { impl<'a, T> IntoIterator for &'a BoxVec { type Item = &'a T; type IntoIter = slice::Iter<'a, T>; + fn into_iter(self) -> Self::IntoIter { self.iter() } @@ -362,6 +365,7 @@ impl<'a, T> IntoIterator for &'a BoxVec { impl<'a, T> IntoIterator for &'a mut BoxVec { type Item = &'a mut T; type IntoIter = slice::IterMut<'a, T>; + fn into_iter(self) -> Self::IntoIter { self.iter_mut() } @@ -373,6 +377,7 @@ impl<'a, T> IntoIterator for &'a mut BoxVec { impl IntoIterator for BoxVec { type Item = T; type IntoIter = IntoIter; + fn into_iter(self) -> IntoIter { IntoIter { index: 0, v: self } } @@ -671,7 +676,7 @@ pub struct CapacityError { impl CapacityError { /// Create a new `CapacityError` from `element`. - pub fn new(element: T) -> CapacityError { + pub const fn new(element: T) -> CapacityError { CapacityError { element } } diff --git a/common/src/cformat.rs b/common/src/cformat.rs index e62ffca65e..25d9aaca66 100644 --- a/common/src/cformat.rs +++ b/common/src/cformat.rs @@ -76,8 +76,9 @@ pub enum CFloatType { } impl CFloatType { - fn case(self) -> Case { + const fn case(self) -> Case { use CFloatType::*; + match self { ExponentLower | PointDecimalLower | GeneralLower => Case::Lower, ExponentUpper | PointDecimalUpper | GeneralUpper => Case::Upper, @@ -100,7 +101,7 @@ pub enum CFormatType { } impl CFormatType { - pub fn to_char(self) -> char { + pub const fn to_char(self) -> char { match self { CFormatType::Number(x) => x as u8 as char, CFormatType::Float(x) => x as u8 as char, @@ -136,9 +137,9 @@ bitflags! { impl CConversionFlags { #[inline] pub fn sign_string(&self) -> &'static str { - if self.contains(CConversionFlags::SIGN_CHAR) { + if self.contains(Self::SIGN_CHAR) { "+" - } else if self.contains(CConversionFlags::BLANK_SIGN) { + } else if self.contains(Self::BLANK_SIGN) { " " } else { "" @@ -171,12 +172,15 @@ pub trait FormatChar: Copy + Into + From { impl FormatBuf for String { type Char = char; + fn chars(&self) -> impl Iterator { (**self).chars() } + fn len(&self) -> usize { self.len() } + fn concat(mut self, other: Self) -> Self { self.extend([other]); self @@ -187,6 +191,7 @@ impl FormatChar for char { fn to_char_lossy(self) -> char { self } + fn eq_char(self, c: char) -> bool { self == c } @@ -194,12 +199,15 @@ impl FormatChar for char { impl FormatBuf for Wtf8Buf { type Char = CodePoint; + fn chars(&self) -> impl Iterator { self.code_points() } + fn len(&self) -> usize { (**self).len() } + fn concat(mut self, other: Self) -> Self { self.extend([other]); self @@ -210,6 +218,7 @@ impl FormatChar for CodePoint { fn to_char_lossy(self) -> char { self.to_char_lossy() } + fn eq_char(self, c: char) -> bool { self == c } @@ -217,12 +226,15 @@ impl FormatChar for CodePoint { impl FormatBuf for Vec { type Char = u8; + fn chars(&self) -> impl Iterator { self.iter().copied() } + fn len(&self) -> usize { self.len() } + fn concat(mut self, other: Self) -> Self { self.extend(other); self @@ -233,6 +245,7 @@ impl FormatChar for u8 { fn to_char_lossy(self) -> char { self.into() } + fn eq_char(self, c: char) -> bool { char::from(self) == c } @@ -393,6 +406,7 @@ impl CFormatSpec { Some(&(CFormatQuantity::Amount(1).into())), ) } + pub fn format_bytes(&self, bytes: &[u8]) -> Vec { let bytes = if let Some(CFormatPrecision::Quantity(CFormatQuantity::Amount(precision))) = self.precision @@ -706,12 +720,12 @@ pub enum CFormatPart { impl CFormatPart { #[inline] - pub fn is_specifier(&self) -> bool { + pub const fn is_specifier(&self) -> bool { matches!(self, CFormatPart::Spec { .. }) } #[inline] - pub fn has_key(&self) -> bool { + pub const fn has_key(&self) -> bool { match self { CFormatPart::Spec(s) => s.mapping_key.is_some(), _ => false, @@ -803,6 +817,7 @@ impl CFormatStrOrBytes { impl IntoIterator for CFormatStrOrBytes { type Item = (usize, CFormatPart); type IntoIter = std::vec::IntoIter; + fn into_iter(self) -> Self::IntoIter { self.parts.into_iter() } diff --git a/common/src/cmp.rs b/common/src/cmp.rs deleted file mode 100644 index d182340a98..0000000000 --- a/common/src/cmp.rs +++ /dev/null @@ -1,48 +0,0 @@ -use volatile::Volatile; - -/// Compare 2 byte slices in a way that ensures that the timing of the operation can't be used to -/// glean any information about the data. -#[inline(never)] -#[cold] -pub fn timing_safe_cmp(a: &[u8], b: &[u8]) -> bool { - // we use raw pointers here to keep faithful to the C implementation and - // to try to avoid any optimizations rustc might do with slices - let len_a = a.len(); - let a = a.as_ptr(); - let len_b = b.len(); - let b = b.as_ptr(); - /* The volatile type declarations make sure that the compiler has no - * chance to optimize and fold the code in any way that may change - * the timing. - */ - let mut result: u8 = 0; - /* loop count depends on length of b */ - let length: Volatile = Volatile::new(len_b); - let mut left: Volatile<*const u8> = Volatile::new(std::ptr::null()); - let mut right: Volatile<*const u8> = Volatile::new(b); - - /* don't use else here to keep the amount of CPU instructions constant, - * volatile forces re-evaluation - * */ - if len_a == length.read() { - left.write(Volatile::new(a).read()); - result = 0; - } - if len_a != length.read() { - left.write(b); - result = 1; - } - - for _ in 0..length.read() { - let l = left.read(); - left.write(l.wrapping_add(1)); - let r = right.read(); - right.write(r.wrapping_add(1)); - // safety: the 0..length range will always be either: - // * as long as the length of both a and b, if len_a and len_b are equal - // * as long as b, and both `left` and `right` are b - result |= unsafe { l.read_volatile() ^ r.read_volatile() }; - } - - result == 0 -} diff --git a/common/src/encodings.rs b/common/src/encodings.rs index c444e27a5a..9e89f9cdec 100644 --- a/common/src/encodings.rs +++ b/common/src/encodings.rs @@ -121,6 +121,7 @@ impl ops::Add for StrSize { } } } + impl ops::AddAssign for StrSize { fn add_assign(&mut self, rhs: Self) { self.bytes += rhs.bytes; @@ -133,6 +134,7 @@ struct DecodeError<'a> { rest: &'a [u8], err_len: Option, } + /// # Safety /// `v[..valid_up_to]` must be valid utf8 unsafe fn make_decode_err(v: &[u8], valid_up_to: usize, err_len: Option) -> DecodeError<'_> { @@ -152,6 +154,7 @@ enum HandleResult<'a> { reason: &'a str, }, } + fn decode_utf8_compatible( mut ctx: Ctx, errors: &E, diff --git a/common/src/fileutils.rs b/common/src/fileutils.rs index 67713c0148..e9a93947c1 100644 --- a/common/src/fileutils.rs +++ b/common/src/fileutils.rs @@ -78,7 +78,7 @@ pub mod windows { .encode_wide() .collect::>() .split(|&c| c == '.' as u16) - .last() + .next_back() .and_then(|s| String::from_utf16(s).ok()); if let Some(file_extension) = file_extension { @@ -256,7 +256,7 @@ pub mod windows { } } - fn attributes_to_mode(attr: u32) -> u16 { + const fn attributes_to_mode(attr: u32) -> u16 { let mut m = 0; if attr & FILE_ATTRIBUTE_DIRECTORY != 0 { m |= libc::S_IFDIR | 0o111; // IFEXEC for user,group,other @@ -362,6 +362,7 @@ pub mod windows { } } } + pub fn stat_basic_info_to_stat(info: &FILE_STAT_BASIC_INFORMATION) -> StatStruct { use windows_sys::Win32::Storage::FileSystem; use windows_sys::Win32::System::Ioctl; diff --git a/common/src/float_ops.rs b/common/src/float_ops.rs index 46e2d57067..b431e79313 100644 --- a/common/src/float_ops.rs +++ b/common/src/float_ops.rs @@ -2,7 +2,7 @@ use malachite_bigint::{BigInt, ToBigInt}; use num_traits::{Float, Signed, ToPrimitive, Zero}; use std::f64; -pub fn ufrexp(value: f64) -> (f64, i32) { +pub const fn decompose_float(value: f64) -> (f64, i32) { if 0.0 == value { (0.0, 0i32) } else { @@ -63,7 +63,7 @@ pub fn gt_int(value: f64, other_int: &BigInt) -> bool { } } -pub fn div(v1: f64, v2: f64) -> Option { +pub const fn div(v1: f64, v2: f64) -> Option { if v2 != 0.0 { Some(v1 / v2) } else { None } } diff --git a/common/src/format.rs b/common/src/format.rs index 75d0996796..99e2e42d23 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore ddfe use itertools::{Itertools, PeekingNext}; use malachite_bigint::{BigInt, Sign}; use num_traits::FromPrimitive; @@ -391,7 +392,7 @@ impl FormatSpec { } } - fn get_separator_interval(&self) -> usize { + const fn get_separator_interval(&self) -> usize { match self.format_type { Some(FormatType::Binary | FormatType::Octal | FormatType::Hex(_)) => 4, Some(FormatType::Decimal | FormatType::Number(_) | FormatType::FixedPoint(_)) => 3, @@ -676,7 +677,7 @@ struct AsciiStr<'a> { } impl<'a> AsciiStr<'a> { - fn new(inner: &'a str) -> Self { + const fn new(inner: &'a str) -> Self { Self { inner } } } @@ -689,6 +690,7 @@ impl CharLen for AsciiStr<'_> { impl Deref for AsciiStr<'_> { type Target = str; + fn deref(&self) -> &Self::Target { self.inner } diff --git a/common/src/hash.rs b/common/src/hash.rs index 8fef70c8b9..a01629efa2 100644 --- a/common/src/hash.rs +++ b/common/src/hash.rs @@ -32,6 +32,7 @@ pub struct HashSecret { impl BuildHasher for HashSecret { type Hasher = SipHasher24; + fn build_hasher(&self) -> Self::Hasher { SipHasher24::new_with_keys(self.k0, self.k1) } @@ -53,14 +54,14 @@ impl HashSecret { fix_sentinel(mod_int(self.hash_one(data) as _)) } - pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hashf: F) -> Result + pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hash_func: F) -> Result where I: IntoIterator, F: Fn(&'a T) -> Result, { let mut hasher = self.build_hasher(); for element in iter { - let item_hash = hashf(element)?; + let item_hash = hash_func(element)?; item_hash.hash(&mut hasher); } Ok(fix_sentinel(mod_int(hasher.finish() as PyHash))) @@ -80,7 +81,7 @@ impl HashSecret { } #[inline] -pub fn hash_pointer(value: usize) -> PyHash { +pub const fn hash_pointer(value: usize) -> PyHash { // TODO: 32bit? let hash = (value >> 4) | value; hash as _ @@ -97,7 +98,7 @@ pub fn hash_float(value: f64) -> Option { }; } - let frexp = super::float_ops::ufrexp(value); + let frexp = super::float_ops::decompose_float(value); // process 28 bits at a time; this should work well both for binary // and hexadecimal floating point. @@ -139,13 +140,18 @@ pub fn hash_bigint(value: &BigInt) -> PyHash { fix_sentinel(ret) } +#[inline] +pub const fn hash_usize(data: usize) -> PyHash { + fix_sentinel(mod_int(data as i64)) +} + #[inline(always)] -pub fn fix_sentinel(x: PyHash) -> PyHash { +pub const fn fix_sentinel(x: PyHash) -> PyHash { if x == SENTINEL { -2 } else { x } } #[inline] -pub fn mod_int(value: i64) -> PyHash { +pub const fn mod_int(value: i64) -> PyHash { value % MODULUS as i64 } diff --git a/common/src/int.rs b/common/src/int.rs index 00b5231dff..9ec9e01498 100644 --- a/common/src/int.rs +++ b/common/src/int.rs @@ -128,7 +128,7 @@ pub fn bytes_to_int(lit: &[u8], mut base: u32) -> Option { } #[inline] -pub fn detect_base(c: &u8) -> Option { +pub const fn detect_base(c: &u8) -> Option { let base = match c { b'x' | b'X' => 16, b'b' | b'B' => 2, diff --git a/common/src/lib.rs b/common/src/lib.rs index c75451802a..c99ba0286a 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,6 +1,6 @@ //! A crate to hold types and functions common to all rustpython components. -#![cfg_attr(target_os = "redox", feature(byte_slice_trim_ascii, new_uninit))] +#![cfg_attr(all(target_os = "wasi", target_env = "p2"), feature(wasip2))] #[macro_use] mod macros; @@ -10,7 +10,6 @@ pub mod atomic; pub mod borrow; pub mod boxvec; pub mod cformat; -pub mod cmp; #[cfg(any(unix, windows, target_os = "wasi"))] pub mod crt_fd; pub mod encodings; diff --git a/common/src/linked_list.rs b/common/src/linked_list.rs index 7f55d727fb..d3f2ccaff5 100644 --- a/common/src/linked_list.rs +++ b/common/src/linked_list.rs @@ -1,4 +1,6 @@ -//! This module is modified from tokio::util::linked_list: https://github.com/tokio-rs/tokio/blob/master/tokio/src/util/linked_list.rs +// spell-checker:disable + +//! This module is modified from tokio::util::linked_list: //! Tokio is licensed under the MIT license: //! //! Copyright (c) 2021 Tokio Contributors diff --git a/common/src/lock.rs b/common/src/lock.rs index 811c461112..ca5ffe8de3 100644 --- a/common/src/lock.rs +++ b/common/src/lock.rs @@ -39,4 +39,4 @@ pub type PyMappedRwLockReadGuard<'a, T> = MappedRwLockReadGuard<'a, RawRwLock, T pub type PyRwLockWriteGuard<'a, T> = RwLockWriteGuard<'a, RawRwLock, T>; pub type PyMappedRwLockWriteGuard<'a, T> = MappedRwLockWriteGuard<'a, RawRwLock, T>; -// can add fn const_{mutex,rwlock}() if necessary, but we probably won't need to +// can add fn const_{mutex,rw_lock}() if necessary, but we probably won't need to diff --git a/common/src/lock/cell_lock.rs b/common/src/lock/cell_lock.rs index 1edd622a20..3d2889d0a4 100644 --- a/common/src/lock/cell_lock.rs +++ b/common/src/lock/cell_lock.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore upgradably sharedly use lock_api::{ GetThreadId, RawMutex, RawRwLock, RawRwLockDowngrade, RawRwLockRecursive, RawRwLockUpgrade, RawRwLockUpgradeDowngrade, diff --git a/common/src/os.rs b/common/src/os.rs index 06ea1432e9..e298db462a 100644 --- a/common/src/os.rs +++ b/common/src/os.rs @@ -1,4 +1,4 @@ -// cspell:disable +// spell-checker:disable // TODO: we can move more os-specific bindings/interfaces from stdlib::{os, posix, nt} to here use std::{io, str::Utf8Error}; @@ -62,13 +62,13 @@ pub fn last_posix_errno() -> i32 { } #[cfg(unix)] -pub fn bytes_as_osstr(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { +pub fn bytes_as_os_str(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { use std::os::unix::ffi::OsStrExt; Ok(std::ffi::OsStr::from_bytes(b)) } #[cfg(not(unix))] -pub fn bytes_as_osstr(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { +pub fn bytes_as_os_str(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { Ok(std::str::from_utf8(b)?.as_ref()) } diff --git a/common/src/rc.rs b/common/src/rc.rs index 81207e840c..40c7cf97a8 100644 --- a/common/src/rc.rs +++ b/common/src/rc.rs @@ -3,7 +3,7 @@ use std::rc::Rc; #[cfg(feature = "threading")] use std::sync::Arc; -// type aliases instead of newtypes because you can't do `fn method(self: PyRc)` with a +// type aliases instead of new-types because you can't do `fn method(self: PyRc)` with a // newtype; requires the arbitrary_self_types unstable feature #[cfg(feature = "threading")] diff --git a/common/src/static_cell.rs b/common/src/static_cell.rs index 7f16dad399..a8beee0820 100644 --- a/common/src/static_cell.rs +++ b/common/src/static_cell.rs @@ -13,7 +13,7 @@ mod non_threading { impl StaticCell { #[doc(hidden)] - pub const fn _from_localkey(inner: &'static LocalKey>) -> Self { + pub const fn _from_local_key(inner: &'static LocalKey>) -> Self { Self { inner } } @@ -56,9 +56,11 @@ mod non_threading { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = { ::std::thread_local! { - $vis static $name: $crate::lock::OnceCell<&'static $t> = $crate::lock::OnceCell::new(); + $vis static $name: $crate::lock::OnceCell<&'static $t> = const { + $crate::lock::OnceCell::new() + }; } - $crate::static_cell::StaticCell::_from_localkey(&$name) + $crate::static_cell::StaticCell::_from_local_key(&$name) };)+ }; } @@ -76,7 +78,7 @@ mod threading { impl StaticCell { #[doc(hidden)] - pub const fn _from_oncecell(inner: OnceCell) -> Self { + pub const fn _from_once_cell(inner: OnceCell) -> Self { Self { inner } } @@ -108,7 +110,7 @@ mod threading { ($($(#[$attr:meta])* $vis:vis static $name:ident: $t:ty;)+) => { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = - $crate::static_cell::StaticCell::_from_oncecell($crate::lock::OnceCell::new());)+ + $crate::static_cell::StaticCell::_from_once_cell($crate::lock::OnceCell::new());)+ }; } } diff --git a/common/src/str.rs b/common/src/str.rs index ca1723e7ef..d1222dfb4d 100644 --- a/common/src/str.rs +++ b/common/src/str.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore uncomputed use crate::atomic::{PyAtomic, Radium}; use crate::format::CharLen; use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf}; @@ -23,6 +24,7 @@ pub enum StrKind { impl std::ops::BitOr for StrKind { type Output = Self; + fn bitor(self, other: Self) -> Self { use StrKind::*; match (self, other) { @@ -34,11 +36,11 @@ impl std::ops::BitOr for StrKind { } impl StrKind { - pub fn is_ascii(&self) -> bool { + pub const fn is_ascii(&self) -> bool { matches!(self, Self::Ascii) } - pub fn is_utf8(&self) -> bool { + pub const fn is_utf8(&self) -> bool { matches!(self, Self::Ascii | Self::Utf8) } @@ -141,6 +143,7 @@ impl StrLen { fn zero() -> Self { 0usize.into() } + #[inline(always)] fn uncomputed() -> Self { usize::MAX.into() @@ -251,7 +254,7 @@ impl StrData { } #[inline] - pub fn as_wtf8(&self) -> &Wtf8 { + pub const fn as_wtf8(&self) -> &Wtf8 { &self.data } @@ -268,7 +271,7 @@ impl StrData { .then(|| unsafe { AsciiStr::from_ascii_unchecked(self.data.as_bytes()) }) } - pub fn kind(&self) -> StrKind { + pub const fn kind(&self) -> StrKind { self.kind } @@ -360,8 +363,8 @@ pub fn get_chars(s: &str, range: impl RangeBounds) -> &str { } #[inline] -pub fn char_range_end(s: &str, nchars: usize) -> Option { - let i = match nchars.checked_sub(1) { +pub fn char_range_end(s: &str, n_chars: usize) -> Option { + let i = match n_chars.checked_sub(1) { Some(last_char_index) => { let (index, c) = s.char_indices().nth(last_char_index)?; index + c.len_utf8() @@ -395,8 +398,8 @@ pub fn get_codepoints(w: &Wtf8, range: impl RangeBounds) -> &Wtf8 { } #[inline] -pub fn codepoint_range_end(s: &Wtf8, nchars: usize) -> Option { - let i = match nchars.checked_sub(1) { +pub fn codepoint_range_end(s: &Wtf8, n_chars: usize) -> Option { + let i = match n_chars.checked_sub(1) { Some(last_char_index) => { let (index, c) = s.code_point_indices().nth(last_char_index)?; index + c.len_wtf8() @@ -424,7 +427,7 @@ pub fn zfill(bytes: &[u8], width: usize) -> Vec { } } -/// Convert a string to ascii compatible, escaping unicodes into escape +/// Convert a string to ascii compatible, escaping unicode-s into escape /// sequences. pub fn to_ascii(value: &str) -> AsciiString { let mut ascii = Vec::new(); @@ -468,7 +471,7 @@ pub mod levenshtein { const CASE_COST: usize = 1; const MAX_STRING_SIZE: usize = 40; - fn substitution_cost(mut a: u8, mut b: u8) -> usize { + const fn substitution_cost(mut a: u8, mut b: u8) -> usize { if (a & 31) != (b & 31) { return MOVE_COST; } @@ -486,7 +489,10 @@ pub mod levenshtein { pub fn levenshtein_distance(a: &str, b: &str, max_cost: usize) -> usize { thread_local! { - static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = const { RefCell::new([0usize; MAX_STRING_SIZE]) }; + #[allow(clippy::declare_interior_mutable_const)] + static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = const { + RefCell::new([0usize; MAX_STRING_SIZE]) + }; } if a == b { diff --git a/compiler/codegen/Cargo.toml b/compiler/codegen/Cargo.toml index 53469b9f6e..479b0b29f6 100644 --- a/compiler/codegen/Cargo.toml +++ b/compiler/codegen/Cargo.toml @@ -33,8 +33,7 @@ memchr = { workspace = true } unicode_names2 = { workspace = true } [dev-dependencies] -# rustpython-parser = { workspace = true } - +ruff_python_parser = { workspace = true } insta = { workspace = true } [lints] diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 83e2f5cf44..2bdc5de393 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -9,8 +9,8 @@ use crate::{ IndexSet, ToPythonName, - error::{CodegenError, CodegenErrorType}, - ir, + error::{CodegenError, CodegenErrorType, PatternUnreachableReason}, + ir::{self, BlockIdx}, symboltable::{self, SymbolFlags, SymbolScope, SymbolTable}, unparse::unparse_expr, }; @@ -22,10 +22,11 @@ use ruff_python_ast::{ Alias, Arguments, BoolOp, CmpOp, Comprehension, ConversionFlag, DebugText, Decorator, DictItem, ExceptHandler, ExceptHandlerExceptHandler, Expr, ExprAttribute, ExprBoolOp, ExprFString, ExprList, ExprName, ExprStarred, ExprSubscript, ExprTuple, ExprUnaryOp, FString, - FStringElement, FStringElements, FStringFlags, FStringPart, Int, Keyword, MatchCase, - ModExpression, ModModule, Operator, Parameters, Pattern, PatternMatchAs, PatternMatchValue, - Stmt, StmtExpr, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, - TypeParams, UnaryOp, WithItem, + FStringElement, FStringElements, FStringFlags, FStringPart, Identifier, Int, Keyword, + MatchCase, ModExpression, ModModule, Operator, Parameters, Pattern, PatternMatchAs, + PatternMatchClass, PatternMatchOr, PatternMatchSequence, PatternMatchSingleton, + PatternMatchStar, PatternMatchValue, Singleton, Stmt, StmtExpr, TypeParam, TypeParamParamSpec, + TypeParamTypeVar, TypeParamTypeVarTuple, TypeParams, UnaryOp, WithItem, }; use ruff_source_file::OneIndexed; use ruff_text_size::{Ranged, TextRange}; @@ -33,12 +34,17 @@ use rustpython_wtf8::Wtf8Buf; // use rustpython_ast::located::{self as located_ast, Located}; use rustpython_compiler_core::{ Mode, - bytecode::{self, Arg as OpArgMarker, CodeObject, ConstantData, Instruction, OpArg, OpArgType}, + bytecode::{ + self, Arg as OpArgMarker, BinaryOperator, CodeObject, ComparisonOperator, ConstantData, + Instruction, OpArg, OpArgType, UnpackExArgs, + }, }; use rustpython_compiler_source::SourceCode; // use rustpython_parser_core::source_code::{LineNumber, SourceLocation}; +use crate::error::InternalError; use std::borrow::Cow; +pub(crate) type InternalResult = Result; type CompileResult = Result; #[derive(PartialEq, Eq, Clone, Copy)] @@ -74,6 +80,7 @@ struct Compiler<'src> { ctx: CompileContext, class_name: Option, opts: CompileOpts, + in_annotation: bool, } enum DoneWithFuture { @@ -145,7 +152,7 @@ pub fn compile_program( let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program(ast, symbol_table)?; let code = compiler.pop_code_object(); - trace!("Compilation completed: {:?}", code); + trace!("Compilation completed: {code:?}"); Ok(code) } @@ -160,7 +167,7 @@ pub fn compile_program_single( let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program_single(&ast.body, symbol_table)?; let code = compiler.pop_code_object(); - trace!("Compilation completed: {:?}", code); + trace!("Compilation completed: {code:?}"); Ok(code) } @@ -174,7 +181,7 @@ pub fn compile_block_expression( let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_block_expr(&ast.body, symbol_table)?; let code = compiler.pop_code_object(); - trace!("Compilation completed: {:?}", code); + trace!("Compilation completed: {code:?}"); Ok(code) } @@ -206,10 +213,92 @@ macro_rules! emit { }; } -struct PatternContext { - current_block: usize, - blocks: Vec, - allow_irrefutable: bool, +fn eprint_location(zelf: &Compiler<'_>) { + let start = zelf + .source_code + .source_location(zelf.current_source_range.start()); + let end = zelf + .source_code + .source_location(zelf.current_source_range.end()); + eprintln!( + "LOCATION: {} from {}:{} to {}:{}", + zelf.source_code.path.to_owned(), + start.row, + start.column, + end.row, + end.column + ); +} + +/// Better traceback for internal error +#[track_caller] +fn unwrap_internal(zelf: &Compiler<'_>, r: InternalResult) -> T { + if let Err(ref r_err) = r { + eprintln!("=== CODEGEN PANIC INFO ==="); + eprintln!("This IS an internal error: {r_err}"); + eprint_location(zelf); + eprintln!("=== END PANIC INFO ==="); + } + r.unwrap() +} + +fn compiler_unwrap_option(zelf: &Compiler<'_>, o: Option) -> T { + if o.is_none() { + eprintln!("=== CODEGEN PANIC INFO ==="); + eprintln!("This IS an internal error, an option was unwrapped during codegen"); + eprint_location(zelf); + eprintln!("=== END PANIC INFO ==="); + } + o.unwrap() +} + +// fn compiler_result_unwrap(zelf: &Compiler<'_>, result: Result) -> T { +// if result.is_err() { +// eprintln!("=== CODEGEN PANIC INFO ==="); +// eprintln!("This IS an internal error, an result was unwrapped during codegen"); +// eprint_location(zelf); +// eprintln!("=== END PANIC INFO ==="); +// } +// result.unwrap() +// } + +/// The pattern context holds information about captured names and jump targets. +#[derive(Clone)] +pub struct PatternContext { + /// A list of names captured by the pattern. + pub stores: Vec, + /// If false, then any name captures against our subject will raise. + pub allow_irrefutable: bool, + /// A list of jump target labels used on pattern failure. + pub fail_pop: Vec, + /// The number of items on top of the stack that should remain. + pub on_top: usize, +} + +impl Default for PatternContext { + fn default() -> Self { + Self::new() + } +} + +impl PatternContext { + pub fn new() -> Self { + PatternContext { + stores: Vec::new(), + allow_irrefutable: false, + fail_pop: Vec::new(), + on_top: 0, + } + } + + pub fn fail_pop_size(&self) -> usize { + self.fail_pop.len() + } +} + +enum JumpOp { + Jump, + PopJumpIfFalse, } impl<'src> Compiler<'src> { @@ -247,6 +336,7 @@ impl<'src> Compiler<'src> { }, class_name: None, opts, + in_annotation: false, } } } @@ -335,10 +425,9 @@ impl Compiler<'_> { fn pop_code_object(&mut self) -> CodeObject { let table = self.pop_symbol_table(); assert!(table.sub_tables.is_empty()); - self.code_stack - .pop() - .unwrap() - .finalize_code(self.opts.optimize) + let pop = self.code_stack.pop(); + let stack_top = compiler_unwrap_option(self, pop); + unwrap_internal(self, stack_top.finalize_code(self.opts.optimize)) } // could take impl Into>, but everything is borrowed from ast structs; we never @@ -404,6 +493,10 @@ impl Compiler<'_> { ) -> CompileResult<()> { self.symbol_table_stack.push(symbol_table); + if Self::find_ann(body) { + emit!(self, Instruction::SetupAnnotation); + } + if let Some((last, body)) = body.split_last() { for statement in body { if let Stmt::Expr(StmtExpr { value, .. }) = &statement { @@ -445,7 +538,8 @@ impl Compiler<'_> { self.current_block().instructions.pop(); // pop Instruction::Pop } Stmt::FunctionDef(_) | Stmt::ClassDef(_) => { - let store_inst = self.current_block().instructions.pop().unwrap(); // pop Instruction::Store + let pop_instructions = self.current_block().instructions.pop(); + let store_inst = compiler_unwrap_option(self, pop_instructions); // pop Instruction::Store emit!(self, Instruction::Duplicate); self.current_block().instructions.push(store_inst); } @@ -503,8 +597,11 @@ impl Compiler<'_> { self.check_forbidden_name(&name, usage)?; let symbol_table = self.symbol_table_stack.last().unwrap(); - let symbol = symbol_table.lookup(name.as_ref()).unwrap_or_else(|| - unreachable!("the symbol '{name}' should be present in the symbol table, even when it is undefined in python."), + let symbol = unwrap_internal( + self, + symbol_table + .lookup(name.as_ref()) + .ok_or_else(|| InternalError::MissingSymbol(name.to_string())), ); let info = self.code_stack.last_mut().unwrap(); let mut cache = &mut info.name_cache; @@ -581,7 +678,7 @@ impl Compiler<'_> { fn compile_statement(&mut self, statement: &Stmt) -> CompileResult<()> { use ruff_python_ast::*; - trace!("Compiling {:?}", statement); + trace!("Compiling {statement:?}"); self.set_source_range(statement.range()); match &statement { @@ -947,14 +1044,32 @@ impl Compiler<'_> { ))); }; let name_string = name.id.to_string(); - if type_params.is_some() { - self.push_symbol_table(); - } - self.compile_expression(value)?; + + // For PEP 695 syntax, we need to compile type_params first + // so that they're available when compiling the value expression if let Some(type_params) = type_params { + self.push_symbol_table(); + + // Compile type params first to define T1, T2, etc. self.compile_type_params(type_params)?; + // Stack now has type_params tuple at top + + // Compile value expression (can now see T1, T2) + self.compile_expression(value)?; + // Stack: [type_params_tuple, value] + + // We need [value, type_params_tuple] for TypeAlias instruction + emit!(self, Instruction::Rotate2); + self.pop_symbol_table(); + } else { + // No type params - push value first, then None (not empty tuple) + self.compile_expression(value)?; + // Push None for type_params (matching CPython) + self.emit_load_const(ConstantData::None); } + + // Push name last self.emit_load_const(ConstantData::Str { value: name_string.clone().into(), }); @@ -1097,6 +1212,7 @@ impl Compiler<'_> { /// Store each type parameter so it is accessible to the current scope, and leave a tuple of /// all the type parameters on the stack. fn compile_type_params(&mut self, type_params: &TypeParams) -> CompileResult<()> { + // First, compile each type parameter and store it for type_param in &type_params.type_params { match type_param { TypeParam::TypeVar(TypeParamTypeVar { name, bound, .. }) => { @@ -1227,6 +1343,14 @@ impl Compiler<'_> { self.compile_statements(body)?; emit!(self, Instruction::PopException); + // Delete the exception variable if it was bound + if let Some(alias) = name { + // Set the variable to None before deleting (as CPython does) + self.emit_load_const(ConstantData::None); + self.store_name(alias.as_str())?; + self.compile_name(alias.as_str(), NameUsage::Delete)?; + } + if !finalbody.is_empty() { emit!(self, Instruction::PopBlock); // pop excepthandler block // We enter the finally block, without exception. @@ -1439,12 +1563,12 @@ impl Compiler<'_> { } for var in &*code.freevars { let table = self.symbol_table_stack.last().unwrap(); - let symbol = table.lookup(var).unwrap_or_else(|| { - panic!( - "couldn't look up var {} in {} in {}", - var, code.obj_name, self.source_code.path - ) - }); + let symbol = unwrap_internal( + self, + table + .lookup(var) + .ok_or_else(|| InternalError::MissingSymbol(var.to_owned())), + ); let parent_code = self.code_stack.last().unwrap(); let vars = match symbol.scope { SymbolScope::Free => &parent_code.freevar_cache, @@ -1527,8 +1651,11 @@ impl Compiler<'_> { // Check if the class is declared global let symbol_table = self.symbol_table_stack.last().unwrap(); - let symbol = symbol_table.lookup(name.as_ref()).expect( - "The symbol must be present in the symbol table, even when it is undefined in python.", + let symbol = unwrap_internal( + self, + symbol_table + .lookup(name.as_ref()) + .ok_or_else(|| InternalError::MissingSymbol(name.to_owned())), ); let mut global_path_prefix = Vec::new(); if symbol.scope == SymbolScope::GlobalExplicit { @@ -1538,8 +1665,12 @@ impl Compiler<'_> { let qualified_name = self.qualified_path.join("."); // If there are type params, we need to push a special symbol table just for them - if type_params.is_some() { + if let Some(type_params) = type_params { self.push_symbol_table(); + // Compile type parameters and store as .type_params + self.compile_type_params(type_params)?; + let dot_type_params = self.name(".type_params"); + emit!(self, Instruction::StoreLocal(dot_type_params)); } self.push_output(bytecode::CodeFlags::empty(), 0, 0, 0, name.to_owned()); @@ -1562,6 +1693,18 @@ impl Compiler<'_> { if Self::find_ann(body) { emit!(self, Instruction::SetupAnnotation); } + + // Set __type_params__ from .type_params if we have type parameters (PEP 695) + if type_params.is_some() { + // Load .type_params from enclosing scope + let dot_type_params = self.name(".type_params"); + emit!(self, Instruction::LoadNameAny(dot_type_params)); + + // Store as __type_params__ + let dunder_type_params = self.name("__type_params__"); + emit!(self, Instruction::StoreLocal(dunder_type_params)); + } + self.compile_statements(body)?; let classcell_idx = self @@ -1595,8 +1738,10 @@ impl Compiler<'_> { let mut func_flags = bytecode::MakeFunctionFlags::empty(); // Prepare generic type parameters: - if let Some(type_params) = type_params { - self.compile_type_params(type_params)?; + if type_params.is_some() { + // Load .type_params from the type params scope + let dot_type_params = self.name(".type_params"); + emit!(self, Instruction::LoadNameAny(dot_type_params)); func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; } @@ -1604,11 +1749,6 @@ impl Compiler<'_> { func_flags |= bytecode::MakeFunctionFlags::CLOSURE; } - // Pop the special type params symbol table - if type_params.is_some() { - self.pop_symbol_table(); - } - self.emit_load_const(ConstantData::Code { code: Box::new(code), }); @@ -1627,6 +1767,11 @@ impl Compiler<'_> { }; self.compile_normal_call(call); + // Pop the special type params symbol table + if type_params.is_some() { + self.pop_symbol_table(); + } + self.apply_decorators(decorator_list); self.store_name(name) @@ -1800,73 +1945,809 @@ impl Compiler<'_> { Ok(()) } - fn compile_pattern_value( + fn forbidden_name(&mut self, name: &str, ctx: NameUsage) -> CompileResult { + if ctx == NameUsage::Store && name == "__debug__" { + return Err(self.error(CodegenErrorType::Assign("__debug__"))); + // return Ok(true); + } + if ctx == NameUsage::Delete && name == "__debug__" { + return Err(self.error(CodegenErrorType::Delete("__debug__"))); + // return Ok(true); + } + Ok(false) + } + + fn compile_error_forbidden_name(&mut self, name: &str) -> CodegenError { + // TODO: make into error (fine for now since it realistically errors out earlier) + panic!("Failing due to forbidden name {name:?}"); + } + + /// Ensures that `pc.fail_pop` has at least `n + 1` entries. + /// If not, new labels are generated and pushed until the required size is reached. + fn ensure_fail_pop(&mut self, pc: &mut PatternContext, n: usize) -> CompileResult<()> { + let required_size = n + 1; + if required_size <= pc.fail_pop.len() { + return Ok(()); + } + while pc.fail_pop.len() < required_size { + let new_block = self.new_block(); + pc.fail_pop.push(new_block); + } + Ok(()) + } + + fn jump_to_fail_pop(&mut self, pc: &mut PatternContext, op: JumpOp) -> CompileResult<()> { + // Compute the total number of items to pop: + // items on top plus the captured objects. + let pops = pc.on_top + pc.stores.len(); + // Ensure that the fail_pop vector has at least `pops + 1` elements. + self.ensure_fail_pop(pc, pops)?; + // Emit a jump using the jump target stored at index `pops`. + match op { + JumpOp::Jump => { + emit!( + self, + Instruction::Jump { + target: pc.fail_pop[pops] + } + ); + } + JumpOp::PopJumpIfFalse => { + emit!( + self, + Instruction::JumpIfFalse { + target: pc.fail_pop[pops] + } + ); + } + } + Ok(()) + } + + /// Emits the necessary POP instructions for all failure targets in the pattern context, + /// then resets the fail_pop vector. + fn emit_and_reset_fail_pop(&mut self, pc: &mut PatternContext) -> CompileResult<()> { + // If the fail_pop vector is empty, nothing needs to be done. + if pc.fail_pop.is_empty() { + debug_assert!(pc.fail_pop.is_empty()); + return Ok(()); + } + // Iterate over the fail_pop vector in reverse order, skipping the first label. + for &label in pc.fail_pop.iter().skip(1).rev() { + self.switch_to_block(label); + // Emit the POP instruction. + emit!(self, Instruction::Pop); + } + // Finally, use the first label. + self.switch_to_block(pc.fail_pop[0]); + pc.fail_pop.clear(); + // Free the memory used by the vector. + pc.fail_pop.shrink_to_fit(); + Ok(()) + } + + /// Duplicate the effect of Python 3.10's ROT_* instructions using SWAPs. + fn pattern_helper_rotate(&mut self, mut count: usize) -> CompileResult<()> { + while count > 1 { + // Emit a SWAP instruction with the current count. + emit!( + self, + Instruction::Swap { + index: u32::try_from(count).unwrap() + } + ); + count -= 1; + } + Ok(()) + } + + /// Helper to store a captured name for a star pattern. + /// + /// If `n` is `None`, it emits a POP_TOP instruction. Otherwise, it first + /// checks that the name is allowed and not already stored. Then it rotates + /// the object on the stack beneath any preserved items and appends the name + /// to the list of captured names. + fn pattern_helper_store_name( + &mut self, + n: Option<&Identifier>, + pc: &mut PatternContext, + ) -> CompileResult<()> { + match n { + // If no name is provided, simply pop the top of the stack. + None => { + emit!(self, Instruction::Pop); + Ok(()) + } + Some(name) => { + // Check if the name is forbidden for storing. + if self.forbidden_name(name.as_str(), NameUsage::Store)? { + return Err(self.compile_error_forbidden_name(name.as_str())); + } + + // Ensure we don't store the same name twice. + // TODO: maybe pc.stores should be a set? + if pc.stores.contains(&name.to_string()) { + return Err( + self.error(CodegenErrorType::DuplicateStore(name.as_str().to_string())) + ); + } + + // Calculate how many items to rotate: + let rotations = pc.on_top + pc.stores.len() + 1; + self.pattern_helper_rotate(rotations)?; + + // Append the name to the captured stores. + pc.stores.push(name.to_string()); + Ok(()) + } + } + } + + fn pattern_unpack_helper(&mut self, elts: &[Pattern]) -> CompileResult<()> { + let n = elts.len(); + let mut seen_star = false; + for (i, elt) in elts.iter().enumerate() { + if elt.is_match_star() { + if !seen_star { + if i >= (1 << 8) || (n - i - 1) >= ((i32::MAX as usize) >> 8) { + todo!(); + // return self.compiler_error(loc, "too many expressions in star-unpacking sequence pattern"); + } + let args = UnpackExArgs { + before: u8::try_from(i).unwrap(), + after: u8::try_from(n - i - 1).unwrap(), + }; + emit!(self, Instruction::UnpackEx { args }); + seen_star = true; + } else { + // TODO: Fix error msg + return Err(self.error(CodegenErrorType::MultipleStarArgs)); + // return self.compiler_error(loc, "multiple starred expressions in sequence pattern"); + } + } + } + if !seen_star { + emit!( + self, + Instruction::UnpackSequence { + size: u32::try_from(n).unwrap() + } + ); + } + Ok(()) + } + + fn pattern_helper_sequence_unpack( &mut self, - value: &PatternMatchValue, - _pattern_context: &mut PatternContext, + patterns: &[Pattern], + _star: Option, + pc: &mut PatternContext, ) -> CompileResult<()> { - use crate::compile::bytecode::ComparisonOperator::*; + // Unpack the sequence into individual subjects. + self.pattern_unpack_helper(patterns)?; + let size = patterns.len(); + // Increase the on_top counter for the newly unpacked subjects. + pc.on_top += size; + // For each unpacked subject, compile its subpattern. + for pattern in patterns { + // Decrement on_top for each subject as it is consumed. + pc.on_top -= 1; + self.compile_pattern_subpattern(pattern, pc)?; + } + Ok(()) + } - self.compile_expression(&value.value)?; - emit!(self, Instruction::CompareOperation { op: Equal }); + fn pattern_helper_sequence_subscr( + &mut self, + patterns: &[Pattern], + star: usize, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Keep the subject around for extracting elements. + pc.on_top += 1; + for (i, pattern) in patterns.iter().enumerate() { + // if pattern.is_wildcard() { + // continue; + // } + if i == star { + // This must be a starred wildcard. + // assert!(pattern.is_star_wildcard()); + continue; + } + // Duplicate the subject. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + if i < star { + // For indices before the star, use a nonnegative index equal to i. + self.emit_load_const(ConstantData::Integer { value: i.into() }); + } else { + // For indices after the star, compute a nonnegative index: + // index = len(subject) - (size - i) + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { + value: (patterns.len() - 1).into(), + }); + // Subtract to compute the correct index. + emit!( + self, + Instruction::BinaryOperation { + op: BinaryOperator::Subtract + } + ); + } + // Use BINARY_OP/NB_SUBSCR to extract the element. + emit!(self, Instruction::BinarySubscript); + // Compile the subpattern in irrefutable mode. + self.compile_pattern_subpattern(pattern, pc)?; + } + // Pop the subject off the stack. + pc.on_top -= 1; + emit!(self, Instruction::Pop); + Ok(()) + } + + fn compile_pattern_subpattern( + &mut self, + p: &Pattern, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Save the current allow_irrefutable state. + let old_allow_irrefutable = pc.allow_irrefutable; + // Temporarily allow irrefutable patterns. + pc.allow_irrefutable = true; + // Compile the pattern. + self.compile_pattern(p, pc)?; + // Restore the original state. + pc.allow_irrefutable = old_allow_irrefutable; Ok(()) } fn compile_pattern_as( &mut self, - as_pattern: &PatternMatchAs, - pattern_context: &mut PatternContext, + p: &PatternMatchAs, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // If there is no sub-pattern, then it's an irrefutable match. + if p.pattern.is_none() { + if !pc.allow_irrefutable { + if let Some(_name) = p.name.as_ref() { + // TODO: This error message does not match cpython exactly + // A name capture makes subsequent patterns unreachable. + return Err(self.error(CodegenErrorType::UnreachablePattern( + PatternUnreachableReason::NameCapture, + ))); + } else { + // A wildcard makes remaining patterns unreachable. + return Err(self.error(CodegenErrorType::UnreachablePattern( + PatternUnreachableReason::Wildcard, + ))); + } + } + // If irrefutable matches are allowed, store the name (if any). + return self.pattern_helper_store_name(p.name.as_ref(), pc); + } + + // Otherwise, there is a sub-pattern. Duplicate the object on top of the stack. + pc.on_top += 1; + emit!(self, Instruction::CopyItem { index: 1_u32 }); + // Compile the sub-pattern. + self.compile_pattern(p.pattern.as_ref().unwrap(), pc)?; + // After success, decrement the on_top counter. + pc.on_top -= 1; + // Store the captured name (if any). + self.pattern_helper_store_name(p.name.as_ref(), pc)?; + Ok(()) + } + + fn compile_pattern_star( + &mut self, + p: &PatternMatchStar, + pc: &mut PatternContext, + ) -> CompileResult<()> { + self.pattern_helper_store_name(p.name.as_ref(), pc)?; + Ok(()) + } + + /// Validates that keyword attributes in a class pattern are allowed + /// and not duplicated. + fn validate_kwd_attrs( + &mut self, + attrs: &[Identifier], + _patterns: &[Pattern], ) -> CompileResult<()> { - if as_pattern.pattern.is_none() && !pattern_context.allow_irrefutable { - // TODO: better error message - if let Some(_name) = as_pattern.name.as_ref() { - return Err(self.error_ranged(CodegenErrorType::InvalidMatchCase, as_pattern.range)); + let n_attrs = attrs.len(); + for i in 0..n_attrs { + let attr = attrs[i].as_str(); + // Check if the attribute name is forbidden in a Store context. + if self.forbidden_name(attr, NameUsage::Store)? { + // Return an error if the name is forbidden. + return Err(self.compile_error_forbidden_name(attr)); + } + // Check for duplicates: compare with every subsequent attribute. + for ident in attrs.iter().take(n_attrs).skip(i + 1) { + let other = ident.as_str(); + if attr == other { + return Err(self.error(CodegenErrorType::RepeatedAttributePattern)); + } } - return Err(self.error_ranged(CodegenErrorType::InvalidMatchCase, as_pattern.range)); } - // Need to make a copy for (possibly) storing later: - emit!(self, Instruction::Duplicate); - if let Some(pattern) = &as_pattern.pattern { - self.compile_pattern_inner(pattern, pattern_context)?; + Ok(()) + } + + fn compile_pattern_class( + &mut self, + p: &PatternMatchClass, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Extract components from the MatchClass pattern. + let match_class = p; + let patterns = &match_class.arguments.patterns; + + // Extract keyword attributes and patterns. + // Capacity is pre-allocated based on the number of keyword arguments. + let mut kwd_attrs = Vec::with_capacity(match_class.arguments.keywords.len()); + let mut kwd_patterns = Vec::with_capacity(match_class.arguments.keywords.len()); + for kwd in &match_class.arguments.keywords { + kwd_attrs.push(kwd.attr.clone()); + kwd_patterns.push(kwd.pattern.clone()); + } + + let nargs = patterns.len(); + let n_attrs = kwd_attrs.len(); + + // Check for too many sub-patterns. + if nargs > u32::MAX as usize || (nargs + n_attrs).saturating_sub(1) > i32::MAX as usize { + let msg = format!( + "too many sub-patterns in class pattern {:?}", + match_class.cls + ); + panic!("{}", msg); + // return self.compiler_error(&msg); } - if let Some(name) = as_pattern.name.as_ref() { - self.store_name(name.as_str())?; - } else { - emit!(self, Instruction::Pop); + + // Validate keyword attributes if any. + if n_attrs != 0 { + self.validate_kwd_attrs(&kwd_attrs, &kwd_patterns)?; + } + + // Compile the class expression. + self.compile_expression(&match_class.cls)?; + + // Create a new tuple of attribute names. + let mut attr_names = vec![]; + for name in kwd_attrs.iter() { + // Py_NewRef(name) is emulated by cloning the name into a PyObject. + attr_names.push(ConstantData::Str { + value: name.as_str().to_string().into(), + }); + } + + use bytecode::TestOperator::*; + + // Emit instructions: + // 1. Load the new tuple of attribute names. + self.emit_load_const(ConstantData::Tuple { + elements: attr_names, + }); + // 2. Emit MATCH_CLASS with nargs. + emit!(self, Instruction::MatchClass(u32::try_from(nargs).unwrap())); + // 3. Duplicate the top of the stack. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + // 4. Load None. + self.emit_load_const(ConstantData::None); + // 5. Compare with IS_OP 1. + emit!(self, Instruction::TestOperation { op: IsNot }); + + // At this point the TOS is a tuple of (nargs + n_attrs) attributes (or None). + pc.on_top += 1; + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // Unpack the tuple into (nargs + n_attrs) items. + let total = nargs + n_attrs; + emit!( + self, + Instruction::UnpackSequence { + size: u32::try_from(total).unwrap() + } + ); + pc.on_top += total; + pc.on_top -= 1; + + // Process each sub-pattern. + for subpattern in patterns.iter().chain(kwd_patterns.iter()) { + // Decrement the on_top counter as each sub-pattern is processed + // (on_top should be zero at the end of the algorithm as a sanity check). + pc.on_top -= 1; + if subpattern.is_wildcard() { + emit!(self, Instruction::Pop); + } + // Compile the subpattern without irrefutability checks. + self.compile_pattern_subpattern(subpattern, pc)?; } Ok(()) } - fn compile_pattern_inner( + // fn compile_pattern_mapping(&mut self, p: &PatternMatchMapping, pc: &mut PatternContext) -> CompileResult<()> { + // // Ensure the pattern is a mapping pattern. + // let mapping = p; // Extract MatchMapping-specific data. + // let keys = &mapping.keys; + // let patterns = &mapping.patterns; + // let size = keys.len(); + // let n_patterns = patterns.len(); + + // if size != n_patterns { + // panic!("keys ({}) / patterns ({}) length mismatch in mapping pattern", size, n_patterns); + // // return self.compiler_error( + // // &format!("keys ({}) / patterns ({}) length mismatch in mapping pattern", size, n_patterns) + // // ); + // } + + // // A double-star target is present if `rest` is set. + // let star_target = mapping.rest; + + // // Keep the subject on top during the mapping and length checks. + // pc.on_top += 1; + // emit!(self, Instruction::MatchMapping); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // // If the pattern is just "{}" (empty mapping) and there's no star target, + // // we're done—pop the subject. + // if size == 0 && star_target.is_none() { + // pc.on_top -= 1; + // emit!(self, Instruction::Pop); + // return Ok(()); + // } + + // // If there are any keys, perform a length check. + // if size != 0 { + // emit!(self, Instruction::GetLen); + // self.emit_load_const(ConstantData::Integer { value: size.into() }); + // emit!(self, Instruction::CompareOperation { op: ComparisonOperator::GreaterOrEqual }); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + // } + + // // Check that the number of sub-patterns is not absurd. + // if size.saturating_sub(1) > (i32::MAX as usize) { + // panic!("too many sub-patterns in mapping pattern"); + // // return self.compiler_error("too many sub-patterns in mapping pattern"); + // } + + // // Collect all keys into a set for duplicate checking. + // let mut seen = HashSet::new(); + + // // For each key, validate it and check for duplicates. + // for (i, key) in keys.iter().enumerate() { + // if let Some(key_val) = key.as_literal_expr() { + // let in_seen = seen.contains(&key_val); + // if in_seen { + // panic!("mapping pattern checks duplicate key: {:?}", key_val); + // // return self.compiler_error(format!("mapping pattern checks duplicate key: {:?}", key_val)); + // } + // seen.insert(key_val); + // } else if !key.is_attribute_expr() { + // panic!("mapping pattern keys may only match literals and attribute lookups"); + // // return self.compiler_error("mapping pattern keys may only match literals and attribute lookups"); + // } + + // // Visit the key expression. + // self.compile_expression(key)?; + // } + // // Drop the set (its resources will be freed automatically). + + // // Build a tuple of keys and emit MATCH_KEYS. + // emit!(self, Instruction::BuildTuple { size: size as u32 }); + // emit!(self, Instruction::MatchKeys); + // // Now, on top of the subject there are two new tuples: one of keys and one of values. + // pc.on_top += 2; + + // // Prepare for matching the values. + // emit!(self, Instruction::CopyItem { index: 1_u32 }); + // self.emit_load_const(ConstantData::None); + // // TODO: should be is + // emit!(self, Instruction::TestOperation::IsNot); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // // Unpack the tuple of values. + // emit!(self, Instruction::UnpackSequence { size: size as u32 }); + // pc.on_top += size.saturating_sub(1); + + // // Compile each subpattern in "subpattern" mode. + // for pattern in patterns { + // pc.on_top = pc.on_top.saturating_sub(1); + // self.compile_pattern_subpattern(pattern, pc)?; + // } + + // // Consume the tuple of keys and the subject. + // pc.on_top = pc.on_top.saturating_sub(2); + // if let Some(star_target) = star_target { + // // If we have a starred name, bind a dict of remaining items to it. + // // This sequence of instructions performs: + // // rest = dict(subject) + // // for key in keys: del rest[key] + // emit!(self, Instruction::BuildMap { size: 0 }); // Build an empty dict. + // emit!(self, Instruction::Swap(3)); // Rearrange stack: [empty, keys, subject] + // emit!(self, Instruction::DictUpdate { size: 2 }); // Update dict with subject. + // emit!(self, Instruction::UnpackSequence { size: size as u32 }); // Unpack keys. + // let mut remaining = size; + // while remaining > 0 { + // emit!(self, Instruction::CopyItem { index: 1 + remaining as u32 }); // Duplicate subject copy. + // emit!(self, Instruction::Swap { index: 2_u32 }); // Bring key to top. + // emit!(self, Instruction::DeleteSubscript); // Delete key from dict. + // remaining -= 1; + // } + // // Bind the dict to the starred target. + // self.pattern_helper_store_name(Some(&star_target), pc)?; + // } else { + // // No starred target: just pop the tuple of keys and the subject. + // emit!(self, Instruction::Pop); + // emit!(self, Instruction::Pop); + // } + // Ok(()) + // } + + fn compile_pattern_or( &mut self, - pattern_type: &Pattern, - pattern_context: &mut PatternContext, + p: &PatternMatchOr, + pc: &mut PatternContext, ) -> CompileResult<()> { - match &pattern_type { - Pattern::MatchValue(value) => self.compile_pattern_value(value, pattern_context), - Pattern::MatchAs(as_pattern) => self.compile_pattern_as(as_pattern, pattern_context), - _ => { - eprintln!("not implemented pattern type: {pattern_type:?}"); - Err(self.error(CodegenErrorType::NotImplementedYet)) + // Ensure the pattern is a MatchOr. + let end = self.new_block(); // Create a new jump target label. + let size = p.patterns.len(); + assert!(size > 1, "MatchOr must have more than one alternative"); + + // Save the current pattern context. + let old_pc = pc.clone(); + // Simulate Py_INCREF on pc.stores by cloning it. + pc.stores = pc.stores.clone(); + let mut control: Option> = None; // Will hold the capture list of the first alternative. + + // Process each alternative. + for (i, alt) in p.patterns.iter().enumerate() { + // Create a fresh empty store for this alternative. + pc.stores = Vec::new(); + // An irrefutable subpattern must be last (if allowed). + pc.allow_irrefutable = (i == size - 1) && old_pc.allow_irrefutable; + // Reset failure targets and the on_top counter. + pc.fail_pop.clear(); + pc.on_top = 0; + // Emit a COPY(1) instruction before compiling the alternative. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + self.compile_pattern(alt, pc)?; + + let n_stores = pc.stores.len(); + if i == 0 { + // Save the captured names from the first alternative. + control = Some(pc.stores.clone()); + } else { + let control_vec = control.as_ref().unwrap(); + if n_stores != control_vec.len() { + return Err(self.error(CodegenErrorType::ConflictingNameBindPattern)); + } else if n_stores > 0 { + // Check that the names occur in the same order. + for i_control in (0..n_stores).rev() { + let name = &control_vec[i_control]; + // Find the index of `name` in the current stores. + let i_stores = + pc.stores.iter().position(|n| n == name).ok_or_else(|| { + self.error(CodegenErrorType::ConflictingNameBindPattern) + })?; + if i_control != i_stores { + // The orders differ; we must reorder. + assert!(i_stores < i_control, "expected i_stores < i_control"); + let rotations = i_stores + 1; + // Rotate pc.stores: take a slice of the first `rotations` items... + let rotated = pc.stores[0..rotations].to_vec(); + // Remove those elements. + for _ in 0..rotations { + pc.stores.remove(0); + } + // Insert the rotated slice at the appropriate index. + let insert_pos = i_control - i_stores; + for (j, elem) in rotated.into_iter().enumerate() { + pc.stores.insert(insert_pos + j, elem); + } + // Also perform the same rotation on the evaluation stack. + for _ in 0..(i_stores + 1) { + self.pattern_helper_rotate(i_control + 1)?; + } + } + } + } } + // Emit a jump to the common end label and reset any failure jump targets. + emit!(self, Instruction::Jump { target: end }); + self.emit_and_reset_fail_pop(pc)?; } + + // Restore the original pattern context. + *pc = old_pc.clone(); + // Simulate Py_INCREF on pc.stores. + pc.stores = pc.stores.clone(); + // In C, old_pc.fail_pop is set to NULL to avoid freeing it later. + // In Rust, old_pc is a local clone, so we need not worry about that. + + // No alternative matched: pop the subject and fail. + emit!(self, Instruction::Pop); + self.jump_to_fail_pop(pc, JumpOp::Jump)?; + + // Use the label "end". + self.switch_to_block(end); + + // Adjust the final captures. + let n_stores = control.as_ref().unwrap().len(); + let n_rots = n_stores + 1 + pc.on_top + pc.stores.len(); + for i in 0..n_stores { + // Rotate the capture to its proper place. + self.pattern_helper_rotate(n_rots)?; + let name = &control.as_ref().unwrap()[i]; + // Check for duplicate binding. + if pc.stores.contains(name) { + return Err(self.error(CodegenErrorType::DuplicateStore(name.to_string()))); + } + pc.stores.push(name.clone()); + } + + // Old context and control will be dropped automatically. + // Finally, pop the copy of the subject. + emit!(self, Instruction::Pop); + Ok(()) } - fn compile_pattern( + fn compile_pattern_sequence( &mut self, - pattern_type: &Pattern, - pattern_context: &mut PatternContext, + p: &PatternMatchSequence, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Ensure the pattern is a MatchSequence. + let patterns = &p.patterns; // a slice of Pattern + let size = patterns.len(); + let mut star: Option = None; + let mut only_wildcard = true; + let mut star_wildcard = false; + + // Find a starred pattern, if it exists. There may be at most one. + for (i, pattern) in patterns.iter().enumerate() { + if pattern.is_match_star() { + if star.is_some() { + // TODO: Fix error msg + return Err(self.error(CodegenErrorType::MultipleStarArgs)); + } + // star wildcard check + star_wildcard = pattern + .as_match_star() + .map(|m| m.name.is_none()) + .unwrap_or(false); + only_wildcard &= star_wildcard; + star = Some(i); + continue; + } + // wildcard check + only_wildcard &= pattern + .as_match_as() + .map(|m| m.name.is_none()) + .unwrap_or(false); + } + + // Keep the subject on top during the sequence and length checks. + pc.on_top += 1; + emit!(self, Instruction::MatchSequence); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + if star.is_none() { + // No star: len(subject) == size + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { value: size.into() }); + emit!( + self, + Instruction::CompareOperation { + op: ComparisonOperator::Equal + } + ); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + } else if size > 1 { + // Star exists: len(subject) >= size - 1 + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { + value: (size - 1).into(), + }); + emit!( + self, + Instruction::CompareOperation { + op: ComparisonOperator::GreaterOrEqual + } + ); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + } + + // Whatever comes next should consume the subject. + pc.on_top -= 1; + if only_wildcard { + // Patterns like: [] / [_] / [_, _] / [*_] / [_, *_] / [_, _, *_] / etc. + emit!(self, Instruction::Pop); + } else if star_wildcard { + self.pattern_helper_sequence_subscr(patterns, star.unwrap(), pc)?; + } else { + self.pattern_helper_sequence_unpack(patterns, star, pc)?; + } + Ok(()) + } + + fn compile_pattern_value( + &mut self, + p: &PatternMatchValue, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // TODO: ensure literal or attribute lookup + self.compile_expression(&p.value)?; + emit!( + self, + Instruction::CompareOperation { + op: bytecode::ComparisonOperator::Equal + } + ); + // emit!(self, Instruction::ToBool); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + Ok(()) + } + + fn compile_pattern_singleton( + &mut self, + p: &PatternMatchSingleton, + pc: &mut PatternContext, ) -> CompileResult<()> { - self.compile_pattern_inner(pattern_type, pattern_context)?; + // Load the singleton constant value. + self.emit_load_const(match p.value { + Singleton::None => ConstantData::None, + Singleton::False => ConstantData::Boolean { value: false }, + Singleton::True => ConstantData::Boolean { value: true }, + }); + // Compare using the "Is" operator. emit!( self, - Instruction::JumpIfFalse { - target: pattern_context.blocks[pattern_context.current_block + 1] + Instruction::CompareOperation { + op: bytecode::ComparisonOperator::Equal } ); + // Jump to the failure label if the comparison is false. + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; Ok(()) } + fn compile_pattern( + &mut self, + pattern_type: &Pattern, + pattern_context: &mut PatternContext, + ) -> CompileResult<()> { + match &pattern_type { + Pattern::MatchValue(pattern_type) => { + self.compile_pattern_value(pattern_type, pattern_context) + } + Pattern::MatchSingleton(pattern_type) => { + self.compile_pattern_singleton(pattern_type, pattern_context) + } + Pattern::MatchSequence(pattern_type) => { + self.compile_pattern_sequence(pattern_type, pattern_context) + } + // Pattern::MatchMapping(pattern_type) => self.compile_pattern_mapping(pattern_type, pattern_context), + Pattern::MatchClass(pattern_type) => { + self.compile_pattern_class(pattern_type, pattern_context) + } + Pattern::MatchStar(pattern_type) => { + self.compile_pattern_star(pattern_type, pattern_context) + } + Pattern::MatchAs(pattern_type) => { + self.compile_pattern_as(pattern_type, pattern_context) + } + Pattern::MatchOr(pattern_type) => { + self.compile_pattern_or(pattern_type, pattern_context) + } + _ => { + // The eprintln gives context as to which pattern type is not implemented. + eprintln!("not implemented pattern type: {pattern_type:?}"); + Err(self.error(CodegenErrorType::NotImplementedYet)) + } + } + } + fn compile_match_inner( &mut self, subject: &Expr, @@ -1874,63 +2755,67 @@ impl Compiler<'_> { pattern_context: &mut PatternContext, ) -> CompileResult<()> { self.compile_expression(subject)?; - pattern_context.blocks = std::iter::repeat_with(|| self.new_block()) - .take(cases.len() + 1) - .collect::>(); - let end_block = *pattern_context.blocks.last().unwrap(); - - let _match_case_type = cases.last().expect("cases is not empty"); - // TODO: get proper check for default case - // let has_default = match_case_type.pattern.is_match_as() && 1 < cases.len(); - let has_default = false; - for i in 0..cases.len() - (has_default as usize) { - self.switch_to_block(pattern_context.blocks[i]); - pattern_context.current_block = i; - pattern_context.allow_irrefutable = cases[i].guard.is_some() || i == cases.len() - 1; - let m = &cases[i]; - // Only copy the subject if we're *not* on the last case: - if i != cases.len() - has_default as usize - 1 { - emit!(self, Instruction::Duplicate); + let end = self.new_block(); + + let num_cases = cases.len(); + assert!(num_cases > 0); + let has_default = cases.iter().last().unwrap().pattern.is_match_star() && num_cases > 1; + + let case_count = num_cases - if has_default { 1 } else { 0 }; + for (i, m) in cases.iter().enumerate().take(case_count) { + // Only copy the subject if not on the last case + if i != case_count - 1 { + emit!(self, Instruction::CopyItem { index: 1_u32 }); } + + pattern_context.stores = Vec::with_capacity(1); + pattern_context.allow_irrefutable = m.guard.is_some() || i == case_count - 1; + pattern_context.fail_pop.clear(); + pattern_context.on_top = 0; + self.compile_pattern(&m.pattern, pattern_context)?; + assert_eq!(pattern_context.on_top, 0); + + for name in &pattern_context.stores { + self.compile_name(name, NameUsage::Store)?; + } + + if let Some(ref _guard) = m.guard { + self.ensure_fail_pop(pattern_context, 0)?; + // TODO: Fix compile jump if call + return Err(self.error(CodegenErrorType::NotImplementedYet)); + // Jump if the guard fails. We assume that patter_context.fail_pop[0] is the jump target. + // self.compile_jump_if(&m.pattern, &guard, pattern_context.fail_pop[0])?; + } + + if i != case_count - 1 { + emit!(self, Instruction::Pop); + } + self.compile_statements(&m.body)?; - emit!(self, Instruction::Jump { target: end_block }); + emit!(self, Instruction::Jump { target: end }); + self.emit_and_reset_fail_pop(pattern_context)?; } - // TODO: below code is not called and does not work + if has_default { - // A trailing "case _" is common, and lets us save a bit of redundant - // pushing and popping in the loop above: - let m = &cases.last().unwrap(); - self.switch_to_block(*pattern_context.blocks.last().unwrap()); - if cases.len() == 1 { - // No matches. Done with the subject: + let m = &cases[num_cases - 1]; + if num_cases == 1 { emit!(self, Instruction::Pop); } else { - // Show line coverage for default case (it doesn't create bytecode) - // emit!(self, Instruction::Nop); + emit!(self, Instruction::Nop); + } + if let Some(ref _guard) = m.guard { + // TODO: Fix compile jump if call + return Err(self.error(CodegenErrorType::NotImplementedYet)); } self.compile_statements(&m.body)?; } - - self.switch_to_block(end_block); - - let code = self.current_code_info(); - pattern_context - .blocks - .iter() - .zip(pattern_context.blocks.iter().skip(1)) - .for_each(|(a, b)| { - code.blocks[a.0 as usize].next = *b; - }); + self.switch_to_block(end); Ok(()) } fn compile_match(&mut self, subject: &Expr, cases: &[MatchCase]) -> CompileResult<()> { - let mut pattern_context = PatternContext { - current_block: usize::MAX, - blocks: Vec::new(), - allow_irrefutable: false, - }; + let mut pattern_context = PatternContext::new(); self.compile_match_inner(subject, cases, &mut pattern_context)?; Ok(()) } @@ -2033,7 +2918,24 @@ impl Compiler<'_> { .into(), }); } else { - self.compile_expression(annotation)?; + let was_in_annotation = self.in_annotation; + self.in_annotation = true; + + // Special handling for starred annotations (*Ts -> Unpack[Ts]) + let result = match annotation { + Expr::Starred(ExprStarred { value, .. }) => { + // Following CPython's approach: + // *args: *Ts (where Ts is a TypeVarTuple). + // Do [annotation_value] = [*Ts]. + self.compile_expression(value)?; + emit!(self, Instruction::UnpackSequence { size: 1 }); + Ok(()) + } + _ => self.compile_expression(annotation), + }; + + self.in_annotation = was_in_annotation; + result?; } Ok(()) } @@ -2376,7 +3278,7 @@ impl Compiler<'_> { fn compile_expression(&mut self, expression: &Expr) -> CompileResult<()> { use ruff_python_ast::*; - trace!("Compiling {:?}", expression); + trace!("Compiling {expression:?}"); let range = expression.range(); self.set_source_range(range); @@ -2640,8 +3542,15 @@ impl Compiler<'_> { Self::contains_await(elt), )?; } - Expr::Starred(_) => { - return Err(self.error(CodegenErrorType::InvalidStarExpr)); + Expr::Starred(ExprStarred { value, .. }) => { + if self.in_annotation { + // In annotation context, starred expressions are allowed (PEP 646) + // For now, just compile the inner value without wrapping with Unpack + // This is a temporary solution until we figure out how to properly import typing + self.compile_expression(value)?; + } else { + return Err(self.error(CodegenErrorType::InvalidStarExpr)); + } } Expr::If(ExprIf { test, body, orelse, .. @@ -3599,7 +4508,7 @@ pub fn ruff_int_to_bigint(int: &Int) -> Result { fn parse_big_integer(int: &Int) -> Result { // TODO: Improve ruff API // Can we avoid this copy? - let s = format!("{}", int); + let s = format!("{int}"); let mut s = s.as_str(); // See: https://peps.python.org/pep-0515/#literal-grammar let radix = match s.get(0..2) { @@ -3637,7 +4546,7 @@ impl ToU32 for usize { } #[cfg(test)] -mod tests { +mod ruff_tests { use super::*; use ruff_python_ast::name::Name; use ruff_python_ast::*; @@ -3740,26 +4649,26 @@ mod tests { } } -/* #[cfg(test)] mod tests { use super::*; - use rustpython_parser::Parse; - use rustpython_parser::ast::Suite; - use rustpython_parser_core::source_code::LinearLocator; fn compile_exec(source: &str) -> CodeObject { - let mut locator: LinearLocator<'_> = LinearLocator::new(source); - use rustpython_parser::ast::fold::Fold; - let mut compiler: Compiler = Compiler::new( - CompileOpts::default(), - "source_path".to_owned(), - "".to_owned(), - ); - let ast = Suite::parse(source, "").unwrap(); - let ast = locator.fold(ast).unwrap(); - let symbol_scope = SymbolTable::scan_program(&ast).unwrap(); - compiler.compile_program(&ast, symbol_scope).unwrap(); + let opts = CompileOpts::default(); + let source_code = SourceCode::new("source_path", source); + let parsed = + ruff_python_parser::parse(source_code.text, ruff_python_parser::Mode::Module.into()) + .unwrap(); + let ast = parsed.into_syntax(); + let ast = match ast { + ruff_python_ast::Mod::Module(stmts) => stmts, + _ => unreachable!(), + }; + let symbol_table = SymbolTable::scan_program(&ast, source_code.clone()) + .map_err(|e| e.into_codegen_error(source_code.path.to_owned())) + .unwrap(); + let mut compiler = Compiler::new(opts, source_code, "".to_owned()); + compiler.compile_program(&ast, symbol_table).unwrap(); compiler.pop_code_object() } @@ -3820,4 +4729,3 @@ for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')): )); } } -*/ diff --git a/compiler/codegen/src/error.rs b/compiler/codegen/src/error.rs index 8f38680de0..5e0ac12934 100644 --- a/compiler/codegen/src/error.rs +++ b/compiler/codegen/src/error.rs @@ -1,7 +1,22 @@ use ruff_source_file::SourceLocation; -use std::fmt; +use std::fmt::{self, Display}; use thiserror::Error; +#[derive(Debug)] +pub enum PatternUnreachableReason { + NameCapture, + Wildcard, +} + +impl Display for PatternUnreachableReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NameCapture => write!(f, "name capture"), + Self::Wildcard => write!(f, "wildcard"), + } + } +} + // pub type CodegenError = rustpython_parser_core::source_code::LocatedError; #[derive(Error, Debug)] @@ -19,6 +34,27 @@ impl fmt::Display for CodegenError { } } +#[derive(Debug)] +#[non_exhaustive] +pub enum InternalError { + StackOverflow, + StackUnderflow, + MissingSymbol(String), +} + +impl Display for InternalError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::StackOverflow => write!(f, "stack overflow"), + Self::StackUnderflow => write!(f, "stack underflow"), + Self::MissingSymbol(s) => write!( + f, + "The symbol '{s}' must be present in the symbol table, even when it is undefined in python." + ), + } + } +} + #[derive(Debug)] #[non_exhaustive] pub enum CodegenErrorType { @@ -47,8 +83,11 @@ pub enum CodegenErrorType { TooManyStarUnpack, EmptyWithItems, EmptyWithBody, + ForbiddenName, DuplicateStore(String), - InvalidMatchCase, + UnreachablePattern(PatternUnreachableReason), + RepeatedAttributePattern, + ConflictingNameBindPattern, NotImplementedYet, // RustPython marker for unimplemented features } @@ -94,11 +133,20 @@ impl fmt::Display for CodegenErrorType { EmptyWithBody => { write!(f, "empty body on With") } + ForbiddenName => { + write!(f, "forbidden attribute name") + } DuplicateStore(s) => { write!(f, "duplicate store {s}") } - InvalidMatchCase => { - write!(f, "invalid match case") + UnreachablePattern(reason) => { + write!(f, "{reason} makes remaining patterns unreachable") + } + RepeatedAttributePattern => { + write!(f, "attribute name repeated in class pattern") + } + ConflictingNameBindPattern => { + write!(f, "alternative patterns bind different names") } NotImplementedYet => { write!(f, "RustPython does not implement this feature yet") diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 39857e6fc5..bb1f8b7564 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -1,6 +1,7 @@ use std::ops; use crate::IndexSet; +use crate::error::InternalError; use ruff_source_file::{OneIndexed, SourceLocation}; use rustpython_compiler_core::bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg, @@ -82,12 +83,12 @@ pub struct CodeInfo { pub freevar_cache: IndexSet, } impl CodeInfo { - pub fn finalize_code(mut self, optimize: u8) -> CodeObject { + pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { if optimize > 0 { self.dce(); } - let max_stackdepth = self.max_stackdepth(); + let max_stackdepth = self.max_stackdepth()?; let cell2arg = self.cell2arg(); let CodeInfo { @@ -154,7 +155,7 @@ impl CodeInfo { locations.clear() } - CodeObject { + Ok(CodeObject { flags, posonlyarg_count, arg_count, @@ -172,7 +173,7 @@ impl CodeInfo { cellvars: cellvar_cache.into_iter().collect(), freevars: freevar_cache.into_iter().collect(), cell2arg, - } + }) } fn cell2arg(&self) -> Option> { @@ -219,7 +220,7 @@ impl CodeInfo { } } - fn max_stackdepth(&self) -> u32 { + fn max_stackdepth(&self) -> crate::InternalResult { let mut maxdepth = 0u32; let mut stack = Vec::with_capacity(self.blocks.len()); let mut start_depths = vec![u32::MAX; self.blocks.len()]; @@ -244,7 +245,13 @@ impl CodeInfo { let instr_display = instr.display(display_arg, self); eprint!("{instr_display}: {depth} {effect:+} => "); } - let new_depth = depth.checked_add_signed(effect).unwrap(); + let new_depth = depth.checked_add_signed(effect).ok_or({ + if effect < 0 { + InternalError::StackUnderflow + } else { + InternalError::StackOverflow + } + })?; if DEBUG { eprintln!("{new_depth}"); } @@ -261,7 +268,13 @@ impl CodeInfo { ) { let effect = instr.stack_effect(ins.arg, true); - let target_depth = depth.checked_add_signed(effect).unwrap(); + let target_depth = depth.checked_add_signed(effect).ok_or({ + if effect < 0 { + InternalError::StackUnderflow + } else { + InternalError::StackOverflow + } + })?; if target_depth > maxdepth { maxdepth = target_depth } @@ -277,7 +290,7 @@ impl CodeInfo { if DEBUG { eprintln!("DONE: {maxdepth}"); } - maxdepth + Ok(maxdepth) } } diff --git a/compiler/codegen/src/lib.rs b/compiler/codegen/src/lib.rs index 90e11c5b84..3ef6a7456f 100644 --- a/compiler/codegen/src/lib.rs +++ b/compiler/codegen/src/lib.rs @@ -18,6 +18,8 @@ mod unparse; pub use compile::CompileOpts; use ruff_python_ast::Expr; +pub(crate) use compile::InternalResult; + pub trait ToPythonName { /// Returns a short name for the node suitable for use in error messages. fn python_name(&self) -> &'static str; diff --git a/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap index 91523b2582..36b00c567d 100644 --- a/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap +++ b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap @@ -11,7 +11,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 6 CallFunctionPositional(1) 7 BuildTuple (2) 8 GetIter - >> 9 ForIter (68) + >> 9 ForIter (71) 10 StoreLocal (2, stop_exc) 2 11 LoadNameAny (3, self) @@ -21,7 +21,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 15 CallFunctionPositional(1) 16 LoadConst (("type")) 17 CallMethodKeyword (1) - 18 SetupWith (65) + 18 SetupWith (68) 19 Pop 3 20 SetupExcept (40) @@ -46,12 +46,12 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 36 YieldFrom 37 WithCleanupFinish 38 PopBlock - 39 Jump (54) + 39 Jump (57) >> 40 Duplicate 6 41 LoadNameAny (7, Exception) 42 TestOperation (ExceptionMatch) - 43 JumpIfFalse (53) + 43 JumpIfFalse (56) 44 StoreLocal (8, ex) 7 45 LoadNameAny (3, self) @@ -61,23 +61,26 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 49 CallMethodPositional (2) 50 Pop 51 PopException - 52 Jump (63) - >> 53 Raise (Reraise) + 52 LoadConst (None) + 53 StoreLocal (8, ex) + 54 DeleteLocal (8, ex) + 55 Jump (66) + >> 56 Raise (Reraise) - 9 >> 54 LoadNameAny (3, self) - 55 LoadMethod (10, fail) - 56 LoadConst ("") - 57 LoadNameAny (2, stop_exc) - 58 FormatValue (None) - 59 LoadConst (" was suppressed") - 60 BuildString (2) - 61 CallMethodPositional (1) - 62 Pop + 9 >> 57 LoadNameAny (3, self) + 58 LoadMethod (10, fail) + 59 LoadConst ("") + 60 LoadNameAny (2, stop_exc) + 61 FormatValue (None) + 62 LoadConst (" was suppressed") + 63 BuildString (2) + 64 CallMethodPositional (1) + 65 Pop - 2 >> 63 PopBlock - 64 EnterFinally - >> 65 WithCleanupStart - 66 WithCleanupFinish - 67 Jump (9) - >> 68 PopBlock - 69 ReturnConst (None) + 2 >> 66 PopBlock + 67 EnterFinally + >> 68 WithCleanupStart + 69 WithCleanupFinish + 70 Jump (9) + >> 71 PopBlock + 72 ReturnConst (None) diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 4f42b3996f..385abfd72f 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -1267,6 +1267,7 @@ impl SymbolTableBuilder<'_> { } fn scan_type_params(&mut self, type_params: &TypeParams) -> SymbolTableResult { + // First register all type parameters for type_param in &type_params.type_params { match type_param { TypeParam::TypeVar(TypeParamTypeVar { diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 94d080ace4..8100146cc3 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -381,6 +381,7 @@ pub type NameIdx = u32; #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u8)] pub enum Instruction { + Nop, /// Importing by name ImportName { idx: Arg, @@ -429,6 +430,7 @@ pub enum Instruction { BinaryOperationInplace { op: Arg, }, + BinarySubscript, LoadAttr { idx: Arg, }, @@ -438,12 +440,20 @@ pub enum Instruction { CompareOperation { op: Arg, }, + CopyItem { + index: Arg, + }, Pop, + Swap { + index: Arg, + }, + // ToBool, Rotate2, Rotate3, Duplicate, Duplicate2, GetIter, + GetLen, Continue { target: Arg