diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 000000000..5ce0dbc26 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,324 @@ +name: CI +on: + push: + branches: + - master + pull_request: + +concurrency: + # Make sure that new pushes cancel running jobs + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + RUSTDOCFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings + RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results + +jobs: + test: + name: Build and test + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - target: aarch64-apple-darwin + os: macos-15 + # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 + channel: nightly-2025-02-07 + - target: aarch64-unknown-linux-gnu + os: ubuntu-24.04-arm + # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804 + channel: nightly-2025-02-07 + - target: aarch64-pc-windows-msvc + os: windows-2025 + build_only: 1 # Can't run on x86 hosts + - target: arm-unknown-linux-gnueabi + os: ubuntu-24.04 + - target: arm-unknown-linux-gnueabihf + os: ubuntu-24.04 + - target: armv7-unknown-linux-gnueabihf + os: ubuntu-24.04 + - target: i586-unknown-linux-gnu + os: ubuntu-24.04 + - target: i686-unknown-linux-gnu + os: ubuntu-24.04 + - target: loongarch64-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc64-unknown-linux-gnu + os: ubuntu-24.04 + - target: powerpc64le-unknown-linux-gnu + os: ubuntu-24.04 + - target: riscv64gc-unknown-linux-gnu + os: ubuntu-24.04 + - target: thumbv6m-none-eabi + os: ubuntu-24.04 + - target: thumbv7em-none-eabi + os: ubuntu-24.04 + - target: thumbv7em-none-eabihf + os: ubuntu-24.04 + - target: thumbv7m-none-eabi + os: ubuntu-24.04 + - target: x86_64-unknown-linux-gnu + os: ubuntu-24.04 + - target: x86_64-apple-darwin + os: macos-13 + - target: wasm32-unknown-unknown + os: ubuntu-24.04 + build_only: 1 + - target: i686-pc-windows-msvc + os: windows-2025 + - target: x86_64-pc-windows-msvc + os: windows-2025 + - target: i686-pc-windows-gnu + os: windows-2025 + # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795 + channel: nightly-2025-02-07-i686-gnu + - target: x86_64-pc-windows-gnu + os: windows-2025 + channel: nightly-x86_64-gnu + runs-on: ${{ matrix.os }} + env: + BUILD_ONLY: ${{ matrix.build_only }} + steps: + - name: Print runner information + run: uname -a + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust (rustup) + shell: bash + run: | + channel="nightly" + # Account for channels that have required components (MinGW) + [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" + rustup update "$channel" --no-self-update + rustup default "$channel" + rustup target add "${{ matrix.target }}" + rustup component add clippy llvm-tools-preview + - uses: taiki-e/install-action@nextest + - uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} + + - name: Verify API list + if: matrix.os == 'ubuntu-24.04' + run: python3 etc/update-api-list.py --check + + # Non-linux tests just use our raw script + - name: Run locally + if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm') + shell: bash + run: ./ci/run.sh ${{ matrix.target }} + + # Otherwise we use our docker containers to run builds + - name: Run in Docker + if: matrix.os == 'ubuntu-24.04' && !contains(matrix.target, 'wasm') + run: | + rustup target add x86_64-unknown-linux-musl + cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} + + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + clippy: + name: Clippy + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@master + with: + submodules: true + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + rustup component add clippy + - uses: Swatinem/rust-cache@v2 + - run: cargo clippy --all --all-features --all-targets + + builtins: + name: Check use with compiler-builtins + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@master + - name: Install Rust + run: rustup update nightly --no-self-update && rustup default nightly + - uses: Swatinem/rust-cache@v2 + - run: cargo check --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml + - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml + + benchmarks: + name: Benchmarks + runs-on: ubuntu-24.04 + timeout-minutes: 20 + steps: + - uses: actions/checkout@master + with: + submodules: true + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + sudo apt update + sudo apt install -y valgrind gdb libc6-dbg # Needed for iai-callgrind + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + + - uses: Swatinem/rust-cache@v2 + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: ./ci/bench-icount.sh + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | + # Always use the same seed for benchmarks. Ideally we should switch to a + # non-random generator. + export LIBM_SEED=benchesbenchesbenchesbencheswoo! + cargo bench --all --features short-benchmarks,build-musl,force-soft-floats + + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + msrv: + name: Check MSRV + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)" + echo "MSRV: $msrv" + rustup update "$msrv" --no-self-update && rustup default "$msrv" + - uses: Swatinem/rust-cache@v2 + - run: | + # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see + # `edition = "2024"` and get spooked. + rm Cargo.toml + cargo build --manifest-path libm/Cargo.toml + + rustfmt: + name: Rustfmt + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@master + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + rustup component add rustfmt + - run: cargo fmt -- --check + + # Determine which extensive tests should be run based on changed files. + calculate_extensive_matrix: + name: Calculate job matrix + runs-on: ubuntu-24.04 + timeout-minutes: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + outputs: + matrix: ${{ steps.script.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 100 + - name: Fetch pull request ref + run: git fetch origin "$GITHUB_REF:$GITHUB_REF" + if: github.event_name == 'pull_request' + - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT" + id: script + + extensive: + name: Extensive tests for ${{ matrix.ty }} + needs: + # Wait on `clippy` so we have some confidence that the crate will build + - clippy + - calculate_extensive_matrix + runs-on: ubuntu-24.04 + timeout-minutes: 240 # 4 hours + strategy: + matrix: + # Use the output from `calculate_extensive_matrix` to calculate the matrix + # FIXME: it would be better to run all jobs (i.e. all types) but mark those that + # didn't change as skipped, rather than completely excluding the job. However, + # this is not currently possible https://github.com/actions/runner/issues/1985. + include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }} + env: + TO_TEST: ${{ matrix.to_test }} + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install Rust + run: | + rustup update nightly --no-self-update + rustup default nightly + - uses: Swatinem/rust-cache@v2 + - name: Run extensive tests + run: | + echo "Tests to run: '$TO_TEST'" + if [ -z "$TO_TEST" ]; then + echo "No tests to run, exiting." + exit + fi + + set -x + + # Run the non-extensive tests first to catch any easy failures + cargo t --profile release-checked -- "$TO_TEST" + + LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo t \ + --features build-mpfr,unstable,force-soft-floats \ + --profile release-checked \ + -- extensive + - name: Print test logs if available + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + + success: + needs: + - test + - builtins + - benchmarks + - msrv + - rustfmt + - extensive + runs-on: ubuntu-24.04 + timeout-minutes: 10 + # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency + # failed" as success. So we have to do some contortions to ensure the job fails if any of its + # dependencies fails. + if: always() # make sure this is never "skipped" + steps: + # Manually check the status of all dependencies. `if: failure()` does not work. + - name: check if any dependency failed + run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 15eba6e89..000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,180 +0,0 @@ -name: CI -on: [push, pull_request] - -env: - CARGO_TERM_VERBOSE: true - RUSTDOCFLAGS: -Dwarnings - RUSTFLAGS: -Dwarnings - RUST_BACKTRACE: full - -jobs: - test: - name: Docker - timeout-minutes: 20 - strategy: - fail-fast: false - matrix: - include: - - target: aarch64-apple-darwin - os: macos-latest - - target: aarch64-unknown-linux-gnu - os: ubuntu-latest - - target: aarch64-pc-windows-msvc - os: windows-latest - build_only: 1 # Can't run on x86 hosts - - target: arm-unknown-linux-gnueabi - os: ubuntu-latest - - target: arm-unknown-linux-gnueabihf - os: ubuntu-latest - - target: armv7-unknown-linux-gnueabihf - os: ubuntu-latest - - target: i586-unknown-linux-gnu - os: ubuntu-latest - - target: i686-unknown-linux-gnu - os: ubuntu-latest - - target: powerpc-unknown-linux-gnu - os: ubuntu-latest - - target: powerpc64-unknown-linux-gnu - os: ubuntu-latest - - target: powerpc64le-unknown-linux-gnu - os: ubuntu-latest - - target: riscv64gc-unknown-linux-gnu - os: ubuntu-latest - - target: thumbv6m-none-eabi - os: ubuntu-latest - - target: thumbv7em-none-eabi - os: ubuntu-latest - - target: thumbv7em-none-eabihf - os: ubuntu-latest - - target: thumbv7m-none-eabi - os: ubuntu-latest - - target: x86_64-unknown-linux-gnu - os: ubuntu-latest - - target: x86_64-apple-darwin - os: macos-13 - - target: i686-pc-windows-msvc - os: windows-latest - - target: x86_64-pc-windows-msvc - os: windows-latest - - target: i686-pc-windows-gnu - os: windows-latest - channel: nightly-i686-gnu - - target: x86_64-pc-windows-gnu - os: windows-latest - channel: nightly-x86_64-gnu - runs-on: ${{ matrix.os }} - env: - BUILD_ONLY: ${{ matrix.build_only }} - steps: - - name: Print runner information - run: uname -a - - uses: actions/checkout@v4 - - name: Install Rust (rustup) - shell: bash - run: | - channel="nightly" - # Account for channels that have required components (MinGW) - [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}" - rustup update "$channel" --no-self-update - rustup default "$channel" - rustup target add ${{ matrix.target }} - rustup component add llvm-tools-preview - - uses: Swatinem/rust-cache@v2 - with: - key: ${{ matrix.target }} - - - name: Download musl source - run: ./ci/download-musl.sh - shell: bash - - # Non-linux tests just use our raw script - - name: Run locally - if: matrix.os != 'ubuntu-latest' - shell: bash - run: ./ci/run.sh ${{ matrix.target }} - - # Otherwise we use our docker containers to run builds - - name: Run in Docker - if: matrix.os == 'ubuntu-latest' - run: | - rustup target add x86_64-unknown-linux-musl - cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }} - - wasm: - name: WebAssembly - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - run: rustup target add wasm32-unknown-unknown - - uses: Swatinem/rust-cache@v2 - - run: cargo build --target wasm32-unknown-unknown - - builtins: - name: "The compiler-builtins crate works" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - uses: Swatinem/rust-cache@v2 - - run: cargo build -p cb - - benchmarks: - name: Benchmarks - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly - - uses: Swatinem/rust-cache@v2 - - name: Download musl source - run: ./ci/download-musl.sh - - run: cargo bench --all - - msrv: - name: Check MSRV - runs-on: ubuntu-latest - env: - RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings` - steps: - - uses: actions/checkout@master - - run: | - msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)" - echo "MSRV: $msrv" - echo "MSRV=$msrv" >> "$GITHUB_ENV" - - name: Install Rust - run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV" - - uses: Swatinem/rust-cache@v2 - - run: cargo build -p libm - - rustfmt: - name: Rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup component add rustfmt - - run: cargo fmt -- --check - - success: - needs: - - test - - wasm - - builtins - - benchmarks - - msrv - - rustfmt - runs-on: ubuntu-latest - # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency - # failed" as success. So we have to do some contortions to ensure the job fails if any of its - # dependencies fails. - if: always() # make sure this is never "skipped" - steps: - # Manually check the status of all dependencies. `if: failure()` does not work. - - name: check if any dependency failed - run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yaml similarity index 95% rename from .github/workflows/publish.yml rename to .github/workflows/publish.yaml index e715c6187..15904079d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yaml @@ -12,7 +12,7 @@ on: jobs: release-plz: name: Release-plz - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index b6a532751..d5caba1a0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,10 @@ .#* /bin /math/src -/math/target -/target +target Cargo.lock -musl/ **.tar.gz + +# Benchmark cache +iai-home +baseline-* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..35b269ead --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "musl"] + path = crates/musl-math-sys/musl + url = https://git.musl-libc.org/git/musl + shallow = true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a39623696..dc4006035 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,9 +6,8 @@ `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method in `src/lib.rs`. - Write some simple tests in your module (using `#[test]`) -- Run `cargo test` to make sure it works -- Run `cargo test --features libm-test/test-musl-serialized` to compare your - implementation against musl's +- Run `cargo test` to make sure it works. Full tests are only run when enabling + features, see [Testing](#testing) below. - Send us a pull request! Make sure to run `cargo fmt` on your code before sending the PR. Also include "closes #42" in the PR description to close the corresponding issue. @@ -44,52 +43,40 @@ Check [PR #65] for an example. `mod.rs`. - You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating - point literals. Rust (the language) doesn't support these kind of literals. The best way I have - found to deal with these literals is to turn them into their integer representation using the - [`hexf!`] macro and then turn them back into floats. See below: + point literals. Rust (the language) doesn't support these kind of literals. This crate provides + two macros, `hf32!` and `hf64!`, which convert string literals to floats at compile time. -[`hexf!`]: https://crates.io/crates/hexf - -``` rust -// Step 1: write a program to convert the float into its integer representation -#[macro_use] -extern crate hexf; - -fn main() { - println!("{:#x}", hexf32!("0x1.0p127").to_bits()); -} -``` - -``` console -$ # Step 2: run the program -$ cargo run -0x7f000000 -``` - -``` rust -// Step 3: copy paste the output into libm -let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12 -``` + ```rust + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000); + ``` - Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`] - newtype to avoid this problem. + newtype to avoid this problem, or individual methods like [`wrapping_add`]. [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html +[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add ## Testing Normal tests can be executed with: ```sh -cargo test +# Tests against musl require that the submodule is up to date. +git submodule init +git submodule update + +# `--release` ables more test cases +cargo test --release ``` -If you'd like to run tests with randomized inputs that get compared against musl -itself, you'll need to be on a Linux system and then you can execute: +If you are on a system that cannot build musl or MPFR, passing +`--no-default-features` will run some limited tests. -```sh -cargo test --features libm-test/test-musl-serialized -``` +The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can +be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help. + +`build-musl` does not build with MSVC, Wasm, or Thumb. -Note that you may need to pass `--release` to Cargo if there are errors related -to integer overflow. +[`rug`]: https://docs.rs/rug/latest/rug/ +[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/ diff --git a/Cargo.toml b/Cargo.toml index aa6c08ddb..268b6fb0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,47 +1,37 @@ -[package] -authors = ["Jorge Aparicio "] -categories = ["no-std"] -description = "libm in pure Rust" -documentation = "https://docs.rs/libm" -keywords = ["libm", "math"] -license = "MIT AND (MIT OR Apache-2.0)" -name = "libm" -readme = "README.md" -repository = "https://github.com/rust-lang/libm" -version = "0.2.11" -edition = "2021" -exclude = ["/ci/", "/.github/workflows/"] -rust-version = "1.63" - -[features] -default = [] - -# This tells the compiler to assume that a Nightly toolchain is being used and -# that it should activate any useful Nightly things accordingly. -unstable = [] - -# Used to prevent using any intrinsics or arch-specific code. -force-soft-floats = [] - [workspace] resolver = "2" members = [ - "crates/compiler-builtins-smoke-test", - "crates/libm-bench", + "libm", "crates/libm-macros", "crates/libm-test", "crates/musl-math-sys", + "crates/util", ] default-members = [ - ".", + "libm", "crates/libm-macros", - "crates/libm-test", + "crates/libm-test" +] +exclude = [ + # Requires `panic = abort` so can't be a member of the workspace + "crates/compiler-builtins-smoke-test", ] -[dev-dependencies] -no-panic = "0.1.30" +# The default release profile is unchanged. +# Release mode with debug assertions +[profile.release-checked] +inherits = "release" +debug-assertions = true +overflow-checks = true -# This is needed for no-panic to correctly detect the lack of panics -[profile.release] +# Release with maximum optimizations, which is very slow to build. This is also +# what is needed to check `no-panic`. +[profile.release-opt] +inherits = "release" +codegen-units = 1 lto = "fat" + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/README.md b/README.md index e5d64bd2d..c120a7588 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ A port of [MUSL]'s libm to Rust. +> [!NOTE] +> The `libm` crate has been merged into the `compiler-builtins` repository. Future +> development work will take place there: https://github.com/rust-lang/compiler-builtins. + [MUSL]: https://musl.libc.org/ ## Goals @@ -41,14 +45,14 @@ This crate supports rustc 1.63 and newer. ## License Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or -http://opensource.org/licenses/MIT). +https://opensource.org/licenses/MIT). ### Contribution Contributions are licensed under both the MIT license and the Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or -http://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state +https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as mentioned, without any additional terms or conditions. diff --git a/build.rs b/build.rs deleted file mode 100644 index b683557e4..000000000 --- a/build.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::env; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); - println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable\"))"); - - println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))"); - - #[allow(unexpected_cfgs)] - if !cfg!(feature = "checked") { - let lvl = env::var("OPT_LEVEL").unwrap(); - if lvl != "0" { - println!("cargo:rustc-cfg=assert_no_panic"); - } - } -} diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh new file mode 100755 index 000000000..4d93e257a --- /dev/null +++ b/ci/bench-icount.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eux + +iai_home="iai-home" + +# Download the baseline from master +./ci/ci-util.py locate-baseline --download --extract + +# Run benchmarks once +function run_icount_benchmarks() { + cargo_args=( + "--bench" "icount" + "--no-default-features" + "--features" "unstable,unstable-float,icount" + ) + + iai_args=( + "--home" "$(pwd)/$iai_home" + "--regression=ir=5.0" + "--save-summary" + ) + + # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax + parsing_iai_args=0 + while [ "$#" -gt 0 ]; do + if [ "$parsing_iai_args" == "1" ]; then + iai_args+=("$1") + elif [ "$1" == "--" ]; then + parsing_iai_args=1 + else + cargo_args+=("$1") + fi + + shift + done + + # Run iai-callgrind benchmarks + cargo bench "${cargo_args[@]}" -- "${iai_args[@]}" + + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + if [ -n "${PR_NUMBER:-}" ]; then + # If this is for a pull request, ignore regressions if specified. + ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER" + else + ./ci/ci-util.py check-regressions --home "$iai_home" || true + fi +} + +# Run once with softfloats, once with arch instructions enabled +run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat +run_icount_benchmarks -- --save-baseline=hardfloat + +# Name and tar the new baseline +name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" +echo "BASELINE_NAME=$name" >>"$GITHUB_ENV" +tar cJf "$name.tar.xz" "$iai_home" diff --git a/ci/ci-util.py b/ci/ci-util.py new file mode 100755 index 000000000..d9e402d6b --- /dev/null +++ b/ci/ci-util.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +"""Utilities for CI. + +This dynamically prepares a list of routines that had a source file change based on +git history. +""" + +import json +import os +import subprocess as sp +import sys +from dataclasses import dataclass +from glob import glob, iglob +from inspect import cleandoc +from os import getenv +from pathlib import Path +from typing import TypedDict, Self + +USAGE = cleandoc( + """ + usage: + + ./ci/ci-util.py [flags] + + COMMAND: + generate-matrix + Calculate a matrix of which functions had source change, print that as + a JSON object. + + locate-baseline [--download] [--extract] + Locate the most recent benchmark baseline available in CI and, if flags + specify, download and extract it. Never exits with nonzero status if + downloading fails. + + Note that `--extract` will overwrite files in `iai-home`. + + check-regressions [--home iai-home] [--allow-pr-override pr_number] + Check `iai-home` (or `iai-home` if unspecified) for `summary.json` + files and see if there are any regressions. This is used as a workaround + for `iai-callgrind` not exiting with error status; see + . + + If `--allow-pr-override` is specified, the regression check will not exit + with failure if any line in the PR starts with `allow-regressions`. + """ +) + +REPO_ROOT = Path(__file__).parent.parent +GIT = ["git", "-C", REPO_ROOT] +DEFAULT_BRANCH = "master" +WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts +ARTIFACT_GLOB = "baseline-icount*" +# Place this in a PR body to skip regression checks (must be at the start of a line). +REGRESSION_DIRECTIVE = "ci: allow-regressions" +# Place this in a PR body to skip extensive tests +SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive" +# Place this in a PR body to allow running a large number of extensive tests. If not +# set, this script will error out if a threshold is exceeded in order to avoid +# accidentally spending huge amounts of CI time. +ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive" +MANY_EXTENSIVE_THRESHOLD = 20 + +# Don't run exhaustive tests if these files change, even if they contaiin a function +# definition. +IGNORE_FILES = [ + "libm/src/math/support/", + "libm/src/libm_helper.rs", + "libm/src/math/arch/intrinsics.rs", +] + +TYPES = ["f16", "f32", "f64", "f128"] + + +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +@dataclass +class PrInfo: + """GitHub response for PR query""" + + body: str + commits: list[str] + created_at: str + number: int + + @classmethod + def load(cls, pr_number: int | str) -> Self: + """For a given PR number, query the body and commit list""" + pr_info = sp.check_output( + [ + "gh", + "pr", + "view", + str(pr_number), + "--json=number,commits,body,createdAt", + # Flatten the commit list to only hashes, change a key to snake naming + "--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)", + ], + text=True, + ) + eprint("PR info:", json.dumps(pr_info, indent=4)) + return cls(**json.loads(pr_info)) + + def contains_directive(self, directive: str) -> bool: + """Return true if the provided directive is on a line in the PR body""" + lines = self.body.splitlines() + return any(line.startswith(directive) for line in lines) + + +class FunctionDef(TypedDict): + """Type for an entry in `function-definitions.json`""" + + sources: list[str] + type: str + + +@dataclass +class Context: + gh_ref: str | None + changed: list[Path] + defs: dict[str, FunctionDef] + + def __init__(self) -> None: + self.gh_ref = getenv("GITHUB_REF") + self.changed = [] + self._init_change_list() + + with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f: + defs = json.load(f) + + defs.pop("__comment", None) + self.defs = defs + + def _init_change_list(self): + """Create a list of files that have been changed. This uses GITHUB_REF if + available, otherwise a diff between `HEAD` and `master`. + """ + + # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being + # the PR number), and sets this as `GITHUB_REF`. + ref = self.gh_ref + eprint(f"using ref `{ref}`") + if ref is None or "merge" not in ref: + # If the ref is not for `merge` then we are not in PR CI + eprint("No diff available for ref") + return + + # The ref is for a dummy merge commit. We can extract the merge base by + # inspecting all parents (`^@`). + merge_sha = sp.check_output( + GIT + ["show-ref", "--hash", ref], text=True + ).strip() + merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True) + eprint(f"Merge:\n{merge_log}\n") + + parents = ( + sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True) + .strip() + .splitlines() + ) + assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}" + base = parents[0].strip() + incoming = parents[1].strip() + + eprint(f"base: {base}, incoming: {incoming}") + textlist = sp.check_output( + GIT + ["diff", base, incoming, "--name-only"], text=True + ) + self.changed = [Path(p) for p in textlist.splitlines()] + + @staticmethod + def _ignore_file(fname: str) -> bool: + return any(fname.startswith(pfx) for pfx in IGNORE_FILES) + + def changed_routines(self) -> dict[str, list[str]]: + """Create a list of routines for which one or more files have been updated, + separated by type. + """ + routines = set() + for name, meta in self.defs.items(): + # Don't update if changes to the file should be ignored + sources = (f for f in meta["sources"] if not self._ignore_file(f)) + + # Select changed files + changed = [f for f in sources if Path(f) in self.changed] + + if len(changed) > 0: + eprint(f"changed files for {name}: {changed}") + routines.add(name) + + ret: dict[str, list[str]] = {} + for r in sorted(routines): + ret.setdefault(self.defs[r]["type"], []).append(r) + + return ret + + def make_workflow_output(self) -> str: + """Create a JSON object a list items for each type's changed files, if any + did change, and the routines that were affected by the change. + """ + + pr_number = os.environ.get("PR_NUMBER") + skip_tests = False + error_on_many_tests = False + + if pr_number is not None and len(pr_number) > 0: + pr = PrInfo.load(pr_number) + skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE) + error_on_many_tests = not pr.contains_directive( + ALLOW_MANY_EXTENSIVE_DIRECTIVE + ) + + if skip_tests: + eprint("Skipping all extensive tests") + + changed = self.changed_routines() + ret = [] + total_to_test = 0 + + for ty in TYPES: + ty_changed = changed.get(ty, []) + ty_to_test = [] if skip_tests else ty_changed + total_to_test += len(ty_to_test) + + item = { + "ty": ty, + "changed": ",".join(ty_changed), + "to_test": ",".join(ty_to_test), + } + + ret.append(item) + output = json.dumps({"matrix": ret}, separators=(",", ":")) + eprint(f"output: {output}") + eprint(f"total extensive tests: {total_to_test}") + + if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD: + eprint( + f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add" + f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional" + ) + exit(1) + + return output + + +def locate_baseline(flags: list[str]) -> None: + """Find the most recent baseline from CI, download it if specified. + + This returns rather than erroring, even if the `gh` commands fail. This is to avoid + erroring in CI if the baseline is unavailable (artifact time limit exceeded, first + run on the branch, etc). + """ + + download = False + extract = False + + while len(flags) > 0: + match flags[0]: + case "--download": + download = True + case "--extract": + extract = True + case _: + eprint(USAGE) + exit(1) + flags = flags[1:] + + if extract and not download: + eprint("cannot extract without downloading") + exit(1) + + try: + # Locate the most recent job to complete with success on our branch + latest_job = sp.check_output( + [ + "gh", + "run", + "list", + "--status=success", + f"--branch={DEFAULT_BRANCH}", + "--json=databaseId,url,headSha,conclusion,createdAt," + "status,workflowDatabaseId,workflowName", + # Return the first array element matching our workflow name. NB: cannot + # just use `--limit=1`, jq filtering happens after limiting. We also + # cannot just use `--workflow` because GH gets confused from + # different file names in history. + f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]', + ], + text=True, + ) + except sp.CalledProcessError as e: + eprint(f"failed to run github command: {e}") + return + + try: + latest = json.loads(latest_job) + eprint("latest job: ", json.dumps(latest, indent=4)) + except json.JSONDecodeError as e: + eprint(f"failed to decode json '{latest_job}', {e}") + return + + if not download: + eprint("--download not specified, returning") + return + + job_id = latest.get("databaseId") + if job_id is None: + eprint("skipping download step") + return + + sp.run( + ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + check=False, + ) + + if not extract: + eprint("skipping extraction step") + return + + # Find the baseline with the most recent timestamp. GH downloads the files to e.g. + # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. + candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + if len(candidate_baselines) == 0: + eprint("no possible baseline directories found") + return + + candidate_baselines.sort(reverse=True) + baseline_archive = candidate_baselines[0] + eprint(f"extracting {baseline_archive}") + sp.run(["tar", "xJvf", baseline_archive], check=True) + eprint("baseline extracted successfully") + + +def check_iai_regressions(args: list[str]): + """Find regressions in iai summary.json files, exit with failure if any are + found. + """ + + iai_home_str = "iai-home" + pr_number = None + + while len(args) > 0: + match args: + case ["--home", home, *rest]: + iai_home_str = home + args = rest + case ["--allow-pr-override", pr_num, *rest]: + pr_number = pr_num + args = rest + case _: + eprint(USAGE) + exit(1) + + iai_home = Path(iai_home_str) + + found_summaries = False + regressions: list[dict] = [] + for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): + found_summaries = True + with open(iai_home / summary_path, "r") as f: + summary = json.load(f) + + summary_regs = [] + run = summary["callgrind_summary"]["callgrind_run"] + fname = summary["function_name"] + id = summary["id"] + name_entry = {"name": f"{fname}.{id}"} + + for segment in run["segments"]: + summary_regs.extend(segment["regressions"]) + + summary_regs.extend(run["total"]["regressions"]) + + regressions.extend(name_entry | reg for reg in summary_regs) + + if not found_summaries: + eprint(f"did not find any summary.json files within {iai_home}") + exit(1) + + if len(regressions) == 0: + eprint("No regressions found") + return + + eprint("Found regressions:", json.dumps(regressions, indent=4)) + + if pr_number is not None: + pr = PrInfo.load(pr_number) + if pr.contains_directive(REGRESSION_DIRECTIVE): + eprint("PR allows regressions, returning") + return + + exit(1) + + +def main(): + match sys.argv[1:]: + case ["generate-matrix"]: + ctx = Context() + output = ctx.make_workflow_output() + print(f"matrix={output}") + case ["locate-baseline", *flags]: + locate_baseline(flags) + case ["check-regressions", *args]: + check_iai_regressions(args) + case ["--help" | "-h"]: + print(USAGE) + exit() + case _: + eprint(USAGE) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index a7b23cb9e..7fa06b286 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -3,7 +3,7 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ gcc libc6-dev ca-certificates \ - gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ + gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \ qemu-user-static ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu- diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile index 3b0bfc0d3..37e206a84 100644 --- a/ci/docker/i586-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile index 3b0bfc0d3..37e206a84 100644 --- a/ci/docker/i686-unknown-linux-gnu/Dockerfile +++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc-multilib libc6-dev ca-certificates + gcc-multilib m4 make libc6-dev ca-certificates diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile new file mode 100644 index 000000000..a9ce320e8 --- /dev/null +++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:24.04 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + gcc libc6-dev qemu-user-static ca-certificates \ + gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross + +ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \ + CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \ + AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \ + CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \ + QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \ + RUST_TEST_THREADS=1 diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 15723ab57..c84a31c57 100644 --- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:24.04 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc libc6-dev ca-certificates + gcc m4 make libc6-dev ca-certificates diff --git a/ci/download-musl.sh b/ci/download-musl.sh deleted file mode 100755 index d0d8b310e..000000000 --- a/ci/download-musl.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh -# Download the expected version of musl to a directory `musl` - -set -eux - -fname=musl-1.2.5.tar.gz -sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4 - -mkdir musl -curl "https://musl.libc.org/releases/$fname" -O - -case "$(uname -s)" in - MINGW*) - # Need to extract the second line because certutil does human output - fsha=$(certutil -hashfile "$fname" SHA256 | sed -n '2p') - [ "$sha" = "$fsha" ] || exit 1 - ;; - *) - echo "$sha $fname" | shasum -a 256 --check || exit 1 - ;; -esac - -tar -xzf "$fname" -C musl --strip-components 1 -rm "$fname" diff --git a/ci/run-docker.sh b/ci/run-docker.sh index a040126df..6626e7226 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -24,11 +24,14 @@ run() { # will be owned by root mkdir -p target - docker build -t "$target" "ci/docker/$target" + set_env="HOME=/tmp PATH=\$PATH:/rust/bin:/cargo/bin" + docker build -t "libm-$target" "ci/docker/$target" docker run \ --rm \ --user "$(id -u):$(id -g)" \ + -e CI \ -e RUSTFLAGS \ + -e CARGO_TERM_COLOR \ -e CARGO_HOME=/cargo \ -e CARGO_TARGET_DIR=/target \ -e "EMULATED=$emulated" \ @@ -38,8 +41,8 @@ run() { -v "$(rustc --print sysroot):/rust:ro" \ --init \ -w /checkout \ - "$target" \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target" + "libm-$target" \ + sh -c "$set_env exec ci/run.sh $target" } if [ -z "$1" ]; then diff --git a/ci/run.sh b/ci/run.sh index f61fff843..a946d325e 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -3,10 +3,10 @@ set -eux export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" -# Needed for no-panic to correct detect a lack of panics -export RUSTFLAGS="${RUSTFLAGS:-} -Ccodegen-units=1" +export NEXTEST_STATUS_LEVEL=all target="${1:-}" +flags="" if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') @@ -14,15 +14,23 @@ if [ -z "$target" ]; then target="$host_target" fi -extra_flags="" +# We enumerate features manually. +flags="$flags --no-default-features" + +# Enable arch-specific routines when available. +flags="$flags --features arch" + +# Always enable `unstable-float` since it expands available API but does not +# change any implementations. +flags="$flags --features unstable-float" # We need to specifically skip tests for musl-math-sys on systems that can't # build musl since otherwise `--all` will activate it. case "$target" in # Can't build at all on MSVC, WASM, or thumb - *windows-msvc*) extra_flags="$extra_flags --exclude musl-math-sys" ;; - *wasm*) extra_flags="$extra_flags --exclude musl-math-sys" ;; - *thumb*) extra_flags="$extra_flags --exclude musl-math-sys" ;; + *windows-msvc*) flags="$flags --exclude musl-math-sys" ;; + *wasm*) flags="$flags --exclude musl-math-sys" ;; + *thumb*) flags="$flags --exclude musl-math-sys" ;; # We can build musl on MinGW but running tests gets a stack overflow *windows-gnu*) ;; @@ -32,34 +40,91 @@ case "$target" in *powerpc64le*) ;; # Everything else gets musl enabled - *) extra_flags="$extra_flags --features libm-test/build-musl" ;; + *) flags="$flags --features libm-test/build-musl" ;; +esac + +# Configure which targets test against MPFR +case "$target" in + # MSVC cannot link MPFR + *windows-msvc*) ;; + # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial. + *windows-gnu*) ;; + # Targets that aren't cross compiled in CI work fine + aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;; + aarch64*linux*) flags="$flags --features libm-test/build-mpfr" ;; + i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;; + i686*) flags="$flags --features libm-test/build-mpfr" ;; + x86_64*) flags="$flags --features libm-test/build-mpfr" ;; esac # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI. # case "$target" in - *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;; + *windows-gnu) flags="$flags --exclude libm-macros" ;; esac -if [ "$(uname -a)" = "Linux" ]; then - # also run the reference tests when we can. requires a Linux host. - extra_flags="$extra_flags --features libm-test/test-musl-serialized" -fi +# Make sure we can build with overriding features. +cargo check -p libm --no-default-features if [ "${BUILD_ONLY:-}" = "1" ]; then + # If we are on targets that can't run tests, verify that we can build. cmd="cargo build --target $target --package libm" $cmd - $cmd --features 'unstable' + $cmd --features unstable-intrinsics + + echo "can't run tests on $target; skipping" + exit +fi - echo "can't run tests on $target" -else - cmd="cargo test --all --target $target $extra_flags" +flags="$flags --all --target $target" +cmd="cargo test $flags" +profile="--profile" - # stable by default - $cmd - $cmd --release +# If nextest is available, use that +command -v cargo-nextest && nextest=1 || nextest=0 +if [ "$nextest" = "1" ]; then + # Workaround for https://github.com/nextest-rs/nextest/issues/2066 + if [ -f /.dockerenv ]; then + cfg_file="/tmp/nextest-config.toml" + echo "[store]" >> "$cfg_file" + echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file" + cfg_flag="--config-file $cfg_file" + fi - # unstable with a feature - $cmd --features 'unstable' - $cmd --release --features 'unstable' + cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags" + profile="--cargo-profile" fi + +# Test once without intrinsics +$cmd + +# Run doctests if they were excluded by nextest +[ "$nextest" = "1" ] && cargo test --doc $flags + +# Exclude the macros and utile crates from the rest of the tests to save CI +# runtime, they shouldn't have anything feature- or opt-level-dependent. +cmd="$cmd --exclude util --exclude libm-macros" + +# Test once with intrinsics enabled +$cmd --features unstable-intrinsics +$cmd --features unstable-intrinsics --benches + +# Test the same in release mode, which also increases coverage. Also ensure +# the soft float routines are checked. +$cmd "$profile" release-checked +$cmd "$profile" release-checked --features force-soft-floats +$cmd "$profile" release-checked --features unstable-intrinsics +$cmd "$profile" release-checked --features unstable-intrinsics --benches + +# Ensure that the routines do not panic. +# +# `--tests` must be passed because no-panic is only enabled as a dev +# dependency. The `release-opt` profile must be used to enable LTO and a +# single CGU. +ENSURE_NO_PANIC=1 cargo build \ + -p libm \ + --target "$target" \ + --no-default-features \ + --features unstable-float \ + --tests \ + --profile release-opt diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml index 8d084ee34..38a511669 100644 --- a/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/crates/compiler-builtins-smoke-test/Cargo.toml @@ -6,10 +6,33 @@ edition = "2021" publish = false [lib] +crate-type = ["staticlib"] test = false bench = false [features] -unstable = [] -checked = [] -force-soft-floats = [] +default = ["arch", "compiler-builtins", "unstable-float"] + +# Copied from `libm`'s root `Cargo.toml`' +arch = [] +compiler-builtins = [] +unstable-float = [] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + "cfg(arch_enabled)", + "cfg(assert_no_panic)", + "cfg(intrinsics_enabled)", + 'cfg(feature, values("force-soft-floats"))', + 'cfg(feature, values("unstable"))', + 'cfg(feature, values("unstable-intrinsics"))', + 'cfg(feature, values("unstable-public-internals"))', +] } + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" +codegen-units = 1 +lto = "fat" diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs index 27d4a0e89..ef8d613c9 100644 --- a/crates/compiler-builtins-smoke-test/build.rs +++ b/crates/compiler-builtins-smoke-test/build.rs @@ -1,3 +1,8 @@ +#[path = "../../libm/configure.rs"] +mod configure; + fn main() { - println!("cargo::rustc-check-cfg=cfg(assert_no_panic)"); + println!("cargo:rerun-if-changed=../../libm/configure.rs"); + let cfg = configure::Config::from_env(); + configure::emit_libm_config(&cfg); } diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs index e65cb8da3..e70f6d9e0 100644 --- a/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/crates/compiler-builtins-smoke-test/src/lib.rs @@ -1,11 +1,17 @@ //! Fake compiler-builtins crate //! //! This is used to test that we can source import `libm` into the compiler-builtins crate. +//! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the +//! default `.rlib`. +#![compiler_builtins] #![feature(core_intrinsics)] +#![feature(compiler_builtins)] +#![feature(f16)] +#![feature(f128)] #![allow(internal_features)] -#![allow(dead_code)] #![no_std] -#[path = "../../../src/math/mod.rs"] -pub mod libm; +mod math; +// Required for macro paths. +use math::libm::support; diff --git a/crates/compiler-builtins-smoke-test/src/math.rs b/crates/compiler-builtins-smoke-test/src/math.rs new file mode 100644 index 000000000..58a5bfbb9 --- /dev/null +++ b/crates/compiler-builtins-smoke-test/src/math.rs @@ -0,0 +1,182 @@ +use core::ffi::c_int; + +#[allow(dead_code)] +#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy. +#[allow(unused_imports)] +#[path = "../../../libm/src/math/mod.rs"] +pub mod libm; + +/// Mark functions `#[no_mangle]` and with the C ABI. +macro_rules! no_mangle { + ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => { + $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+ + }; + + // Handle simple functions with single return types + (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => { + #[unsafe(no_mangle)] + extern "C" fn $name($($arg: $aty),+) -> $ret { + libm::$name($($arg),+) + } + }; + + + // Functions with `&mut` return values need to be handled differently, use `|` to + // separate inputs vs. outputs. + ( + @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty + ) => { + #[unsafe(no_mangle)] + extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret { + let ret; + (ret, $(*$rarg),+) = libm::$name($($arg),+); + ret + } + }; +} + +no_mangle! { + frexp(x: f64 | y: &mut c_int) -> f64; + frexpf(x: f32 | y: &mut c_int) -> f32; + acos(x: f64) -> f64; + acosf(x: f32) -> f32; + acosh(x: f64) -> f64; + acoshf(x: f32) -> f32; + asin(x: f64) -> f64; + asinf(x: f32) -> f32; + asinh(x: f64) -> f64; + asinhf(x: f32) -> f32; + atan(x: f64) -> f64; + atan2(x: f64, y: f64) -> f64; + atan2f(x: f32, y: f32) -> f32; + atanf(x: f32) -> f32; + atanh(x: f64) -> f64; + atanhf(x: f32) -> f32; + cbrt(x: f64) -> f64; + cbrtf(x: f32) -> f32; + ceil(x: f64) -> f64; + ceilf(x: f32) -> f32; + ceilf128(x: f128) -> f128; + ceilf16(x: f16) -> f16; + copysign(x: f64, y: f64) -> f64; + copysignf(x: f32, y: f32) -> f32; + copysignf128(x: f128, y: f128) -> f128; + copysignf16(x: f16, y: f16) -> f16; + cos(x: f64) -> f64; + cosf(x: f32) -> f32; + cosh(x: f64) -> f64; + coshf(x: f32) -> f32; + erf(x: f64) -> f64; + erfc(x: f64) -> f64; + erfcf(x: f32) -> f32; + erff(x: f32) -> f32; + exp(x: f64) -> f64; + exp10(x: f64) -> f64; + exp10f(x: f32) -> f32; + exp2(x: f64) -> f64; + exp2f(x: f32) -> f32; + expf(x: f32) -> f32; + expm1(x: f64) -> f64; + expm1f(x: f32) -> f32; + fabs(x: f64) -> f64; + fabsf(x: f32) -> f32; + fabsf128(x: f128) -> f128; + fabsf16(x: f16) -> f16; + fdim(x: f64, y: f64) -> f64; + fdimf(x: f32, y: f32) -> f32; + fdimf128(x: f128, y: f128) -> f128; + fdimf16(x: f16, y: f16) -> f16; + floor(x: f64) -> f64; + floorf(x: f32) -> f32; + floorf128(x: f128) -> f128; + floorf16(x: f16) -> f16; + fma(x: f64, y: f64, z: f64) -> f64; + fmaf(x: f32, y: f32, z: f32) -> f32; + fmax(x: f64, y: f64) -> f64; + fmaxf(x: f32, y: f32) -> f32; + fmin(x: f64, y: f64) -> f64; + fminf(x: f32, y: f32) -> f32; + fmod(x: f64, y: f64) -> f64; + fmodf(x: f32, y: f32) -> f32; + hypot(x: f64, y: f64) -> f64; + hypotf(x: f32, y: f32) -> f32; + ilogb(x: f64) -> c_int; + ilogbf(x: f32) -> c_int; + j0(x: f64) -> f64; + j0f(x: f32) -> f32; + j1(x: f64) -> f64; + j1f(x: f32) -> f32; + jn(x: c_int, y: f64) -> f64; + jnf(x: c_int, y: f32) -> f32; + ldexp(x: f64, y: c_int) -> f64; + ldexpf(x: f32, y: c_int) -> f32; + lgamma(x: f64) -> f64; + lgamma_r(x: f64 | r: &mut c_int) -> f64; + lgammaf(x: f32) -> f32; + lgammaf_r(x: f32 | r: &mut c_int) -> f32; + log(x: f64) -> f64; + log10(x: f64) -> f64; + log10f(x: f32) -> f32; + log1p(x: f64) -> f64; + log1pf(x: f32) -> f32; + log2(x: f64) -> f64; + log2f(x: f32) -> f32; + logf(x: f32) -> f32; + modf(x: f64 | r: &mut f64) -> f64; + modff(x: f32 | r: &mut f32) -> f32; + nextafter(x: f64, y: f64) -> f64; + nextafterf(x: f32, y: f32) -> f32; + pow(x: f64, y: f64) -> f64; + powf(x: f32, y: f32) -> f32; + remainder(x: f64, y: f64) -> f64; + remainderf(x: f32, y: f32) -> f32; + remquo(x: f64, y: f64 | q: &mut c_int) -> f64; + remquof(x: f32, y: f32 | q: &mut c_int) -> f32; + rint(x: f64) -> f64; + rintf(x: f32) -> f32; + rintf128(x: f128) -> f128; + rintf16(x: f16) -> f16; + round(x: f64) -> f64; + roundf(x: f32) -> f32; + scalbn(x: f64, y: c_int) -> f64; + scalbnf(x: f32, y: c_int) -> f32; + sin(x: f64) -> f64; + sinf(x: f32) -> f32; + sinh(x: f64) -> f64; + sinhf(x: f32) -> f32; + sqrt(x: f64) -> f64; + sqrtf(x: f32) -> f32; + tan(x: f64) -> f64; + tanf(x: f32) -> f32; + tanh(x: f64) -> f64; + tanhf(x: f32) -> f32; + tgamma(x: f64) -> f64; + tgammaf(x: f32) -> f32; + trunc(x: f64) -> f64; + truncf(x: f32) -> f32; + truncf128(x: f128) -> f128; + truncf16(x: f16) -> f16; + y0(x: f64) -> f64; + y0f(x: f32) -> f32; + y1(x: f64) -> f64; + y1f(x: f32) -> f32; + yn(x: c_int, y: f64) -> f64; + ynf(x: c_int, y: f32) -> f32; +} + +/* sincos has no direct return type, not worth handling in the macro */ + +#[unsafe(no_mangle)] +extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) { + (*s, *c) = libm::sincos(x); +} + +#[unsafe(no_mangle)] +extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) { + (*s, *c) = libm::sincosf(x); +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} diff --git a/crates/libm-bench/Cargo.toml b/crates/libm-bench/Cargo.toml deleted file mode 100644 index ee8c58200..000000000 --- a/crates/libm-bench/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "libm-bench" -version = "0.1.0" -authors = ["Gonzalo Brito Gadeschi "] -edition = "2021" -license = "MIT OR Apache-2.0" -publish = false - -[dependencies] -libm = { path = "../..", default-features = false } -rand = "0.8.5" -paste = "1.0.15" - -[features] -default = [] -unstable = [ "libm/unstable" ] diff --git a/crates/libm-bench/benches/bench.rs b/crates/libm-bench/benches/bench.rs deleted file mode 100644 index ca999b90f..000000000 --- a/crates/libm-bench/benches/bench.rs +++ /dev/null @@ -1,116 +0,0 @@ -#![feature(test)] -extern crate test; - -use rand::Rng; -use test::Bencher; - -macro_rules! unary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x))) - } - } - )*); -} -macro_rules! binary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, y))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, y))) - } - } - )*); - ($($func:ident);*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let n = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, n))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let n = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, n))) - } - } - )*); -} -macro_rules! trinary { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - let z = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](x, y, z))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let x = rng.gen::(); - let y = rng.gen::(); - let z = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](x, y, z))) - } - } - )*); -} -macro_rules! bessel { - ($($func:ident),*) => ($( - paste::item! { - #[bench] - pub fn [<$func>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let mut n = rng.gen::(); - n &= 0xffff; - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func>](n, x))) - } - #[bench] - pub fn [<$func f>](bh: &mut Bencher) { - let mut rng = rand::thread_rng(); - let mut n = rng.gen::(); - n &= 0xffff; - let x = rng.gen::(); - bh.iter(|| test::black_box(libm::[<$func f>](n, x))) - } - } - )*); -} - -unary!( - acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0, - j1, lgamma, log, log1p, log2, log10, rint, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc, - y0, y1 -); -binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow); -trinary!(fma); -bessel!(jn, yn); -binary!(ldexp; scalbn); diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml index 3da9d45a2..50c869db7 100644 --- a/crates/libm-macros/Cargo.toml +++ b/crates/libm-macros/Cargo.toml @@ -1,13 +1,21 @@ [package] name = "libm-macros" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [lib] proc-macro = true [dependencies] -proc-macro2 = "1.0.88" -quote = "1.0.37" -syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] } +heck = "0.5.0" +proc-macro2 = "1.0.94" +quote = "1.0.40" +syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] } + +[lints.rust] +# Values used during testing +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(f16_enabled)', + 'cfg(f128_enabled)', +] } diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs new file mode 100644 index 000000000..864b625ea --- /dev/null +++ b/crates/libm-macros/src/enums.rs @@ -0,0 +1,154 @@ +use heck::ToUpperCamelCase; +use proc_macro2 as pm2; +use proc_macro2::{Ident, Span}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Fields, ItemEnum, Variant}; + +use crate::{ALL_OPERATIONS, base_name}; + +/// Implement `#[function_enum]`, see documentation in `lib.rs`. +pub fn function_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + let attr_span = attributes.span(); + let mut attr = attributes.into_iter(); + + // Attribute should be the identifier of the `BaseName` enum. + let Some(tt) = attr.next() else { + return Err(syn::Error::new(attr_span, "expected one attribute")); + }; + + let pm2::TokenTree::Ident(base_enum) = tt else { + return Err(syn::Error::new(tt.span(), "expected an identifier")); + }; + + if let Some(tt) = attr.next() { + return Err(syn::Error::new(tt.span(), "unexpected token after identifier")); + } + + let enum_name = &item.ident; + let mut as_str_arms = Vec::new(); + let mut from_str_arms = Vec::new(); + let mut base_arms = Vec::new(); + + for func in ALL_OPERATIONS.iter() { + let fn_name = func.name; + let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site()); + let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #fn_name }); + from_str_arms.push(quote! { #fn_name => Self::#ident }); + + // Match arm for `fn base_name(self)` matcher + base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident }); + + let variant = + Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + + item.variants.push(variant); + } + + let variants = item.variants.iter(); + + let res = quote! { + // Instantiate the enum + #item + + impl #enum_name { + /// All variants of this enum. + pub const ALL: &[Self] = &[ + #( Self::#variants, )* + ]; + + /// The stringified version of this function name. + pub const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms , )* + } + } + + /// If `s` is the name of a function, return it. + pub fn from_str(s: &str) -> Option { + let ret = match s { + #( #from_str_arms , )* + _ => return None, + }; + Some(ret) + } + + /// The base name enum for this function. + pub const fn base_name(self) -> #base_enum { + match self { + #( #base_arms, )* + } + } + + /// Return information about this operation. + pub fn math_op(self) -> &'static crate::op::MathOpInfo { + crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap() + } + } + }; + + Ok(res) +} + +/// Implement `#[base_name_enum]`, see documentation in `lib.rs`. +pub fn base_name_enum( + mut item: ItemEnum, + attributes: pm2::TokenStream, +) -> syn::Result { + expect_empty_enum(&item)?; + if !attributes.is_empty() { + let sp = attributes.span(); + return Err(syn::Error::new(sp.span(), "no attributes expected")); + } + + let mut base_names: Vec<_> = ALL_OPERATIONS.iter().map(|func| base_name(func.name)).collect(); + base_names.sort_unstable(); + base_names.dedup(); + + let item_name = &item.ident; + let mut as_str_arms = Vec::new(); + + for base_name in base_names { + let ident = Ident::new(&base_name.to_upper_camel_case(), Span::call_site()); + + // Match arm for `fn as_str(self)` matcher + as_str_arms.push(quote! { Self::#ident => #base_name }); + + let variant = + Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None }; + + item.variants.push(variant); + } + + let res = quote! { + // Instantiate the enum + #item + + impl #item_name { + /// The stringified version of this base name. + pub const fn as_str(self) -> &'static str { + match self { + #( #as_str_arms ),* + } + } + } + }; + + Ok(res) +} + +/// Verify that an enum is empty, otherwise return an error +fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> { + if !item.variants.is_empty() { + Err(syn::Error::new(item.variants.span(), "expected an empty enum")) + } else { + Ok(()) + } +} diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs index dc78598ca..3cee5385b 100644 --- a/crates/libm-macros/src/lib.rs +++ b/crates/libm-macros/src/lib.rs @@ -1,242 +1,48 @@ +mod enums; mod parse; -use std::sync::LazyLock; +mod shared; use parse::{Invocation, StructuredInput}; use proc_macro as pm; use proc_macro2::{self as pm2, Span}; use quote::{ToTokens, quote}; -use syn::Ident; +pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; +use syn::spanned::Spanned; use syn::visit_mut::VisitMut; +use syn::{Ident, ItemEnum}; -const ALL_FUNCTIONS: &[(Signature, Option, &[&str])] = &[ - ( - // `fn(f32) -> f32` - Signature { args: &[Ty::F32], returns: &[Ty::F32] }, - None, - &[ - "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf", - "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f", - "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf", - "sqrtf", "tanf", "tanhf", "tgammaf", "truncf", - ], - ), - ( - // `(f64) -> f64` - Signature { args: &[Ty::F64], returns: &[Ty::F64] }, - None, - &[ - "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh", - "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10", - "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh", - "tgamma", "trunc", - ], - ), - ( - // `(f32, f32) -> f32` - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, - None, - &[ - "atan2f", - "copysignf", - "fdimf", - "fmaxf", - "fminf", - "fmodf", - "hypotf", - "nextafterf", - "powf", - "remainderf", - ], - ), - ( - // `(f64, f64) -> f64` - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, - None, - &[ - "atan2", - "copysign", - "fdim", - "fmax", - "fmin", - "fmod", - "hypot", - "nextafter", - "pow", - "remainder", - ], - ), - ( - // `(f32, f32, f32) -> f32` - Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, - None, - &["fmaf"], - ), - ( - // `(f64, f64, f64) -> f64` - Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, - None, - &["fma"], - ), - ( - // `(f32) -> i32` - Signature { args: &[Ty::F32], returns: &[Ty::I32] }, - None, - &["ilogbf"], - ), - ( - // `(f64) -> i32` - Signature { args: &[Ty::F64], returns: &[Ty::I32] }, - None, - &["ilogb"], - ), - ( - // `(i32, f32) -> f32` - Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, - None, - &["jnf"], - ), - ( - // `(i32, f64) -> f64` - Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, - None, - &["jn"], - ), - ( - // `(f32, i32) -> f32` - Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, - None, - &["scalbnf", "ldexpf"], - ), - ( - // `(f64, i64) -> f64` - Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, - None, - &["scalbn", "ldexp"], - ), - ( - // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), - &["modff"], - ), - ( - // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), - &["modf"], - ), - ( - // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), - &["frexpf", "lgammaf_r"], - ), - ( - // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), - &["frexp", "lgamma_r"], - ), - ( - // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` - Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, - Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), - &["remquof"], - ), - ( - // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` - Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, - Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), - &["remquo"], - ), - ( - // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` - Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, - Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), - &["sincosf"], - ), - ( - // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` - Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, - Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), - &["sincos"], - ), -]; - -/// A type used in a function signature. -#[allow(dead_code)] -#[derive(Debug, Clone, Copy)] -enum Ty { - F16, - F32, - F64, - F128, - I32, - CInt, - MutF16, - MutF32, - MutF64, - MutF128, - MutI32, - MutCInt, -} +const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"]; -impl ToTokens for Ty { - fn to_tokens(&self, tokens: &mut pm2::TokenStream) { - let ts = match self { - Ty::F16 => quote! { f16 }, - Ty::F32 => quote! { f32 }, - Ty::F64 => quote! { f64 }, - Ty::F128 => quote! { f128 }, - Ty::I32 => quote! { i32 }, - Ty::CInt => quote! { ::core::ffi::c_int }, - Ty::MutF16 => quote! { &mut f16 }, - Ty::MutF32 => quote! { &mut f32 }, - Ty::MutF64 => quote! { &mut f64 }, - Ty::MutF128 => quote! { &mut f128 }, - Ty::MutI32 => quote! { &mut i32 }, - Ty::MutCInt => quote! { &mut core::ffi::c_int }, - }; +/// Populate an enum with a variant representing function. Names are in upper camel case. +/// +/// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides +/// the name of the `BaseName` enum. +#[proc_macro_attribute] +pub fn function_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::function_enum(item, attributes.into()); - tokens.extend(ts); + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), } + .into() } -/// Representation of e.g. `(f32, f32) -> f32` -#[derive(Debug, Clone)] -struct Signature { - args: &'static [Ty], - returns: &'static [Ty], -} - -/// Combined information about a function implementation. -#[derive(Debug, Clone)] -struct FunctionInfo { - name: &'static str, - /// Function signature for C implementations - c_sig: Signature, - /// Function signature for Rust implementations - rust_sig: Signature, -} - -/// A flat representation of `ALL_FUNCTIONS`. -static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { - let mut ret = Vec::new(); +/// Create an enum representing all possible base names, with names in upper camel case. +/// +/// Applied to an empty enum. +#[proc_macro_attribute] +pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream { + let item = syn::parse_macro_input!(tokens as ItemEnum); + let res = enums::base_name_enum(item, attributes.into()); - for (rust_sig, c_sig, names) in ALL_FUNCTIONS { - for name in *names { - let api = FunctionInfo { - name, - rust_sig: rust_sig.clone(), - c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), - }; - ret.push(api); - } + match res { + Ok(ts) => ts, + Err(e) => e.into_compile_error(), } - - ret.sort_by_key(|item| item.name); - ret -}); + .into() +} /// Do something for each function present in this crate. /// @@ -258,6 +64,8 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ( /// // Name of that function /// fn_name: $fn_name:ident, +/// // The basic float type for this function (e.g. `f32`, `f64`) +/// FTy: $FTy:ty, /// // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`) /// CFn: $CFn:ty, /// // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`) @@ -271,7 +79,7 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// // The Rust version's return type (e.g. `(f32, f32)`) /// RustRet: $RustRet:ty, /// // Attributes for the current function, if any -/// attrs: [$($meta:meta)*] +/// attrs: [$($attr:meta),*], /// // Extra tokens passed directly (if any) /// extra: [$extra:ident], /// // Extra function-tokens passed directly (if any) @@ -279,17 +87,19 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ) => { }; /// } /// +/// // All fields except for `callback` are optional. /// libm_macros::for_each_function! { /// // The macro to invoke as a callback /// callback: callback_macro, +/// // Which types to include either as a list (`[CFn, RustFn, RustArgs]`) or "all" +/// emit_types: all, /// // Functions to skip, i.e. `callback` shouldn't be called at all for these. -/// // -/// // This is an optional field. /// skip: [sin, cos], /// // Attributes passed as `attrs` for specific functions. For example, here the invocation /// // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will. /// // -/// // This is an optional field. +/// // Note that `f16_enabled` and `f128_enabled` will always get emitted regardless of whether +/// // or not this is specified. /// attributes: [ /// #[meta1] /// #[meta2] @@ -297,8 +107,6 @@ static ALL_FUNCTIONS_FLAT: LazyLock> = LazyLock::new(|| { /// ], /// // Any tokens that should be passed directly to all invocations of the callback. This can /// // be used to pass local variables or other things the macro needs access to. -/// // -/// // This is an optional field. /// extra: [foo], /// // Similar to `extra`, but allow providing a pattern for only specific functions. Uses /// // a simplified match-like syntax. @@ -313,7 +121,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { let input = syn::parse_macro_input!(tokens as Invocation); let res = StructuredInput::from_fields(input) - .and_then(|s_in| validate(&s_in).map(|fn_list| (s_in, fn_list))) + .and_then(|mut s_in| validate(&mut s_in).map(|fn_list| (s_in, fn_list))) .and_then(|(s_in, fn_list)| expand(s_in, &fn_list)); match res { @@ -325,7 +133,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream { /// Check for any input that is structurally correct but has other problems. /// /// Returns the list of function names that we should expand for. -fn validate(input: &StructuredInput) -> syn::Result> { +fn validate(input: &mut StructuredInput) -> syn::Result> { // Collect lists of all functions that are provied as macro inputs in various fields (only, // skip, attributes). let attr_mentions = input @@ -341,7 +149,7 @@ fn validate(input: &StructuredInput) -> syn::Result> // Make sure that every function mentioned is a real function for mentioned in all_mentioned_fns { - if !ALL_FUNCTIONS_FLAT.iter().any(|func| mentioned == func.name) { + if !ALL_OPERATIONS.iter().any(|func| mentioned == func.name) { let e = syn::Error::new( mentioned.span(), format!("unrecognized function name `{mentioned}`"), @@ -353,14 +161,14 @@ fn validate(input: &StructuredInput) -> syn::Result> if !input.skip.is_empty() && input.only.is_some() { let e = syn::Error::new( input.only_span.unwrap(), - format!("only one of `skip` or `only` may be specified"), + "only one of `skip` or `only` may be specified", ); return Err(e); } // Construct a list of what we intend to expand let mut fn_list = Vec::new(); - for func in ALL_FUNCTIONS_FLAT.iter() { + for func in ALL_OPERATIONS.iter() { let fn_name = func.name; // If we have an `only` list and it does _not_ contain this function name, skip it if input.only.as_ref().is_some_and(|only| !only.iter().any(|o| o == fn_name)) { @@ -376,6 +184,43 @@ fn validate(input: &StructuredInput) -> syn::Result> fn_list.push(func); } + // Types that the user would like us to provide in the macro + let mut add_all_types = false; + for ty in &input.emit_types { + let ty_name = ty.to_string(); + if ty_name == "all" { + add_all_types = true; + continue; + } + + // Check that all requested types are valid + if !KNOWN_TYPES.contains(&ty_name.as_str()) { + let e = syn::Error::new( + ty_name.span(), + format!("unrecognized type identifier `{ty_name}`"), + ); + return Err(e); + } + } + + if add_all_types { + // Ensure that if `all` was specified that nothing else was + if input.emit_types.len() > 1 { + let e = syn::Error::new( + input.emit_types_span.unwrap(), + "if `all` is specified, no other type identifiers may be given", + ); + return Err(e); + } + + // ...and then add all types + input.emit_types.clear(); + for ty in KNOWN_TYPES { + let ident = Ident::new(ty, Span::call_site()); + input.emit_types.push(ident); + } + } + if let Some(map) = &input.fn_extra { if !map.keys().any(|key| key == "_") { // No default provided; make sure every expected function is covered @@ -404,7 +249,7 @@ fn validate(input: &StructuredInput) -> syn::Result> } /// Expand our structured macro input into invocations of the callback macro. -fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result { +fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result { let mut out = pm2::TokenStream::new(); let default_ident = Ident::new("_", Span::call_site()); let callback = input.callback; @@ -413,16 +258,28 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result { - let meta = attrs - .iter() - .filter(|map| map.names.contains(&fn_name)) - .flat_map(|map| &map.meta); - quote! { attrs: [ #( #meta )* ] } - } - None => pm2::TokenStream::new(), - }; + let mut meta_fields = Vec::new(); + if let Some(attrs) = &input.attributes { + let meta_iter = attrs + .iter() + .filter(|map| map.names.contains(&fn_name)) + .flat_map(|map| &map.meta) + .map(|v| v.into_token_stream()); + + meta_fields.extend(meta_iter); + } + + // Always emit f16 and f128 meta so this doesn't need to be repeated everywhere + if func.rust_sig.args.contains(&Ty::F16) || func.rust_sig.returns.contains(&Ty::F16) { + let ts = quote! { cfg(f16_enabled) }; + meta_fields.push(ts); + } + if func.rust_sig.args.contains(&Ty::F128) || func.rust_sig.returns.contains(&Ty::F128) { + let ts = quote! { cfg(f128_enabled) }; + meta_fields.push(ts); + } + + let meta_field = quote! { attrs: [ #( #meta_fields ),* ], }; // Prepare extra in an `extra: ...` field, running the replacer let extra_field = match input.extra.clone() { @@ -451,20 +308,31 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result pm2::TokenStream::new(), }; + let base_fty = func.float_ty; let c_args = &func.c_sig.args; let c_ret = &func.c_sig.returns; let rust_args = &func.rust_sig.args; let rust_ret = &func.rust_sig.returns; + let mut ty_fields = Vec::new(); + for ty in &input.emit_types { + let field = match ty.to_string().as_str() { + "FTy" => quote! { FTy: #base_fty, }, + "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), }, + "CArgs" => quote! { CArgs: ( #(#c_args),* ,), }, + "CRet" => quote! { CRet: ( #(#c_ret),* ), }, + "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), }, + "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), }, + "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), }, + _ => unreachable!("checked in validation"), + }; + ty_fields.push(field); + } + let new = quote! { #callback! { fn_name: #fn_name, - CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), - CArgs: ( #(#c_args),* ,), - CRet: ( #(#c_ret),* ), - RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), - RustArgs: ( #(#rust_args),* ,), - RustRet: ( #(#rust_ret),* ), + #( #ty_fields )* #meta_field #extra_field #fn_extra_field @@ -488,24 +356,7 @@ struct MacroReplace { impl MacroReplace { fn new(name: &'static str) -> Self { - // Keep this in sync with `libm_test::canonical_name` - let known_mappings = &[ - ("erff", "erf"), - ("erf", "erf"), - ("lgammaf_r", "lgamma_r"), - ("modff", "modf"), - ("modf", "modf"), - ]; - - let norm_name = match known_mappings.iter().find(|known| known.0 == name) { - Some(found) => found.1, - None => name - .strip_suffix("f") - .or_else(|| name.strip_suffix("f16")) - .or_else(|| name.strip_suffix("f128")) - .unwrap_or(name), - }; - + let norm_name = base_name(name); Self { fn_name: name, norm_name: norm_name.to_owned(), error: None } } @@ -539,3 +390,57 @@ impl VisitMut for MacroReplace { syn::visit_mut::visit_ident_mut(self, i); } } + +/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, +/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. +fn base_name(name: &str) -> &str { + let known_mappings = &[ + ("erff", "erf"), + ("erf", "erf"), + ("lgammaf_r", "lgamma_r"), + ("modff", "modf"), + ("modf", "modf"), + ]; + + match known_mappings.iter().find(|known| known.0 == name) { + Some(found) => found.1, + None => name + .strip_suffix("f") + .or_else(|| name.strip_suffix("f16")) + .or_else(|| name.strip_suffix("f128")) + .unwrap_or(name), + } +} + +impl ToTokens for Ty { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + Ty::F16 => quote! { f16 }, + Ty::F32 => quote! { f32 }, + Ty::F64 => quote! { f64 }, + Ty::F128 => quote! { f128 }, + Ty::I32 => quote! { i32 }, + Ty::CInt => quote! { ::core::ffi::c_int }, + Ty::MutF16 => quote! { &'a mut f16 }, + Ty::MutF32 => quote! { &'a mut f32 }, + Ty::MutF64 => quote! { &'a mut f64 }, + Ty::MutF128 => quote! { &'a mut f128 }, + Ty::MutI32 => quote! { &'a mut i32 }, + Ty::MutCInt => quote! { &'a mut core::ffi::c_int }, + }; + + tokens.extend(ts); + } +} +impl ToTokens for FloatTy { + fn to_tokens(&self, tokens: &mut pm2::TokenStream) { + let ts = match self { + FloatTy::F16 => quote! { f16 }, + FloatTy::F32 => quote! { f32 }, + FloatTy::F64 => quote! { f64 }, + FloatTy::F128 => quote! { f128 }, + }; + + tokens.extend(ts); + } +} diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs index ee9bd524b..369bbae2f 100644 --- a/crates/libm-macros/src/parse.rs +++ b/crates/libm-macros/src/parse.rs @@ -5,7 +5,7 @@ use quote::ToTokens; use syn::parse::{Parse, ParseStream, Parser}; use syn::punctuated::Punctuated; use syn::spanned::Spanned; -use syn::token::Comma; +use syn::token::{self, Comma}; use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed}; /// The input to our macro; just a list of `field: value` items. @@ -39,6 +39,9 @@ impl Parse for Mapping { pub struct StructuredInput { /// Macro to invoke once per function pub callback: Ident, + /// Whether or not to provide `CFn` `CArgs` `RustFn` etc. This is really only needed + /// once for crate to set up the main trait. + pub emit_types: Vec, /// Skip these functions pub skip: Vec, /// Invoke only for these functions @@ -50,6 +53,7 @@ pub struct StructuredInput { /// Per-function extra expressions to pass to the macro pub fn_extra: Option>, // For diagnostics + pub emit_types_span: Option, pub only_span: Option, pub fn_extra_span: Option, } @@ -58,6 +62,7 @@ impl StructuredInput { pub fn from_fields(input: Invocation) -> syn::Result { let mut map: Vec<_> = input.fields.into_iter().collect(); let cb_expr = expect_field(&mut map, "callback")?; + let emit_types_expr = expect_field(&mut map, "emit_types").ok(); let skip_expr = expect_field(&mut map, "skip").ok(); let only_expr = expect_field(&mut map, "only").ok(); let attr_expr = expect_field(&mut map, "attributes").ok(); @@ -71,6 +76,12 @@ impl StructuredInput { ))?; } + let emit_types_span = emit_types_expr.as_ref().map(|expr| expr.span()); + let emit_types = match emit_types_expr { + Some(expr) => Parser::parse2(parse_ident_or_array, expr.into_token_stream())?, + None => Vec::new(), + }; + let skip = match skip_expr { Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?, None => Vec::new(), @@ -103,6 +114,7 @@ impl StructuredInput { Ok(Self { callback: expect_ident(cb_expr)?, + emit_types, skip, only, only_span, @@ -110,6 +122,7 @@ impl StructuredInput { extra, fn_extra, fn_extra_span, + emit_types_span, }) } } @@ -183,6 +196,15 @@ fn expect_ident(expr: Expr) -> syn::Result { syn::parse2(expr.into_token_stream()) } +/// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`). +fn parse_ident_or_array(input: ParseStream) -> syn::Result> { + if !input.peek(token::Bracket) { + return Ok(vec![input.parse()?]); + } + + parse_ident_array(input) +} + /// Parse an array of expressions. fn parse_expr_array(input: ParseStream) -> syn::Result> { let content; diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs new file mode 100644 index 000000000..5e58220eb --- /dev/null +++ b/crates/libm-macros/src/shared.rs @@ -0,0 +1,444 @@ +/* List of all functions that is shared between `libm-macros` and `libm-test`. */ + +use std::fmt; +use std::sync::LazyLock; + +const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] = &[ + ( + // `fn(f16) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16], returns: &[Ty::F16] }, + None, + &[ + "ceilf16", + "fabsf16", + "floorf16", + "rintf16", + "roundevenf16", + "roundf16", + "sqrtf16", + "truncf16", + ], + ), + ( + // `fn(f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "acosf", + "acoshf", + "asinf", + "asinhf", + "atanf", + "atanhf", + "cbrtf", + "ceilf", + "cosf", + "coshf", + "erfcf", + "erff", + "exp10f", + "exp2f", + "expf", + "expm1f", + "fabsf", + "floorf", + "j0f", + "j1f", + "lgammaf", + "log10f", + "log1pf", + "log2f", + "logf", + "rintf", + "roundevenf", + "roundf", + "sinf", + "sinhf", + "sqrtf", + "tanf", + "tanhf", + "tgammaf", + "truncf", + "y0f", + "y1f", + ], + ), + ( + // `(f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "cbrt", + "ceil", + "cos", + "cosh", + "erf", + "erfc", + "exp", + "exp10", + "exp2", + "expm1", + "fabs", + "floor", + "j0", + "j1", + "lgamma", + "log", + "log10", + "log1p", + "log2", + "rint", + "round", + "roundeven", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + "tgamma", + "trunc", + "y0", + "y1", + ], + ), + ( + // `fn(f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128], returns: &[Ty::F128] }, + None, + &[ + "ceilf128", + "fabsf128", + "floorf128", + "rintf128", + "roundevenf128", + "roundf128", + "sqrtf128", + "truncf128", + ], + ), + ( + // `(f16, f16) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] }, + None, + &[ + "copysignf16", + "fdimf16", + "fmaxf16", + "fmaximum_numf16", + "fmaximumf16", + "fminf16", + "fminimum_numf16", + "fminimumf16", + "fmodf16", + ], + ), + ( + // `(f32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &[ + "atan2f", + "copysignf", + "fdimf", + "fmaxf", + "fmaximum_numf", + "fmaximumf", + "fminf", + "fminimum_numf", + "fminimumf", + "fmodf", + "hypotf", + "nextafterf", + "powf", + "remainderf", + ], + ), + ( + // `(f64, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &[ + "atan2", + "copysign", + "fdim", + "fmax", + "fmaximum", + "fmaximum_num", + "fmin", + "fminimum", + "fminimum_num", + "fmod", + "hypot", + "nextafter", + "pow", + "remainder", + ], + ), + ( + // `(f128, f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] }, + None, + &[ + "copysignf128", + "fdimf128", + "fmaxf128", + "fmaximum_numf128", + "fmaximumf128", + "fminf128", + "fminimum_numf128", + "fminimumf128", + "fmodf128", + ], + ), + ( + // `(f32, f32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] }, + None, + &["fmaf"], + ), + ( + // `(f64, f64, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] }, + None, + &["fma"], + ), + ( + // `(f128, f128, f128) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] }, + None, + &["fmaf128"], + ), + ( + // `(f32) -> i32` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::I32] }, + None, + &["ilogbf"], + ), + ( + // `(f64) -> i32` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::I32] }, + None, + &["ilogb"], + ), + ( + // `(i32, f32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] }, + None, + &["jnf", "ynf"], + ), + ( + // `(i32, f64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] }, + None, + &["jn", "yn"], + ), + ( + // `(f16, i32) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] }, + None, + &["ldexpf16", "scalbnf16"], + ), + ( + // `(f32, i32) -> f32` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] }, + None, + &["ldexpf", "scalbnf"], + ), + ( + // `(f64, i64) -> f64` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] }, + None, + &["ldexp", "scalbn"], + ), + ( + // `(f128, i32) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] }, + None, + &["ldexpf128", "scalbnf128"], + ), + ( + // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }), + &["modff"], + ), + ( + // `(f64, &mut f64) -> f64` as `(f64) -> (f64, f64)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }), + &["modf"], + ), + ( + // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["frexpf", "lgammaf_r"], + ), + ( + // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["frexp", "lgamma_r"], + ), + ( + // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)` + FloatTy::F32, + Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] }, + Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }), + &["remquof"], + ), + ( + // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)` + FloatTy::F64, + Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] }, + Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }), + &["remquo"], + ), + ( + // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)` + FloatTy::F32, + Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] }, + Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }), + &["sincosf"], + ), + ( + // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)` + FloatTy::F64, + Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] }, + Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }), + &["sincos"], + ), +]; + +/// A type used in a function signature. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Ty { + F16, + F32, + F64, + F128, + I32, + CInt, + MutF16, + MutF32, + MutF64, + MutF128, + MutI32, + MutCInt, +} + +/// A subset of [`Ty`] representing only floats. +#[allow(dead_code)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FloatTy { + F16, + F32, + F64, + F128, +} + +impl fmt::Display for Ty { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Ty::F16 => "f16", + Ty::F32 => "f32", + Ty::F64 => "f64", + Ty::F128 => "f128", + Ty::I32 => "i32", + Ty::CInt => "::core::ffi::c_int", + Ty::MutF16 => "&mut f16", + Ty::MutF32 => "&mut f32", + Ty::MutF64 => "&mut f64", + Ty::MutF128 => "&mut f128", + Ty::MutI32 => "&mut i32", + Ty::MutCInt => "&mut ::core::ffi::c_int", + }; + f.write_str(s) + } +} + +impl fmt::Display for FloatTy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + FloatTy::F16 => "f16", + FloatTy::F32 => "f32", + FloatTy::F64 => "f64", + FloatTy::F128 => "f128", + }; + f.write_str(s) + } +} + +/// Representation of e.g. `(f32, f32) -> f32` +#[derive(Debug, Clone)] +pub struct Signature { + pub args: &'static [Ty], + pub returns: &'static [Ty], +} + +/// Combined information about a function implementation. +#[derive(Debug, Clone)] +pub struct MathOpInfo { + pub name: &'static str, + pub float_ty: FloatTy, + /// Function signature for C implementations + pub c_sig: Signature, + /// Function signature for Rust implementations + pub rust_sig: Signature, +} + +/// A flat representation of `ALL_FUNCTIONS`. +pub static ALL_OPERATIONS: LazyLock> = LazyLock::new(|| { + let mut ret = Vec::new(); + + for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED { + for name in *names { + let api = MathOpInfo { + name, + float_ty: *base_fty, + rust_sig: rust_sig.clone(), + c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()), + }; + ret.push(api); + } + + if !names.is_sorted() { + let mut sorted = (*names).to_owned(); + sorted.sort_unstable(); + panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}"); + } + } + + ret.sort_by_key(|item| item.name); + ret +}); diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs index 8f8c09f1b..5314e84bb 100644 --- a/crates/libm-macros/tests/basic.rs +++ b/crates/libm-macros/tests/basic.rs @@ -1,34 +1,31 @@ +#![feature(f16)] +#![feature(f128)] // `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into. #![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))] macro_rules! basic { ( fn_name: $fn_name:ident, + FTy: $FTy:ty, CFn: $CFn:ty, CArgs: $CArgs:ty, CRet: $CRet:ty, RustFn: $RustFn:ty, RustArgs: $RustArgs:ty, RustRet: $RustRet:ty, - attrs: [$($meta:meta)*] + attrs: [$($attr:meta),*], extra: [$($extra_tt:tt)*], fn_extra: $fn_extra:expr, ) => { - $(#[$meta])* - mod $fn_name { - #[allow(unused)] - type CFnTy = $CFn; - // type CArgsTy<'_> = $CArgs; - // type CRetTy<'_> = $CRet; - #[allow(unused)] + $(#[$attr])* + #[allow(dead_code)] + pub mod $fn_name { + type FTy= $FTy; + type CFnTy<'a> = $CFn; type RustFnTy = $RustFn; - #[allow(unused)] type RustArgsTy = $RustArgs; - #[allow(unused)] type RustRetTy = $RustRet; - #[allow(unused)] const A: &[&str] = &[$($extra_tt)*]; - #[allow(unused)] fn foo(a: f32) -> f32 { $fn_extra(a) } @@ -39,6 +36,7 @@ macro_rules! basic { mod test_basic { libm_macros::for_each_function! { callback: basic, + emit_types: all, skip: [sin, cos], attributes: [ // just some random attributes @@ -58,25 +56,10 @@ mod test_basic { macro_rules! basic_no_extra { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, + attrs: [$($attr:meta),*], ) => { - mod $fn_name { - #[allow(unused)] - type CFnTy = $CFn; - // type CArgsTy<'_> = $CArgs; - // type CRetTy<'_> = $CRet; - #[allow(unused)] - type RustFnTy = $RustFn; - #[allow(unused)] - type RustArgsTy = $RustArgs; - #[allow(unused)] - type RustRetTy = $RustRet; - } + $(#[$attr])* + mod $fn_name {} }; } @@ -94,3 +77,27 @@ mod test_only { only: [sin, sinf], } } + +macro_rules! specified_types { + ( + fn_name: $fn_name:ident, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], + ) => { + $(#[$attr])* + #[allow(dead_code)] + mod $fn_name { + type RustFnTy = $RustFn; + type RustArgsTy = $RustArgs; + } + }; +} + +mod test_emit_types { + // Test that we can specify a couple types to emit + libm_macros::for_each_function! { + callback: specified_types, + emit_types: [RustFn, RustArgs], + } +} diff --git a/crates/libm-macros/tests/enum.rs b/crates/libm-macros/tests/enum.rs new file mode 100644 index 000000000..93e209a0d --- /dev/null +++ b/crates/libm-macros/tests/enum.rs @@ -0,0 +1,38 @@ +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum Identifier {} + +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum BaseName {} + +#[test] +fn as_str() { + assert_eq!(Identifier::Sin.as_str(), "sin"); + assert_eq!(Identifier::Sinf.as_str(), "sinf"); +} + +#[test] +fn from_str() { + assert_eq!(Identifier::from_str("sin").unwrap(), Identifier::Sin); + assert_eq!(Identifier::from_str("sinf").unwrap(), Identifier::Sinf); +} + +#[test] +fn basename() { + assert_eq!(Identifier::Sin.base_name(), BaseName::Sin); + assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin); +} + +#[test] +fn math_op() { + assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64); + assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32); +} + +// Replicate the structure that we have in `libm-test` +mod op { + include!("../../libm-macros/src/shared.rs"); +} + +use op::FloatTy; diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml index 703524bcd..5d150b4ae 100644 --- a/crates/libm-test/Cargo.toml +++ b/crates/libm-test/Cargo.toml @@ -1,31 +1,70 @@ [package] name = "libm-test" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [features] -default = [] +default = ["build-mpfr", "build-musl", "unstable-float"] + +# Propagated from libm because this affects which functions we test. +unstable-float = ["libm/unstable-float", "rug?/nightly-float"] # Generate tests which are random inputs and the outputs are calculated with # musl libc. -test-musl-serialized = ["rand"] +build-mpfr = ["dep:rug", "dep:gmp-mpfr-sys"] # Build our own musl for testing and benchmarks build-musl = ["dep:musl-math-sys"] +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + +# Run with a reduced set of benchmarks, such as for CI +short-benchmarks = [] + [dependencies] -anyhow = "1.0.90" -libm = { path = "../.." } +anyhow = "1.0.97" +# This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`. +gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false } +iai-callgrind = { version = "0.14.0", optional = true } +indicatif = { version = "0.17.11", default-features = false } +libm = { path = "../../libm", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } musl-math-sys = { path = "../musl-math-sys", optional = true } paste = "1.0.15" -rand = "0.8.5" -rand_chacha = "0.3.1" - -[target.'cfg(target_family = "wasm")'.dependencies] -# Enable randomness on WASM -getrandom = { version = "0.2", features = ["js"] } +rand = "0.9.0" +rand_chacha = "0.9.0" +rayon = "1.10.0" +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] } [build-dependencies] -rand = { version = "0.8.5", optional = true } +rand = { version = "0.9.0", optional = true } + +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +libtest-mimic = "0.8.1" + +[[bench]] +name = "icount" +harness = false +required-features = ["icount"] + +[[bench]] +name = "random" +harness = false + +[[test]] +# No harness so that we can skip tests at runtime based on env. Prefixed with +# `z` so these tests get run last. +name = "z_extensive" +harness = false + +[lints.rust] +# Values from the chared config.rs used by `libm` but not the test crate +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))', +] } diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs new file mode 100644 index 000000000..da8c6bfd1 --- /dev/null +++ b/crates/libm-test/benches/icount.rs @@ -0,0 +1,316 @@ +//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. + +use std::hint::black_box; + +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm::support::{HInt, u256}; +use libm_test::generate::spaced; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; + +const BENCH_ITER_ITEMS: u64 = 500; + +macro_rules! icount_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($_attr:meta),*], + ) => { + paste::paste! { + // Construct benchmark inputs from the logspace generator. + fn [< setup_ $fn_name >]() -> Vec> { + type Op = op::$fn_name::Routine; + let mut ctx = CheckCtx::new( + Op::IDENTIFIER, + CheckBasis::None, + GeneratorKind::QuickSpaced + ); + ctx.override_iterations(BENCH_ITER_ITEMS); + let ret = spaced::get_test_cases::(&ctx).0.collect::>(); + println!("operation {}, {} steps", Op::NAME, ret.len()); + ret + } + + // Run benchmarks with the above inputs. + #[library_benchmark] + #[bench::logspace([< setup_ $fn_name >]())] + fn [< icount_bench_ $fn_name >](cases: Vec>) { + type Op = op::$fn_name::Routine; + let f = black_box(Op::ROUTINE); + for input in cases.iter().copied() { + input.call(f); + } + } + + library_benchmark_group!( + name = [< icount_bench_ $fn_name _group >]; + benchmarks = [< icount_bench_ $fn_name >] + ); + } + }; +} + +libm_macros::for_each_function! { + callback: icount_benches, +} + +fn setup_u128_mul() -> Vec<(u128, u128)> { + let step = u128::MAX / 300; + let mut x = 0u128; + let mut y = 0u128; + let mut v = Vec::new(); + + loop { + 'inner: loop { + match y.checked_add(step) { + Some(new) => y = new, + None => break 'inner, + } + + v.push((x, y)) + } + + match x.checked_add(step) { + Some(new) => x = new, + None => break, + } + } + + v +} + +fn setup_u256_add() -> Vec<(u256, u256)> { + let mut v = Vec::new(); + for (x, y) in setup_u128_mul() { + // square the u128 inputs to cover most of the u256 range + v.push((x.widen_mul(x), y.widen_mul(y))); + } + // Doesn't get covered by `u128:MAX^2` + v.push((u256::MAX, u256::MAX)); + v +} + +fn setup_u256_shift() -> Vec<(u256, u32)> { + let mut v = Vec::new(); + + for (x, _) in setup_u128_mul() { + let x2 = x.widen_mul(x); + for y in 0u32..256 { + v.push((x2, y)); + } + } + + v +} + +#[library_benchmark] +#[bench::linspace(setup_u128_mul())] +fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x).zero_widen_mul(black_box(y))); + } +} + +library_benchmark_group!( + name = icount_bench_u128_widen_mul_group; + benchmarks = icount_bench_u128_widen_mul +); + +#[library_benchmark] +#[bench::linspace(setup_u256_add())] +fn icount_bench_u256_add(cases: Vec<(u256, u256)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) + black_box(y)); + } +} + +library_benchmark_group!( + name = icount_bench_u256_add_group; + benchmarks = icount_bench_u256_add +); + +#[library_benchmark] +#[bench::linspace(setup_u256_shift())] +fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) { + for (x, y) in cases.iter().copied() { + black_box(black_box(x) >> black_box(y)); + } +} + +library_benchmark_group!( + name = icount_bench_u256_shr_group; + benchmarks = icount_bench_u256_shr +); + +main!( + library_benchmark_groups = + // u256-related benchmarks + icount_bench_u128_widen_mul_group, + icount_bench_u256_add_group, + icount_bench_u256_shr_group, + // verify-apilist-start + // verify-sorted-start + icount_bench_acos_group, + icount_bench_acosf_group, + icount_bench_acosh_group, + icount_bench_acoshf_group, + icount_bench_asin_group, + icount_bench_asinf_group, + icount_bench_asinh_group, + icount_bench_asinhf_group, + icount_bench_atan2_group, + icount_bench_atan2f_group, + icount_bench_atan_group, + icount_bench_atanf_group, + icount_bench_atanh_group, + icount_bench_atanhf_group, + icount_bench_cbrt_group, + icount_bench_cbrtf_group, + icount_bench_ceil_group, + icount_bench_ceilf128_group, + icount_bench_ceilf16_group, + icount_bench_ceilf_group, + icount_bench_copysign_group, + icount_bench_copysignf128_group, + icount_bench_copysignf16_group, + icount_bench_copysignf_group, + icount_bench_cos_group, + icount_bench_cosf_group, + icount_bench_cosh_group, + icount_bench_coshf_group, + icount_bench_erf_group, + icount_bench_erfc_group, + icount_bench_erfcf_group, + icount_bench_erff_group, + icount_bench_exp10_group, + icount_bench_exp10f_group, + icount_bench_exp2_group, + icount_bench_exp2f_group, + icount_bench_exp_group, + icount_bench_expf_group, + icount_bench_expm1_group, + icount_bench_expm1f_group, + icount_bench_fabs_group, + icount_bench_fabsf128_group, + icount_bench_fabsf16_group, + icount_bench_fabsf_group, + icount_bench_fdim_group, + icount_bench_fdimf128_group, + icount_bench_fdimf16_group, + icount_bench_fdimf_group, + icount_bench_floor_group, + icount_bench_floorf128_group, + icount_bench_floorf16_group, + icount_bench_floorf_group, + icount_bench_fma_group, + icount_bench_fmaf128_group, + icount_bench_fmaf_group, + icount_bench_fmax_group, + icount_bench_fmaxf128_group, + icount_bench_fmaxf16_group, + icount_bench_fmaxf_group, + icount_bench_fmaximum_group, + icount_bench_fmaximum_num_group, + icount_bench_fmaximum_numf128_group, + icount_bench_fmaximum_numf16_group, + icount_bench_fmaximum_numf_group, + icount_bench_fmaximumf128_group, + icount_bench_fmaximumf16_group, + icount_bench_fmaximumf_group, + icount_bench_fmin_group, + icount_bench_fminf128_group, + icount_bench_fminf16_group, + icount_bench_fminf_group, + icount_bench_fminimum_group, + icount_bench_fminimum_num_group, + icount_bench_fminimum_numf128_group, + icount_bench_fminimum_numf16_group, + icount_bench_fminimum_numf_group, + icount_bench_fminimumf128_group, + icount_bench_fminimumf16_group, + icount_bench_fminimumf_group, + icount_bench_fmod_group, + icount_bench_fmodf128_group, + icount_bench_fmodf16_group, + icount_bench_fmodf_group, + icount_bench_frexp_group, + icount_bench_frexpf_group, + icount_bench_hypot_group, + icount_bench_hypotf_group, + icount_bench_ilogb_group, + icount_bench_ilogbf_group, + icount_bench_j0_group, + icount_bench_j0f_group, + icount_bench_j1_group, + icount_bench_j1f_group, + icount_bench_jn_group, + icount_bench_jnf_group, + icount_bench_ldexp_group, + icount_bench_ldexpf128_group, + icount_bench_ldexpf16_group, + icount_bench_ldexpf_group, + icount_bench_lgamma_group, + icount_bench_lgamma_r_group, + icount_bench_lgammaf_group, + icount_bench_lgammaf_r_group, + icount_bench_log10_group, + icount_bench_log10f_group, + icount_bench_log1p_group, + icount_bench_log1pf_group, + icount_bench_log2_group, + icount_bench_log2f_group, + icount_bench_log_group, + icount_bench_logf_group, + icount_bench_modf_group, + icount_bench_modff_group, + icount_bench_nextafter_group, + icount_bench_nextafterf_group, + icount_bench_pow_group, + icount_bench_powf_group, + icount_bench_remainder_group, + icount_bench_remainderf_group, + icount_bench_remquo_group, + icount_bench_remquof_group, + icount_bench_rint_group, + icount_bench_rintf128_group, + icount_bench_rintf16_group, + icount_bench_rintf_group, + icount_bench_round_group, + icount_bench_roundeven_group, + icount_bench_roundevenf128_group, + icount_bench_roundevenf16_group, + icount_bench_roundevenf_group, + icount_bench_roundf128_group, + icount_bench_roundf16_group, + icount_bench_roundf_group, + icount_bench_scalbn_group, + icount_bench_scalbnf128_group, + icount_bench_scalbnf16_group, + icount_bench_scalbnf_group, + icount_bench_sin_group, + icount_bench_sincos_group, + icount_bench_sincosf_group, + icount_bench_sinf_group, + icount_bench_sinh_group, + icount_bench_sinhf_group, + icount_bench_sqrt_group, + icount_bench_sqrtf128_group, + icount_bench_sqrtf16_group, + icount_bench_sqrtf_group, + icount_bench_tan_group, + icount_bench_tanf_group, + icount_bench_tanh_group, + icount_bench_tanhf_group, + icount_bench_tgamma_group, + icount_bench_tgammaf_group, + icount_bench_trunc_group, + icount_bench_truncf128_group, + icount_bench_truncf16_group, + icount_bench_truncf_group, + icount_bench_y0_group, + icount_bench_y0f_group, + icount_bench_y1_group, + icount_bench_y1f_group, + icount_bench_yn_group, + icount_bench_ynf_group, + // verify-sorted-end + // verify-apilist-end +); diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs new file mode 100644 index 000000000..63d7e5c6d --- /dev/null +++ b/crates/libm-test/benches/random.rs @@ -0,0 +1,207 @@ +use std::hint::black_box; +use std::time::Duration; + +use criterion::{Criterion, criterion_main}; +use libm_test::generate::random; +use libm_test::generate::random::RandomInput; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall}; + +/// Benchmark with this many items to get a variety +const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 }; + +/// Extra parameters we only care about if we are benchmarking against musl. +#[allow(dead_code)] +struct MuslExtra { + musl_fn: Option, + skip_on_i586: bool, +} + +macro_rules! musl_rand_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + fn_extra: ($skip_on_i586:expr, $musl_fn:expr), + ) => { + paste::paste! { + $(#[$attr])* + fn [< musl_bench_ $fn_name >](c: &mut Criterion) { + type Op = libm_test::op::$fn_name::Routine; + + #[cfg(feature = "build-musl")] + let musl_extra = MuslExtra::> { + musl_fn: $musl_fn, + skip_on_i586: $skip_on_i586, + }; + + #[cfg(not(feature = "build-musl"))] + let musl_extra = MuslExtra { + musl_fn: None, + skip_on_i586: $skip_on_i586, + }; + + bench_one::(c, musl_extra); + } + } + }; +} + +fn bench_one(c: &mut Criterion, musl_extra: MuslExtra) +where + Op: MathOp, + Op::RustArgs: RandomInput, +{ + let name = Op::NAME; + + let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random); + let benchvec: Vec<_> = + random::get_test_cases::(&ctx).0.take(BENCH_ITER_ITEMS).collect(); + + // Perform a sanity check that we are benchmarking the same thing + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + for input in benchvec.iter().copied() { + use anyhow::Context; + use libm_test::CheckOutput; + + if cfg!(x86_no_sse) && musl_extra.skip_on_i586 { + break; + } + + let Some(musl_fn) = musl_extra.musl_fn else { + continue; + }; + let musl_res = input.call(musl_fn); + let crate_res = input.call(Op::ROUTINE); + + crate_res.validate(musl_res, input, &ctx).context(name).unwrap(); + } + + #[cfg(not(feature = "build-musl"))] + let _ = musl_extra; // silence unused warnings + + /* Option pointers are black boxed to avoid inlining in the benchmark loop */ + + let mut group = c.benchmark_group(name); + group.bench_function("crate", |b| { + b.iter(|| { + let f = black_box(Op::ROUTINE); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + + // Don't test against musl if it is not available + #[cfg(feature = "build-musl")] + { + if let Some(musl_fn) = musl_extra.musl_fn { + group.bench_function("musl", |b| { + b.iter(|| { + let f = black_box(musl_fn); + for input in benchvec.iter().copied() { + input.call(f); + } + }) + }); + } + } +} + +libm_macros::for_each_function! { + callback: musl_rand_benches, + skip: [], + fn_extra: match MACRO_FN_NAME { + // We pass a tuple of `(skip_on_i586, musl_fn)` + + // FIXME(correctness): exp functions have the wrong result on i586 + exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)), + + // Musl does not provide `f16` and `f128` functions + ceilf128 + | ceilf16 + | copysignf128 + | copysignf16 + | fabsf128 + | fabsf16 + | fdimf128 + | fdimf16 + | floorf128 + | floorf16 + | fmaf128 + | fmaxf128 + | fmaxf16 + | fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximum_numf128 + | fmaximum_numf16 + | fmaximumf + | fmaximumf128 + | fmaximumf16 + | fminf128 + | fminf16 + | fminimum + | fminimum_num + | fminimum_numf + | fminimum_numf128 + | fminimum_numf16 + | fminimumf + | fminimumf128 + | fminimumf16 + | fmodf128 + | fmodf16 + | ldexpf128 + | ldexpf16 + | rintf128 + | rintf16 + | roundeven + | roundevenf + | roundevenf128 + | roundevenf16 + | roundf128 + | roundf16 + | scalbnf128 + | scalbnf16 + | sqrtf128 + | sqrtf16 + | truncf128 + | truncf16 => (false, None), + + // By default we never skip (false) and always have a musl function available + _ => (false, Some(musl_math_sys::MACRO_FN_NAME)) + } +} + +macro_rules! run_callback { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: [$criterion:ident], + ) => { + paste::paste! { + $(#[$attr])* + [< musl_bench_ $fn_name >](&mut $criterion) + } + }; +} + +pub fn musl_random() { + let mut criterion = Criterion::default(); + + // For CI, run a short 0.5s warmup and 1.0s tests. This makes benchmarks complete in + // about the same time as other tests. + if cfg!(feature = "short-benchmarks") { + criterion = criterion + .warm_up_time(Duration::from_millis(200)) + .measurement_time(Duration::from_millis(600)); + } + + criterion = criterion.configure_from_args(); + + libm_macros::for_each_function! { + callback: run_callback, + extra: [criterion], + }; +} + +criterion_main!(musl_random); diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs index 472dec9d3..f75e3dda5 100644 --- a/crates/libm-test/build.rs +++ b/crates/libm-test/build.rs @@ -1,531 +1,9 @@ -use std::fmt::Write; -use std::path::PathBuf; -use std::{env, fs}; +#[path = "../../libm/configure.rs"] +mod configure; +use configure::Config; fn main() { + println!("cargo:rerun-if-changed=../../libm/configure.rs"); let cfg = Config::from_env(); - - emit_optimization_cfg(&cfg); - emit_cfg_shorthands(&cfg); - list_all_tests(&cfg); - - #[cfg(feature = "test-musl-serialized")] - musl_serialized_tests::generate(); -} - -#[allow(dead_code)] -struct Config { - manifest_dir: PathBuf, - out_dir: PathBuf, - opt_level: u8, - target_arch: String, - target_env: String, - target_family: Option, - target_os: String, - target_string: String, - target_vendor: String, - target_features: Vec, -} - -impl Config { - fn from_env() -> Self { - let target_features = env::var("CARGO_CFG_TARGET_FEATURE") - .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) - .unwrap_or_default(); - - Self { - manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), - out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), - opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(), - target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), - target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), - target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), - target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), - target_string: env::var("TARGET").unwrap(), - target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), - target_features, - } - } -} - -/// Some tests are extremely slow. Emit a config option based on optimization level. -fn emit_optimization_cfg(cfg: &Config) { - println!("cargo::rustc-check-cfg=cfg(optimizations_enabled)"); - - if cfg.opt_level >= 2 { - println!("cargo::rustc-cfg=optimizations_enabled"); - } -} - -/// Provide an alias for common longer config combinations. -fn emit_cfg_shorthands(cfg: &Config) { - println!("cargo::rustc-check-cfg=cfg(x86_no_sse)"); - if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { - // Shorthand to detect i586 targets - println!("cargo::rustc-cfg=x86_no_sse"); - } -} - -/// Create a list of all source files in an array. This can be used for making sure that -/// all functions are tested or otherwise covered in some way. -// FIXME: it would probably be better to use rustdoc JSON output to get public functions. -fn list_all_tests(cfg: &Config) { - let math_src = cfg.manifest_dir.join("../../src/math"); - - let mut files = fs::read_dir(math_src) - .unwrap() - .map(|f| f.unwrap().path()) - .filter(|entry| entry.is_file()) - .map(|f| f.file_stem().unwrap().to_str().unwrap().to_owned()) - .collect::>(); - files.sort(); - - let mut s = "pub const ALL_FUNCTIONS: &[&str] = &[".to_owned(); - for f in files { - if f == "mod" { - // skip mod.rs - continue; - } - write!(s, "\"{f}\",").unwrap(); - } - write!(s, "];").unwrap(); - - let outfile = cfg.out_dir.join("all_files.rs"); - fs::write(outfile, s).unwrap(); -} - -/// At build time, generate the output of what the corresponding `*musl` target does with a range -/// of inputs. -/// -/// Serialize that target's output, run the same thing with our symbols, then load and compare -/// the resulting values. -#[cfg(feature = "test-musl-serialized")] -mod musl_serialized_tests { - use std::path::PathBuf; - use std::process::Command; - use std::{env, fs}; - - use rand::Rng; - use rand::seq::SliceRandom; - - // Number of tests to generate for each function - const NTESTS: usize = 500; - - // These files are all internal functions or otherwise miscellaneous, not - // defining a function we want to test. - const IGNORED_FILES: &[&str] = &[ - "fenv.rs", - // These are giving slightly different results compared to musl - "lgamma.rs", - "lgammaf.rs", - "tgamma.rs", - "j0.rs", - "j0f.rs", - "jn.rs", - "jnf.rs", - "j1.rs", - "j1f.rs", - ]; - - struct Function { - name: String, - args: Vec, - ret: Vec, - tests: Vec, - } - - enum Ty { - F32, - F64, - I32, - Bool, - } - - struct Test { - inputs: Vec, - outputs: Vec, - } - - pub fn generate() { - // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 - let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - let libm_test = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - let math_src = libm_test.join("../../src/math"); - - if target_arch == "powerpc64" { - return; - } - - let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::>(); - - let mut math = Vec::new(); - for file in files { - if IGNORED_FILES.iter().any(|f| file.ends_with(f)) { - continue; - } - - println!("generating musl reference tests in {:?}", file); - - let contents = fs::read_to_string(file).unwrap(); - let mut functions = contents.lines().filter(|f| f.starts_with("pub fn")); - while let Some(function_to_test) = functions.next() { - math.push(parse(function_to_test)); - } - } - - // Generate a bunch of random inputs for each function. This will - // attempt to generate a good set of uniform test cases for exercising - // all the various functionality. - generate_random_tests(&mut math, &mut rand::thread_rng()); - - // After we have all our inputs, use the x86_64-unknown-linux-musl - // target to generate the expected output. - generate_test_outputs(&mut math); - //panic!("Boo"); - // ... and now that we have both inputs and expected outputs, do a bunch - // of codegen to create the unit tests which we'll actually execute. - generate_unit_tests(&math); - } - - /// A "poor man's" parser for the signature of a function - fn parse(s: &str) -> Function { - let s = eat(s, "pub fn "); - let pos = s.find('(').unwrap(); - let name = &s[..pos]; - let s = &s[pos + 1..]; - let end = s.find(')').unwrap(); - let args = s[..end] - .split(',') - .map(|arg| { - let colon = arg.find(':').unwrap(); - parse_ty(arg[colon + 1..].trim()) - }) - .collect::>(); - let tail = &s[end + 1..]; - let tail = eat(tail, " -> "); - let ret = parse_retty(tail.replace("{", "").trim()); - - return Function { name: name.to_string(), args, ret, tests: Vec::new() }; - - fn parse_ty(s: &str) -> Ty { - match s { - "f32" => Ty::F32, - "f64" => Ty::F64, - "i32" => Ty::I32, - "bool" => Ty::Bool, - other => panic!("unknown type `{}`", other), - } - } - - fn parse_retty(s: &str) -> Vec { - match s { - "(f32, f32)" => vec![Ty::F32, Ty::F32], - "(f32, i32)" => vec![Ty::F32, Ty::I32], - "(f64, f64)" => vec![Ty::F64, Ty::F64], - "(f64, i32)" => vec![Ty::F64, Ty::I32], - other => vec![parse_ty(other)], - } - } - - fn eat<'a>(s: &'a str, prefix: &str) -> &'a str { - if s.starts_with(prefix) { - &s[prefix.len()..] - } else { - panic!("{:?} didn't start with {:?}", s, prefix) - } - } - } - - fn generate_random_tests(functions: &mut [Function], rng: &mut R) { - for function in functions { - for _ in 0..NTESTS { - function.tests.push(generate_test(function, rng)); - } - } - - fn generate_test(function: &Function, rng: &mut R) -> Test { - let mut inputs = function.args.iter().map(|ty| ty.gen_i64(rng)).collect::>(); - - // First argument to this function appears to be a number of - // iterations, so passing in massive random numbers causes it to - // take forever to execute, so make sure we're not running random - // math code until the heat death of the universe. - if function.name == "jn" || function.name == "jnf" { - inputs[0] &= 0xffff; - } - - Test { - inputs, - // zero output for now since we'll generate it later - outputs: vec![], - } - } - } - - impl Ty { - fn gen_i64(&self, r: &mut R) -> i64 { - use std::{f32, f64}; - - return match self { - Ty::F32 => { - if r.gen_range(0..20) < 1 { - let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap(); - i.to_bits().into() - } else { - r.gen::().to_bits().into() - } - } - Ty::F64 => { - if r.gen_range(0..20) < 1 { - let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap(); - i.to_bits() as i64 - } else { - r.gen::().to_bits() as i64 - } - } - Ty::I32 => { - if r.gen_range(0..10) < 1 { - let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap(); - i.into() - } else { - r.gen::().into() - } - } - Ty::Bool => r.gen::() as i64, - }; - } - - fn libc_ty(&self) -> &'static str { - match self { - Ty::F32 => "f32", - Ty::F64 => "f64", - Ty::I32 => "i32", - Ty::Bool => "i32", - } - } - - fn libc_pty(&self) -> &'static str { - match self { - Ty::F32 => "*mut f32", - Ty::F64 => "*mut f64", - Ty::I32 => "*mut i32", - Ty::Bool => "*mut i32", - } - } - - fn default(&self) -> &'static str { - match self { - Ty::F32 => "0_f32", - Ty::F64 => "0_f64", - Ty::I32 => "0_i32", - Ty::Bool => "false", - } - } - - fn to_i64(&self) -> &'static str { - match self { - Ty::F32 => ".to_bits() as i64", - Ty::F64 => ".to_bits() as i64", - Ty::I32 => " as i64", - Ty::Bool => " as i64", - } - } - } - - fn generate_test_outputs(functions: &mut [Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - // Generate a program which will run all tests with all inputs in - // `functions`. This program will write all outputs to stdout (in a - // binary format). - src.push_str("use std::io::Write;"); - src.push_str("fn main() {"); - src.push_str("let mut result = Vec::new();"); - for function in functions.iter_mut() { - src.push_str("unsafe {"); - src.push_str("extern { fn "); - src.push_str(&function.name); - src.push_str("("); - - let (ret, retptr) = match function.name.as_str() { - "sincos" | "sincosf" => (None, &function.ret[..]), - _ => (Some(&function.ret[0]), &function.ret[1..]), - }; - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&format!("arg{}: {},", i, arg.libc_ty())); - } - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!("argret{}: {},", i, ret.libc_pty())); - } - src.push_str(")"); - if let Some(ty) = ret { - src.push_str(" -> "); - src.push_str(ty.libc_ty()); - } - src.push_str("; }"); - - src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len())); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - } - src.push_str("];"); - - src.push_str("for test in TESTS {"); - for (i, arg) in retptr.iter().enumerate() { - src.push_str(&format!("let mut argret{} = {};", i, arg.default())); - } - src.push_str("let output = "); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - for (i, _) in retptr.iter().enumerate() { - src.push_str(&format!("&mut argret{},", i)); - } - src.push_str(");"); - if let Some(ty) = &ret { - src.push_str(&format!("let output = output{};", ty.to_i64())); - src.push_str("result.extend_from_slice(&output.to_le_bytes());"); - } - - for (i, ret) in retptr.iter().enumerate() { - src.push_str(&format!( - "result.extend_from_slice(&(argret{}{}).to_le_bytes());", - i, - ret.to_i64(), - )); - } - src.push_str("}"); - - src.push_str("}"); - } - - src.push_str("std::io::stdout().write_all(&result).unwrap();"); - - src.push_str("}"); - - let path = format!("{}/gen.rs", dst); - fs::write(&path, src).unwrap(); - - // Make it somewhat pretty if something goes wrong - drop(Command::new("rustfmt").arg(&path).status()); - - // Compile and execute this tests for the musl target, assuming we're an - // x86_64 host effectively. - let status = Command::new("rustc") - .current_dir(&dst) - .arg(&path) - .arg("--target=x86_64-unknown-linux-musl") - .status() - .unwrap(); - assert!(status.success()); - let output = Command::new("./gen").current_dir(&dst).output().unwrap(); - assert!(output.status.success()); - assert!(output.stderr.is_empty()); - - // Map all the output bytes back to an `i64` and then shove it all into - // the expected results. - let mut results = output.stdout.chunks_exact(8).map(|buf| { - let mut exact = [0; 8]; - exact.copy_from_slice(buf); - i64::from_le_bytes(exact) - }); - - for f in functions.iter_mut() { - for test in f.tests.iter_mut() { - test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect(); - } - } - assert!(results.next().is_none()); - } - - /// Codegens a file which has a ton of `#[test]` annotations for all the - /// tests that we generated above. - fn generate_unit_tests(functions: &[Function]) { - let mut src = String::new(); - let dst = std::env::var("OUT_DIR").unwrap(); - - for function in functions { - src.push_str("#[test]"); - src.push_str("fn "); - src.push_str(&function.name); - src.push_str("_matches_musl() {"); - src.push_str(&format!( - "static TESTS: &[([i64; {}], [i64; {}])]", - function.args.len(), - function.ret.len(), - )); - src.push_str(" = &["); - for test in function.tests.iter() { - src.push_str("(["); - for val in test.inputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("["); - for val in test.outputs.iter() { - src.push_str(&val.to_string()); - src.push_str(","); - } - src.push_str("],"); - src.push_str("),"); - } - src.push_str("];"); - - src.push_str("for (test, expected) in TESTS {"); - src.push_str("let output = libm::"); - src.push_str(&function.name); - src.push_str("("); - for (i, arg) in function.args.iter().enumerate() { - src.push_str(&match arg { - Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i), - Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i), - Ty::I32 => format!("test[{}] as i32", i), - Ty::Bool => format!("test[{}] as i32", i), - }); - src.push_str(","); - } - src.push_str(");"); - - for (i, ret) in function.ret.iter().enumerate() { - let get = if function.ret.len() == 1 { String::new() } else { format!(".{}", i) }; - src.push_str(&(match ret { - Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i), - Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i), - Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i), - Ty::Bool => unreachable!(), - })); - } - - src.push_str( - r#" - panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output); - "#, - ); - src.push_str("}"); - - src.push_str("}"); - } - - let path = format!("{}/musl-tests.rs", dst); - fs::write(&path, src).unwrap(); - - // Try to make it somewhat pretty - drop(Command::new("rustfmt").arg(&path).status()); - } + configure::emit_test_config(&cfg); } diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs new file mode 100644 index 000000000..78524761e --- /dev/null +++ b/crates/libm-test/examples/plot_domains.rs @@ -0,0 +1,103 @@ +//! Program to write all inputs from a generator to a file, then invoke a Julia script to plot +//! them. Output is in `target/plots`. +//! +//! Requires Julia with the `CairoMakie` dependency. +//! +//! Note that running in release mode by default generates a _lot_ more datapoints, which +//! causes plotting to be extremely slow (some simplification to be done in the script). + +use std::fmt::Write as _; +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::process::Command; +use std::{env, fs}; + +use libm_test::generate::spaced::SpacedInput; +use libm_test::generate::{edge_cases, spaced}; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; + +const JL_PLOT: &str = "examples/plot_file.jl"; + +fn main() { + let manifest_env = env::var("CARGO_MANIFEST_DIR").unwrap(); + let manifest_dir = Path::new(&manifest_env); + let out_dir = manifest_dir.join("../../target/plots"); + if !out_dir.exists() { + fs::create_dir(&out_dir).unwrap(); + } + + let jl_script = manifest_dir.join(JL_PLOT); + let mut config = format!(r#"out_dir = "{}""#, out_dir.display()); + config.write_str("\n\n").unwrap(); + + // Plot a few domains with some functions that use them. + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + plot_one_operator::(&out_dir, &mut config); + + let config_path = out_dir.join("config.toml"); + fs::write(&config_path, config).unwrap(); + + // The script expects a path to `config.toml` to be passed as its only argument + let mut cmd = Command::new("julia"); + if cfg!(optimizations_enabled) { + cmd.arg("-O3"); + } + cmd.arg(jl_script).arg(config_path); + + println!("launching script... {cmd:?}"); + cmd.status().unwrap(); +} + +/// Run multiple generators for a single operator. +fn plot_one_operator(out_dir: &Path, config: &mut String) +where + Op: MathOp, + Op::RustArgs: SpacedInput, +{ + let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced); + plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::(&ctx).0); + ctx.gen_kind = GeneratorKind::EdgeCases; + plot_one_generator( + out_dir, + &ctx, + "edge_cases", + config, + edge_cases::get_test_cases::(&ctx).0, + ); +} + +/// Plot the output of a single generator. +fn plot_one_generator( + out_dir: &Path, + ctx: &CheckCtx, + gen_name: &str, + config: &mut String, + generator: impl Iterator, +) { + let fn_name = ctx.base_name_str; + let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt")); + + let f = fs::File::create(&text_file).unwrap(); + let mut w = BufWriter::new(f); + let mut count = 0u64; + + for input in generator { + writeln!(w, "{:e}", input.0).unwrap(); + count += 1; + } + + w.flush().unwrap(); + println!("generated {count} inputs for {fn_name}-{gen_name}"); + + writeln!( + config, + r#"[[input]] +function = "{fn_name}" +generator = "{gen_name}" +input_file = "{}" +"#, + text_file.to_str().unwrap() + ) + .unwrap() +} diff --git a/crates/libm-test/examples/plot_file.jl b/crates/libm-test/examples/plot_file.jl new file mode 100644 index 000000000..acffd9756 --- /dev/null +++ b/crates/libm-test/examples/plot_file.jl @@ -0,0 +1,171 @@ +"A quick script for plotting a list of floats. + +Takes a path to a TOML file (Julia has builtin TOML support but not JSON) which +specifies a list of source files to plot. Plots are done with both a linear and +a log scale. + +Requires [Makie] (specifically CairoMakie) for plotting. + +[Makie]: https://docs.makie.org/stable/ +" + +using CairoMakie +using TOML + +function main()::Nothing + CairoMakie.activate!(px_per_unit = 10) + config_path = ARGS[1] + + cfg = Dict() + open(config_path, "r") do f + cfg = TOML.parse(f) + end + + out_dir = cfg["out_dir"] + for input in cfg["input"] + fn_name = input["function"] + gen_name = input["generator"] + input_file = input["input_file"] + + plot_one(input_file, out_dir, fn_name, gen_name) + end +end + +"Read inputs from a file, create both linear and log plots for one function" +function plot_one( + input_file::String, + out_dir::String, + fn_name::String, + gen_name::String, +)::Nothing + fig = Figure() + + lin_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name.png") + log_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name-log.png") + + # Map string function names to callable functions + if fn_name == "cos" + orig_func = cos + xlims = (-6.0, 6.0) + xlims_log = (-pi * 10, pi * 10) + elseif fn_name == "cbrt" + orig_func = cbrt + xlims = (-2.0, 2.0) + xlims_log = (-1000.0, 1000.0) + elseif fn_name == "sqrt" + orig_func = sqrt + xlims = (-1.1, 6.0) + xlims_log = (-1.1, 5000.0) + else + println("unrecognized function name `$fn_name`; update plot_file.jl") + exit(1) + end + + # Edge cases don't do much beyond +/-1, except for infinity. + if gen_name == "edge_cases" + xlims = (-1.1, 1.1) + xlims_log = (-1.1, 1.1) + end + + # Turn domain errors into NaN + func(x) = map_or(x, orig_func, NaN) + + # Parse a series of X values produced by the generator + inputs = readlines(input_file) + gen_x = map((v) -> parse(Float32, v), inputs) + + do_plot( + fig, + gen_x, + func, + xlims[1], + xlims[2], + "$fn_name $gen_name (linear scale)", + lin_out_file, + false, + ) + + do_plot( + fig, + gen_x, + func, + xlims_log[1], + xlims_log[2], + "$fn_name $gen_name (log scale)", + log_out_file, + true, + ) +end + +"Create a single plot" +function do_plot( + fig::Figure, + gen_x::Vector{F}, + func::Function, + xmin::AbstractFloat, + xmax::AbstractFloat, + title::String, + out_file::String, + logscale::Bool, +)::Nothing where {F<:AbstractFloat} + println("plotting $title") + + # `gen_x` is the values the generator produces. `actual_x` is for plotting a + # continuous function. + input_min = xmin - 1.0 + input_max = xmax + 1.0 + gen_x = filter((v) -> v >= input_min && v <= input_max, gen_x) + markersize = length(gen_x) < 10_000 ? 6.0 : 4.0 + + steps = 10_000 + if logscale + r = LinRange(symlog10(input_min), symlog10(input_max), steps) + actual_x = sympow10.(r) + xscale = Makie.pseudolog10 + else + actual_x = LinRange(input_min, input_max, steps) + xscale = identity + end + + gen_y = @. func(gen_x) + actual_y = @. func(actual_x) + + ax = Axis(fig[1, 1], xscale = xscale, title = title) + + lines!( + ax, + actual_x, + actual_y, + color = (:lightblue, 0.6), + linewidth = 6.0, + label = "true function", + ) + scatter!( + ax, + gen_x, + gen_y, + color = (:darkblue, 0.9), + markersize = markersize, + label = "checked inputs", + ) + axislegend(ax, position = :rb, framevisible = false) + + save(out_file, fig) + delete!(ax) +end + +"Apply a function, returning the default if there is a domain error" +function map_or(input::AbstractFloat, f::Function, default::Any)::Union{AbstractFloat,Any} + try + return f(input) + catch + return default + end +end + +# Operations for logarithms that are symmetric about 0 +C = 10 +symlog10(x::Number) = sign(x) * (log10(1 + abs(x) / (10^C))) +sympow10(x::Number) = (10^C) * (10^x - 1) + +main() diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs new file mode 100644 index 000000000..41e948461 --- /dev/null +++ b/crates/libm-test/src/domain.rs @@ -0,0 +1,265 @@ +//! Traits and operations related to bounds of a function. + +use std::fmt; +use std::ops::Bound; + +use libm::support::Int; + +use crate::{BaseName, Float, FloatExt, Identifier}; + +/// Representation of a single dimension of a function's domain. +#[derive(Clone, Debug)] +pub struct Domain { + /// Start of the region for which a function is defined (ignoring poles). + pub start: Bound, + /// Endof the region for which a function is defined (ignoring poles). + pub end: Bound, + /// Additional points to check closer around. These can be e.g. undefined asymptotes or + /// inflection points. + pub check_points: Option BoxIter>, +} + +type BoxIter = Box>; + +impl Domain { + /// The start of this domain, saturating at negative infinity. + pub fn range_start(&self) -> F { + match self.start { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_up(), + Bound::Unbounded => F::NEG_INFINITY, + } + } + + /// The end of this domain, saturating at infinity. + pub fn range_end(&self) -> F { + match self.end { + Bound::Included(v) => v, + Bound::Excluded(v) => v.next_down(), + Bound::Unbounded => F::INFINITY, + } + } +} + +/// A value that may be any float type or any integer type. +#[derive(Clone, Debug)] +pub enum EitherPrim { + Float(F), + Int(I), +} + +impl EitherPrim { + pub fn unwrap_float(self) -> F { + match self { + EitherPrim::Float(f) => f, + EitherPrim::Int(_) => panic!("expected float; got {self:?}"), + } + } + + pub fn unwrap_int(self) -> I { + match self { + EitherPrim::Float(_) => panic!("expected int; got {self:?}"), + EitherPrim::Int(i) => i, + } + } +} + +/// Convenience 1-dimensional float domains. +impl Domain { + /// x ∈ ℝ + const UNBOUNDED: Self = + Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + + /// x ∈ ℝ >= 0 + const POSITIVE: Self = + Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None }; + + /// x ∈ ℝ > 0 + const STRICTLY_POSITIVE: Self = + Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None }; + + /// Wrap in the float variant of [`EitherPrim`]. + const fn into_prim_float(self) -> EitherPrim> { + EitherPrim::Float(self) + } +} + +/// Convenience 1-dimensional integer domains. +impl Domain { + /// x ∈ ℝ + const UNBOUNDED_INT: Self = + Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }; + + /// Wrap in the int variant of [`EitherPrim`]. + const fn into_prim_int(self) -> EitherPrim, Self> { + EitherPrim::Int(self) + } +} + +/// Multidimensional domains, represented as an array of 1-D domains. +impl EitherPrim, Domain> { + /// x ∈ ℝ + const UNBOUNDED1: [Self; 1] = + [Domain { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None } + .into_prim_float()]; + + /// {x1, x2} ∈ ℝ + const UNBOUNDED2: [Self; 2] = + [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED.into_prim_float()]; + + /// {x1, x2, x3} ∈ ℝ + const UNBOUNDED3: [Self; 3] = [ + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + Domain::UNBOUNDED.into_prim_float(), + ]; + + /// {x1, x2} ∈ ℝ, one float and one int + const UNBOUNDED_F_I: [Self; 2] = + [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED_INT.into_prim_int()]; + + /// x ∈ ℝ >= 0 + const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()]; + + /// x ∈ ℝ > 0 + const STRICTLY_POSITIVE: [Self; 1] = [Domain::STRICTLY_POSITIVE.into_prim_float()]; + + /// Used for versions of `asin` and `acos`. + const INVERSE_TRIG_PERIODIC: [Self; 1] = [Domain { + start: Bound::Included(F::NEG_ONE), + end: Bound::Included(F::ONE), + check_points: None, + } + .into_prim_float()]; + + /// Domain for `acosh` + const ACOSH: [Self; 1] = + [Domain { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None } + .into_prim_float()]; + + /// Domain for `atanh` + const ATANH: [Self; 1] = [Domain { + start: Bound::Excluded(F::NEG_ONE), + end: Bound::Excluded(F::ONE), + check_points: None, + } + .into_prim_float()]; + + /// Domain for `sin`, `cos`, and `tan` + const TRIG: [Self; 1] = [Domain { + // Trig functions have special behavior at fractions of π. + check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())), + ..Domain::UNBOUNDED + } + .into_prim_float()]; + + /// Domain for `log` in various bases + const LOG: [Self; 1] = Self::STRICTLY_POSITIVE; + + /// Domain for `log1p` i.e. `log(1 + x)` + const LOG1P: [Self; 1] = + [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None } + .into_prim_float()]; + + /// Domain for `sqrt` + const SQRT: [Self; 1] = Self::POSITIVE; + + /// Domain for `gamma` + const GAMMA: [Self; 1] = [Domain { + check_points: Some(|| { + // Negative integers are asymptotes + Box::new((0..u8::MAX).map(|scale| { + let mut base = F::ZERO; + for _ in 0..scale { + base = base - F::ONE; + } + base + })) + }), + // Whether or not gamma is defined for negative numbers is implementation dependent + ..Domain::UNBOUNDED + } + .into_prim_float()]; + + /// Domain for `loggamma` + const LGAMMA: [Self; 1] = Self::STRICTLY_POSITIVE; + + /// Domain for `jn` and `yn`. + // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test + // the entire system unbounded. + const BESSEL_N: [Self; 2] = + [Domain::UNBOUNDED_INT.into_prim_int(), Domain::UNBOUNDED.into_prim_float()]; +} + +/// Get the domain for a given function. +pub fn get_domain( + id: Identifier, + argnum: usize, +) -> EitherPrim, Domain> { + let x = match id.base_name() { + BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Acosh => &EitherPrim::ACOSH[..], + BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..], + BaseName::Asinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan => &EitherPrim::UNBOUNDED1[..], + BaseName::Atan2 => &EitherPrim::UNBOUNDED2[..], + BaseName::Cbrt => &EitherPrim::UNBOUNDED1[..], + BaseName::Atanh => &EitherPrim::ATANH[..], + BaseName::Ceil => &EitherPrim::UNBOUNDED1[..], + BaseName::Cosh => &EitherPrim::UNBOUNDED1[..], + BaseName::Copysign => &EitherPrim::UNBOUNDED2[..], + BaseName::Cos => &EitherPrim::TRIG[..], + BaseName::Exp => &EitherPrim::UNBOUNDED1[..], + BaseName::Erf => &EitherPrim::UNBOUNDED1[..], + BaseName::Erfc => &EitherPrim::UNBOUNDED1[..], + BaseName::Expm1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp10 => &EitherPrim::UNBOUNDED1[..], + BaseName::Exp2 => &EitherPrim::UNBOUNDED1[..], + BaseName::Frexp => &EitherPrim::UNBOUNDED1[..], + BaseName::Fabs => &EitherPrim::UNBOUNDED1[..], + BaseName::Fdim => &EitherPrim::UNBOUNDED2[..], + BaseName::Floor => &EitherPrim::UNBOUNDED1[..], + BaseName::Fma => &EitherPrim::UNBOUNDED3[..], + BaseName::Fmax => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmaximum => &EitherPrim::UNBOUNDED2[..], + BaseName::FmaximumNum => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmin => &EitherPrim::UNBOUNDED2[..], + BaseName::Fminimum => &EitherPrim::UNBOUNDED2[..], + BaseName::FminimumNum => &EitherPrim::UNBOUNDED2[..], + BaseName::Fmod => &EitherPrim::UNBOUNDED2[..], + BaseName::Hypot => &EitherPrim::UNBOUNDED2[..], + BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..], + BaseName::J0 => &EitherPrim::UNBOUNDED1[..], + BaseName::J1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Jn => &EitherPrim::BESSEL_N[..], + BaseName::Ldexp => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Lgamma => &EitherPrim::LGAMMA[..], + BaseName::LgammaR => &EitherPrim::LGAMMA[..], + BaseName::Log => &EitherPrim::LOG[..], + BaseName::Log10 => &EitherPrim::LOG[..], + BaseName::Log1p => &EitherPrim::LOG1P[..], + BaseName::Log2 => &EitherPrim::LOG[..], + BaseName::Modf => &EitherPrim::UNBOUNDED1[..], + BaseName::Nextafter => &EitherPrim::UNBOUNDED2[..], + BaseName::Pow => &EitherPrim::UNBOUNDED2[..], + BaseName::Remainder => &EitherPrim::UNBOUNDED2[..], + BaseName::Remquo => &EitherPrim::UNBOUNDED2[..], + BaseName::Rint => &EitherPrim::UNBOUNDED1[..], + BaseName::Round => &EitherPrim::UNBOUNDED1[..], + BaseName::Roundeven => &EitherPrim::UNBOUNDED1[..], + BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..], + BaseName::Sin => &EitherPrim::TRIG[..], + BaseName::Sincos => &EitherPrim::TRIG[..], + BaseName::Sinh => &EitherPrim::UNBOUNDED1[..], + BaseName::Sqrt => &EitherPrim::SQRT[..], + BaseName::Tan => &EitherPrim::TRIG[..], + BaseName::Tanh => &EitherPrim::UNBOUNDED1[..], + BaseName::Tgamma => &EitherPrim::GAMMA[..], + BaseName::Trunc => &EitherPrim::UNBOUNDED1[..], + BaseName::Y0 => &EitherPrim::UNBOUNDED1[..], + BaseName::Y1 => &EitherPrim::UNBOUNDED1[..], + BaseName::Yn => &EitherPrim::BESSEL_N[..], + }; + + x[argnum].clone() +} diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs new file mode 100644 index 000000000..ddb7bf90e --- /dev/null +++ b/crates/libm-test/src/f8_impl.rs @@ -0,0 +1,503 @@ +//! An IEEE-compliant 8-bit float type for testing purposes. + +use std::cmp::{self, Ordering}; +use std::{fmt, ops}; + +use crate::Float; + +/// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively +/// or by hand. +/// +/// IEEE-754 compliant type that includes a 1 bit sign, 4 bit exponent, and 3 bit significand. +/// Bias is -7. +/// +/// Based on . +#[derive(Clone, Copy)] +#[repr(transparent)] +#[allow(non_camel_case_types)] +pub struct f8(u8); + +impl Float for f8 { + type Int = u8; + type SignedInt = i8; + + const ZERO: Self = Self(0b0_0000_000); + const NEG_ZERO: Self = Self(0b1_0000_000); + const ONE: Self = Self(0b0_0111_000); + const NEG_ONE: Self = Self(0b1_0111_000); + const MAX: Self = Self(0b0_1110_111); + const MIN: Self = Self(0b1_1110_111); + const INFINITY: Self = Self(0b0_1111_000); + const NEG_INFINITY: Self = Self(0b1_1111_000); + const NAN: Self = Self(0b0_1111_100); + const NEG_NAN: Self = Self(0b1_1111_100); + const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS); + // FIXME: incorrect values + const EPSILON: Self = Self::ZERO; + const PI: Self = Self::ZERO; + const NEG_PI: Self = Self::ZERO; + const FRAC_PI_2: Self = Self::ZERO; + + const BITS: u32 = 8; + const SIG_BITS: u32 = 3; + const SIGN_MASK: Self::Int = 0b1_0000_000; + const SIG_MASK: Self::Int = 0b0_0000_111; + const EXP_MASK: Self::Int = 0b0_1111_000; + const IMPLICIT_BIT: Self::Int = 0b0_0001_000; + + fn to_bits(self) -> Self::Int { + self.0 + } + + fn to_bits_signed(self) -> Self::SignedInt { + self.0 as i8 + } + + fn is_nan(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0 + } + + fn is_infinite(self) -> bool { + self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK == 0 + } + + fn is_sign_negative(self) -> bool { + self.0 & Self::SIGN_MASK != 0 + } + + fn from_bits(a: Self::Int) -> Self { + Self(a) + } + + fn abs(self) -> Self { + libm::generic::fabs(self) + } + + fn copysign(self, other: Self) -> Self { + libm::generic::copysign(self, other) + } + + fn fma(self, _y: Self, _z: Self) -> Self { + unimplemented!() + } + + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { + unimplemented!() + } +} + +impl f8 { + pub const ALL_LEN: usize = 240; + + /// All non-infinite non-NaN values of `f8` + pub const ALL: [Self; Self::ALL_LEN] = [ + // -m*2^7 + Self(0b1_1110_111), // -240 + Self(0b1_1110_110), + Self(0b1_1110_101), + Self(0b1_1110_100), + Self(0b1_1110_011), + Self(0b1_1110_010), + Self(0b1_1110_001), + Self(0b1_1110_000), // -128 + // -m*2^6 + Self(0b1_1101_111), // -120 + Self(0b1_1101_110), + Self(0b1_1101_101), + Self(0b1_1101_100), + Self(0b1_1101_011), + Self(0b1_1101_010), + Self(0b1_1101_001), + Self(0b1_1101_000), // -64 + // -m*2^5 + Self(0b1_1100_111), // -60 + Self(0b1_1100_110), + Self(0b1_1100_101), + Self(0b1_1100_100), + Self(0b1_1100_011), + Self(0b1_1100_010), + Self(0b1_1100_001), + Self(0b1_1100_000), // -32 + // -m*2^4 + Self(0b1_1011_111), // -30 + Self(0b1_1011_110), + Self(0b1_1011_101), + Self(0b1_1011_100), + Self(0b1_1011_011), + Self(0b1_1011_010), + Self(0b1_1011_001), + Self(0b1_1011_000), // -16 + // -m*2^3 + Self(0b1_1010_111), // -15 + Self(0b1_1010_110), + Self(0b1_1010_101), + Self(0b1_1010_100), + Self(0b1_1010_011), + Self(0b1_1010_010), + Self(0b1_1010_001), + Self(0b1_1010_000), // -8 + // -m*2^2 + Self(0b1_1001_111), // -7.5 + Self(0b1_1001_110), + Self(0b1_1001_101), + Self(0b1_1001_100), + Self(0b1_1001_011), + Self(0b1_1001_010), + Self(0b1_1001_001), + Self(0b1_1001_000), // -4 + // -m*2^1 + Self(0b1_1000_111), // -3.75 + Self(0b1_1000_110), + Self(0b1_1000_101), + Self(0b1_1000_100), + Self(0b1_1000_011), + Self(0b1_1000_010), + Self(0b1_1000_001), + Self(0b1_1000_000), // -2 + // -m*2^0 + Self(0b1_0111_111), // -1.875 + Self(0b1_0111_110), + Self(0b1_0111_101), + Self(0b1_0111_100), + Self(0b1_0111_011), + Self(0b1_0111_010), + Self(0b1_0111_001), + Self(0b1_0111_000), // -1 + // -m*2^-1 + Self(0b1_0110_111), // −0.9375 + Self(0b1_0110_110), + Self(0b1_0110_101), + Self(0b1_0110_100), + Self(0b1_0110_011), + Self(0b1_0110_010), + Self(0b1_0110_001), + Self(0b1_0110_000), // -0.5 + // -m*2^-2 + Self(0b1_0101_111), // −0.46875 + Self(0b1_0101_110), + Self(0b1_0101_101), + Self(0b1_0101_100), + Self(0b1_0101_011), + Self(0b1_0101_010), + Self(0b1_0101_001), + Self(0b1_0101_000), // -0.25 + // -m*2^-3 + Self(0b1_0100_111), // −0.234375 + Self(0b1_0100_110), + Self(0b1_0100_101), + Self(0b1_0100_100), + Self(0b1_0100_011), + Self(0b1_0100_010), + Self(0b1_0100_001), + Self(0b1_0100_000), // -0.125 + // -m*2^-4 + Self(0b1_0011_111), // −0.1171875 + Self(0b1_0011_110), + Self(0b1_0011_101), + Self(0b1_0011_100), + Self(0b1_0011_011), + Self(0b1_0011_010), + Self(0b1_0011_001), + Self(0b1_0011_000), // −0.0625 + // -m*2^-5 + Self(0b1_0010_111), // −0.05859375 + Self(0b1_0010_110), + Self(0b1_0010_101), + Self(0b1_0010_100), + Self(0b1_0010_011), + Self(0b1_0010_010), + Self(0b1_0010_001), + Self(0b1_0010_000), // −0.03125 + // -m*2^-6 + Self(0b1_0001_111), // −0.029296875 + Self(0b1_0001_110), + Self(0b1_0001_101), + Self(0b1_0001_100), + Self(0b1_0001_011), + Self(0b1_0001_010), + Self(0b1_0001_001), + Self(0b1_0001_000), // −0.015625 + // -m*2^-7 subnormal numbers + Self(0b1_0000_111), // −0.013671875 + Self(0b1_0000_110), + Self(0b1_0000_101), + Self(0b1_0000_100), + Self(0b1_0000_011), + Self(0b1_0000_010), + Self(0b1_0000_001), // −0.001953125 + // Zeroes + Self(0b1_0000_000), // -0.0 + Self(0b0_0000_000), // 0.0 + // m*2^-7 // subnormal numbers + Self(0b0_0000_001), + Self(0b0_0000_010), + Self(0b0_0000_011), + Self(0b0_0000_100), + Self(0b0_0000_101), + Self(0b0_0000_110), + Self(0b0_0000_111), // 0.013671875 + // m*2^-6 + Self(0b0_0001_000), // 0.015625 + Self(0b0_0001_001), + Self(0b0_0001_010), + Self(0b0_0001_011), + Self(0b0_0001_100), + Self(0b0_0001_101), + Self(0b0_0001_110), + Self(0b0_0001_111), // 0.029296875 + // m*2^-5 + Self(0b0_0010_000), // 0.03125 + Self(0b0_0010_001), + Self(0b0_0010_010), + Self(0b0_0010_011), + Self(0b0_0010_100), + Self(0b0_0010_101), + Self(0b0_0010_110), + Self(0b0_0010_111), // 0.05859375 + // m*2^-4 + Self(0b0_0011_000), // 0.0625 + Self(0b0_0011_001), + Self(0b0_0011_010), + Self(0b0_0011_011), + Self(0b0_0011_100), + Self(0b0_0011_101), + Self(0b0_0011_110), + Self(0b0_0011_111), // 0.1171875 + // m*2^-3 + Self(0b0_0100_000), // 0.125 + Self(0b0_0100_001), + Self(0b0_0100_010), + Self(0b0_0100_011), + Self(0b0_0100_100), + Self(0b0_0100_101), + Self(0b0_0100_110), + Self(0b0_0100_111), // 0.234375 + // m*2^-2 + Self(0b0_0101_000), // 0.25 + Self(0b0_0101_001), + Self(0b0_0101_010), + Self(0b0_0101_011), + Self(0b0_0101_100), + Self(0b0_0101_101), + Self(0b0_0101_110), + Self(0b0_0101_111), // 0.46875 + // m*2^-1 + Self(0b0_0110_000), // 0.5 + Self(0b0_0110_001), + Self(0b0_0110_010), + Self(0b0_0110_011), + Self(0b0_0110_100), + Self(0b0_0110_101), + Self(0b0_0110_110), + Self(0b0_0110_111), // 0.9375 + // m*2^0 + Self(0b0_0111_000), // 1 + Self(0b0_0111_001), + Self(0b0_0111_010), + Self(0b0_0111_011), + Self(0b0_0111_100), + Self(0b0_0111_101), + Self(0b0_0111_110), + Self(0b0_0111_111), // 1.875 + // m*2^1 + Self(0b0_1000_000), // 2 + Self(0b0_1000_001), + Self(0b0_1000_010), + Self(0b0_1000_011), + Self(0b0_1000_100), + Self(0b0_1000_101), + Self(0b0_1000_110), + Self(0b0_1000_111), // 3.75 + // m*2^2 + Self(0b0_1001_000), // 4 + Self(0b0_1001_001), + Self(0b0_1001_010), + Self(0b0_1001_011), + Self(0b0_1001_100), + Self(0b0_1001_101), + Self(0b0_1001_110), + Self(0b0_1001_111), // 7.5 + // m*2^3 + Self(0b0_1010_000), // 8 + Self(0b0_1010_001), + Self(0b0_1010_010), + Self(0b0_1010_011), + Self(0b0_1010_100), + Self(0b0_1010_101), + Self(0b0_1010_110), + Self(0b0_1010_111), // 15 + // m*2^4 + Self(0b0_1011_000), // 16 + Self(0b0_1011_001), + Self(0b0_1011_010), + Self(0b0_1011_011), + Self(0b0_1011_100), + Self(0b0_1011_101), + Self(0b0_1011_110), + Self(0b0_1011_111), // 30 + // m*2^5 + Self(0b0_1100_000), // 32 + Self(0b0_1100_001), + Self(0b0_1100_010), + Self(0b0_1100_011), + Self(0b0_1100_100), + Self(0b0_1100_101), + Self(0b0_1100_110), + Self(0b0_1100_111), // 60 + // m*2^6 + Self(0b0_1101_000), // 64 + Self(0b0_1101_001), + Self(0b0_1101_010), + Self(0b0_1101_011), + Self(0b0_1101_100), + Self(0b0_1101_101), + Self(0b0_1101_110), + Self(0b0_1101_111), // 120 + // m*2^7 + Self(0b0_1110_000), // 128 + Self(0b0_1110_001), + Self(0b0_1110_010), + Self(0b0_1110_011), + Self(0b0_1110_100), + Self(0b0_1110_101), + Self(0b0_1110_110), + Self(0b0_1110_111), // 240 + ]; +} + +impl ops::Add for f8 { + type Output = Self; + fn add(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Sub for f8 { + type Output = Self; + fn sub(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Mul for f8 { + type Output = Self; + fn mul(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} +impl ops::Div for f8 { + type Output = Self; + fn div(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::Neg for f8 { + type Output = Self; + fn neg(self) -> Self::Output { + Self(self.0 ^ Self::SIGN_MASK) + } +} + +impl ops::Rem for f8 { + type Output = Self; + fn rem(self, _rhs: Self) -> Self::Output { + unimplemented!() + } +} + +impl ops::AddAssign for f8 { + fn add_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::SubAssign for f8 { + fn sub_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl ops::MulAssign for f8 { + fn mul_assign(&mut self, _rhs: Self) { + unimplemented!() + } +} + +impl cmp::PartialEq for f8 { + fn eq(&self, other: &Self) -> bool { + if self.is_nan() || other.is_nan() { + false + } else if self.abs().to_bits() | other.abs().to_bits() == 0 { + true + } else { + self.0 == other.0 + } + } +} +impl cmp::PartialOrd for f8 { + fn partial_cmp(&self, other: &Self) -> Option { + let inf_rep = f8::EXP_MASK; + + let a_abs = self.abs().to_bits(); + let b_abs = other.abs().to_bits(); + + // If either a or b is NaN, they are unordered. + if a_abs > inf_rep || b_abs > inf_rep { + return None; + } + + // If a and b are both zeros, they are equal. + if a_abs | b_abs == 0 { + return Some(Ordering::Equal); + } + + let a_srep = self.to_bits_signed(); + let b_srep = other.to_bits_signed(); + let res = a_srep.cmp(&b_srep); + + if a_srep & b_srep >= 0 { + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + Some(res) + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. + Some(res.reverse()) + } + } +} +impl fmt::Display for f8 { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +impl fmt::Debug for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Binary::fmt(self, f) + } +} + +impl fmt::Binary for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let v = self.0; + write!( + f, + "0b{:b}_{:04b}_{:03b}", + v >> 7, + (v & Self::EXP_MASK) >> Self::SIG_BITS, + v & Self::SIG_MASK + ) + } +} + +impl fmt::LowerHex for f8 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +pub const fn hf8(s: &str) -> f8 { + let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { panic!() }; + f8(bits as u8) +} diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs deleted file mode 100644 index 3e9eca37a..000000000 --- a/crates/libm-test/src/gen.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Different generators that can create random or systematic bit patterns. - -use crate::GenerateInput; -pub mod random; - -/// Helper type to turn any reusable input into a generator. -#[derive(Clone, Debug, Default)] -pub struct CachedInput { - pub inputs_f32: Vec<(f32, f32, f32)>, - pub inputs_f64: Vec<(f64, f64, f64)>, - pub inputs_i32: Vec<(i32, i32, i32)>, -} - -impl GenerateInput<(f32,)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().map(|f| (f.0,)) - } -} - -impl GenerateInput<(f32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().map(|f| (f.0, f.1)) - } -} - -impl GenerateInput<(i32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_i32.iter().zip(self.inputs_f32.iter()).map(|(i, f)| (i.0, f.0)) - } -} - -impl GenerateInput<(f32, i32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - GenerateInput::<(i32, f32)>::get_cases(self).map(|(i, f)| (f, i)) - } -} - -impl GenerateInput<(f32, f32, f32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f32.iter().copied() - } -} - -impl GenerateInput<(f64,)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().map(|f| (f.0,)) - } -} - -impl GenerateInput<(f64, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().map(|f| (f.0, f.1)) - } -} - -impl GenerateInput<(i32, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_i32.iter().zip(self.inputs_f64.iter()).map(|(i, f)| (i.0, f.0)) - } -} - -impl GenerateInput<(f64, i32)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - GenerateInput::<(i32, f64)>::get_cases(self).map(|(i, f)| (f, i)) - } -} - -impl GenerateInput<(f64, f64, f64)> for CachedInput { - fn get_cases(&self) -> impl Iterator { - self.inputs_f64.iter().copied() - } -} diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs deleted file mode 100644 index 601ef4f1d..000000000 --- a/crates/libm-test/src/gen/random.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! A simple generator that produces deterministic random input, caching to use the same -//! inputs for all functions. - -use std::sync::LazyLock; - -use rand::{Rng, SeedableRng}; -use rand_chacha::ChaCha8Rng; - -use super::CachedInput; -use crate::GenerateInput; - -const SEED: [u8; 32] = *b"3.141592653589793238462643383279"; - -/// Number of tests to run. -const NTESTS: usize = { - if cfg!(optimizations_enabled) { - if crate::emulated() - || !cfg!(target_pointer_width = "64") - || cfg!(all(target_arch = "x86_64", target_vendor = "apple")) - { - // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run - // in QEMU. - 100_000 - } else { - 5_000_000 - } - } else { - // Without optimizations just run a quick check - 800 - } -}; - -/// Tested inputs. -static TEST_CASES: LazyLock = LazyLock::new(|| make_test_cases(NTESTS)); - -/// The first argument to `jn` and `jnf` is the number of iterations. Make this a reasonable -/// value so tests don't run forever. -static TEST_CASES_JN: LazyLock = LazyLock::new(|| { - // Start with regular test cases - let mut cases = (&*TEST_CASES).clone(); - - // These functions are extremely slow, limit them - cases.inputs_i32.truncate((NTESTS / 1000).max(80)); - cases.inputs_f32.truncate((NTESTS / 1000).max(80)); - cases.inputs_f64.truncate((NTESTS / 1000).max(80)); - - // It is easy to overflow the stack with these in debug mode - let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") { - 0xffff - } else if cfg!(windows) { - 0x00ff - } else { - 0x0fff - }; - - let mut rng = ChaCha8Rng::from_seed(SEED); - - for case in cases.inputs_i32.iter_mut() { - case.0 = rng.gen_range(3..=max_iterations); - } - - cases -}); - -fn make_test_cases(ntests: usize) -> CachedInput { - let mut rng = ChaCha8Rng::from_seed(SEED); - - // make sure we include some basic cases - let mut inputs_i32 = vec![(0, 0, 0), (1, 1, 1), (-1, -1, -1)]; - let mut inputs_f32 = vec![ - (0.0, 0.0, 0.0), - (f32::EPSILON, f32::EPSILON, f32::EPSILON), - (f32::INFINITY, f32::INFINITY, f32::INFINITY), - (f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY), - (f32::MAX, f32::MAX, f32::MAX), - (f32::MIN, f32::MIN, f32::MIN), - (f32::MIN_POSITIVE, f32::MIN_POSITIVE, f32::MIN_POSITIVE), - (f32::NAN, f32::NAN, f32::NAN), - ]; - let mut inputs_f64 = vec![ - (0.0, 0.0, 0.0), - (f64::EPSILON, f64::EPSILON, f64::EPSILON), - (f64::INFINITY, f64::INFINITY, f64::INFINITY), - (f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY), - (f64::MAX, f64::MAX, f64::MAX), - (f64::MIN, f64::MIN, f64::MIN), - (f64::MIN_POSITIVE, f64::MIN_POSITIVE, f64::MIN_POSITIVE), - (f64::NAN, f64::NAN, f64::NAN), - ]; - - inputs_i32.extend((0..(ntests - inputs_i32.len())).map(|_| rng.gen::<(i32, i32, i32)>())); - - // Generate integers to get a full range of bitpatterns, then convert back to - // floats. - inputs_f32.extend((0..(ntests - inputs_f32.len())).map(|_| { - let ints = rng.gen::<(u32, u32, u32)>(); - (f32::from_bits(ints.0), f32::from_bits(ints.1), f32::from_bits(ints.2)) - })); - inputs_f64.extend((0..(ntests - inputs_f64.len())).map(|_| { - let ints = rng.gen::<(u64, u64, u64)>(); - (f64::from_bits(ints.0), f64::from_bits(ints.1), f64::from_bits(ints.2)) - })); - - CachedInput { inputs_f32, inputs_f64, inputs_i32 } -} - -/// Create a test case iterator. -pub fn get_test_cases(fname: &str) -> impl Iterator -where - CachedInput: GenerateInput, -{ - let inputs = if fname == "jn" || fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES }; - - CachedInput::get_cases(inputs) -} diff --git a/crates/libm-test/src/generate.rs b/crates/libm-test/src/generate.rs new file mode 100644 index 000000000..89ca09a7a --- /dev/null +++ b/crates/libm-test/src/generate.rs @@ -0,0 +1,43 @@ +//! Different generators that can create random or systematic bit patterns. + +pub mod case_list; +pub mod edge_cases; +pub mod random; +pub mod spaced; + +/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure +/// the provided size was correct. +#[derive(Debug)] +pub struct KnownSize { + total: u64, + current: u64, + iter: I, +} + +impl KnownSize { + pub fn new(iter: I, total: u64) -> Self { + Self { total, current: 0, iter } + } +} + +impl Iterator for KnownSize { + type Item = I::Item; + + fn next(&mut self) -> Option { + let next = self.iter.next(); + if next.is_some() { + self.current += 1; + return next; + } + + assert_eq!(self.current, self.total, "total items did not match expected"); + None + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = usize::try_from(self.total - self.current).unwrap(); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for KnownSize {} diff --git a/crates/libm-test/src/generate/case_list.rs b/crates/libm-test/src/generate/case_list.rs new file mode 100644 index 000000000..e3628d51c --- /dev/null +++ b/crates/libm-test/src/generate/case_list.rs @@ -0,0 +1,853 @@ +//! Test cases to verify specific values. +//! +//! Each routine can have a set of inputs and, optinoally, outputs. If an output is provided, it +//! will be used to check against. If only inputs are provided, the case will be checked against +//! a basis. +//! +//! This is useful for adding regression tests or expected failures. + +use libm::hf64; +#[cfg(f128_enabled)] +use libm::hf128; + +use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op}; + +pub struct TestCase { + pub input: Op::RustArgs, + pub output: Option, +} + +impl TestCase { + #[expect(dead_code)] + fn append_inputs(v: &mut Vec, l: &[Op::RustArgs]) { + v.extend(l.iter().copied().map(|input| Self { input, output: None })); + } + + fn append_pairs(v: &mut Vec, l: &[(Op::RustArgs, Option)]) + where + Op::RustRet: Copy, + { + v.extend(l.iter().copied().map(|(input, output)| Self { input, output })); + } +} + +fn acos_cases() -> Vec> { + vec![] +} + +fn acosf_cases() -> Vec> { + vec![] +} + +fn acosh_cases() -> Vec> { + vec![] +} + +fn acoshf_cases() -> Vec> { + vec![] +} + +fn asin_cases() -> Vec> { + vec![] +} + +fn asinf_cases() -> Vec> { + vec![] +} + +fn asinh_cases() -> Vec> { + vec![] +} + +fn asinhf_cases() -> Vec> { + vec![] +} + +fn atan_cases() -> Vec> { + vec![] +} + +fn atan2_cases() -> Vec> { + vec![] +} + +fn atan2f_cases() -> Vec> { + vec![] +} + +fn atanf_cases() -> Vec> { + vec![] +} + +fn atanh_cases() -> Vec> { + vec![] +} + +fn atanhf_cases() -> Vec> { + vec![] +} + +fn cbrt_cases() -> Vec> { + vec![] +} + +fn cbrtf_cases() -> Vec> { + vec![] +} + +fn ceil_cases() -> Vec> { + vec![] +} + +fn ceilf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ceilf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ceilf16_cases() -> Vec> { + vec![] +} + +fn copysign_cases() -> Vec> { + vec![] +} + +fn copysignf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn copysignf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn copysignf16_cases() -> Vec> { + vec![] +} + +fn cos_cases() -> Vec> { + vec![] +} + +fn cosf_cases() -> Vec> { + vec![] +} + +fn cosh_cases() -> Vec> { + vec![] +} + +fn coshf_cases() -> Vec> { + vec![] +} + +fn erf_cases() -> Vec> { + vec![] +} + +fn erfc_cases() -> Vec> { + vec![] +} + +fn erfcf_cases() -> Vec> { + vec![] +} + +fn erff_cases() -> Vec> { + vec![] +} + +fn exp_cases() -> Vec> { + vec![] +} + +fn exp10_cases() -> Vec> { + vec![] +} + +fn exp10f_cases() -> Vec> { + vec![] +} + +fn exp2_cases() -> Vec> { + vec![] +} + +fn exp2f_cases() -> Vec> { + vec![] +} + +fn expf_cases() -> Vec> { + vec![] +} + +fn expm1_cases() -> Vec> { + vec![] +} + +fn expm1f_cases() -> Vec> { + vec![] +} + +fn fabs_cases() -> Vec> { + vec![] +} + +fn fabsf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fabsf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fabsf16_cases() -> Vec> { + vec![] +} + +fn fdim_cases() -> Vec> { + vec![] +} + +fn fdimf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fdimf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fdimf16_cases() -> Vec> { + vec![] +} + +fn floor_cases() -> Vec> { + vec![] +} + +fn floorf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn floorf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn floorf16_cases() -> Vec> { + vec![] +} + +fn fma_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect sign + ((5e-324, -5e-324, 0.0), Some(-0.0)), + ], + ); + v +} + +fn fmaf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaf128_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + ( + // Tricky rounding case that previously failed in extensive tests + ( + hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"), + hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"), + hf128!("-0x0.000000000000000000000000048ap-16382"), + ), + Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")), + ), + ( + // Subnormal edge case that caused a failure + ( + hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"), + hf128!("0x1.ffffffffffffffffffffffffffffp-1"), + hf128!("0x0.8000000000000000000000000009p-16382"), + ), + Some(hf128!("0x1.0000000000000000000000000000p-16382")), + ), + ], + ); + v +} + +#[cfg(f16_enabled)] +fn fmaxf16_cases() -> Vec> { + vec![] +} + +fn fmaxf_cases() -> Vec> { + vec![] +} + +fn fmax_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaxf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaximumf16_cases() -> Vec> { + vec![] +} + +fn fmaximumf_cases() -> Vec> { + vec![] +} + +fn fmaximum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmaximum_numf16_cases() -> Vec> { + vec![] +} + +fn fmaximum_numf_cases() -> Vec> { + vec![] +} + +fn fmaximum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fmaximum_numf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminf16_cases() -> Vec> { + vec![] +} + +fn fminf_cases() -> Vec> { + vec![] +} + +fn fmin_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminimumf16_cases() -> Vec> { + vec![] +} + +fn fminimumf_cases() -> Vec> { + vec![] +} + +fn fminimum_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimumf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fminimum_numf16_cases() -> Vec> { + vec![] +} + +fn fminimum_numf_cases() -> Vec> { + vec![] +} + +fn fminimum_num_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn fminimum_numf128_cases() -> Vec> { + vec![] +} + +fn fmod_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 3.123e-320), Some(2.0696e-320)), + ((2.1, 2.253547e-318), Some(1.772535e-318)), + ], + ); + v +} + +fn fmodf_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Previous failure with incorrect loop iteration + // + ((2.1, 8.858e-42), Some(8.085e-42)), + ((2.1, 6.39164e-40), Some(6.1636e-40)), + ((5.5, 6.39164e-40), Some(4.77036e-40)), + ((-151.189, 6.39164e-40), Some(-5.64734e-40)), + ], + ); + v +} + +#[cfg(f128_enabled)] +fn fmodf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn fmodf16_cases() -> Vec> { + vec![] +} + +fn frexp_cases() -> Vec> { + vec![] +} + +fn frexpf_cases() -> Vec> { + vec![] +} + +fn hypot_cases() -> Vec> { + vec![] +} + +fn hypotf_cases() -> Vec> { + vec![] +} + +fn ilogb_cases() -> Vec> { + vec![] +} + +fn ilogbf_cases() -> Vec> { + vec![] +} + +fn j0_cases() -> Vec> { + vec![] +} + +fn j0f_cases() -> Vec> { + vec![] +} + +fn j1_cases() -> Vec> { + vec![] +} + +fn j1f_cases() -> Vec> { + vec![] +} + +fn jn_cases() -> Vec> { + vec![] +} + +fn jnf_cases() -> Vec> { + vec![] +} + +fn ldexp_cases() -> Vec> { + vec![] +} + +fn ldexpf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn ldexpf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn ldexpf16_cases() -> Vec> { + vec![] +} + +fn lgamma_cases() -> Vec> { + vec![] +} + +fn lgamma_r_cases() -> Vec> { + vec![] +} + +fn lgammaf_cases() -> Vec> { + vec![] +} + +fn lgammaf_r_cases() -> Vec> { + vec![] +} + +fn log_cases() -> Vec> { + vec![] +} + +fn log10_cases() -> Vec> { + vec![] +} + +fn log10f_cases() -> Vec> { + vec![] +} + +fn log1p_cases() -> Vec> { + vec![] +} + +fn log1pf_cases() -> Vec> { + vec![] +} + +fn log2_cases() -> Vec> { + vec![] +} + +fn log2f_cases() -> Vec> { + vec![] +} + +fn logf_cases() -> Vec> { + vec![] +} + +fn modf_cases() -> Vec> { + vec![] +} + +fn modff_cases() -> Vec> { + vec![] +} + +fn nextafter_cases() -> Vec> { + vec![] +} + +fn nextafterf_cases() -> Vec> { + vec![] +} + +fn pow_cases() -> Vec> { + vec![] +} + +fn powf_cases() -> Vec> { + vec![] +} + +fn remainder_cases() -> Vec> { + vec![] +} + +fn remainderf_cases() -> Vec> { + vec![] +} + +fn remquo_cases() -> Vec> { + vec![] +} + +fn remquof_cases() -> Vec> { + vec![] +} + +fn rint_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known failure on i586 + #[cfg(not(x86_no_sse))] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + #[cfg(x86_no_sse)] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), + ], + ); + v +} + +fn rintf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn rintf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn rintf16_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn roundf16_cases() -> Vec> { + vec![] +} + +fn round_cases() -> Vec> { + vec![] +} + +fn roundf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn roundevenf16_cases() -> Vec> { + vec![] +} + +fn roundeven_cases() -> Vec> { + let mut v = vec![]; + TestCase::append_pairs( + &mut v, + &[ + // Known failure on i586 + #[cfg(not(x86_no_sse))] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))), + #[cfg(x86_no_sse)] + ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))), + ], + ); + v +} + +fn roundevenf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn roundevenf128_cases() -> Vec> { + vec![] +} + +fn scalbn_cases() -> Vec> { + vec![] +} + +fn scalbnf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn scalbnf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn scalbnf16_cases() -> Vec> { + vec![] +} + +fn sin_cases() -> Vec> { + vec![] +} + +fn sincos_cases() -> Vec> { + vec![] +} + +fn sincosf_cases() -> Vec> { + vec![] +} + +fn sinf_cases() -> Vec> { + vec![] +} + +fn sinh_cases() -> Vec> { + vec![] +} + +fn sinhf_cases() -> Vec> { + vec![] +} + +fn sqrt_cases() -> Vec> { + vec![] +} + +fn sqrtf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn sqrtf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn sqrtf16_cases() -> Vec> { + vec![] +} + +fn tan_cases() -> Vec> { + vec![] +} + +fn tanf_cases() -> Vec> { + vec![] +} + +fn tanh_cases() -> Vec> { + vec![] +} + +fn tanhf_cases() -> Vec> { + vec![] +} + +fn tgamma_cases() -> Vec> { + vec![] +} + +fn tgammaf_cases() -> Vec> { + vec![] +} + +fn trunc_cases() -> Vec> { + vec![] +} + +fn truncf_cases() -> Vec> { + vec![] +} + +#[cfg(f128_enabled)] +fn truncf128_cases() -> Vec> { + vec![] +} + +#[cfg(f16_enabled)] +fn truncf16_cases() -> Vec> { + vec![] +} + +fn y0_cases() -> Vec> { + vec![] +} + +fn y0f_cases() -> Vec> { + vec![] +} + +fn y1_cases() -> Vec> { + vec![] +} + +fn y1f_cases() -> Vec> { + vec![] +} + +fn yn_cases() -> Vec> { + vec![] +} + +fn ynf_cases() -> Vec> { + vec![] +} + +pub trait CaseListInput: MathOp + Sized { + fn get_cases() -> Vec>; +} + +macro_rules! impl_case_list { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + $(#[$attr])* + impl CaseListInput for crate::op::$fn_name::Routine { + fn get_cases() -> Vec> { + [< $fn_name _cases >]() + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_case_list, +} + +/// This is the test generator for standalone tests, i.e. those with no basis. For this, it +/// only extracts tests with a known output. +pub fn get_test_cases_standalone( + ctx: &CheckCtx, +) -> impl Iterator + use<'_, Op> +where + Op: MathOp + CaseListInput, +{ + assert_eq!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + Op::get_cases().into_iter().filter_map(|x| x.output.map(|o| (x.input, o))) +} + +/// Opposite of the above; extract only test cases that don't have a known output, to be run +/// against a basis. +pub fn get_test_cases_basis( + ctx: &CheckCtx, +) -> (impl Iterator + use<'_, Op>, u64) +where + Op: MathOp + CaseListInput, +{ + assert_ne!(ctx.basis, CheckBasis::None); + assert_eq!(ctx.gen_kind, GeneratorKind::List); + + let cases = Op::get_cases(); + let count: u64 = cases.iter().filter(|case| case.output.is_none()).count().try_into().unwrap(); + + (cases.into_iter().filter(|x| x.output.is_none()).map(|x| x.input), count) +} diff --git a/crates/libm-test/src/generate/edge_cases.rs b/crates/libm-test/src/generate/edge_cases.rs new file mode 100644 index 000000000..56cc9fa9a --- /dev/null +++ b/crates/libm-test/src/generate/edge_cases.rs @@ -0,0 +1,310 @@ +//! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs. + +use libm::support::{CastInto, Float, Int, MinInt}; + +use crate::domain::get_domain; +use crate::generate::KnownSize; +use crate::op::OpITy; +use crate::run_cfg::{check_near_count, check_point_count}; +use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log}; + +/// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis. +pub trait EdgeCaseInput { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Create a list of values around interesting points (infinities, zeroes, NaNs). +fn float_edge_cases( + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + Op: MathOp, +{ + let mut ret = Vec::new(); + let one = OpITy::::ONE; + let values = &mut ret; + let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float(); + let domain_start = domain.range_start(); + let domain_end = domain.range_end(); + + let check_points = check_point_count(ctx); + let near_points = check_near_count(ctx); + + // Check near some notable constants + count_up(Op::FTy::ONE, near_points, values); + count_up(Op::FTy::ZERO, near_points, values); + count_up(Op::FTy::NEG_ONE, near_points, values); + count_down(Op::FTy::ONE, near_points, values); + count_down(Op::FTy::ZERO, near_points, values); + count_down(Op::FTy::NEG_ONE, near_points, values); + values.push(Op::FTy::NEG_ZERO); + + // Check values near the extremes + count_up(Op::FTy::NEG_INFINITY, near_points, values); + count_down(Op::FTy::INFINITY, near_points, values); + count_down(domain_end, near_points, values); + count_up(domain_start, near_points, values); + count_down(domain_start, near_points, values); + count_up(domain_end, near_points, values); + count_down(domain_end, near_points, values); + + // Check some special values that aren't included in the above ranges + values.push(Op::FTy::NAN); + values.extend(Op::FTy::consts().iter()); + + // Check around the maximum subnormal value + let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK); + count_up(sub_max, near_points, values); + count_down(sub_max, near_points, values); + count_up(-sub_max, near_points, values); + count_down(-sub_max, near_points, values); + + // Check a few values around the subnormal range + for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) { + let v = Op::FTy::from_bits(one << shift); + count_up(v, 2, values); + count_down(v, 2, values); + count_up(-v, 2, values); + count_down(-v, 2, values); + } + + // Check around asymptotes + if let Some(f) = domain.check_points { + let iter = f(); + for x in iter.take(check_points) { + count_up(x, near_points, values); + count_down(x, near_points, values); + } + } + + // Some results may overlap so deduplicate the vector to save test cycles. + values.sort_by_key(|x| x.to_bits()); + values.dedup_by_key(|x| x.to_bits()); + + let count = ret.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (ret.into_iter(), count) +} + +/// Add `points` values starting at and including `x` and counting up. Uses the smallest possible +/// increments (1 ULP). +fn count_up(mut x: F, points: u64, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x < F::INFINITY && count < points { + values.push(x); + x = x.next_up(); + count += 1; + } +} + +/// Add `points` values starting at and including `x` and counting down. Uses the smallest possible +/// increments (1 ULP). +fn count_down(mut x: F, points: u64, values: &mut Vec) { + assert!(!x.is_nan()); + + let mut count = 0; + while x > F::NEG_INFINITY && count < points { + values.push(x); + x = x.next_down(); + count += 1; + } +} + +/// Create a list of values around interesting integer points (min, zero, max). +pub fn int_edge_cases( + ctx: &CheckCtx, + argnum: usize, +) -> (impl Iterator + Clone, u64) +where + i32: CastInto, +{ + let mut values = Vec::new(); + let near_points = check_near_count(ctx); + + // Check around max/min and zero + int_count_around(I::MIN, near_points, &mut values); + int_count_around(I::MAX, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + int_count_around(I::ZERO, near_points, &mut values); + + if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) { + assert_eq!(argnum, 1, "scalbn integer argument should be arg1"); + let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty { + FloatTy::F16 => { + #[cfg(not(f16_enabled))] + unreachable!(); + #[cfg(f16_enabled)] + (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM) + } + FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM), + FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM), + FloatTy::F128 => { + #[cfg(not(f128_enabled))] + unreachable!(); + #[cfg(f128_enabled)] + (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM) + } + }; + + // `scalbn`/`ldexp` have their trickiest behavior around exponent limits + int_count_around(emax.cast(), near_points, &mut values); + int_count_around(emin.cast(), near_points, &mut values); + int_count_around(emin_sn.cast(), near_points, &mut values); + int_count_around((-emin_sn).cast(), near_points, &mut values); + + // Also check values that cause the maximum possible difference in exponents + int_count_around((emax - emin).cast(), near_points, &mut values); + int_count_around((emin - emax).cast(), near_points, &mut values); + int_count_around((emax - emin_sn).cast(), near_points, &mut values); + int_count_around((emin_sn - emax).cast(), near_points, &mut values); + } + + values.sort(); + values.dedup(); + let count = values.len().try_into().unwrap(); + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = ctx.input_count(), + )); + + (values.into_iter(), count) +} + +/// Add `points` values both up and down, starting at and including `x`. +fn int_count_around(x: I, points: u64, values: &mut Vec) { + let mut current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_add(I::ONE) { + Some(v) => v, + None => break, + }; + } + + current = x; + for _ in 0..points { + values.push(current); + current = match current.checked_sub(I::ONE) { + Some(v) => v, + None => break, + }; + } +} + +macro_rules! impl_edge_case_input { + ($fty:ty) => { + impl EdgeCaseInput for ($fty,) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let iter0 = iter0.map(|v| (v,)); + (iter0, steps0) + } + } + + impl EdgeCaseInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + (iter, count) + } + } + + impl EdgeCaseInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + let (iter2, steps2) = float_edge_cases::(ctx, 2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + + (iter, count) + } + } + + impl EdgeCaseInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = int_edge_cases(ctx, 0); + let (iter1, steps1) = float_edge_cases::(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + (iter, count) + } + } + + impl EdgeCaseInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let (iter0, steps0) = float_edge_cases::(ctx, 0); + let (iter1, steps1) = int_edge_cases(ctx, 1); + + let iter = + iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second))); + let count = steps0.checked_mul(steps1).unwrap(); + + (iter, count) + } + } + }; +} + +#[cfg(f16_enabled)] +impl_edge_case_input!(f16); +impl_edge_case_input!(f32); +impl_edge_case_input!(f64); +#[cfg(f128_enabled)] +impl_edge_case_input!(f128); + +pub fn get_test_cases( + ctx: &CheckCtx, +) -> (impl Iterator + Send + use<'_, Op>, u64) +where + Op: MathOp, + Op::RustArgs: EdgeCaseInput, +{ + let (iter, count) = Op::RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) +} diff --git a/crates/libm-test/src/generate/random.rs b/crates/libm-test/src/generate/random.rs new file mode 100644 index 000000000..e8a7ee905 --- /dev/null +++ b/crates/libm-test/src/generate/random.rs @@ -0,0 +1,125 @@ +use std::env; +use std::ops::RangeInclusive; +use std::sync::LazyLock; + +use libm::support::Float; +use rand::distr::{Alphanumeric, StandardUniform}; +use rand::prelude::Distribution; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; + +use super::KnownSize; +use crate::CheckCtx; +use crate::run_cfg::{int_range, iteration_count}; + +pub(crate) const SEED_ENV: &str = "LIBM_SEED"; + +pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| { + let s = env::var(SEED_ENV).unwrap_or_else(|_| { + let mut rng = rand::rng(); + (0..32).map(|_| rng.sample(Alphanumeric) as char).collect() + }); + + s.as_bytes().try_into().unwrap_or_else(|_| { + panic!("Seed must be 32 characters, got `{s}`"); + }) +}); + +/// Generate a sequence of random values of this type. +pub trait RandomInput: Sized { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Generate a sequence of deterministically random floats. +fn random_floats(count: u64) -> impl Iterator +where + StandardUniform: Distribution, +{ + let mut rng = ChaCha8Rng::from_seed(*SEED); + + // Generate integers to get a full range of bitpatterns (including NaNs), then convert back + // to the float type. + (0..count).map(move |_| F::from_bits(rng.random::())) +} + +/// Generate a sequence of deterministically random `i32`s within a specified range. +fn random_ints(count: u64, range: RangeInclusive) -> impl Iterator { + let mut rng = ChaCha8Rng::from_seed(*SEED); + (0..count).map(move |_| rng.random_range::(range.clone())) +} + +macro_rules! impl_random_input { + ($fty:ty) => { + impl RandomInput for ($fty,) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count = iteration_count(ctx, 0); + let iter = random_floats(count).map(|f: $fty| (f,)); + (iter, count) + } + } + + impl RandomInput for ($fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let iter = random_floats(count0) + .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2))); + (iter, count0 * count1) + } + } + + impl RandomInput for ($fty, $fty, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let count2 = iteration_count(ctx, 2); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_floats(count1).flat_map(move |f2: $fty| { + random_floats(count2).map(move |f3: $fty| (f1, f2, f3)) + }) + }); + (iter, count0 * count1 * count2) + } + } + + impl RandomInput for (i32, $fty) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range0 = int_range(ctx, 0); + let iter = random_ints(count0, range0) + .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2))); + (iter, count0 * count1) + } + } + + impl RandomInput for ($fty, i32) { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let count0 = iteration_count(ctx, 0); + let count1 = iteration_count(ctx, 1); + let range1 = int_range(ctx, 1); + let iter = random_floats(count0).flat_map(move |f1: $fty| { + random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2)) + }); + (iter, count0 * count1) + } + } + }; +} + +#[cfg(f16_enabled)] +impl_random_input!(f16); +impl_random_input!(f32); +impl_random_input!(f64); +#[cfg(f128_enabled)] +impl_random_input!(f128); + +/// Create a test case iterator. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> (impl Iterator + Send + use<'_, RustArgs>, u64) { + let (iter, count) = RustArgs::get_cases(ctx); + + // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong. + (KnownSize::new(iter, count), count) +} diff --git a/crates/libm-test/src/generate/spaced.rs b/crates/libm-test/src/generate/spaced.rs new file mode 100644 index 000000000..bea3f4c7e --- /dev/null +++ b/crates/libm-test/src/generate/spaced.rs @@ -0,0 +1,253 @@ +use std::fmt; +use std::ops::RangeInclusive; + +use libm::support::{Float, MinInt}; + +use crate::domain::get_domain; +use crate::op::OpITy; +use crate::run_cfg::{int_range, iteration_count}; +use crate::{CheckCtx, MathOp, linear_ints, logspace}; + +/// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float +/// types and single argument functions) or provide evenly spaced inputs across the domain with +/// approximately `u32::MAX` total iterations. +pub trait SpacedInput { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator + Send, u64); +} + +/// Construct an iterator from `logspace` and also calculate the total number of steps expected +/// for that iterator. +fn logspace_steps( + ctx: &CheckCtx, + argnum: usize, + max_steps: u64, +) -> (impl Iterator + Clone, u64) +where + Op: MathOp, + OpITy: TryFrom, + u64: TryFrom, Error: fmt::Debug>, + RangeInclusive>: Iterator, +{ + // i8 is a dummy type here, it can be any integer. + let domain = get_domain::(ctx.fn_ident, argnum).unwrap_float(); + let start = domain.range_start(); + let end = domain.range_end(); + + let max_steps = OpITy::::try_from(max_steps).unwrap_or(OpITy::::MAX); + let (iter, steps) = logspace(start, end, max_steps); + + // `steps` will be <= the original `max_steps`, which is a `u64`. + (iter, steps.try_into().unwrap()) +} + +/// Represents the iterator in either `Left` or `Right`. +enum EitherIter { + A(A), + B(B), +} + +impl, B: Iterator> Iterator for EitherIter { + type Item = T; + + fn next(&mut self) -> Option { + match self { + Self::A(iter) => iter.next(), + Self::B(iter) => iter.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + Self::A(iter) => iter.size_hint(), + Self::B(iter) => iter.size_hint(), + } + } +} + +/// Gets the total number of possible values, returning `None` if that number doesn't fit in a +/// `u64`. +fn value_count() -> Option +where + u64: TryFrom, +{ + u64::try_from(F::Int::MAX).ok().and_then(|max| max.checked_add(1)) +} + +/// Returns an iterator of every possible value of type `F`. +fn all_values() -> impl Iterator +where + RangeInclusive: Iterator, +{ + (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits)) +} + +macro_rules! impl_spaced_input { + ($fty:ty) => { + impl SpacedInput for ($fty,) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + // `f16` and `f32` can have exhaustive tests. + match value_count::() { + Some(steps0) if steps0 <= max_steps0 => { + let iter0 = all_values(); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::A(iter0), steps0) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let iter0 = iter0.map(|v| (v,)); + (EitherIter::B(iter0), steps0) + } + } + } + } + + impl SpacedInput for ($fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + // `f16` can have exhaustive tests. + match value_count::() { + Some(count) if count <= max_steps0 && count <= max_steps1 => { + let iter = all_values() + .flat_map(|first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), count.checked_mul(count).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for ($fty, $fty, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + let max_steps2 = iteration_count(ctx, 2); + // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed. + match value_count::() { + Some(count) + if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 => + { + let iter = all_values().flat_map(|first| { + all_values().flat_map(move |second| { + all_values().map(move |third| (first, second, third)) + }) + }); + (EitherIter::A(iter), count.checked_pow(3).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + let (iter2, steps2) = logspace_steps::(ctx, 2, max_steps2); + + let iter = iter0 + .flat_map(move |first| iter1.clone().map(move |second| (first, second))) + .flat_map(move |(first, second)| { + iter2.clone().map(move |third| (first, second, third)) + }); + let count = + steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for (i32, $fty) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let range0 = int_range(ctx, 0); + let max_steps0 = iteration_count(ctx, 0); + let max_steps1 = iteration_count(ctx, 1); + match value_count::() { + Some(count1) if count1 <= max_steps1 => { + let (iter0, steps0) = linear_ints(range0, max_steps0); + let iter = iter0 + .flat_map(move |first| all_values().map(move |second| (first, second))); + (EitherIter::A(iter), steps0.checked_mul(count1).unwrap()) + } + _ => { + let (iter0, steps0) = linear_ints(range0, max_steps0); + let (iter1, steps1) = logspace_steps::(ctx, 1, max_steps1); + + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + + impl SpacedInput for ($fty, i32) + where + Op: MathOp, + { + fn get_cases(ctx: &CheckCtx) -> (impl Iterator, u64) { + let max_steps0 = iteration_count(ctx, 0); + let range1 = int_range(ctx, 1); + let max_steps1 = iteration_count(ctx, 1); + match value_count::() { + Some(count0) if count0 <= max_steps0 => { + let (iter1, steps1) = linear_ints(range1, max_steps1); + let iter = all_values().flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + (EitherIter::A(iter), count0.checked_mul(steps1).unwrap()) + } + _ => { + let (iter0, steps0) = logspace_steps::(ctx, 0, max_steps0); + let (iter1, steps1) = linear_ints(range1, max_steps1); + + let iter = iter0.flat_map(move |first| { + iter1.clone().map(move |second| (first, second)) + }); + let count = steps0.checked_mul(steps1).unwrap(); + + (EitherIter::B(iter), count) + } + } + } + } + }; +} + +#[cfg(f16_enabled)] +impl_spaced_input!(f16); +impl_spaced_input!(f32); +impl_spaced_input!(f64); +#[cfg(f128_enabled)] +impl_spaced_input!(f128); + +/// Create a test case iterator for extensive inputs. Also returns the total test case count. +pub fn get_test_cases( + ctx: &CheckCtx, +) -> (impl Iterator + Send + use<'_, Op>, u64) +where + Op: MathOp, + Op::RustArgs: SpacedInput, +{ + Op::RustArgs::get_cases(ctx) +} diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs index 2abe7f605..485c01a47 100644 --- a/crates/libm-test/src/lib.rs +++ b/crates/libm-test/src/lib.rs @@ -1,68 +1,44 @@ -pub mod gen; -mod num_traits; -mod special_case; +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] +#![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig + +pub mod domain; +mod f8_impl; +pub mod generate; +#[cfg(feature = "build-mpfr")] +pub mod mpfloat; +mod num; +pub mod op; +mod precision; +mod run_cfg; mod test_traits; -pub use num_traits::{Float, Hex, Int}; -pub use special_case::{MaybeOverride, SpecialCase}; -pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall}; +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::sync::LazyLock; +use std::time::SystemTime; + +pub use f8_impl::{f8, hf8}; +pub use libm::support::{Float, Int, IntTy, MinInt}; +pub use num::{FloatExt, linear_ints, logspace}; +pub use op::{ + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet, + Ty, +}; +pub use precision::{MaybeOverride, SpecialCase, default_ulp}; +use run_cfg::extensive_max_iterations; +pub use run_cfg::{ + CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count, + skip_extensive_test, +}; +pub use test_traits::{CheckOutput, Hex, TupleCall}; /// Result type for tests is usually from `anyhow`. Most times there is no success value to /// propagate. pub type TestResult = Result; -// List of all files present in libm's source -include!(concat!(env!("OUT_DIR"), "/all_files.rs")); - -/// ULP allowed to differ from musl (note that musl itself may not be accurate). -const MUSL_DEFAULT_ULP: u32 = 2; - -/// Certain functions have different allowed ULP (consider these xfail). -/// -/// Note that these results were obtained using 400,000,000 rounds of random inputs, which -/// is not a value used by default. -pub fn musl_allowed_ulp(name: &str) -> u32 { - match name { - #[cfg(x86_no_sse)] - "asinh" | "asinhf" => 6, - "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400, - "tanh" | "tanhf" => 4, - "tgamma" => 20, - "j0" | "j0f" | "j1" | "j1f" => { - // Results seem very target-dependent - if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 } - } - "jn" | "jnf" => 1000, - "sincosf" => 500, - #[cfg(not(target_pointer_width = "64"))] - "exp10" => 4, - #[cfg(not(target_pointer_width = "64"))] - "exp10f" => 4, - _ => MUSL_DEFAULT_ULP, - } -} - -/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`, -/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`. -pub fn canonical_name(name: &str) -> &str { - let known_mappings = &[ - ("erff", "erf"), - ("erf", "erf"), - ("lgammaf_r", "lgamma_r"), - ("modff", "modf"), - ("modf", "modf"), - ]; - - match known_mappings.iter().find(|known| known.0 == name) { - Some(found) => found.1, - None => name - .strip_suffix("f") - .or_else(|| name.strip_suffix("f16")) - .or_else(|| name.strip_suffix("f128")) - .unwrap_or(name), - } -} - /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run. pub const fn emulated() -> bool { match option_env!("EMULATED") { @@ -71,3 +47,59 @@ pub const fn emulated() -> bool { Some(_) => true, } } + +/// True if `CI` is set and nonempty. +pub const fn ci() -> bool { + match option_env!("CI") { + Some(s) if s.is_empty() => false, + None => false, + Some(_) => true, + } +} + +/// Print to stderr and additionally log it to `target/test-log.txt`. This is useful for saving +/// output that would otherwise be consumed by the test harness. +pub fn test_log(s: &str) { + // Handle to a file opened in append mode, unless a suitable path can't be determined. + static OUTFILE: LazyLock> = LazyLock::new(|| { + // If the target directory is overridden, use that environment variable. Otherwise, save + // at the default path `{workspace_root}/target`. + let target_dir = match env::var("CARGO_TARGET_DIR") { + Ok(s) => PathBuf::from(s), + Err(_) => { + let Ok(x) = env::var("CARGO_MANIFEST_DIR") else { + return None; + }; + + PathBuf::from(x).parent().unwrap().parent().unwrap().join("target") + } + }; + let outfile = target_dir.join("test-log.txt"); + + let mut f = File::options() + .create(true) + .append(true) + .open(outfile) + .expect("failed to open logfile"); + let now = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap(); + + writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap(); + writeln!(f, "arch: {}", env::consts::ARCH).unwrap(); + writeln!(f, "os: {}", env::consts::OS).unwrap(); + writeln!(f, "bits: {}", usize::BITS).unwrap(); + writeln!(f, "emulated: {}", emulated()).unwrap(); + writeln!(f, "ci: {}", ci()).unwrap(); + writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap(); + writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap(); + writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap(); + writeln!(f, "extensive iterations {}", extensive_max_iterations()).unwrap(); + + Some(f) + }); + + eprintln!("{s}"); + + if let Some(mut f) = OUTFILE.as_ref() { + writeln!(f, "{s}").unwrap(); + } +} diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs new file mode 100644 index 000000000..9b51dc605 --- /dev/null +++ b/crates/libm-test/src/mpfloat.rs @@ -0,0 +1,603 @@ +//! Interfaces needed to support testing with multi-precision floating point numbers. +//! +//! Within this module, the macros create a submodule for each `libm` function. These contain +//! a struct named `Operation` that implements [`MpOp`]. + +use std::cmp::Ordering; + +use rug::Assign; +pub use rug::Float as MpFloat; +use rug::az::{self, Az}; +use rug::float::Round::Nearest; +use rug::ops::{PowAssignRound, RemAssignRound}; + +use crate::{Float, MathOp}; + +/// Create a multiple-precision float with the correct number of bits for a concrete float type. +fn new_mpfloat() -> MpFloat { + MpFloat::new(F::SIG_BITS + 1) +} + +/// Set subnormal emulation and convert to a concrete float type. +fn prep_retval(mp: &mut MpFloat, ord: Ordering) -> F +where + for<'a> &'a MpFloat: az::Cast, +{ + mp.subnormalize_ieee_round(ord, Nearest); + (&*mp).az::() +} + +/// Structures that represent a float operation. +/// +pub trait MpOp: MathOp { + /// The struct itself should hold any context that can be reused among calls to `run` (allocated + /// `MpFloat`s). + type MpTy; + + /// Create a new instance. + fn new_mp() -> Self::MpTy; + + /// Perform the operation. + /// + /// Usually this means assigning inputs to cached floats, performing the operation, applying + /// subnormal approximation, and converting the result back to concrete values. + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet; +} + +/// Implement `MpOp` for functions with a single return value. +macro_rules! impl_mp_op { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let ord = this.[< $fn_name_normalized _round >](Nearest); + prep_retval::(this, ord) + } + } + } + }; + // Matcher for binary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } + } + } + }; + // Matcher for ternary functions + ( + fn_name: $fn_name:ident, + RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty, + attrs: [$($attr:meta),*], + fn_extra: $fn_name_normalized:expr, + ) => { + paste::paste! { + $(#[$attr])* + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = (MpFloat, MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + new_mpfloat::(), + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.2.assign(input.2); + let ord = this.0.[< $fn_name_normalized _round >](&this.1, &this.2, Nearest); + prep_retval::(&mut this.0, ord) + } + } + } + }; +} + +libm_macros::for_each_function! { + callback: impl_mp_op, + emit_types: [RustFn], + skip: [ + // Most of these need a manual implementation + // verify-sorted-start + ceil, + ceilf, + ceilf128, + ceilf16, + copysign, + copysignf, + copysignf128, + copysignf16, + fabs, + fabsf, + fabsf128, + fabsf16,floor, + floorf, + floorf128, + floorf16, + fmaximum, + fmaximumf, + fmaximumf128, + fmaximumf16, + fminimum, + fminimumf, + fminimumf128, + fminimumf16, + fmod, + fmodf, + fmodf128, + fmodf16, + frexp, + frexpf, + ilogb, + ilogbf, + jn, + jnf, + ldexp, + ldexpf, + ldexpf128, + ldexpf16, + lgamma_r, + lgammaf_r, + modf, + modff, + nextafter, + nextafterf, + pow, + powf,remquo, + remquof, + rint, + rintf, + rintf128, + rintf16, + round, + roundeven, + roundevenf, + roundevenf128, + roundevenf16, + roundf, + roundf128, + roundf16, + scalbn, + scalbnf, + scalbnf128, + scalbnf16, + sincos,sincosf, + trunc, + truncf, + truncf128, + truncf16,yn, + ynf, + // verify-sorted-end + ], + fn_extra: match MACRO_FN_NAME { + // Remap function names that are different between mpfr and libm + expm1 | expm1f => exp_m1, + fabs | fabsf => abs, + fdim | fdimf | fdimf16 | fdimf128 => positive_diff, + fma | fmaf | fmaf128 => mul_add, + fmax | fmaxf | fmaxf16 | fmaxf128 | + fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max, + fmin | fminf | fminf16 | fminf128 | + fminimum_num | fminimum_numf | fminimum_numf16 | fminimum_numf128 => min, + lgamma | lgammaf => ln_gamma, + log | logf => ln, + log1p | log1pf => ln_1p, + tgamma | tgammaf => gamma, + _ => MACRO_FN_NAME_NORMALIZED + } +} + +/// Implement unary functions that don't have a `_round` version +macro_rules! impl_no_round { + // Unary matcher + ($($fn_name:ident => $rug_name:ident;)*) => { + paste::paste! { + $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )* + } + }; + + (@inner_unary $fn_name:ident, $rug_name:ident) => { + impl MpOp for crate::op::$fn_name::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + this.$rug_name(); + prep_retval::(this, Ordering::Equal) + } + } + }; +} + +impl_no_round! { + ceil => ceil_mut; + ceilf => ceil_mut; + fabs => abs_mut; + fabsf => abs_mut; + floor => floor_mut; + floorf => floor_mut; + rint => round_even_mut; // FIXME: respect rounding mode + rintf => round_even_mut; // FIXME: respect rounding mode + round => round_mut; + roundeven => round_even_mut; + roundevenf => round_even_mut; + roundf => round_mut; + trunc => trunc_mut; + truncf => trunc_mut; +} + +#[cfg(f16_enabled)] +impl_no_round! { + ceilf16 => ceil_mut; + fabsf16 => abs_mut; + floorf16 => floor_mut; + rintf16 => round_even_mut; // FIXME: respect rounding mode + roundf16 => round_mut; + roundevenf16 => round_even_mut; + truncf16 => trunc_mut; +} + +#[cfg(f128_enabled)] +impl_no_round! { + ceilf128 => ceil_mut; + fabsf128 => abs_mut; + floorf128 => floor_mut; + rintf128 => round_even_mut; // FIXME: respect rounding mode + roundf128 => round_mut; + roundevenf128 => round_even_mut; + truncf128 => trunc_mut; +} + +/// Some functions are difficult to do in a generic way. Implement them here. +macro_rules! impl_op_for_ty { + ($fty:ty, $suffix:literal) => { + paste::paste! { + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(&this.0); + let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.1, ord0), + prep_retval::(&mut this.0, ord1), + ) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.pow_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let exp = this.frexp_mut(); + (prep_retval::(this, Ordering::Equal), exp) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + + // `get_exp` follows `frexp` for `0.5 <= |m| < 1.0`. Adjust the exponent by + // one to scale the significand to `1.0 <= |m| < 2.0`. + this.get_exp().map(|v| v - 1).unwrap_or_else(|| { + if this.is_infinite() { + i32::MAX + } else { + // Zero or NaN + i32::MIN + } + }) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + let (n, x) = input; + this.assign(x); + let ord = this.jn_round(n, Nearest); + prep_retval::(this, ord) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(0.0); + let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest); + ( + prep_retval::(&mut this.0, sord), + prep_retval::(&mut this.1, cord) + ) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + ( + new_mpfloat::(), + new_mpfloat::(), + ) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest); + (prep_retval::(&mut this.0, ord), q) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + let (n, x) = input; + this.assign(x); + let ord = this.yn_round(n, Nearest); + prep_retval::(this, ord) + } + } + } + }; +} + +/// Version of `impl_op_for_ty` with only functions that have `f16` and `f128` implementations. +macro_rules! impl_op_for_ty_all { + ($fty:ty, $suffix:literal) => { + paste::paste! { + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + this.0.copysign_mut(&this.1); + prep_retval::(&mut this.0, Ordering::Equal) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = this.0.rem_assign_round(&this.1, Nearest); + prep_retval::(&mut this.0, ord) + + } + } + + impl MpOp for crate::op::[< fmaximum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.max_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + + impl MpOp for crate::op::[< fminimum $suffix >]::Routine { + type MpTy = (MpFloat, MpFloat); + + fn new_mp() -> Self::MpTy { + (new_mpfloat::(), new_mpfloat::()) + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.0.assign(input.0); + this.1.assign(input.1); + let ord = if this.0.is_nan() || this.1.is_nan() { + this.0.assign($fty::NAN); + Ordering::Equal + } else { + this.0.min_round(&this.1, Nearest) + }; + prep_retval::(&mut this.0, ord) + } + } + + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all + // methods. + impl MpOp for crate::op::[]::Routine { + type MpTy = ]::Routine as MpOp>::MpTy; + + fn new_mp() -> Self::MpTy { + ]::Routine as MpOp>::new_mp() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + ]::Routine as MpOp>::run(this, input) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + *this <<= input.1; + prep_retval::(this, Ordering::Equal) + } + } + } + }; +} + +impl_op_for_ty!(f32, "f"); +impl_op_for_ty!(f64, ""); + +#[cfg(f16_enabled)] +impl_op_for_ty_all!(f16, "f16"); +impl_op_for_ty_all!(f32, "f"); +impl_op_for_ty_all!(f64, ""); +#[cfg(f128_enabled)] +impl_op_for_ty_all!(f128, "f128"); + +// `lgamma_r` is not a simple suffix so we can't use the above macro. +impl MpOp for crate::op::lgamma_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } +} + +impl MpOp for crate::op::lgammaf_r::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + let (sign, ord) = this.ln_abs_gamma_round(Nearest); + let ret = prep_retval::(this, ord); + (ret, sign as i32) + } +} + +/* stub implementations so we don't need to special case them */ + +impl MpOp for crate::op::nextafter::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} + +impl MpOp for crate::op::nextafterf::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + unimplemented!("nextafter does not yet have a MPFR operation"); + } + + fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet { + unimplemented!("nextafter does not yet have a MPFR operation"); + } +} diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs new file mode 100644 index 000000000..eed941423 --- /dev/null +++ b/crates/libm-test/src/num.rs @@ -0,0 +1,529 @@ +//! Helpful numeric operations. + +use std::cmp::min; +use std::ops::RangeInclusive; + +use libm::support::Float; + +use crate::{Int, MinInt}; + +/// Extension to `libm`'s `Float` trait with methods that are useful for tests but not +/// needed in `libm` itself. +pub trait FloatExt: Float { + /// The minimum subnormal number. + const TINY_BITS: Self::Int = Self::Int::ONE; + + /// Retrieve additional constants for this float type. + fn consts() -> Consts { + Consts::new() + } + + /// Increment by one ULP, saturating at infinity. + fn next_up(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest subnormal + Self::TINY_BITS + } else if bits == abs { + // Positive: counting up is more positive + bits + Self::Int::ONE + } else { + // Negative: counting down is more positive + bits - Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_up` `n` times. + fn n_up(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = !is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) + } else if crosses_zero { + min(n - abs, inf_bits) + } else if is_positive { + // Positive, counting up is more positive but this may overflow + match bits.checked_add(n) { + Some(v) if v >= inf_bits => inf_bits, + Some(v) => v, + None => inf_bits, + } + } else { + // Negative, counting down is more positive + bits - n + }; + Self::from_bits(next_bits) + } + + /// Decrement by one ULP, saturating at negative infinity. + fn next_down(self) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() { + return self; + } + + let abs = self.abs().to_bits(); + let next_bits = if abs == Self::Int::ZERO { + // Next up from 0 is the smallest negative subnormal + Self::TINY_BITS | Self::SIGN_MASK + } else if bits == abs { + // Positive: counting down is more negative + bits - Self::Int::ONE + } else { + // Negative: counting up is more negative + bits + Self::Int::ONE + }; + Self::from_bits(next_bits) + } + + /// A faster way to effectively call `next_down` `n` times. + fn n_down(self, n: Self::Int) -> Self { + let bits = self.to_bits(); + if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() || n == Self::Int::ZERO { + return self; + } + + let abs = self.abs().to_bits(); + let is_positive = bits == abs; + let crosses_zero = is_positive && n > abs; + let inf_bits = Self::INFINITY.to_bits(); + let ninf_bits = Self::NEG_INFINITY.to_bits(); + + let next_bits = if abs == Self::Int::ZERO { + min(n, inf_bits) | Self::SIGN_MASK + } else if crosses_zero { + min(n - abs, inf_bits) | Self::SIGN_MASK + } else if is_positive { + // Positive, counting down is more negative + bits - n + } else { + // Negative, counting up is more negative but this may overflow + match bits.checked_add(n) { + Some(v) if v > ninf_bits => ninf_bits, + Some(v) => v, + None => ninf_bits, + } + }; + Self::from_bits(next_bits) + } +} + +impl FloatExt for F where F: Float {} + +/// Extra constants that are useful for tests. +#[derive(Debug, Clone, Copy)] +pub struct Consts { + /// The default quiet NaN, which is also the minimum quiet NaN. + pub pos_nan: F, + /// The default quiet NaN with negative sign. + pub neg_nan: F, + /// NaN with maximum (unsigned) significand to be a quiet NaN. The significand is saturated. + pub max_qnan: F, + /// NaN with minimum (unsigned) significand to be a signaling NaN. + pub min_snan: F, + /// NaN with maximum (unsigned) significand to be a signaling NaN. + pub max_snan: F, + pub neg_max_qnan: F, + pub neg_min_snan: F, + pub neg_max_snan: F, +} + +impl Consts { + fn new() -> Self { + let top_sigbit_mask = F::Int::ONE << (F::SIG_BITS - 1); + let pos_nan = F::EXP_MASK | top_sigbit_mask; + let max_qnan = F::EXP_MASK | F::SIG_MASK; + let min_snan = F::EXP_MASK | F::Int::ONE; + let max_snan = (F::EXP_MASK | F::SIG_MASK) ^ top_sigbit_mask; + + let neg_nan = pos_nan | F::SIGN_MASK; + let neg_max_qnan = max_qnan | F::SIGN_MASK; + let neg_min_snan = min_snan | F::SIGN_MASK; + let neg_max_snan = max_snan | F::SIGN_MASK; + + Self { + pos_nan: F::from_bits(pos_nan), + neg_nan: F::from_bits(neg_nan), + max_qnan: F::from_bits(max_qnan), + min_snan: F::from_bits(min_snan), + max_snan: F::from_bits(max_snan), + neg_max_qnan: F::from_bits(neg_max_qnan), + neg_min_snan: F::from_bits(neg_min_snan), + neg_max_snan: F::from_bits(neg_max_snan), + } + } + + pub fn iter(self) -> impl Iterator { + // Destructure so we get unused warnings if we forget a list entry. + let Self { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = self; + + [pos_nan, neg_nan, max_qnan, min_snan, max_snan, neg_max_qnan, neg_min_snan, neg_max_snan] + .into_iter() + } +} + +/// Return the number of steps between two floats, returning `None` if either input is NaN. +/// +/// This is the number of steps needed for `n_up` or `n_down` to go between values. Infinities +/// are treated the same as those functions (will return the nearest finite value), and only one +/// of `-0` or `+0` is counted. It does not matter which value is greater. +pub fn ulp_between(x: F, y: F) -> Option { + let a = as_ulp_steps(x)?; + let b = as_ulp_steps(y)?; + Some(a.abs_diff(b)) +} + +/// Return the (signed) number of steps from zero to `x`. +fn as_ulp_steps(x: F) -> Option { + let s = x.to_bits_signed(); + let val = if s >= F::SignedInt::ZERO { + // each increment from `s = 0` is one step up from `x = 0.0` + s + } else { + // each increment from `s = F::SignedInt::MIN` is one step down from `x = -0.0` + F::SignedInt::MIN - s + }; + + // If `x` is NaN, return `None` + (!x.is_nan()).then_some(val) +} + +/// An iterator that returns floats with linearly spaced integer representations, which translates +/// to logarithmic spacing of their values. +/// +/// Note that this tends to skip negative zero, so that needs to be checked explicitly. +/// +/// Returns `(iterator, iterator_length)`. +pub fn logspace( + start: F, + end: F, + steps: F::Int, +) -> (impl Iterator + Clone, F::Int) +where + RangeInclusive: Iterator, +{ + assert!(!start.is_nan()); + assert!(!end.is_nan()); + assert!(end >= start); + + let steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2"); + let between = ulp_between(start, end).expect("`start` or `end` is NaN"); + let spacing = (between / steps).max(F::Int::ONE); + let steps = steps.min(between); // At maximum, one step per ULP + + let mut x = start; + ( + (F::Int::ZERO..=steps).map(move |_| { + let ret = x; + x = x.n_up(spacing); + ret + }), + steps + F::Int::ONE, + ) +} + +/// Returns an iterator of up to `steps` integers evenly distributed. +pub fn linear_ints( + range: RangeInclusive, + steps: u64, +) -> (impl Iterator + Clone, u64) { + let steps = steps.checked_sub(1).unwrap(); + let between = u64::from(range.start().abs_diff(*range.end())); + let spacing = i32::try_from((between / steps).max(1)).unwrap(); + let steps = steps.min(between); + let mut x: i32 = *range.start(); + ( + (0..=steps).map(move |_| { + let res = x; + // Wrapping add to avoid panic on last item (where `x` could overflow past i32::MAX as + // there is no next item). + x = x.wrapping_add(spacing); + res + }), + steps + 1, + ) +} + +#[cfg(test)] +mod tests { + use std::cmp::max; + + use super::*; + use crate::f8; + + #[test] + fn test_next_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + let down = v.next_down().to_bits(); + let up = v.next_up().to_bits(); + + if i == 0 { + assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})"); + } else { + let expected = + if v == f8::ZERO { 1 | f8::SIGN_MASK } else { f8::ALL[i - 1].to_bits() }; + assert_eq!(down, expected, "{i} next_down({v:#010b})"); + } + + if i == f8::ALL_LEN - 1 { + assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})"); + } else { + let expected = if v == f8::NEG_ZERO { 1 } else { f8::ALL[i + 1].to_bits() }; + assert_eq!(up, expected, "{i} next_up({v:#010b})"); + } + } + } + + #[test] + fn test_next_up_down_inf_nan() { + assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),); + assert_eq!(f8::NEG_INFINITY.next_down().to_bits(), f8::NEG_INFINITY.to_bits(),); + assert_eq!(f8::INFINITY.next_down().to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits(),); + assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),); + assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),); + assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),); + } + + #[test] + fn test_n_up_down_quick() { + assert_eq!(f8::ALL[0].n_up(4).to_bits(), f8::ALL[4].to_bits(),); + assert_eq!( + f8::ALL[f8::ALL_LEN - 1].n_down(4).to_bits(), + f8::ALL[f8::ALL_LEN - 5].to_bits(), + ); + + // Check around zero + assert_eq!(f8::from_bits(0b0).n_up(7).to_bits(), 0b0_0000_111); + assert_eq!(f8::from_bits(0b0).n_down(7).to_bits(), 0b1_0000_111); + + // Check across zero + assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001); + assert_eq!(f8::from_bits(0b0_0000_111).n_down(8).to_bits(), 0b1_0000_001); + } + + #[test] + fn test_n_up_down_one() { + // Verify that `n_up(1)` and `n_down(1)` are the same as `next_up()` and next_down()`.` + for i in 0..u8::MAX { + let v = f8::from_bits(i); + assert_eq!(v.next_up().to_bits(), v.n_up(1).to_bits()); + assert_eq!(v.next_down().to_bits(), v.n_down(1).to_bits()); + } + } + + #[test] + fn test_n_up_down_inf_nan_zero() { + assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits()); + assert_eq!(f8::NEG_INFINITY.n_up(239).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits()); + assert_eq!(f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), f8::NEG_INFINITY.to_bits()); + + assert_eq!(f8::INFINITY.n_down(1).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits()); + assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits()); + assert_eq!(f8::INFINITY.n_down(240).to_bits(), f8::NEG_INFINITY.to_bits()); + assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits()); + + assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits()); + assert_eq!(f8::NAN.n_down(u8::MAX).to_bits(), f8::NAN.to_bits()); + + assert_eq!(f8::ZERO.n_down(1).to_bits(), f8::TINY_BITS | f8::SIGN_MASK); + assert_eq!(f8::NEG_ZERO.n_up(1).to_bits(), f8::TINY_BITS); + } + + /// True if the specified range of `f8::ALL` includes both +0 and -0 + fn crossed_zero(start: usize, end: usize) -> bool { + let crossed = &f8::ALL[start..=end]; + crossed.iter().any(|f| f8::eq_repr(*f, f8::ZERO)) + && crossed.iter().any(|f| f8::eq_repr(*f, f8::NEG_ZERO)) + } + + #[test] + fn test_n_up_down() { + for (i, v) in f8::ALL.into_iter().enumerate() { + for n in 0..f8::ALL_LEN { + let down = v.n_down(n as u8).to_bits(); + let up = v.n_up(n as u8).to_bits(); + + if let Some(down_exp_idx) = i.checked_sub(n) { + // No overflow + let mut expected = f8::ALL[down_exp_idx].to_bits(); + if n >= 1 && crossed_zero(down_exp_idx, i) { + // If both -0 and +0 are included, we need to adjust our expected value + match down_exp_idx.checked_sub(1) { + Some(v) => expected = f8::ALL[v].to_bits(), + // Saturate to -inf if we are out of values + None => expected = f8::NEG_INFINITY.to_bits(), + } + } + assert_eq!(down, expected, "{i} {n} n_down({v:#010b})"); + } else { + // Overflow to -inf + assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} {n} n_down({v:#010b})"); + } + + let mut up_exp_idx = i + n; + if up_exp_idx < f8::ALL_LEN { + // No overflow + if n >= 1 && up_exp_idx < f8::ALL_LEN && crossed_zero(i, up_exp_idx) { + // If both -0 and +0 are included, we need to adjust our expected value + up_exp_idx += 1; + } + + let expected = if up_exp_idx >= f8::ALL_LEN { + f8::INFINITY.to_bits() + } else { + f8::ALL[up_exp_idx].to_bits() + }; + + assert_eq!(up, expected, "{i} {n} n_up({v:#010b})"); + } else { + // Overflow to +inf + assert_eq!(up, f8::INFINITY.to_bits(), "{i} {n} n_up({v:#010b})"); + } + } + } + } + + #[test] + fn test_ulp_between() { + for (i, x) in f8::ALL.into_iter().enumerate() { + for (j, y) in f8::ALL.into_iter().enumerate() { + let ulp = ulp_between(x, y).unwrap(); + let make_msg = || format!("i: {i} j: {j} x: {x:b} y: {y:b} ulp {ulp}"); + + let i_low = min(i, j); + let i_hi = max(i, j); + let mut expected = u8::try_from(i_hi - i_low).unwrap(); + if crossed_zero(i_low, i_hi) { + expected -= 1; + } + + assert_eq!(ulp, expected, "{}", make_msg()); + + // Skip if either are zero since `next_{up,down}` will count over it + let either_zero = x == f8::ZERO || y == f8::ZERO; + if x < y && !either_zero { + assert_eq!(x.n_up(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + assert_eq!(y.n_down(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + } else if !either_zero { + assert_eq!(y.n_up(ulp).to_bits(), x.to_bits(), "{}", make_msg()); + assert_eq!(x.n_down(ulp).to_bits(), y.to_bits(), "{}", make_msg()); + } + } + } + } + + #[test] + fn test_ulp_between_inf_nan_zero() { + assert_eq!(ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), f8::ALL_LEN as u8); + assert_eq!(ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), f8::ALL_LEN as u8); + assert_eq!( + ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(), + f8::ALL_LEN as u8 - 1 + ); + assert_eq!(ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), f8::ALL_LEN as u8 - 1); + + assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0); + assert_eq!(ulp_between(f8::NAN, f8::ZERO), None); + assert_eq!(ulp_between(f8::ZERO, f8::NAN), None); + } + + #[test] + fn test_logspace() { + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2); + let ls: Vec<_> = ls.collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3); + let ls: Vec<_> = ls.collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10); + let ls: Vec<_> = ls.collect(); + let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)]; + assert_eq!(ls, exp); + assert_eq!(ls.len(), usize::from(count)); + } + + #[test] + fn test_linear_ints() { + let (ints, count) = linear_ints(0..=4, 2); + let ints: Vec<_> = ints.collect(); + let exp = [0, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + let (ints, count) = linear_ints(0..=4, 3); + let ints: Vec<_> = ints.collect(); + let exp = [0, 2, 4]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that we include all values with no repeats if `steps` exceeds the maximum number + // of steps. + let (ints, count) = linear_ints(0x0..=0x3, 10); + let ints: Vec<_> = ints.collect(); + let exp = [0, 1, 2, 3]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + + // Check that there are no panics around `i32::MAX`. + let (ints, count) = linear_ints(i32::MAX - 1..=i32::MAX, 5); + let ints: Vec<_> = ints.collect(); + let exp = [i32::MAX - 1, i32::MAX]; + assert_eq!(ints, exp); + assert_eq!(ints.len(), usize::try_from(count).unwrap()); + } + + #[test] + fn test_consts() { + let Consts { + pos_nan, + neg_nan, + max_qnan, + min_snan, + max_snan, + neg_max_qnan, + neg_min_snan, + neg_max_snan, + } = f8::consts(); + + assert_eq!(pos_nan.to_bits(), 0b0_1111_100); + assert_eq!(neg_nan.to_bits(), 0b1_1111_100); + assert_eq!(max_qnan.to_bits(), 0b0_1111_111); + assert_eq!(min_snan.to_bits(), 0b0_1111_001); + assert_eq!(max_snan.to_bits(), 0b0_1111_011); + assert_eq!(neg_max_qnan.to_bits(), 0b1_1111_111); + assert_eq!(neg_min_snan.to_bits(), 0b1_1111_001); + assert_eq!(neg_max_snan.to_bits(), 0b1_1111_011); + } +} diff --git a/crates/libm-test/src/num_traits.rs b/crates/libm-test/src/num_traits.rs deleted file mode 100644 index e16f4e4dc..000000000 --- a/crates/libm-test/src/num_traits.rs +++ /dev/null @@ -1,214 +0,0 @@ -use std::fmt; - -use crate::{MaybeOverride, SpecialCase, TestResult}; - -/// Common types and methods for floating point numbers. -pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq { - type Int: Int; - type SignedInt: Int + Int; - - const ZERO: Self; - const ONE: Self; - - /// The bitwidth of the float type - const BITS: u32; - - /// The bitwidth of the significand - const SIGNIFICAND_BITS: u32; - - /// The bitwidth of the exponent - const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; - - fn is_nan(self) -> bool; - fn is_infinite(self) -> bool; - fn to_bits(self) -> Self::Int; - fn from_bits(bits: Self::Int) -> Self; - fn signum(self) -> Self; -} - -macro_rules! impl_float { - ($($fty:ty, $ui:ty, $si:ty, $significand_bits:expr;)+) => { - $( - impl Float for $fty { - type Int = $ui; - type SignedInt = $si; - - const ZERO: Self = 0.0; - const ONE: Self = 1.0; - - const BITS: u32 = <$ui>::BITS; - const SIGNIFICAND_BITS: u32 = $significand_bits; - - fn is_nan(self) -> bool { - self.is_nan() - } - fn is_infinite(self) -> bool { - self.is_infinite() - } - fn to_bits(self) -> Self::Int { - self.to_bits() - } - fn from_bits(bits: Self::Int) -> Self { - Self::from_bits(bits) - } - fn signum(self) -> Self { - self.signum() - } - } - - impl Hex for $fty { - fn hex(self) -> String { - self.to_bits().hex() - } - } - )+ - } -} - -impl_float!( - f32, u32, i32, 23; - f64, u64, i64, 52; -); - -/// Common types and methods for integers. -pub trait Int: Copy + fmt::Display + fmt::Debug + PartialEq { - type OtherSign: Int; - type Unsigned: Int; - const BITS: u32; - const SIGNED: bool; - - fn signed(self) -> ::OtherSign; - fn unsigned(self) -> Self::Unsigned; - fn checked_sub(self, other: Self) -> Option; - fn abs(self) -> Self; -} - -macro_rules! impl_int { - ($($ui:ty, $si:ty ;)+) => { - $( - impl Int for $ui { - type OtherSign = $si; - type Unsigned = Self; - const BITS: u32 = <$ui>::BITS; - const SIGNED: bool = false; - fn signed(self) -> Self::OtherSign { - self as $si - } - fn unsigned(self) -> Self { - self - } - fn checked_sub(self, other: Self) -> Option { - self.checked_sub(other) - } - fn abs(self) -> Self { - unimplemented!() - } - } - - impl Int for $si { - type OtherSign = $ui; - type Unsigned = $ui; - const BITS: u32 = <$ui>::BITS; - const SIGNED: bool = true; - fn signed(self) -> Self { - self - } - fn unsigned(self) -> $ui { - self as $ui - } - fn checked_sub(self, other: Self) -> Option { - self.checked_sub(other) - } - fn abs(self) -> Self { - self.abs() - } - } - - impl_int!(@for_both $si); - impl_int!(@for_both $ui); - - )+ - }; - - (@for_both $ty:ty) => { - impl Hex for $ty { - fn hex(self) -> String { - format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) - } - } - - impl $crate::CheckOutput for $ty - where - Input: Hex + fmt::Debug, - SpecialCase: MaybeOverride, - { - fn validate<'a>( - self, - expected: Self, - input: Input, - ctx: &$crate::CheckCtx, - ) -> TestResult { - if let Some(res) = SpecialCase::check_int(input, self, expected, ctx) { - return res; - } - - anyhow::ensure!( - self == expected, - "\ - \n input: {input:?} {ibits}\ - \n expected: {expected:<22?} {expbits}\ - \n actual: {self:<22?} {actbits}\ - ", - actbits = self.hex(), - expbits = expected.hex(), - ibits = input.hex(), - ); - - Ok(()) - } - } - } -} - -impl_int!( - u32, i32; - u64, i64; -); - -/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` -/// will always print with `0x` followed by 8 digits. -/// -/// This is only used for printing errors so allocating is okay. -pub trait Hex: Copy { - fn hex(self) -> String; -} - -impl Hex for (T1,) -where - T1: Hex, -{ - fn hex(self) -> String { - format!("({},)", self.0.hex()) - } -} - -impl Hex for (T1, T2) -where - T1: Hex, - T2: Hex, -{ - fn hex(self) -> String { - format!("({}, {})", self.0.hex(), self.1.hex()) - } -} - -impl Hex for (T1, T2, T3) -where - T1: Hex, - T2: Hex, - T3: Hex, -{ - fn hex(self) -> String { - format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) - } -} diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs new file mode 100644 index 000000000..47d72ae58 --- /dev/null +++ b/crates/libm-test/src/op.rs @@ -0,0 +1,151 @@ +//! Types representing individual functions. +//! +//! Each routine gets a module with its name, e.g. `mod sinf { /* ... */ }`. The module +//! contains a unit struct `Routine` which implements `MathOp`. +//! +//! Basically everything could be called a "function" here, so we loosely use the following +//! terminology: +//! +//! - "Function": the math operation that does not have an associated precision. E.g. `f(x) = e^x`, +//! `f(x) = log(x)`. +//! - "Routine": A code implementation of a math operation with a specific precision. E.g. `exp`, +//! `expf`, `expl`, `log`, `logf`. +//! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher +//! level. `Op` is also used as the name for generic parameters since it is terse. + +use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; + +pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty}; + +use crate::{CheckOutput, Float, TupleCall}; + +mod shared { + include!("../../libm-macros/src/shared.rs"); +} + +/// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc). +#[libm_macros::function_enum(BaseName)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Identifier {} + +impl fmt::Display for Identifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`. +#[libm_macros::base_name_enum] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BaseName {} + +impl fmt::Display for BaseName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Attributes ascribed to a `libm` routine including signature, type information, +/// and naming. +pub trait MathOp { + /// The float type used for this operation. + type FTy: Float; + + /// The function type representing the signature in a C library. + type CFn: Copy; + + /// Arguments passed to the C library function as a tuple. These may include `&mut` return + /// values. + type CArgs<'a> + where + Self: 'a; + + /// The type returned by C implementations. + type CRet; + + /// The signature of the Rust function as a `fn(...) -> ...` type. + type RustFn: Copy + UnwindSafe; + + /// Arguments passed to the Rust library function as a tuple. + /// + /// The required `TupleCall` bounds ensure this type can be passed either to the C function or + /// to the Rust function. + type RustArgs: Copy + + TupleCall + + TupleCall + + RefUnwindSafe; + + /// Type returned from the Rust function. + type RustRet: CheckOutput; + + /// The name of this function, including suffix (e.g. `sin`, `sinf`). + const IDENTIFIER: Identifier; + + /// The name as a string. + const NAME: &'static str = Self::IDENTIFIER.as_str(); + + /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`. + const BASE_NAME: BaseName = Self::IDENTIFIER.base_name(); + + /// The function in `libm` which can be called. + const ROUTINE: Self::RustFn; +} + +/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types). +pub type OpFTy = ::FTy; +/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types). +pub type OpITy = <::FTy as Float>::Int; +/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types). +pub type OpCFn = ::CFn; +/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types). +pub type OpCRet = ::CRet; +/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). +pub type OpRustFn = ::RustFn; +/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types). +pub type OpRustArgs = ::RustArgs; +/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). +pub type OpRustRet = ::RustRet; + +macro_rules! do_thing { + // Matcher for unary functions + ( + fn_name: $fn_name:ident, + FTy: $FTy:ty, + CFn: $CFn:ty, + CArgs: $CArgs:ty, + CRet: $CRet:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + RustRet: $RustRet:ty, + attrs: [$($attr:meta),*], + + ) => { + paste::paste! { + $(#[$attr])* + pub mod $fn_name { + use super::*; + pub struct Routine; + + impl MathOp for Routine { + type FTy = $FTy; + type CFn = for<'a> $CFn; + type CArgs<'a> = $CArgs where Self: 'a; + type CRet = $CRet; + type RustFn = $RustFn; + type RustArgs = $RustArgs; + type RustRet = $RustRet; + + const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >]; + const ROUTINE: Self::RustFn = libm::$fn_name; + } + } + + } + }; +} + +libm_macros::for_each_function! { + callback: do_thing, + emit_types: all, +} diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs new file mode 100644 index 000000000..f5fb5f670 --- /dev/null +++ b/crates/libm-test/src/precision.rs @@ -0,0 +1,573 @@ +//! Configuration for skipping or changing the result for individual test cases (inputs) rather +//! than ignoring entire tests. + +use core::f32; + +use CheckBasis::{Mpfr, Musl}; +use libm::support::CastFrom; +use {BaseName as Bn, Identifier as Id}; + +use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; + +/// Type implementing [`IgnoreCase`]. +pub struct SpecialCase; + +/// ULP allowed to differ from the results returned by a test basis. +#[allow(clippy::single_match)] +pub fn default_ulp(ctx: &CheckCtx) -> u32 { + // ULP compared to the infinite (MPFR) result. + let mut ulp = match ctx.base_name { + // Operations that require exact results. This list should correlate with what we + // have documented at . + Bn::Ceil + | Bn::Copysign + | Bn::Fabs + | Bn::Fdim + | Bn::Floor + | Bn::Fma + | Bn::Fmax + | Bn::Fmaximum + | Bn::FmaximumNum + | Bn::Fmin + | Bn::Fminimum + | Bn::FminimumNum + | Bn::Fmod + | Bn::Frexp + | Bn::Ilogb + | Bn::Ldexp + | Bn::Modf + | Bn::Nextafter + | Bn::Remainder + | Bn::Remquo + | Bn::Rint + | Bn::Round + | Bn::Roundeven + | Bn::Scalbn + | Bn::Sqrt + | Bn::Trunc => 0, + + // Operations that aren't required to be exact, but our implementations are. + Bn::Cbrt => 0, + + // Bessel functions have large inaccuracies. + Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000, + + // For all other operations, specify our implementation's worst case precision. + Bn::Acos => 1, + Bn::Acosh => 4, + Bn::Asin => 1, + Bn::Asinh => 2, + Bn::Atan => 1, + Bn::Atan2 => 2, + Bn::Atanh => 2, + Bn::Cos => 1, + Bn::Cosh => 1, + Bn::Erf => 1, + Bn::Erfc => 4, + Bn::Exp => 1, + Bn::Exp10 => 6, + Bn::Exp2 => 1, + Bn::Expm1 => 1, + Bn::Hypot => 1, + Bn::Lgamma | Bn::LgammaR => 16, + Bn::Log => 1, + Bn::Log10 => 1, + Bn::Log1p => 1, + Bn::Log2 => 1, + Bn::Pow => 1, + Bn::Sin => 1, + Bn::Sincos => 1, + Bn::Sinh => 2, + Bn::Tan => 1, + Bn::Tanh => 2, + // tgammaf has higher accuracy than tgamma. + Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 1, + Bn::Tgamma => 20, + }; + + // There are some cases where musl's approximation is less accurate than ours. For these + // cases, increase the ULP. + if ctx.basis == Musl { + match ctx.base_name { + Bn::Cosh => ulp = 2, + Bn::Exp10 if usize::BITS < 64 => ulp = 4, + Bn::Lgamma | Bn::LgammaR => ulp = 400, + Bn::Tanh => ulp = 4, + _ => (), + } + + match ctx.fn_ident { + Id::Cbrt => ulp = 2, + // FIXME(#401): musl has an incorrect result here. + Id::Fdim => ulp = 2, + Id::Sincosf => ulp = 500, + Id::Tgamma => ulp = 20, + _ => (), + } + } + + if cfg!(target_arch = "x86") { + match ctx.fn_ident { + // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is + // incorrect on i586 and i686. + Id::Fma => ulp = 1, + _ => (), + } + } + + // In some cases, our implementation is less accurate than musl on i586. + if cfg!(x86_no_sse) { + match ctx.fn_ident { + // FIXME(#401): these need to be correctly rounded but are not. + Id::Fmaf => ulp = 1, + Id::Fdim => ulp = 1, + Id::Round => ulp = 1, + + Id::Asinh => ulp = 3, + Id::Asinhf => ulp = 3, + Id::Cbrt => ulp = 1, + Id::Exp10 | Id::Exp10f => ulp = 1_000_000, + Id::Exp2 | Id::Exp2f => ulp = 10_000_000, + Id::Log1p | Id::Log1pf => ulp = 2, + Id::Tan => ulp = 2, + _ => (), + } + } + + ulp +} + +/// Result of checking for possible overrides. +#[derive(Debug, Default)] +pub enum CheckAction { + /// The check should pass. Default case. + #[default] + AssertSuccess, + + /// Override the ULP for this check. + AssertWithUlp(u32), + + /// Failure is expected, ensure this is the case (xfail). Takes a contxt string to help trace + /// back exactly why we expect this to fail. + AssertFailure(&'static str), + + /// The override somehow validated the result, here it is. + Custom(TestResult), + + /// Disregard the output. + Skip, +} + +/// Don't run further validation on this test case. +const SKIP: CheckAction = CheckAction::Skip; + +/// Return this to skip checks on a test that currently fails but shouldn't. Takes a description +/// of context. +const XFAIL: fn(&'static str) -> CheckAction = CheckAction::AssertFailure; + +/// Indicates that we expect a test to fail but we aren't asserting that it does (e.g. some results +/// within a range do actually pass). +/// +/// Same as `SKIP`, just indicates we have something to eventually fix. +const XFAIL_NOCHECK: CheckAction = CheckAction::Skip; + +/// By default, all tests should pass. +const DEFAULT: CheckAction = CheckAction::AssertSuccess; + +/// Allow overriding the outputs of specific test cases. +/// +/// There are some cases where we want to xfail specific cases or handle certain inputs +/// differently than the rest of calls to `validate`. This provides a hook to do that. +/// +/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks +/// are skipped and the provided result is returned instead. +/// +/// This gets implemented once per input type, then the functions provide further filtering +/// based on function name and values. +/// +/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still +/// returned. +pub trait MaybeOverride { + fn check_float( + _input: Input, + _actual: F, + _expected: F, + _ctx: &CheckCtx, + ) -> CheckAction { + DEFAULT + } + + fn check_int(_input: Input, _actual: I, _expected: I, _ctx: &CheckCtx) -> CheckAction { + DEFAULT + } +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16,)> for SpecialCase {} + +impl MaybeOverride<(f32,)> for SpecialCase { + fn check_float(input: (f32,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if ctx.base_name == BaseName::Expm1 + && !input.0.is_infinite() + && input.0 > 80.0 + && actual.is_infinite() + && !expected.is_infinite() + { + // we return infinity but the number is representable + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("expm1 representable numbers"); + } + + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Exp2 + && !expected.is_infinite() + && actual.is_infinite() + { + // We return infinity when there is a representable value. Test input: 127.97238 + return XFAIL("586 exp2 representable numbers"); + } + + if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() { + // we return some NaN that should be real values or infinite + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + return XFAIL("sinh unexpected NaN"); + } + + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 > 4e36 + && expected.is_infinite() + && !actual.is_infinite() + { + // This result should saturate but we return a finite value. + return XFAIL_NOCHECK; + } + + if ctx.base_name == BaseName::J0 && input.0 < -1e34 { + // Errors get huge close to -inf + return XFAIL_NOCHECK; + } + + unop_common(input, actual, expected, ctx) + } + + fn check_int(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { + // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::LgammaR + && input.0 == f32::NEG_INFINITY + && actual.abs() == expected.abs() + { + return XFAIL("lgammar integer result"); + } + + DEFAULT + } +} + +impl MaybeOverride<(f64,)> for SpecialCase { + fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { + if cfg!(x86_no_sse) + && ctx.base_name == BaseName::Ceil + && ctx.basis == CheckBasis::Musl + && input.0 < 0.0 + && input.0 > -1.0 + && expected == F::ZERO + && actual == F::ZERO + { + // musl returns -0.0, we return +0.0 + return XFAIL("i586 ceil signed zero"); + } + + if cfg!(x86_no_sse) + && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) + && (expected - actual).abs() <= F::ONE + && (expected - actual).abs() > F::ZERO + { + // Our rounding mode is incorrect. + return XFAIL("i586 rint rounding mode"); + } + + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) + && expected.eq_repr(F::NEG_ZERO) + && actual.eq_repr(F::ZERO) + { + // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. + // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 + return XFAIL("i586 ceil/floor signed zero"); + } + + if cfg!(x86_no_sse) + && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) + { + // FIXME: i586 has very imprecise results with ULP > u32::MAX for these + // operations so we can't reasonably provide a limit. + return XFAIL_NOCHECK; + } + + if ctx.base_name == BaseName::J0 && input.0 < -1e300 { + // Errors get huge close to -inf + return XFAIL_NOCHECK; + } + + // maybe_check_nan_bits(actual, expected, ctx) + unop_common(input, actual, expected, ctx) + } + + fn check_int(input: (f64,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction { + // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR + // sets +1 + if ctx.basis == CheckBasis::Mpfr + && ctx.base_name == BaseName::LgammaR + && input.0 == f64::NEG_INFINITY + && actual.abs() == expected.abs() + { + return XFAIL("lgammar integer result"); + } + + DEFAULT + } +} + +#[cfg(f128_enabled)] +impl MaybeOverride<(f128,)> for SpecialCase {} + +// F1 and F2 are always the same type, this is just to please generics +fn unop_common( + input: (F1,), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + if ctx.base_name == BaseName::Acosh + && input.0 < F1::NEG_ONE + && !(expected.is_nan() && actual.is_nan()) + { + // acoshf is undefined for x <= 1.0, but we return a random result at lower values. + + if ctx.basis == CheckBasis::Musl { + return XFAIL_NOCHECK; + } + + return XFAIL("acoshf undefined"); + } + + if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR) + && input.0 < F1::ZERO + && !input.0.is_infinite() + { + // loggamma should not be defined for x < 0, yet we both return results + return XFAIL_NOCHECK; + } + + // fabs and copysign must leave NaNs untouched. + if ctx.base_name == BaseName::Fabs && input.0.is_nan() { + // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 + // `extern "C"` `f32`/`f64` return ABI. + // LLVM issue + // Rust issue + if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl && actual.is_nan() { + return XFAIL_NOCHECK; + } + + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. + if ctx.basis == CheckBasis::Mpfr { + return DEFAULT; + } + + // abs and copysign require signaling NaNs to be propagated, so verify bit equality. + if actual.to_bits() == expected.to_bits() { + return CheckAction::Custom(Ok(())); + } else { + return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns"))); + } + } + + DEFAULT +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, f16)> for SpecialCase { + fn check_float( + input: (f16, f16), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(f32, f32)> for SpecialCase { + fn check_float( + input: (f32, f32), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(f64, f64)> for SpecialCase { + fn check_float( + input: (f64, f64), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128)> for SpecialCase { + fn check_float( + input: (f128, f128), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + binop_common(input, actual, expected, ctx) + } +} + +// F1 and F2 are always the same type, this is just to please generics +fn binop_common( + input: (F1, F1), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if + // the first input (magnitude source) is NaN and the output is also a NaN, or if the second + // input (sign source) is NaN. + if ctx.basis == CheckBasis::Mpfr + && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan()) + { + return SKIP; + } + + /* FIXME(#439): our fmin and fmax do not compare signed zeros */ + + if ctx.base_name == BaseName::Fmin + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::NEG_ZERO) + && actual.biteq(F2::ZERO) + { + return XFAIL("fmin signed zeroes"); + } + + if ctx.base_name == BaseName::Fmax + && input.0.biteq(F1::NEG_ZERO) + && input.1.biteq(F1::ZERO) + && expected.biteq(F2::ZERO) + && actual.biteq(F2::NEG_ZERO) + { + return XFAIL("fmax signed zeroes"); + } + + // Musl propagates NaNs if one is provided as the input, but we return the other input. + if (ctx.base_name == BaseName::Fmax || ctx.base_name == BaseName::Fmin) + && ctx.basis == Musl + && (input.0.is_nan() ^ input.1.is_nan()) + && expected.is_nan() + { + return XFAIL("fmax/fmin musl NaN"); + } + + DEFAULT +} + +impl MaybeOverride<(i32, f32)> for SpecialCase { + fn check_float( + input: (i32, f32), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + // `ynf(213, 109.15641) = -inf` with our library, should be finite. + if ctx.basis == Mpfr + && ctx.base_name == BaseName::Yn + && input.0 > 200 + && !expected.is_infinite() + && actual.is_infinite() + { + return XFAIL("ynf infinity mismatch"); + } + + int_float_common(input, actual, expected, ctx) + } +} + +impl MaybeOverride<(i32, f64)> for SpecialCase { + fn check_float( + input: (i32, f64), + actual: F, + expected: F, + ctx: &CheckCtx, + ) -> CheckAction { + int_float_common(input, actual, expected, ctx) + } +} + +fn int_float_common( + input: (i32, F1), + actual: F2, + expected: F2, + ctx: &CheckCtx, +) -> CheckAction { + if ctx.basis == Mpfr + && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) + && input.1 == F1::NEG_INFINITY + && actual == F2::ZERO + && expected == F2::ZERO + { + return XFAIL("we disagree with MPFR on the sign of zero"); + } + + // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should + // be -3.2161271e38. + if ctx.basis == Musl + && ctx.fn_ident == Identifier::Ynf + && !expected.is_infinite() + && actual.is_infinite() + && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) + < F2::Int::cast_from(10_000_000u32)) + { + return XFAIL_NOCHECK; + } + + // Our bessel functions blow up with large N values + if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) { + if cfg!(x86_no_sse) { + // Precision is especially bad on i586, not worth checking. + return XFAIL_NOCHECK; + } + + if input.0 > 4000 { + return XFAIL_NOCHECK; + } else if input.0 > 100 { + return CheckAction::AssertWithUlp(1_000_000); + } + } + DEFAULT +} + +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, i32)> for SpecialCase {} +impl MaybeOverride<(f32, i32)> for SpecialCase {} +impl MaybeOverride<(f64, i32)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, i32)> for SpecialCase {} + +impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} +impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, f128, f128)> for SpecialCase {} diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs new file mode 100644 index 000000000..b36164b00 --- /dev/null +++ b/crates/libm-test/src/run_cfg.rs @@ -0,0 +1,370 @@ +//! Configuration for how tests get run. + +use std::ops::RangeInclusive; +use std::sync::LazyLock; +use std::{env, str}; + +use crate::generate::random::{SEED, SEED_ENV}; +use crate::{BaseName, FloatTy, Identifier, test_log}; + +/// The environment variable indicating which extensive tests should be run. +pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS"; + +/// Specify the number of iterations via this environment variable, rather than using the default. +pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS"; + +/// The override value, if set by the above environment. +static EXTENSIVE_ITER_OVERRIDE: LazyLock> = LazyLock::new(|| { + env::var(EXTENSIVE_ITER_ENV).map(|v| v.parse().expect("failed to parse iteration count")).ok() +}); + +/// Specific tests that need to have a reduced amount of iterations to complete in a reasonable +/// amount of time. +/// +/// Contains the itentifier+generator combo to match on, plus the factor to reduce by. +const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[ + (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50), + (Identifier::Fmodf128, GeneratorKind::Extensive, 50), +]; + +/// Maximum number of iterations to run for a single routine. +/// +/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines +/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly +/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple +/// hours. +pub fn extensive_max_iterations() -> u64 { + let default = 1 << 32; // default value + EXTENSIVE_ITER_OVERRIDE.unwrap_or(default) +} + +/// Context passed to [`CheckOutput`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CheckCtx { + /// Allowed ULP deviation + pub ulp: u32, + pub fn_ident: Identifier, + pub base_name: BaseName, + /// Function name. + pub fn_name: &'static str, + /// Return the unsuffixed version of the function name. + pub base_name_str: &'static str, + /// Source of truth for tests. + pub basis: CheckBasis, + pub gen_kind: GeneratorKind, + /// If specified, this value will override the value returned by [`iteration_count`]. + pub override_iterations: Option, +} + +impl CheckCtx { + /// Create a new check context, using the default ULP for the function. + pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self { + let mut ret = Self { + ulp: 0, + fn_ident, + fn_name: fn_ident.as_str(), + base_name: fn_ident.base_name(), + base_name_str: fn_ident.base_name().as_str(), + basis, + gen_kind, + override_iterations: None, + }; + ret.ulp = crate::default_ulp(&ret); + ret + } + + /// The number of input arguments for this function. + pub fn input_count(&self) -> usize { + self.fn_ident.math_op().rust_sig.args.len() + } + + pub fn override_iterations(&mut self, count: u64) { + self.override_iterations = Some(count) + } +} + +/// Possible items to test against +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CheckBasis { + /// Check against Musl's math sources. + Musl, + /// Check against infinite precision (MPFR). + Mpfr, + /// Benchmarks or other times when this is not relevant. + None, +} + +/// The different kinds of generators that provide test input, which account for input pattern +/// and quantity. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum GeneratorKind { + EdgeCases, + Extensive, + QuickSpaced, + Random, + List, +} + +/// A list of all functions that should get extensive tests. +/// +/// This also supports the special test name `all` to run all tests, as well as `all_f16`, +/// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type. +static EXTENSIVE: LazyLock> = LazyLock::new(|| { + let var = env::var(EXTENSIVE_ENV).unwrap_or_default(); + let list = var.split(",").filter(|s| !s.is_empty()).collect::>(); + let mut ret = Vec::new(); + + let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| { + let iter = Identifier::ALL.iter().filter(move |id| id.math_op().float_ty == fty).copied(); + ret.extend(iter); + }; + + for item in list { + match item { + "all" => ret = Identifier::ALL.to_owned(), + "all_f16" => append_ty_ops(&mut ret, FloatTy::F16), + "all_f32" => append_ty_ops(&mut ret, FloatTy::F32), + "all_f64" => append_ty_ops(&mut ret, FloatTy::F64), + "all_f128" => append_ty_ops(&mut ret, FloatTy::F128), + s => { + let id = Identifier::from_str(s) + .unwrap_or_else(|| panic!("unrecognized test name `{s}`")); + ret.push(id); + } + } + } + + ret +}); + +/// Information about the function to be tested. +#[derive(Debug)] +struct TestEnv { + /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated. + slow_platform: bool, + /// The float cannot be tested exhaustively, `f64` or `f128`. + large_float_ty: bool, + /// Env indicates that an extensive test should be run. + should_run_extensive: bool, + /// Multiprecision tests will be run. + mp_tests_enabled: bool, + /// The number of inputs to the function. + input_count: usize, +} + +impl TestEnv { + fn from_env(ctx: &CheckCtx) -> Self { + let id = ctx.fn_ident; + let op = id.math_op(); + + let will_run_mp = cfg!(feature = "build-mpfr"); + let large_float_ty = match op.float_ty { + FloatTy::F16 | FloatTy::F32 => false, + FloatTy::F64 | FloatTy::F128 => true, + }; + + let will_run_extensive = EXTENSIVE.contains(&id); + + let input_count = op.rust_sig.args.len(); + + Self { + slow_platform: slow_platform(), + large_float_ty, + should_run_extensive: will_run_extensive, + mp_tests_enabled: will_run_mp, + input_count, + } + } +} + +/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start +/// with a reduced number on these platforms. +fn slow_platform() -> bool { + let slow_on_ci = crate::emulated() + || usize::BITS < 64 + || cfg!(all(target_arch = "x86_64", target_vendor = "apple")); + + // If not running in CI, there is no need to reduce iteration count. + slow_on_ci && crate::ci() +} + +/// The number of iterations to run for a given test. +pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { + let t_env = TestEnv::from_env(ctx); + + // Ideally run 5M tests + let mut domain_iter_count: u64 = 4_000_000; + + // Start with a reduced number of tests on slow platforms. + if t_env.slow_platform { + domain_iter_count = 100_000; + } + + // If we will be running tests against MPFR, we don't need to test as much against musl. + // However, there are some platforms where we have to test against musl since MPFR can't be + // built. + if t_env.mp_tests_enabled && ctx.basis == CheckBasis::Musl { + domain_iter_count /= 100; + } + + // Run fewer random tests than domain tests. + let random_iter_count = domain_iter_count / 100; + + let mut total_iterations = match ctx.gen_kind { + GeneratorKind::QuickSpaced => domain_iter_count, + GeneratorKind::Random => random_iter_count, + GeneratorKind::Extensive => extensive_max_iterations(), + GeneratorKind::EdgeCases | GeneratorKind::List => { + unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind) + } + }; + + // Larger float types get more iterations. + if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive { + if ctx.gen_kind == GeneratorKind::Extensive { + // Extensive already has a pretty high test count. + total_iterations *= 2; + } else { + total_iterations *= 4; + } + } + + // Functions with more arguments get more iterations. + let arg_multiplier = 1 << (t_env.input_count - 1); + total_iterations *= arg_multiplier; + + // FMA has a huge domain but is reasonably fast to run, so increase another 1.5x. + if ctx.base_name == BaseName::Fma { + total_iterations = 3 * total_iterations / 2; + } + + // Some tests are significantly slower than others and need to be further reduced. + if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS + .iter() + .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind) + { + // However, do not override if the extensive iteration count has been manually set. + if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) { + total_iterations /= scale; + } + } + + if cfg!(optimizations_enabled) { + // Always run at least 10,000 tests. + total_iterations = total_iterations.max(10_000); + } else { + // Without optimizations, just run a quick check regardless of other parameters. + total_iterations = 800; + } + + let mut overridden = false; + if let Some(count) = ctx.override_iterations { + total_iterations = count; + overridden = true; + } + + // Adjust for the number of inputs + let ntests = match t_env.input_count { + 1 => total_iterations, + 2 => (total_iterations as f64).sqrt().ceil() as u64, + 3 => (total_iterations as f64).cbrt().ceil() as u64, + _ => panic!("test has more than three arguments"), + }; + + let total = ntests.pow(t_env.input_count.try_into().unwrap()); + + let seed_msg = match ctx.gen_kind { + GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(), + GeneratorKind::Random => { + format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap()) + } + GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(), + }; + + test_log(&format!( + "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ + ({total} total){seed_msg}{omsg}", + gen_kind = ctx.gen_kind, + basis = ctx.basis, + fn_ident = ctx.fn_ident, + arg = argnum + 1, + args = t_env.input_count, + omsg = if overridden { " (overridden)" } else { "" } + )); + + ntests +} + +/// Some tests require that an integer be kept within reasonable limits; generate that here. +pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive { + let t_env = TestEnv::from_env(ctx); + + if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) { + return i32::MIN..=i32::MAX; + } + + assert_eq!(argnum, 0, "For `jn`/`yn`, only the first argument takes an integer"); + + // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be + // completed in a reasonable amount of time. + let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) { + (-0xf)..=0xff + } else { + (-0xff)..=0xffff + }; + + let extensive_range = (-0xfff)..=0xfffff; + + match ctx.gen_kind { + GeneratorKind::Extensive => extensive_range, + GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range, + GeneratorKind::EdgeCases => extensive_range, + GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind), + } +} + +/// For domain tests, limit how many asymptotes or specified check points we test. +pub fn check_point_count(ctx: &CheckCtx) -> usize { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_point_count is intended for edge case tests" + ); + let t_env = TestEnv::from_env(ctx); + if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 } +} + +/// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check +/// this many surrounding values. +pub fn check_near_count(ctx: &CheckCtx) -> u64 { + assert_eq!( + ctx.gen_kind, + GeneratorKind::EdgeCases, + "check_near_count is intended for edge case tests" + ); + if cfg!(optimizations_enabled) { + // Taper based on the number of inputs. + match ctx.input_count() { + 1 | 2 => 100, + 3 => 50, + x => panic!("unexpected argument count {x}"), + } + } else { + 8 + } +} + +/// Check whether extensive actions should be run or skipped. +pub fn skip_extensive_test(ctx: &CheckCtx) -> bool { + let t_env = TestEnv::from_env(ctx); + !t_env.should_run_extensive +} + +/// The number of iterations to run for `u256` fuzz tests. +pub fn bigint_fuzz_iteration_count() -> u64 { + if !cfg!(optimizations_enabled) { + return 1000; + } + + if slow_platform() { 100_000 } else { 5_000_000 } +} diff --git a/crates/libm-test/src/special_case.rs b/crates/libm-test/src/special_case.rs deleted file mode 100644 index df263d742..000000000 --- a/crates/libm-test/src/special_case.rs +++ /dev/null @@ -1,239 +0,0 @@ -//! Configuration for skipping or changing the result for individual test cases (inputs) rather -//! than ignoring entire tests. - -use core::f32; - -use crate::{CheckBasis, CheckCtx, Float, Int, TestResult}; - -/// Type implementing [`IgnoreCase`]. -pub struct SpecialCase; - -/// Don't run further validation on this test case. -const SKIP: Option = Some(Ok(())); - -/// Return this to skip checks on a test that currently fails but shouldn't. Looks -/// the same as skip, but we keep them separate to better indicate purpose. -const XFAIL: Option = Some(Ok(())); - -/// Allow overriding the outputs of specific test cases. -/// -/// There are some cases where we want to xfail specific cases or handle certain inputs -/// differently than the rest of calls to `validate`. This provides a hook to do that. -/// -/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks -/// are skipped and the provided result is returned instead. -/// -/// This gets implemented once per input type, then the functions provide further filtering -/// based on function name and values. -/// -/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still -/// returned. -pub trait MaybeOverride { - fn check_float( - _input: Input, - _actual: F, - _expected: F, - _ulp: &mut u32, - _ctx: &CheckCtx, - ) -> Option { - None - } - - fn check_int( - _input: Input, - _actual: I, - _expected: I, - _ctx: &CheckCtx, - ) -> Option { - None - } -} - -impl MaybeOverride<(f32,)> for SpecialCase { - fn check_float( - input: (f32,), - actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - if ctx.basis == CheckBasis::Musl { - if ctx.fname == "acoshf" && input.0 < -1.0 { - // acoshf is undefined for x <= 1.0, but we return a random result at lower - // values. - return XFAIL; - } - - if ctx.fname == "sincosf" { - let factor_frac_pi_2 = input.0.abs() / f32::consts::FRAC_PI_2; - if (factor_frac_pi_2 - factor_frac_pi_2.round()).abs() < 1e-2 { - // we have a bad approximation near multiples of pi/2 - return XFAIL; - } - } - - if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() { - // we return infinity but the number is representable - return XFAIL; - } - - if ctx.fname == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() { - // we return some NaN that should be real values or infinite - // doesn't seem to happen on x86 - return XFAIL; - } - - if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; - } - } - - maybe_check_nan_bits(actual, expected, ctx) - } -} - -impl MaybeOverride<(f64,)> for SpecialCase { - fn check_float( - input: (f64,), - actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - if ctx.basis == CheckBasis::Musl { - if cfg!(target_arch = "x86") && ctx.fname == "acosh" && input.0 < 1.0 { - // The function is undefined, both implementations return random results - return SKIP; - } - - if cfg!(x86_no_sse) - && ctx.fname == "ceil" - && input.0 < 0.0 - && input.0 > -1.0 - && expected == F::ZERO - && actual == F::ZERO - { - // musl returns -0.0, we return +0.0 - return XFAIL; - } - - if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 { - // loggamma should not be defined for x < 0, yet we both return results - return XFAIL; - } - } - - maybe_check_nan_bits(actual, expected, ctx) - } -} - -/// Check NaN bits if the function requires it -fn maybe_check_nan_bits(actual: F, expected: F, ctx: &CheckCtx) -> Option { - if !(ctx.canonical_name == "fabs" || ctx.canonical_name == "copysign") { - return None; - } - - // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686 - // `extern "C"` `f32`/`f64` return ABI. - // LLVM issue - // Rust issue - if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl { - return SKIP; - } - - // abs and copysign require signaling NaNs to be propagated, so verify bit equality. - if actual.to_bits() == expected.to_bits() { - return SKIP; - } else { - Some(Err(anyhow::anyhow!("NaNs have different bitpatterns"))) - } -} - -impl MaybeOverride<(f32, f32)> for SpecialCase { - fn check_float( - input: (f32, f32), - _actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - maybe_skip_min_max_nan(input, expected, ctx) - } -} -impl MaybeOverride<(f64, f64)> for SpecialCase { - fn check_float( - input: (f64, f64), - _actual: F, - expected: F, - _ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - maybe_skip_min_max_nan(input, expected, ctx) - } -} - -/// Musl propagates NaNs if one is provided as the input, but we return the other input. -// F1 and F2 are always the same type, this is just to please generics -fn maybe_skip_min_max_nan( - input: (F1, F1), - expected: F2, - ctx: &CheckCtx, -) -> Option { - if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin") - && (input.0.is_nan() || input.1.is_nan()) - && expected.is_nan() - { - return XFAIL; - } else { - None - } -} - -impl MaybeOverride<(i32, f32)> for SpecialCase { - fn check_float( - input: (i32, f32), - _actual: F, - _expected: F, - ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - bessel_prec_dropoff(input, ulp, ctx) - } -} -impl MaybeOverride<(i32, f64)> for SpecialCase { - fn check_float( - input: (i32, f64), - _actual: F, - _expected: F, - ulp: &mut u32, - ctx: &CheckCtx, - ) -> Option { - bessel_prec_dropoff(input, ulp, ctx) - } -} - -/// Our bessel functions blow up with large N values -fn bessel_prec_dropoff( - input: (i32, F), - ulp: &mut u32, - ctx: &CheckCtx, -) -> Option { - if ctx.canonical_name == "jn" { - if input.0 > 4000 { - return XFAIL; - } else if input.0 > 2000 { - // *ulp = 20_000; - *ulp = 20000; - } else if input.0 > 1000 { - *ulp = 4000; - } - } - - None -} - -impl MaybeOverride<(f32, f32, f32)> for SpecialCase {} -impl MaybeOverride<(f64, f64, f64)> for SpecialCase {} -impl MaybeOverride<(f32, i32)> for SpecialCase {} -impl MaybeOverride<(f64, i32)> for SpecialCase {} diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs index c24ac6e43..c560dade8 100644 --- a/crates/libm-test/src/test_traits.rs +++ b/crates/libm-test/src/test_traits.rs @@ -1,22 +1,21 @@ //! Traits related to testing. //! -//! There are three main traits in this module: +//! There are two main traits in this module: //! -//! - `GenerateInput`: implemented on any types that create test cases. //! - `TupleCall`: implemented on tuples to allow calling them as function arguments. //! - `CheckOutput`: implemented on anything that is an output type for validation against an //! expected value. -use std::fmt; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::{fmt, panic}; -use anyhow::{Context, bail, ensure}; +use anyhow::{Context, anyhow, bail, ensure}; +use libm::support::Hexf; -use crate::{Float, Hex, Int, MaybeOverride, SpecialCase, TestResult}; - -/// Implement this on types that can generate a sequence of tuples for test input. -pub trait GenerateInput { - fn get_cases(&self) -> impl Iterator; -} +use crate::precision::CheckAction; +use crate::{ + CheckBasis, CheckCtx, Float, GeneratorKind, Int, MaybeOverride, SpecialCase, TestResult, +}; /// Trait for calling a function with a tuple as arguments. /// @@ -25,43 +24,45 @@ pub trait GenerateInput { pub trait TupleCall: fmt::Debug { type Output; fn call(self, f: Func) -> Self::Output; -} -/// Context passed to [`CheckOutput`]. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct CheckCtx { - /// Allowed ULP deviation - pub ulp: u32, - /// Function name. - pub fname: &'static str, - /// Return the unsuffixed version of the function name. - pub canonical_name: &'static str, - /// Source of truth for tests. - pub basis: CheckBasis, -} - -impl CheckCtx { - pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self { - let canonical_fname = crate::canonical_name(fname); - Self { ulp, fname, canonical_name: canonical_fname, basis } + /// Intercept panics and print the input to stderr before continuing. + fn call_intercept_panics(self, f: Func) -> Self::Output + where + Self: RefUnwindSafe + Copy, + Func: UnwindSafe, + { + let res = panic::catch_unwind(|| self.call(f)); + match res { + Ok(v) => v, + Err(e) => { + eprintln!("panic with the following input: {self:?}"); + panic::resume_unwind(e) + } + } } } -/// Possible items to test against -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum CheckBasis { - /// Check against Musl's math sources. - Musl, -} - /// A trait to implement on any output type so we can verify it in a generic way. pub trait CheckOutput: Sized { /// Validate `self` (actual) and `expected` are the same. /// /// `input` is only used here for error messages. - fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; + fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult; +} + +/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32` +/// will always print with `0x` followed by 8 digits. +/// +/// This is only used for printing errors so allocating is okay. +pub trait Hex: Copy { + /// Hex integer syntax. + fn hex(self) -> String; + /// Hex float syntax. + fn hexf(self) -> String; } +/* implement `TupleCall` */ + impl TupleCall R> for (T1,) where T1: fmt::Debug, @@ -125,7 +126,7 @@ where } } -impl TupleCall for (T1,) +impl TupleCall fn(T1, &'a mut T2, &'a mut T3)> for (T1,) where T1: fmt::Debug, T2: fmt::Debug + Default, @@ -133,7 +134,7 @@ where { type Output = (T2, T3); - fn call(self, f: fn(T1, &mut T2, &mut T3)) -> Self::Output { + fn call(self, f: for<'a> fn(T1, &'a mut T2, &'a mut T3)) -> Self::Output { let mut t2 = T2::default(); let mut t3 = T3::default(); f(self.0, &mut t2, &mut t3); @@ -141,72 +142,267 @@ where } } -// Implement for floats -impl CheckOutput for F +/* implement `Hex` */ + +impl Hex for (T1,) +where + T1: Hex, +{ + fn hex(self) -> String { + format!("({},)", self.0.hex()) + } + + fn hexf(self) -> String { + format!("({},)", self.0.hexf()) + } +} + +impl Hex for (T1, T2) where - F: Float + Hex, + T1: Hex, + T2: Hex, +{ + fn hex(self) -> String { + format!("({}, {})", self.0.hex(), self.1.hex()) + } + + fn hexf(self) -> String { + format!("({}, {})", self.0.hexf(), self.1.hexf()) + } +} + +impl Hex for (T1, T2, T3) +where + T1: Hex, + T2: Hex, + T3: Hex, +{ + fn hex(self) -> String { + format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex()) + } + + fn hexf(self) -> String { + format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf()) + } +} + +/* trait implementations for ints */ + +macro_rules! impl_int { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize) + } + + fn hexf(self) -> String { + String::new() + } + } + + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_int(self, expected, input, ctx) + } + } + )* + }; +} + +fn validate_int(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult +where + I: Int + Hex, Input: Hex + fmt::Debug, - u32: TryFrom, SpecialCase: MaybeOverride, { - fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult { - // Create a wrapper function so we only need to `.with_context` once. - let inner = || -> TestResult { - let mut allowed_ulp = ctx.ulp; - - // If the tested function requires a nonstandard test, run it here. - if let Some(res) = - SpecialCase::check_float(input, self, expected, &mut allowed_ulp, ctx) - { - return res; + let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) { + // `require_biteq` forbids overrides. + _ if ctx.gen_kind == GeneratorKind::List => (actual == expected, None), + CheckAction::AssertSuccess => (actual == expected, None), + CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(_) => panic!("ulp has no meaning for integer checks"), + }; + + let make_xfail_msg = || match xfail_msg { + Some(m) => format!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {m}", + ), + None => String::new(), + }; + + anyhow::ensure!( + result, + "\ + \n input: {input:?} {ibits}\ + \n expected: {expected:<22?} {expbits}\ + \n actual: {actual:<22?} {actbits}\ + \n {msg}\ + ", + actbits = actual.hex(), + expbits = expected.hex(), + ibits = input.hex(), + msg = make_xfail_msg() + ); + + Ok(()) +} + +impl_int!(u32, i32, u64, i64); + +/* trait implementations for floats */ + +macro_rules! impl_float { + ($($ty:ty),*) => { + $( + impl Hex for $ty { + fn hex(self) -> String { + format!( + "{:#0width$x}", + self.to_bits(), + width = ((Self::BITS / 4) + 2) as usize + ) + } + + fn hexf(self) -> String { + format!("{}", Hexf(self)) + } } - // Check when both are NaNs - if self.is_nan() && expected.is_nan() { - // By default, NaNs have nothing special to check. - return Ok(()); - } else if self.is_nan() || expected.is_nan() { - // Check when only one is a NaN - bail!("real value != NaN") + impl $crate::CheckOutput for $ty + where + Input: Hex + fmt::Debug, + SpecialCase: MaybeOverride, + { + fn validate<'a>( + self, + expected: Self, + input: Input, + ctx: &$crate::CheckCtx, + ) -> TestResult { + validate_float(self, expected, input, ctx) + } } + )* + }; +} - // Make sure that the signs are the same before checing ULP to avoid wraparound - let act_sig = self.signum(); - let exp_sig = expected.signum(); - ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}"); +fn validate_float(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult +where + F: Float + Hex, + Input: Hex + fmt::Debug, + u32: TryFrom, + SpecialCase: MaybeOverride, +{ + let mut assert_failure_msg = None; + + // Create a wrapper function so we only need to `.with_context` once. + let mut inner = || -> TestResult { + let mut allowed_ulp = ctx.ulp; + + // Forbid overrides if the items came from an explicit list, as long as we are checking + // against either MPFR or the result itself. + let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl; + + match SpecialCase::check_float(input, actual, expected, ctx) { + _ if require_biteq => (), + CheckAction::AssertSuccess => (), + CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg), + CheckAction::Custom(res) => return res, + CheckAction::Skip => return Ok(()), + CheckAction::AssertWithUlp(ulp_override) => allowed_ulp = ulp_override, + }; - if self.is_infinite() ^ expected.is_infinite() { - bail!("mismatched infinities"); + // Check when both are NaNs + if actual.is_nan() && expected.is_nan() { + if require_biteq && ctx.basis == CheckBasis::None { + ensure!(actual.to_bits() == expected.to_bits(), "mismatched NaN bitpatterns"); } + // By default, NaNs have nothing special to check. + return Ok(()); + } else if actual.is_nan() || expected.is_nan() { + // Check when only one is a NaN + bail!("real value != NaN") + } - let act_bits = self.to_bits().signed(); - let exp_bits = expected.to_bits().signed(); + // Make sure that the signs are the same before checing ULP to avoid wraparound + let act_sig = actual.signum(); + let exp_sig = expected.signum(); + ensure!(act_sig == exp_sig, "mismatched signs {act_sig:?} {exp_sig:?}"); - let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); + if actual.is_infinite() ^ expected.is_infinite() { + bail!("mismatched infinities"); + } - let ulp_u32 = u32::try_from(ulp_diff) - .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + let act_bits = actual.to_bits().signed(); + let exp_bits = expected.to_bits().signed(); - ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs(); - Ok(()) - }; + let ulp_u32 = u32::try_from(ulp_diff) + .map_err(|e| anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?; + + ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",); + + Ok(()) + }; - inner().with_context(|| { - format!( - "\ - \n input: {input:?} {ibits}\ - \n expected: {expected:<22?} {expbits}\ - \n actual: {self:<22?} {actbits}\ - ", - actbits = self.hex(), - expbits = expected.hex(), - ibits = input.hex(), - ) - }) + let mut res = inner(); + + if let Some(msg) = assert_failure_msg { + // Invert `Ok` and `Err` if the test is an xfail. + if res.is_ok() { + let e = anyhow!( + "expected failure but test passed. Does an XFAIL need to be updated?\n\ + failed at: {msg}", + ); + res = Err(e) + } else { + res = Ok(()) + } } + + res.with_context(|| { + format!( + "\ + \n input: {input:?}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ + \n expected: {expected:<22?} {exphex} {expbits}\ + \n actual: {actual:<22?} {acthex} {actbits}\ + ", + ihex = input.hexf(), + ibits = input.hex(), + exphex = expected.hexf(), + expbits = expected.hex(), + actbits = actual.hex(), + acthex = actual.hexf(), + ) + }) } +impl_float!(f32, f64); + +#[cfg(f16_enabled)] +impl_float!(f16); + +#[cfg(f128_enabled)] +impl_float!(f128); + +/* trait implementations for compound types */ + /// Implement `CheckOutput` for combinations of types. macro_rules! impl_tuples { ($(($a:ty, $b:ty);)*) => { @@ -227,12 +423,15 @@ macro_rules! impl_tuples { .with_context(|| format!( "full context:\ \n input: {input:?} {ibits}\ + \n as hex: {ihex}\ + \n as bits: {ibits}\ \n expected: {expected:?} {expbits}\ \n actual: {self:?} {actbits}\ ", - actbits = self.hex(), - expbits = expected.hex(), + ihex = input.hexf(), ibits = input.hex(), + expbits = expected.hex(), + actbits = self.hex(), )) } } diff --git a/crates/libm-test/tests/check_coverage.rs b/crates/libm-test/tests/check_coverage.rs index ef6d21fdb..c23298686 100644 --- a/crates/libm-test/tests/check_coverage.rs +++ b/crates/libm-test/tests/check_coverage.rs @@ -1,60 +1,61 @@ //! Ensure that `for_each_function!` isn't missing any symbols. -/// Files in `src/` that do not export a testable symbol. -const ALLOWED_SKIPS: &[&str] = &[ - // Not a generic test function - "fenv", - // Nonpublic functions - "expo2", - "k_cos", - "k_cosf", - "k_expo2", - "k_expo2f", - "k_sin", - "k_sinf", - "k_tan", - "k_tanf", - "rem_pio2", - "rem_pio2_large", - "rem_pio2f", -]; +use std::collections::HashSet; +use std::env; +use std::path::Path; +use std::process::Command; macro_rules! callback { ( fn_name: $name:ident, - CFn: $_CFn:ty, - CArgs: $_CArgs:ty, - CRet: $_CRet:ty, - RustFn: $_RustFn:ty, - RustArgs: $_RustArgs:ty, - RustRet: $_RustRet:ty, - extra: [$push_to:ident], + attrs: [$($attr:meta),*], + extra: [$set:ident], ) => { - $push_to.push(stringify!($name)); + let name = stringify!($name); + let new = $set.insert(name); + assert!(new, "duplicate function `{name}` in `ALL_OPERATIONS`"); }; } #[test] fn test_for_each_function_all_included() { - let mut included = Vec::new(); - let mut missing = Vec::new(); + let all_functions: HashSet<_> = include_str!("../../../etc/function-list.txt") + .lines() + .filter(|line| !line.starts_with("#")) + .collect(); + + let mut tested = HashSet::new(); libm_macros::for_each_function! { callback: callback, - extra: [included], + extra: [tested], }; - for f in libm_test::ALL_FUNCTIONS { - if !included.contains(f) && !ALLOWED_SKIPS.contains(f) { - missing.push(f) - } - } - - if !missing.is_empty() { + let untested = all_functions.difference(&tested); + if untested.clone().next().is_some() { panic!( - "missing tests for the following: {missing:#?} \ + "missing tests for the following: {untested:#?} \ \nmake sure any new functions are entered in \ - `ALL_FUNCTIONS` (in `libm-macros`)." + `ALL_OPERATIONS` (in `libm-macros`)." ); } + assert_eq!(all_functions, tested); +} + +#[test] +fn ensure_list_updated() { + if libm_test::ci() { + // Most CI tests run in Docker where we don't have Python or Rustdoc, so it's easiest + // to just run the python file directly when it is available. + eprintln!("skipping test; CI runs the python file directly"); + return; + } + + let res = Command::new("python3") + .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../../etc/update-api-list.py")) + .arg("--check") + .status() + .unwrap(); + + assert!(res.success(), "May need to run `./etc/update-api-list.py`"); } diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs index 208b8e286..cbb4bd49b 100644 --- a/crates/libm-test/tests/compare_built_musl.rs +++ b/crates/libm-test/tests/compare_built_musl.rs @@ -9,44 +9,135 @@ // There are some targets we can't build musl for #![cfg(feature = "build-musl")] -use libm_test::gen::random; -use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, musl_allowed_ulp}; -use musl_math_sys as musl; +use libm_test::generate::{case_list, edge_cases, random, spaced}; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; -macro_rules! musl_rand_tests { +const BASIS: CheckBasis = CheckBasis::Musl; + +fn musl_runner( + ctx: &CheckCtx, + cases: impl Iterator, + musl_fn: Op::CFn, +) { + for input in cases { + let musl_res = input.call(musl_fn); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + + crate_res.validate(musl_res, input, ctx).unwrap(); + } +} + +/// Test against musl with generators from a domain. +macro_rules! musl_tests { ( fn_name: $fn_name:ident, - CFn: $CFn:ty, - CArgs: $CArgs:ty, - CRet: $CRet:ty, - RustFn: $RustFn:ty, - RustArgs: $RustArgs:ty, - RustRet: $RustRet:ty, - attrs: [$($meta:meta)*] - ) => { paste::paste! { - #[test] - $(#[$meta])* - fn [< musl_random_ $fn_name >]() { - let fname = stringify!($fn_name); - let ulp = musl_allowed_ulp(fname); - let cases = random::get_test_cases::<$RustArgs>(fname); - let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl); - - for input in cases { - let musl_res = input.call(musl::$fn_name as $CFn); - let crate_res = input.call(libm::$fn_name as $RustFn); - - crate_res.validate(musl_res, input, &ctx).unwrap(); + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< musl_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); + } + + #[test] + $(#[$attr])* + fn [< musl_quickspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + musl_runner::(&ctx, cases, musl_math_sys::$fn_name); } } - } }; + }; } libm_macros::for_each_function! { - callback: musl_rand_tests, - skip: [], - attributes: [ - #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586 - [exp10, exp10f, exp2, exp2f, rint] + callback: musl_tests, + attributes: [], + skip: [ + // TODO integer inputs + jn, + jnf, + ldexp, + ldexpf, + scalbn, + scalbnf, + yn, + ynf, + + // Not provided by musl + // verify-sorted-start + ceilf128, + ceilf16, + copysignf128, + copysignf16, + fabsf128, + fabsf16, + fdimf128, + fdimf16, + floorf128, + floorf16, + fmaf128, + fmaxf128, + fmaxf16, + fmaximum, + fmaximum_num, + fmaximum_numf, + fmaximum_numf128, + fmaximum_numf16, + fmaximumf, + fmaximumf128, + fmaximumf16, + fminf128, + fminf16, + fminimum, + fminimum_num, + fminimum_numf, + fminimum_numf128, + fminimum_numf16, + fminimumf, + fminimumf128, + fminimumf16, + fmodf128, + fmodf16, + ldexpf128, + ldexpf16, + rintf128, + rintf16, + roundeven, + roundevenf, + roundevenf128, + roundevenf16, + roundf128, + roundf16, + scalbnf128, + scalbnf16, + sqrtf128, + sqrtf16, + truncf128, + truncf16, + // verify-sorted-end ], } diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs new file mode 100644 index 000000000..80b2c7868 --- /dev/null +++ b/crates/libm-test/tests/multiprecision.rs @@ -0,0 +1,79 @@ +//! Test with "infinite precision" + +#![cfg(feature = "build-mpfr")] + +use libm_test::generate::{case_list, edge_cases, random, spaced}; +use libm_test::mpfloat::MpOp; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::Mpfr; + +fn mp_runner(ctx: &CheckCtx, cases: impl Iterator) { + let mut mp_vals = Op::new_mp(); + for input in cases { + let mp_res = Op::run(&mut mp_vals, input); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + + crate_res.validate(mp_res, input, ctx).unwrap(); + } +} + +macro_rules! mp_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< mp_case_list_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_basis::(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_random_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random); + let cases = random::get_test_cases::<::RustArgs>(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_edge_case_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases); + let cases = edge_cases::get_test_cases::(&ctx).0; + mp_runner::(&ctx, cases); + } + + #[test] + $(#[$attr])* + fn [< mp_quickspace_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced); + let cases = spaced::get_test_cases::(&ctx).0; + mp_runner::(&ctx, cases); + } + } + }; +} + +libm_macros::for_each_function! { + callback: mp_tests, + attributes: [ + // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())` + #[ignore = "large values are infeasible in MPFR"] + [jn, jnf, yn, ynf], + ], + skip: [ + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], +} diff --git a/crates/libm-test/tests/musl_biteq.rs b/crates/libm-test/tests/musl_biteq.rs deleted file mode 100644 index f586fd03d..000000000 --- a/crates/libm-test/tests/musl_biteq.rs +++ /dev/null @@ -1,6 +0,0 @@ -//! compare - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(all(test, feature = "test-musl-serialized"))] -include!(concat!(env!("OUT_DIR"), "/musl-tests.rs")); diff --git a/crates/libm-test/tests/standalone.rs b/crates/libm-test/tests/standalone.rs new file mode 100644 index 000000000..7b30a3b48 --- /dev/null +++ b/crates/libm-test/tests/standalone.rs @@ -0,0 +1,38 @@ +//! Test cases that have both an input and an output, so do not require a basis. + +use libm_test::generate::case_list; +use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall}; + +const BASIS: CheckBasis = CheckBasis::None; + +fn standalone_runner( + ctx: &CheckCtx, + cases: impl Iterator, +) { + for (input, expected) in cases { + let crate_res = input.call_intercept_panics(Op::ROUTINE); + crate_res.validate(expected, input, ctx).unwrap(); + } +} + +macro_rules! mp_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + ) => { + paste::paste! { + #[test] + $(#[$attr])* + fn [< standalone_ $fn_name >]() { + type Op = libm_test::op::$fn_name::Routine; + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List); + let cases = case_list::get_test_cases_standalone::(&ctx); + standalone_runner::(&ctx, cases); + } + } + }; +} + +libm_macros::for_each_function! { + callback: mp_tests, +} diff --git a/crates/libm-test/tests/u256.rs b/crates/libm-test/tests/u256.rs new file mode 100644 index 000000000..4444036d0 --- /dev/null +++ b/crates/libm-test/tests/u256.rs @@ -0,0 +1,147 @@ +//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128` +//! routines, so this only does a few million fuzz iterations against GMP. + +#![cfg(feature = "build-mpfr")] + +use std::sync::LazyLock; + +use libm::support::{HInt, u256}; +type BigInt = rug::Integer; + +use libm_test::bigint_fuzz_iteration_count; +use libm_test::generate::random::SEED; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaCha8Rng; +use rug::Assign; +use rug::integer::Order; +use rug::ops::NotAssign; + +static BIGINT_U256_MAX: LazyLock = + LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf)); + +/// Copied from the test module. +fn hexu(v: u256) -> String { + format!("0x{:032x}{:032x}", v.hi, v.lo) +} + +fn random_u256(rng: &mut ChaCha8Rng) -> u256 { + let lo: u128 = rng.random(); + let hi: u128 = rng.random(); + u256 { lo, hi } +} + +fn assign_bigint(bx: &mut BigInt, x: u256) { + bx.assign_digits(&[x.lo, x.hi], Order::Lsf); +} + +fn from_bigint(bx: &mut BigInt) -> u256 { + // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing. + *bx &= &*BIGINT_U256_MAX; + let mut bres = [0u128, 0]; + bx.write_digits(&mut bres, Order::Lsf); + bx.assign(0); + u256 { lo: bres[0], hi: bres[1] } +} + +fn check_one( + x: impl FnOnce() -> String, + y: impl FnOnce() -> Option, + actual: u256, + expected: &mut BigInt, +) { + let expected = from_bigint(expected); + if actual != expected { + let xmsg = x(); + let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default(); + panic!( + "Results do not match\n\ + input: {xmsg}\n\ + {ymsg}\ + actual: {}\n\ + expected: {}\ + ", + hexu(actual), + hexu(expected), + ) + } +} + +#[test] +fn mp_u256_bitor() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x | y; + bx |= &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_not() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + assign_bigint(&mut bx, x); + let actual = !x; + bx.not_assign(); + check_one(|| hexu(x), || None, actual, &mut bx); + } +} + +#[test] +fn mp_u256_add() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let y = random_u256(&mut rng); + assign_bigint(&mut bx, x); + assign_bigint(&mut by, y); + let actual = x + y; + bx += &by; + check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx); + } +} + +#[test] +fn mp_u256_shr() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x = random_u256(&mut rng); + let shift: u32 = rng.random_range(0..255); + assign_bigint(&mut bx, x); + let actual = x >> shift; + bx >>= shift; + check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx); + } +} + +#[test] +fn mp_u256_widen_mul() { + let mut rng = ChaCha8Rng::from_seed(*SEED); + let mut bx = BigInt::new(); + let mut by = BigInt::new(); + + for _ in 0..bigint_fuzz_iteration_count() { + let x: u128 = rng.random(); + let y: u128 = rng.random(); + bx.assign(x); + by.assign(y); + let actual = x.widen_mul(y); + bx *= &by; + check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx); + } +} diff --git a/crates/libm-test/tests/z_extensive/main.rs b/crates/libm-test/tests/z_extensive/main.rs new file mode 100644 index 000000000..5448cb6ea --- /dev/null +++ b/crates/libm-test/tests/z_extensive/main.rs @@ -0,0 +1,14 @@ +//! `main` is just a wrapper to handle configuration. + +#[cfg(not(feature = "build-mpfr"))] +fn main() { + eprintln!("multiprecision not enabled; skipping extensive tests"); +} + +#[cfg(feature = "build-mpfr")] +mod run; + +#[cfg(feature = "build-mpfr")] +fn main() { + run::run(); +} diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs new file mode 100644 index 000000000..b10c231d1 --- /dev/null +++ b/crates/libm-test/tests/z_extensive/run.rs @@ -0,0 +1,233 @@ +//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`. + +use std::fmt; +use std::io::{self, IsTerminal}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; + +use indicatif::{ProgressBar, ProgressStyle}; +use libm_test::generate::spaced; +use libm_test::mpfloat::MpOp; +use libm_test::{ + CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall, + skip_extensive_test, +}; +use libtest_mimic::{Arguments, Trial}; +use rayon::prelude::*; +use spaced::SpacedInput; + +const BASIS: CheckBasis = CheckBasis::Mpfr; +const GEN_KIND: GeneratorKind = GeneratorKind::Extensive; + +/// Run the extensive test suite. +pub fn run() { + let mut args = Arguments::from_args(); + // Prevent multiple tests from running in parallel, each test gets parallized internally. + args.test_threads = Some(1); + let tests = register_all_tests(); + + // With default parallelism, the CPU doesn't saturate. We don't need to be nice to + // other processes, so do 1.5x to make sure we use all available resources. + let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2; + rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap(); + + libtest_mimic::run(&args, tests).exit(); +} + +macro_rules! mp_extensive_tests { + ( + fn_name: $fn_name:ident, + attrs: [$($attr:meta),*], + extra: [$push_to:ident], + ) => { + $(#[$attr])* + register_single_test::(&mut $push_to); + }; +} + +/// Create a list of tests for consumption by `libtest_mimic`. +fn register_all_tests() -> Vec { + let mut all_tests = Vec::new(); + + libm_macros::for_each_function! { + callback: mp_extensive_tests, + extra: [all_tests], + skip: [ + // FIXME: test needed, see + // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392 + nextafter, + nextafterf, + ], + } + + all_tests +} + +/// Add a single test to the list. +fn register_single_test(all: &mut Vec) +where + Op: MathOp + MpOp, + Op::RustArgs: SpacedInput + Send, +{ + let test_name = format!("mp_extensive_{}", Op::NAME); + let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND); + let skip = skip_extensive_test(&ctx); + + let runner = move || { + if !cfg!(optimizations_enabled) { + panic!("extensive tests should be run with --release"); + } + + let res = run_single_test::(&ctx); + let e = match res { + Ok(()) => return Ok(()), + Err(e) => e, + }; + + // Format with the `Debug` implementation so we get the error cause chain, and print it + // here so we see the result immediately (rather than waiting for all tests to conclude). + let e = format!("{e:?}"); + eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER); + + Err(e.into()) + }; + + all.push(Trial::test(test_name, runner).with_ignored_flag(skip)); +} + +/// Test runner for a signle routine. +fn run_single_test(ctx: &CheckCtx) -> TestResult +where + Op: MathOp + MpOp, + Op::RustArgs: SpacedInput + Send, +{ + // Small delay before printing anything so other output from the runner has a chance to flush. + std::thread::sleep(Duration::from_millis(500)); + eprintln!(); + + let completed = AtomicU64::new(0); + let (ref mut cases, total) = spaced::get_test_cases::(ctx); + let pb = Progress::new(Op::NAME, total); + + let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec| -> TestResult { + for input in input_vec { + // Test the input. + let mp_res = Op::run(mp_vals, input); + let crate_res = input.call_intercept_panics(Op::ROUTINE); + crate_res.validate(mp_res, input, ctx)?; + + let completed = completed.fetch_add(1, Ordering::Relaxed) + 1; + pb.update(completed, input); + } + + Ok(()) + }; + + // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near + // a performance sweet spot. Ideally we would reuse these allocations rather than discarding, + // but that is difficult with Rayon's API. + let chunk_size = 50_000; + let chunks = std::iter::from_fn(move || { + let mut v = Vec::with_capacity(chunk_size); + v.extend(cases.take(chunk_size)); + (!v.is_empty()).then_some(v) + }); + + // Run the actual tests + let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk); + + let real_total = completed.load(Ordering::Relaxed); + pb.complete(real_total); + + if res.is_ok() && real_total != total { + // Provide a warning if our estimate needs to be updated. + panic!("total run {real_total} does not match expected {total}"); + } + + res +} + +/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages. +struct Progress { + pb: ProgressBar, + name_padded: String, + final_style: ProgressStyle, + is_tty: bool, +} + +impl Progress { + const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}"; + const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \ + {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}"; + + fn new(name: &str, total: u64) -> Self { + eprintln!("starting extensive tests for `{name}`"); + let name_padded = format!("{name:9}"); + let is_tty = io::stderr().is_terminal(); + + let initial_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let final_style = + ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded)) + .unwrap() + .progress_chars("##-"); + + let pb = ProgressBar::new(total); + pb.set_style(initial_style); + + Self { pb, final_style, name_padded, is_tty } + } + + fn update(&self, completed: u64, input: impl fmt::Debug) { + // Infrequently update the progress bar. + if completed % 20_000 == 0 { + self.pb.set_position(completed); + } + + if completed % 500_000 == 0 { + self.pb.set_message(format!("input: {input:<24?}")); + } + + if !self.is_tty && completed % 5_000_000 == 0 { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3?}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}", + elapsed = self.pb.elapsed().as_secs(), + percent = completed as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = completed, + human_len = len, + per_sec = self.pb.per_sec(), + eta = self.pb.eta().as_secs() + ); + } + } + + fn complete(self, real_total: u64) { + self.pb.set_style(self.final_style); + self.pb.set_position(real_total); + self.pb.abandon(); + + if !self.is_tty { + let len = self.pb.length().unwrap_or_default(); + eprintln!( + "[{elapsed:3}s {percent:3.0}%] {name} \ + {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}", + elapsed = self.pb.elapsed().as_secs(), + percent = real_total as f32 * 100.0 / len as f32, + name = self.name_padded, + human_pos = real_total, + human_len = len, + per_sec = self.pb.per_sec(), + elapsed_precise = self.pb.elapsed().as_secs(), + ); + } + + eprintln!(); + } +} diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml index 7f6272d79..9e866a970 100644 --- a/crates/musl-math-sys/Cargo.toml +++ b/crates/musl-math-sys/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "musl-math-sys" version = "0.1.0" -edition = "2021" +edition = "2024" publish = false [dependencies] [dev-dependencies] -libm = { path = "../../" } +libm = { path = "../../libm" } [build-dependencies] -cc = "1.1.24" +cc = "1.2.16" diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs index 03df06c79..f06d84ee2 100644 --- a/crates/musl-math-sys/build.rs +++ b/crates/musl-math-sys/build.rs @@ -79,17 +79,12 @@ impl Config { let target_features = env::var("CARGO_CFG_TARGET_FEATURE") .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) .unwrap_or_default(); - - // Default to the `{workspace_root}/musl` if not specified - let musl_dir = env::var("MUSL_SOURCE_DIR") - .map(PathBuf::from) - .unwrap_or_else(|_| manifest_dir.parent().unwrap().parent().unwrap().join("musl")); + let musl_dir = manifest_dir.join("musl"); let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() }; println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display()); - println!("cargo::rerun-if-env-changed=MUSL_SOURCE_DIR"); println!("cargo::rerun-if-changed={}", musl_dir.display()); Self { @@ -111,20 +106,17 @@ impl Config { /// Build musl math symbols to a static library fn build_musl_math(cfg: &Config) { let musl_dir = &cfg.musl_dir; - assert!( - musl_dir.exists(), - "musl source is missing. it can be downloaded with ./ci/download-musl.sh" - ); - let math = musl_dir.join("src/math"); let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch); + assert!(math.exists(), "musl source not found. Is the submodule up to date?"); + let source_map = find_math_source(&math, cfg); let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a")); // Run configuration steps. Usually done as part of the musl `Makefile`. let obj_include = cfg.out_dir.join("musl_obj/include"); fs::create_dir_all(&obj_include).unwrap(); - fs::create_dir_all(&obj_include.join("bits")).unwrap(); + fs::create_dir_all(obj_include.join("bits")).unwrap(); let sed_stat = Command::new("sed") .arg("-f") .arg(musl_dir.join("tools/mkalltypes.sed")) @@ -151,7 +143,6 @@ fn build_musl_math(cfg: &Config) { .flag_if_supported("-ffreestanding") .flag_if_supported("-nostdinc") .define("_ALL_SOURCE", "1") - .opt_level(3) .define( "ROOT_INCLUDE_FEATURES", Some(musl_dir.join("include/features.h").to_str().unwrap()), diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl new file mode 160000 index 000000000..61399d4bd --- /dev/null +++ b/crates/musl-math-sys/musl @@ -0,0 +1 @@ +Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs index fe3c89229..6a4bf4859 100644 --- a/crates/musl-math-sys/src/lib.rs +++ b/crates/musl-math-sys/src/lib.rs @@ -7,14 +7,16 @@ use std::ffi::{c_char, c_int, c_long}; /// unsound. macro_rules! functions { ( $( + $( #[$meta:meta] )* $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty; )* ) => { - extern "C" { + unsafe extern "C" { $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )* } $( // Expose a safe version + $( #[$meta] )* pub fn $name( $($arg: $aty),+ ) -> $rty { // SAFETY: FFI calls with no preconditions unsafe { $pfx_name( $($arg),+ ) } @@ -231,8 +233,13 @@ functions! { musl_logf: logf(a: f32) -> f32; musl_modf: modf(a: f64, b: &mut f64) -> f64; musl_modff: modff(a: f32, b: &mut f32) -> f32; + + // FIXME: these need to be unsafe + #[allow(clippy::not_unsafe_ptr_arg_deref)] musl_nan: nan(a: *const c_char) -> f64; + #[allow(clippy::not_unsafe_ptr_arg_deref)] musl_nanf: nanf(a: *const c_char) -> f32; + musl_nearbyint: nearbyint(a: f64) -> f64; musl_nearbyintf: nearbyintf(a: f32) -> f32; musl_nextafter: nextafter(a: f64, b: f64) -> f64; @@ -275,5 +282,6 @@ functions! { musl_y0f: y0f(a: f32) -> f32; musl_y1: y1(a: f64) -> f64; musl_y1f: y1f(a: f32) -> f32; + musl_yn: yn(a: c_int, b: f64) -> f64; musl_ynf: ynf(a: c_int, b: f32) -> f32; } diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml new file mode 100644 index 000000000..4bcb97472 --- /dev/null +++ b/crates/util/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "util" +version = "0.1.0" +edition = "2024" +publish = false + +[features] +default = ["build-musl", "build-mpfr", "unstable-float"] +build-musl = ["libm-test/build-musl", "dep:musl-math-sys"] +build-mpfr = ["libm-test/build-mpfr", "dep:rug"] +unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"] + +[dependencies] +libm = { path = "../../libm", default-features = false } +libm-macros = { path = "../libm-macros" } +libm-test = { path = "../libm-test", default-features = false } +musl-math-sys = { path = "../musl-math-sys", optional = true } +rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] } diff --git a/crates/util/build.rs b/crates/util/build.rs new file mode 100644 index 000000000..a1be41275 --- /dev/null +++ b/crates/util/build.rs @@ -0,0 +1,10 @@ +#![allow(unexpected_cfgs)] + +#[path = "../../libm/configure.rs"] +mod configure; + +fn main() { + println!("cargo:rerun-if-changed=../../libm/configure.rs"); + let cfg = configure::Config::from_env(); + configure::emit_libm_config(&cfg); +} diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs new file mode 100644 index 000000000..ef70ec903 --- /dev/null +++ b/crates/util/src/main.rs @@ -0,0 +1,382 @@ +//! Helper CLI utility for common tasks. + +#![cfg_attr(f16_enabled, feature(f16))] +#![cfg_attr(f128_enabled, feature(f128))] + +use std::any::type_name; +use std::env; +use std::num::ParseIntError; +use std::str::FromStr; + +use libm::support::{Hexf, hf32, hf64}; +#[cfg(feature = "build-mpfr")] +use libm_test::mpfloat::MpOp; +use libm_test::{MathOp, TupleCall}; +#[cfg(feature = "build-mpfr")] +use rug::az::{self, Az}; + +const USAGE: &str = "\ +usage: + +cargo run -p util -- + +SUBCOMMAND: + eval inputs... + Evaulate the expression with a given basis. This can be useful for + running routines with a debugger, or quickly checking input. Examples: + * eval musl sinf 1.234 # print the results of musl sinf(1.234f32) + * eval mpfr pow 1.234 2.432 # print the results of mpfr pow(1.234, 2.432) +"; + +fn main() { + let args = env::args().collect::>(); + let str_args = args.iter().map(|s| s.as_str()).collect::>(); + + match &str_args.as_slice()[1..] { + ["eval", basis, op, inputs @ ..] => do_eval(basis, op, inputs), + _ => { + println!("{USAGE}\nunrecognized input `{str_args:?}`"); + std::process::exit(1); + } + } +} + +macro_rules! handle_call { + ( + fn_name: $fn_name:ident, + CFn: $CFn:ty, + RustFn: $RustFn:ty, + RustArgs: $RustArgs:ty, + attrs: [$($attr:meta),*], + extra: ($basis:ident, $op:ident, $inputs:ident), + fn_extra: $musl_fn:expr, + ) => { + $(#[$attr])* + if $op == stringify!($fn_name) { + type Op = libm_test::op::$fn_name::Routine; + + let input = <$RustArgs>::parse($inputs); + let libm_fn: ::RustFn = libm::$fn_name; + + let output = match $basis { + "libm" => input.call_intercept_panics(libm_fn), + #[cfg(feature = "build-musl")] + "musl" => { + let musl_fn: ::CFn = + $musl_fn.unwrap_or_else(|| panic!("no musl function for {}", $op)); + input.call(musl_fn) + } + #[cfg(feature = "build-mpfr")] + "mpfr" => { + let mut mp = ::new_mp(); + Op::run(&mut mp, input) + } + _ => panic!("unrecognized or disabled basis '{}'", $basis), + }; + println!("{output:?} {:x}", Hexf(output)); + return; + } + }; +} + +/// Evaluate the specified operation with a given basis. +fn do_eval(basis: &str, op: &str, inputs: &[&str]) { + libm_macros::for_each_function! { + callback: handle_call, + emit_types: [CFn, RustFn, RustArgs], + extra: (basis, op, inputs), + fn_extra: match MACRO_FN_NAME { + ceilf128 + | ceilf16 + | copysignf128 + | copysignf16 + | fabsf128 + | fabsf16 + | fdimf128 + | fdimf16 + | floorf128 + | floorf16 + | fmaf128 + | fmaxf128 + | fmaxf16 + | fmaximum + | fmaximum_num + | fmaximum_numf + | fmaximum_numf128 + | fmaximum_numf16 + | fmaximumf + | fmaximumf128 + | fmaximumf16 + | fminf128 + | fminf16 + | fminimum + | fminimum_num + | fminimum_numf + | fminimum_numf128 + | fminimum_numf16 + | fminimumf + | fminimumf128 + | fminimumf16 + | fmodf128 + | fmodf16 + | ldexpf128 + | ldexpf16 + | rintf128 + | rintf16 + | roundeven + | roundevenf + | roundevenf128 + | roundevenf16 + | roundf128 + | roundf16 + | scalbnf128 + | scalbnf16 + | sqrtf128 + | sqrtf16 + | truncf128 + | truncf16 => None, + _ => Some(musl_math_sys::MACRO_FN_NAME) + } + } + + panic!("no operation matching {op}"); +} + +/// Parse a tuple from a space-delimited string. +trait ParseTuple { + fn parse(input: &[&str]) -> Self; +} + +macro_rules! impl_parse_tuple { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + (parse(input, 0), parse(input, 1), parse(input, 2)) + } + } + }; +} + +#[allow(unused_macros)] +#[cfg(feature = "build-mpfr")] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 1, "expected a single argument, got {input:?}"); + (parse_rug(input, 0),) + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse_rug(input, 0), parse(input, 1)) + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 2, "expected two arguments, got {input:?}"); + (parse(input, 0), parse_rug(input, 1)) + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(input: &[&str]) -> Self { + assert_eq!(input.len(), 3, "expected three arguments, got {input:?}"); + (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 2)) + } + } + }; +} + +// Fallback for when Rug is not built. +#[allow(unused_macros)] +#[cfg(not(feature = "build-mpfr"))] +macro_rules! impl_parse_tuple_via_rug { + ($ty:ty) => { + impl ParseTuple for ($ty,) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, i32) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for (i32, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + + impl ParseTuple for ($ty, $ty, $ty) { + fn parse(_input: &[&str]) -> Self { + panic!("parsing this type requires the `build-mpfr` feature") + } + } + }; +} + +impl_parse_tuple!(f32); +impl_parse_tuple!(f64); + +#[cfg(f16_enabled)] +impl_parse_tuple_via_rug!(f16); +#[cfg(f128_enabled)] +impl_parse_tuple_via_rug!(f128); + +/// Try to parse the number, printing a nice message on failure. +fn parse(input: &[&str], idx: usize) -> T { + let s = input[idx]; + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") || s.starts_with("-0x") { + return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return T::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + s.parse().unwrap_or_else(|_| panic!("{}", msg())) +} + +/// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement +/// `FromStr`. +#[cfg(feature = "build-mpfr")] +fn parse_rug(input: &[&str], idx: usize) -> F +where + F: libm_test::Float + FromStrRadix, + rug::Float: az::Cast, +{ + let s = input[idx]; + + let msg = || format!("invalid {} input '{s}'", type_name::()); + + if s.starts_with("0x") { + return F::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg())); + } + + if s.starts_with("0b") { + return F::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg())); + } + + let x = rug::Float::parse(s).unwrap_or_else(|_| panic!("{}", msg())); + let x = rug::Float::with_val(F::BITS, x); + x.az() +} + +trait FromStrRadix: Sized { + fn from_str_radix(s: &str, radix: u32) -> Result; +} + +impl FromStrRadix for i32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + let s = strip_radix_prefix(s, radix); + i32::from_str_radix(s, radix) + } +} + +#[cfg(f16_enabled)] +impl FromStrRadix for f16 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf16(s)); + } + + let s = strip_radix_prefix(s, radix); + u16::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f32 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + // Parse as hex float + return Ok(hf32(s)); + } + + let s = strip_radix_prefix(s, radix); + u32::from_str_radix(s, radix).map(Self::from_bits) + } +} + +impl FromStrRadix for f64 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if s.contains("p") { + return Ok(hf64(s)); + } + + let s = strip_radix_prefix(s, radix); + u64::from_str_radix(s, radix).map(Self::from_bits) + } +} + +#[cfg(f128_enabled)] +impl FromStrRadix for f128 { + fn from_str_radix(s: &str, radix: u32) -> Result { + if radix == 16 && s.contains("p") { + return Ok(libm::support::hf128(s)); + } + let s = strip_radix_prefix(s, radix); + u128::from_str_radix(s, radix).map(Self::from_bits) + } +} + +fn strip_radix_prefix(s: &str, radix: u32) -> &str { + if radix == 16 { + s.strip_prefix("0x").unwrap() + } else if radix == 2 { + s.strip_prefix("0b").unwrap() + } else { + s + } +} diff --git a/etc/function-definitions.json b/etc/function-definitions.json new file mode 100644 index 000000000..3e33343c4 --- /dev/null +++ b/etc/function-definitions.json @@ -0,0 +1,1069 @@ +{ + "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things", + "acos": { + "sources": [ + "libm/src/math/acos.rs" + ], + "type": "f64" + }, + "acosf": { + "sources": [ + "libm/src/math/acosf.rs" + ], + "type": "f32" + }, + "acosh": { + "sources": [ + "libm/src/math/acosh.rs" + ], + "type": "f64" + }, + "acoshf": { + "sources": [ + "libm/src/math/acoshf.rs" + ], + "type": "f32" + }, + "asin": { + "sources": [ + "libm/src/math/asin.rs" + ], + "type": "f64" + }, + "asinf": { + "sources": [ + "libm/src/math/asinf.rs" + ], + "type": "f32" + }, + "asinh": { + "sources": [ + "libm/src/math/asinh.rs" + ], + "type": "f64" + }, + "asinhf": { + "sources": [ + "libm/src/math/asinhf.rs" + ], + "type": "f32" + }, + "atan": { + "sources": [ + "libm/src/math/atan.rs" + ], + "type": "f64" + }, + "atan2": { + "sources": [ + "libm/src/math/atan2.rs" + ], + "type": "f64" + }, + "atan2f": { + "sources": [ + "libm/src/math/atan2f.rs" + ], + "type": "f32" + }, + "atanf": { + "sources": [ + "libm/src/math/atanf.rs" + ], + "type": "f32" + }, + "atanh": { + "sources": [ + "libm/src/math/atanh.rs" + ], + "type": "f64" + }, + "atanhf": { + "sources": [ + "libm/src/math/atanhf.rs" + ], + "type": "f32" + }, + "cbrt": { + "sources": [ + "libm/src/math/cbrt.rs" + ], + "type": "f64" + }, + "cbrtf": { + "sources": [ + "libm/src/math/cbrtf.rs" + ], + "type": "f32" + }, + "ceil": { + "sources": [ + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f64" + }, + "ceilf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f32" + }, + "ceilf128": { + "sources": [ + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f128" + }, + "ceilf16": { + "sources": [ + "libm/src/math/ceil.rs", + "libm/src/math/generic/ceil.rs" + ], + "type": "f16" + }, + "copysign": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f64" + }, + "copysignf": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f32" + }, + "copysignf128": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f128" + }, + "copysignf16": { + "sources": [ + "libm/src/math/copysign.rs", + "libm/src/math/generic/copysign.rs" + ], + "type": "f16" + }, + "cos": { + "sources": [ + "libm/src/math/cos.rs" + ], + "type": "f64" + }, + "cosf": { + "sources": [ + "libm/src/math/cosf.rs" + ], + "type": "f32" + }, + "cosh": { + "sources": [ + "libm/src/math/cosh.rs" + ], + "type": "f64" + }, + "coshf": { + "sources": [ + "libm/src/math/coshf.rs" + ], + "type": "f32" + }, + "erf": { + "sources": [ + "libm/src/math/erf.rs" + ], + "type": "f64" + }, + "erfc": { + "sources": [ + "libm/src/math/erf.rs" + ], + "type": "f64" + }, + "erfcf": { + "sources": [ + "libm/src/math/erff.rs" + ], + "type": "f32" + }, + "erff": { + "sources": [ + "libm/src/math/erff.rs" + ], + "type": "f32" + }, + "exp": { + "sources": [ + "libm/src/math/exp.rs" + ], + "type": "f64" + }, + "exp10": { + "sources": [ + "libm/src/math/exp10.rs" + ], + "type": "f64" + }, + "exp10f": { + "sources": [ + "libm/src/math/exp10f.rs" + ], + "type": "f32" + }, + "exp2": { + "sources": [ + "libm/src/math/exp2.rs" + ], + "type": "f64" + }, + "exp2f": { + "sources": [ + "libm/src/math/exp2f.rs" + ], + "type": "f32" + }, + "expf": { + "sources": [ + "libm/src/math/expf.rs" + ], + "type": "f32" + }, + "expm1": { + "sources": [ + "libm/src/math/expm1.rs" + ], + "type": "f64" + }, + "expm1f": { + "sources": [ + "libm/src/math/expm1f.rs" + ], + "type": "f32" + }, + "fabs": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f64" + }, + "fabsf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f32" + }, + "fabsf128": { + "sources": [ + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f128" + }, + "fabsf16": { + "sources": [ + "libm/src/math/fabs.rs", + "libm/src/math/generic/fabs.rs" + ], + "type": "f16" + }, + "fdim": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f64" + }, + "fdimf": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f32" + }, + "fdimf128": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f128" + }, + "fdimf16": { + "sources": [ + "libm/src/math/fdim.rs", + "libm/src/math/generic/fdim.rs" + ], + "type": "f16" + }, + "floor": { + "sources": [ + "libm/src/math/arch/i586.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f64" + }, + "floorf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f32" + }, + "floorf128": { + "sources": [ + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f128" + }, + "floorf16": { + "sources": [ + "libm/src/math/floor.rs", + "libm/src/math/generic/floor.rs" + ], + "type": "f16" + }, + "fma": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/fma.rs" + ], + "type": "f64" + }, + "fmaf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/fma_wide.rs" + ], + "type": "f32" + }, + "fmaf128": { + "sources": [ + "libm/src/math/fma.rs" + ], + "type": "f128" + }, + "fmax": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f64" + }, + "fmaxf": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f32" + }, + "fmaxf128": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f128" + }, + "fmaxf16": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmax.rs" + ], + "type": "f16" + }, + "fmaximum": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f64" + }, + "fmaximum_num": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f64" + }, + "fmaximum_numf": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f32" + }, + "fmaximum_numf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f128" + }, + "fmaximum_numf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fmaximum_num.rs" + ], + "type": "f16" + }, + "fmaximumf": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f32" + }, + "fmaximumf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f128" + }, + "fmaximumf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fmaximum.rs" + ], + "type": "f16" + }, + "fmin": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f64" + }, + "fminf": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f32" + }, + "fminf128": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f128" + }, + "fminf16": { + "sources": [ + "libm/src/math/fmin_fmax.rs", + "libm/src/math/generic/fmin.rs" + ], + "type": "f16" + }, + "fminimum": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f64" + }, + "fminimum_num": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f64" + }, + "fminimum_numf": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f32" + }, + "fminimum_numf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f128" + }, + "fminimum_numf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum_num.rs", + "libm/src/math/generic/fminimum_num.rs" + ], + "type": "f16" + }, + "fminimumf": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f32" + }, + "fminimumf128": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f128" + }, + "fminimumf16": { + "sources": [ + "libm/src/math/fminimum_fmaximum.rs", + "libm/src/math/generic/fminimum.rs" + ], + "type": "f16" + }, + "fmod": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f64" + }, + "fmodf": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f32" + }, + "fmodf128": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f128" + }, + "fmodf16": { + "sources": [ + "libm/src/math/fmod.rs", + "libm/src/math/generic/fmod.rs" + ], + "type": "f16" + }, + "frexp": { + "sources": [ + "libm/src/math/frexp.rs" + ], + "type": "f64" + }, + "frexpf": { + "sources": [ + "libm/src/math/frexpf.rs" + ], + "type": "f32" + }, + "hypot": { + "sources": [ + "libm/src/math/hypot.rs" + ], + "type": "f64" + }, + "hypotf": { + "sources": [ + "libm/src/math/hypotf.rs" + ], + "type": "f32" + }, + "ilogb": { + "sources": [ + "libm/src/math/ilogb.rs" + ], + "type": "f64" + }, + "ilogbf": { + "sources": [ + "libm/src/math/ilogbf.rs" + ], + "type": "f32" + }, + "j0": { + "sources": [ + "libm/src/math/j0.rs" + ], + "type": "f64" + }, + "j0f": { + "sources": [ + "libm/src/math/j0f.rs" + ], + "type": "f32" + }, + "j1": { + "sources": [ + "libm/src/math/j1.rs" + ], + "type": "f64" + }, + "j1f": { + "sources": [ + "libm/src/math/j1f.rs" + ], + "type": "f32" + }, + "jn": { + "sources": [ + "libm/src/math/jn.rs" + ], + "type": "f64" + }, + "jnf": { + "sources": [ + "libm/src/math/jnf.rs" + ], + "type": "f32" + }, + "ldexp": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f64" + }, + "ldexpf": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f32" + }, + "ldexpf128": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f128" + }, + "ldexpf16": { + "sources": [ + "libm/src/math/ldexp.rs" + ], + "type": "f16" + }, + "lgamma": { + "sources": [ + "libm/src/math/lgamma.rs" + ], + "type": "f64" + }, + "lgamma_r": { + "sources": [ + "libm/src/math/lgamma_r.rs" + ], + "type": "f64" + }, + "lgammaf": { + "sources": [ + "libm/src/math/lgammaf.rs" + ], + "type": "f32" + }, + "lgammaf_r": { + "sources": [ + "libm/src/math/lgammaf_r.rs" + ], + "type": "f32" + }, + "log": { + "sources": [ + "libm/src/math/log.rs" + ], + "type": "f64" + }, + "log10": { + "sources": [ + "libm/src/math/log10.rs" + ], + "type": "f64" + }, + "log10f": { + "sources": [ + "libm/src/math/log10f.rs" + ], + "type": "f32" + }, + "log1p": { + "sources": [ + "libm/src/math/log1p.rs" + ], + "type": "f64" + }, + "log1pf": { + "sources": [ + "libm/src/math/log1pf.rs" + ], + "type": "f32" + }, + "log2": { + "sources": [ + "libm/src/math/log2.rs" + ], + "type": "f64" + }, + "log2f": { + "sources": [ + "libm/src/math/log2f.rs" + ], + "type": "f32" + }, + "logf": { + "sources": [ + "libm/src/math/logf.rs" + ], + "type": "f32" + }, + "modf": { + "sources": [ + "libm/src/math/modf.rs" + ], + "type": "f64" + }, + "modff": { + "sources": [ + "libm/src/math/modff.rs" + ], + "type": "f32" + }, + "nextafter": { + "sources": [ + "libm/src/math/nextafter.rs" + ], + "type": "f64" + }, + "nextafterf": { + "sources": [ + "libm/src/math/nextafterf.rs" + ], + "type": "f32" + }, + "pow": { + "sources": [ + "libm/src/math/pow.rs" + ], + "type": "f64" + }, + "powf": { + "sources": [ + "libm/src/math/powf.rs" + ], + "type": "f32" + }, + "remainder": { + "sources": [ + "libm/src/math/remainder.rs" + ], + "type": "f64" + }, + "remainderf": { + "sources": [ + "libm/src/math/remainderf.rs" + ], + "type": "f32" + }, + "remquo": { + "sources": [ + "libm/src/math/remquo.rs" + ], + "type": "f64" + }, + "remquof": { + "sources": [ + "libm/src/math/remquof.rs" + ], + "type": "f32" + }, + "rint": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" + ], + "type": "f64" + }, + "rintf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/rint.rs" + ], + "type": "f32" + }, + "rintf128": { + "sources": [ + "libm/src/math/rint.rs" + ], + "type": "f128" + }, + "rintf16": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/rint.rs" + ], + "type": "f16" + }, + "round": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f64" + }, + "roundeven": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f64" + }, + "roundevenf": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f32" + }, + "roundevenf128": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f128" + }, + "roundevenf16": { + "sources": [ + "libm/src/math/roundeven.rs" + ], + "type": "f16" + }, + "roundf": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f32" + }, + "roundf128": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f128" + }, + "roundf16": { + "sources": [ + "libm/src/math/generic/round.rs", + "libm/src/math/round.rs" + ], + "type": "f16" + }, + "scalbn": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f64" + }, + "scalbnf": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f32" + }, + "scalbnf128": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f128" + }, + "scalbnf16": { + "sources": [ + "libm/src/math/generic/scalbn.rs", + "libm/src/math/scalbn.rs" + ], + "type": "f16" + }, + "sin": { + "sources": [ + "libm/src/math/sin.rs" + ], + "type": "f64" + }, + "sincos": { + "sources": [ + "libm/src/math/sincos.rs" + ], + "type": "f64" + }, + "sincosf": { + "sources": [ + "libm/src/math/sincosf.rs" + ], + "type": "f32" + }, + "sinf": { + "sources": [ + "libm/src/math/sinf.rs" + ], + "type": "f32" + }, + "sinh": { + "sources": [ + "libm/src/math/sinh.rs" + ], + "type": "f64" + }, + "sinhf": { + "sources": [ + "libm/src/math/sinhf.rs" + ], + "type": "f32" + }, + "sqrt": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/i686.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f64" + }, + "sqrtf": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/arch/i686.rs", + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f32" + }, + "sqrtf128": { + "sources": [ + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f128" + }, + "sqrtf16": { + "sources": [ + "libm/src/math/arch/aarch64.rs", + "libm/src/math/generic/sqrt.rs", + "libm/src/math/sqrt.rs" + ], + "type": "f16" + }, + "tan": { + "sources": [ + "libm/src/math/tan.rs" + ], + "type": "f64" + }, + "tanf": { + "sources": [ + "libm/src/math/tanf.rs" + ], + "type": "f32" + }, + "tanh": { + "sources": [ + "libm/src/math/tanh.rs" + ], + "type": "f64" + }, + "tanhf": { + "sources": [ + "libm/src/math/tanhf.rs" + ], + "type": "f32" + }, + "tgamma": { + "sources": [ + "libm/src/math/tgamma.rs" + ], + "type": "f64" + }, + "tgammaf": { + "sources": [ + "libm/src/math/tgammaf.rs" + ], + "type": "f32" + }, + "trunc": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f64" + }, + "truncf": { + "sources": [ + "libm/src/math/arch/wasm32.rs", + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f32" + }, + "truncf128": { + "sources": [ + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f128" + }, + "truncf16": { + "sources": [ + "libm/src/math/generic/trunc.rs", + "libm/src/math/trunc.rs" + ], + "type": "f16" + }, + "y0": { + "sources": [ + "libm/src/math/j0.rs" + ], + "type": "f64" + }, + "y0f": { + "sources": [ + "libm/src/math/j0f.rs" + ], + "type": "f32" + }, + "y1": { + "sources": [ + "libm/src/math/j1.rs" + ], + "type": "f64" + }, + "y1f": { + "sources": [ + "libm/src/math/j1f.rs" + ], + "type": "f32" + }, + "yn": { + "sources": [ + "libm/src/math/jn.rs" + ], + "type": "f64" + }, + "ynf": { + "sources": [ + "libm/src/math/jnf.rs" + ], + "type": "f32" + } +} diff --git a/etc/function-list.txt b/etc/function-list.txt new file mode 100644 index 000000000..1f226c8c0 --- /dev/null +++ b/etc/function-list.txt @@ -0,0 +1,164 @@ +# autogenerated by update-api-list.py +acos +acosf +acosh +acoshf +asin +asinf +asinh +asinhf +atan +atan2 +atan2f +atanf +atanh +atanhf +cbrt +cbrtf +ceil +ceilf +ceilf128 +ceilf16 +copysign +copysignf +copysignf128 +copysignf16 +cos +cosf +cosh +coshf +erf +erfc +erfcf +erff +exp +exp10 +exp10f +exp2 +exp2f +expf +expm1 +expm1f +fabs +fabsf +fabsf128 +fabsf16 +fdim +fdimf +fdimf128 +fdimf16 +floor +floorf +floorf128 +floorf16 +fma +fmaf +fmaf128 +fmax +fmaxf +fmaxf128 +fmaxf16 +fmaximum +fmaximum_num +fmaximum_numf +fmaximum_numf128 +fmaximum_numf16 +fmaximumf +fmaximumf128 +fmaximumf16 +fmin +fminf +fminf128 +fminf16 +fminimum +fminimum_num +fminimum_numf +fminimum_numf128 +fminimum_numf16 +fminimumf +fminimumf128 +fminimumf16 +fmod +fmodf +fmodf128 +fmodf16 +frexp +frexpf +hypot +hypotf +ilogb +ilogbf +j0 +j0f +j1 +j1f +jn +jnf +ldexp +ldexpf +ldexpf128 +ldexpf16 +lgamma +lgamma_r +lgammaf +lgammaf_r +log +log10 +log10f +log1p +log1pf +log2 +log2f +logf +modf +modff +nextafter +nextafterf +pow +powf +remainder +remainderf +remquo +remquof +rint +rintf +rintf128 +rintf16 +round +roundeven +roundevenf +roundevenf128 +roundevenf16 +roundf +roundf128 +roundf16 +scalbn +scalbnf +scalbnf128 +scalbnf16 +sin +sincos +sincosf +sinf +sinh +sinhf +sqrt +sqrtf +sqrtf128 +sqrtf16 +tan +tanf +tanh +tanhf +tgamma +tgammaf +trunc +truncf +truncf128 +truncf16 +y0 +y0f +y1 +y1f +yn +ynf diff --git a/etc/update-api-list.py b/etc/update-api-list.py new file mode 100755 index 000000000..0770a8b20 --- /dev/null +++ b/etc/update-api-list.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 +"""Create a text file listing all public API. This can be used to ensure that all +functions are covered by our macros. + +This file additionally does tidy-esque checks that all functions are listed where +needed, or that lists are sorted. +""" + +import difflib +import json +import re +import subprocess as sp +import sys +from dataclasses import dataclass +from glob import glob +from pathlib import Path +from typing import Any, Callable, TypeAlias + +SELF_PATH = Path(__file__) +ETC_DIR = SELF_PATH.parent +ROOT_DIR = ETC_DIR.parent + +# These files do not trigger a retest. +IGNORED_SOURCES = ["libm/src/libm_helper.rs", "libm/src/math/support/float_traits.rs"] + +IndexTy: TypeAlias = dict[str, dict[str, Any]] +"""Type of the `index` item in rustdoc's JSON output""" + + +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +@dataclass +class Crate: + """Representation of public interfaces and function defintion locations in + `libm`. + """ + + public_functions: list[str] + """List of all public functions.""" + defs: dict[str, list[str]] + """Map from `name->[source files]` to find all places that define a public + function. We track this to know which tests need to be rerun when specific files + get updated. + """ + types: dict[str, str] + """Map from `name->type`.""" + + def __init__(self) -> None: + self.public_functions = [] + self.defs = {} + self.types = {} + + j = self.get_rustdoc_json() + index: IndexTy = j["index"] + self._init_function_list(index) + self._init_defs(index) + self._init_types() + + @staticmethod + def get_rustdoc_json() -> dict[Any, Any]: + """Get rustdoc's JSON output for the `libm` crate.""" + + j = sp.check_output( + [ + "rustdoc", + "libm/src/lib.rs", + "--edition=2021", + "--document-private-items", + "--output-format=json", + "--cfg=f16_enabled", + "--cfg=f128_enabled", + "-Zunstable-options", + "-o-", + ], + cwd=ROOT_DIR, + text=True, + ) + j = json.loads(j) + return j + + def _init_function_list(self, index: IndexTy) -> None: + """Get a list of public functions from rustdoc JSON output. + + Note that this only finds functions that are reexported in `lib.rs`, this will + need to be adjusted if we need to account for functions that are defined there, or + glob reexports in other locations. + """ + # Filter out items that are not public + public = [i for i in index.values() if i["visibility"] == "public"] + + # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`. + use = (i for i in public if "use" in i["inner"]) + use = ( + i + for i in use + if i["span"]["filename"] in ["libm/src/math/mod.rs", "libm/src/lib.rs"] + ) + reexported_ids = [item["inner"]["use"]["id"] for item in use] + + # Collect a list of reexported items that are functions + for id in reexported_ids: + srcitem = index.get(str(id)) + # External crate + if srcitem is None: + continue + + # Skip if not a function + if "function" not in srcitem["inner"]: + continue + + self.public_functions.append(srcitem["name"]) + self.public_functions.sort() + + def _init_defs(self, index: IndexTy) -> None: + defs = {name: set() for name in self.public_functions} + funcs = (i for i in index.values() if "function" in i["inner"]) + funcs = (f for f in funcs if f["name"] in self.public_functions) + for func in funcs: + defs[func["name"]].add(func["span"]["filename"]) + + # A lot of the `arch` module is often configured out so doesn't show up in docs. Use + # string matching as a fallback. + for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR): + contents = (ROOT_DIR.joinpath(fname)).read_text() + + for name in self.public_functions: + if f"fn {name}" in contents: + defs[name].add(fname) + + for name, sources in defs.items(): + base_sources = defs[base_name(name)[0]] + for src in (s for s in base_sources if "generic" in s): + sources.add(src) + + for src in IGNORED_SOURCES: + sources.discard(src) + + # Sort the set + self.defs = {k: sorted(v) for (k, v) in defs.items()} + + def _init_types(self) -> None: + self.types = {name: base_name(name)[1] for name in self.public_functions} + + def write_function_list(self, check: bool) -> None: + """Collect the list of public functions to a simple text file.""" + output = "# autogenerated by update-api-list.py\n" + for name in self.public_functions: + output += f"{name}\n" + + out_file = ETC_DIR.joinpath("function-list.txt") + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output, "function list") + else: + with open(out_file, "w") as f: + f.write(output) + + def write_function_defs(self, check: bool) -> None: + """Collect the list of information about public functions to a JSON file .""" + comment = ( + "Autogenerated by update-api-list.py. " + "List of files that define a function with a given name. " + "This file is checked in to make it obvious if refactoring breaks things" + ) + + d = {"__comment": comment} + d |= { + name: {"sources": self.defs[name], "type": self.types[name]} + for name in self.public_functions + } + + out_file = ETC_DIR.joinpath("function-definitions.json") + output = json.dumps(d, indent=4) + "\n" + + if check: + with open(out_file, "r") as f: + current = f.read() + diff_and_exit(current, output, "source list") + else: + with open(out_file, "w") as f: + f.write(output) + + def tidy_lists(self) -> None: + """In each file, check annotations indicating blocks of code should be sorted or should + include all public API. + """ + + flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True) + + for path in flist.splitlines(): + fpath = ROOT_DIR.joinpath(path) + if fpath.is_dir() or fpath == SELF_PATH: + continue + + lines = fpath.read_text().splitlines() + + validate_delimited_block( + fpath, + lines, + "verify-sorted-start", + "verify-sorted-end", + ensure_sorted, + ) + + validate_delimited_block( + fpath, + lines, + "verify-apilist-start", + "verify-apilist-end", + lambda p, n, lines: self.ensure_contains_api(p, n, lines), + ) + + def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]): + """Given a list of strings, ensure that each public function we have is named + somewhere. + """ + not_found = [] + for func in self.public_functions: + # The function name may be on its own or somewhere in a snake case string. + pat = re.compile(rf"(\b|_){func}(\b|_)") + found = next((line for line in lines if pat.search(line)), None) + + if found is None: + not_found.append(func) + + if len(not_found) == 0: + return + + relpath = fpath.relative_to(ROOT_DIR) + eprint(f"functions not found at {relpath}:{line_num}: {not_found}") + exit(1) + + +def validate_delimited_block( + fpath: Path, + lines: list[str], + start: str, + end: str, + validate: Callable[[Path, int, list[str]], None], +) -> None: + """Identify blocks of code wrapped within `start` and `end`, collect their contents + to a list of strings, and call `validate` for each of those lists. + """ + relpath = fpath.relative_to(ROOT_DIR) + block_lines = [] + block_start_line: None | int = None + for line_num, line in enumerate(lines): + line_num += 1 + + if start in line: + block_start_line = line_num + continue + + if end in line: + if block_start_line is None: + eprint(f"`{end}` without `{start}` at {relpath}:{line_num}") + exit(1) + + validate(fpath, block_start_line, block_lines) + block_lines = [] + block_start_line = None + continue + + if block_start_line is not None: + block_lines.append(line) + + if block_start_line is not None: + eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}") + exit(1) + + +def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None: + """Ensure that a list of lines is sorted, otherwise print a diff and exit.""" + relpath = fpath.relative_to(ROOT_DIR) + diff_and_exit( + "\n".join(lines), + "\n".join(sorted(lines)), + f"sorted block at {relpath}:{block_start_line}", + ) + + +def diff_and_exit(actual: str, expected: str, name: str): + """If the two strings are different, print a diff between them and then exit + with an error. + """ + if actual == expected: + print(f"{name} output matches expected; success") + return + + a = [f"{line}\n" for line in actual.splitlines()] + b = [f"{line}\n" for line in expected.splitlines()] + + diff = difflib.unified_diff(a, b, "actual", "expected") + sys.stdout.writelines(diff) + print(f"mismatched {name}") + exit(1) + + +def base_name(name: str) -> tuple[str, str]: + """Return the basename and type from a full function name. Keep in sync with Rust's + `fn base_name`. + """ + known_mappings = [ + ("erff", ("erf", "f32")), + ("erf", ("erf", "f64")), + ("modff", ("modf", "f32")), + ("modf", ("modf", "f64")), + ("lgammaf_r", ("lgamma_r", "f32")), + ("lgamma_r", ("lgamma_r", "f64")), + ] + + found = next((base for (full, base) in known_mappings if full == name), None) + if found is not None: + return found + + if name.endswith("f"): + return (name.rstrip("f"), "f32") + + if name.endswith("f16"): + return (name.rstrip("f16"), "f16") + + if name.endswith("f128"): + return (name.rstrip("f128"), "f128") + + return (name, "f64") + + +def ensure_updated_list(check: bool) -> None: + """Runner to update the function list and JSON, or check that it is already up + to date. + """ + crate = Crate() + crate.write_function_list(check) + crate.write_function_defs(check) + + crate.tidy_lists() + + +def main(): + """By default overwrite the file. If `--check` is passed, print a diff instead and + error if the files are different. + """ + match sys.argv: + case [_]: + ensure_updated_list(False) + case [_, "--check"]: + ensure_updated_list(True) + case _: + print("unrecognized arguments") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/libm/Cargo.toml b/libm/Cargo.toml new file mode 100644 index 000000000..44154c1a8 --- /dev/null +++ b/libm/Cargo.toml @@ -0,0 +1,49 @@ +[package] +authors = ["Jorge Aparicio "] +categories = ["no-std"] +description = "libm in pure Rust" +documentation = "https://docs.rs/libm" +keywords = ["libm", "math"] +license = "MIT" +name = "libm" +readme = "README.md" +repository = "https://github.com/rust-lang/libm" +version = "0.2.11" +edition = "2021" +rust-version = "1.63" + +[features] +default = ["arch"] + +# Enable architecture-specific features such as SIMD or assembly routines. +arch = [] + +# This tells the compiler to assume that a Nightly toolchain is being used and +# that it should activate any useful Nightly things accordingly. +unstable = ["unstable-intrinsics", "unstable-float"] + +# Enable calls to functions in `core::intrinsics` +unstable-intrinsics = [] + +# Make some internal things public for testing. +unstable-public-internals = [] + +# Enable the nightly-only `f16` and `f128`. +unstable-float = [] + +# Used to prevent using any intrinsics or arch-specific code. +# +# HACK: this is a negative feature which is generally a bad idea in Cargo, but +# we need it to be able to forbid other features when this crate is used in +# Rust dependencies. Setting this overrides all features that may enable +# hard float operations. +force-soft-floats = [] + +[dev-dependencies] +no-panic = "0.1.35" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + # compiler-builtins sets this feature, but we use it in `libm` + 'cfg(feature, values("compiler-builtins"))', +] } diff --git a/libm/LICENSE.txt b/libm/LICENSE.txt new file mode 120000 index 000000000..4ab43736a --- /dev/null +++ b/libm/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/libm/README.md b/libm/README.md new file mode 120000 index 000000000..32d46ee88 --- /dev/null +++ b/libm/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/libm/build.rs b/libm/build.rs new file mode 100644 index 000000000..07d08ed43 --- /dev/null +++ b/libm/build.rs @@ -0,0 +1,18 @@ +use std::env; + +mod configure; + +fn main() { + let cfg = configure::Config::from_env(); + + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=configure.rs"); + println!("cargo:rustc-check-cfg=cfg(assert_no_panic)"); + + // If set, enable `no-panic`. Requires LTO (`release-opt` profile). + if env::var("ENSURE_NO_PANIC").is_ok() { + println!("cargo:rustc-cfg=assert_no_panic"); + } + + configure::emit_libm_config(&cfg); +} diff --git a/libm/configure.rs b/libm/configure.rs new file mode 100644 index 000000000..8b8ba9815 --- /dev/null +++ b/libm/configure.rs @@ -0,0 +1,183 @@ +// Configuration shared with both libm and libm-test + +use std::env; +use std::path::PathBuf; + +#[allow(dead_code)] +pub struct Config { + pub manifest_dir: PathBuf, + pub out_dir: PathBuf, + pub opt_level: String, + pub cargo_features: Vec, + pub target_arch: String, + pub target_env: String, + pub target_family: Option, + pub target_os: String, + pub target_string: String, + pub target_vendor: String, + pub target_features: Vec, +} + +impl Config { + pub fn from_env() -> Self { + let target_features = env::var("CARGO_CFG_TARGET_FEATURE") + .map(|feats| feats.split(',').map(ToOwned::to_owned).collect()) + .unwrap_or_default(); + let cargo_features = env::vars() + .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned)) + .map(|s| s.to_lowercase().replace("_", "-")) + .collect(); + + Self { + manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()), + out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()), + opt_level: env::var("OPT_LEVEL").unwrap(), + cargo_features, + target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(), + target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(), + target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(), + target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(), + target_string: env::var("TARGET").unwrap(), + target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(), + target_features, + } + } +} + +/// Libm gets most config options made available. +#[allow(dead_code)] +pub fn emit_libm_config(cfg: &Config) { + emit_intrinsics_cfg(); + emit_arch_cfg(); + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Tests don't need most feature-related config. +#[allow(dead_code)] +pub fn emit_test_config(cfg: &Config) { + emit_optimization_cfg(cfg); + emit_cfg_shorthands(cfg); + emit_cfg_env(cfg); + emit_f16_f128_cfg(cfg); +} + +/// Simplify the feature logic for enabling intrinsics so code only needs to use +/// `cfg(intrinsics_enabled)`. +fn emit_intrinsics_cfg() { + println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)"); + + // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides + // to disable. + if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=intrinsics_enabled"); + } +} + +/// Simplify the feature logic for enabling arch-specific features so code only needs to use +/// `cfg(arch_enabled)`. +fn emit_arch_cfg() { + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + + // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. + if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=arch_enabled"); + } +} + +/// Some tests are extremely slow. Emit a config option based on optimization level. +fn emit_optimization_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)"); + + if !matches!(cfg.opt_level.as_str(), "0" | "1") { + println!("cargo:rustc-cfg=optimizations_enabled"); + } +} + +/// Provide an alias for common longer config combinations. +fn emit_cfg_shorthands(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(x86_no_sse)"); + if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") { + // Shorthand to detect i586 targets + println!("cargo:rustc-cfg=x86_no_sse"); + } +} + +/// Reemit config that we make use of for test logging. +fn emit_cfg_env(cfg: &Config) { + println!("cargo:rustc-env=CFG_CARGO_FEATURES={:?}", cfg.cargo_features); + println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level); + println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", cfg.target_features); +} + +/// Configure whether or not `f16` and `f128` support should be enabled. +fn emit_f16_f128_cfg(cfg: &Config) { + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + // `unstable-float` enables these features. + if !cfg!(feature = "unstable-float") { + return; + } + + // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means + // that the backend will not crash when using these types and generates code that can be called + // without crashing (no infinite recursion). This does not mean that the platform doesn't have + // ABI or other bugs. + // + // We do this here rather than in `rust-lang/rust` because configuring via cargo features is + // not straightforward. + // + // Original source of this list: + // + let f16_enabled = match cfg.target_arch.as_str() { + // Unsupported + "arm64ec" => false, + // Selection failure + "s390x" => false, + // Infinite recursion + // FIXME(llvm): loongarch fixed by + "csky" => false, + "hexagon" => false, + "loongarch64" => false, + "mips" | "mips64" | "mips32r6" | "mips64r6" => false, + "powerpc" | "powerpc64" => false, + "sparc" | "sparc64" => false, + "wasm32" | "wasm64" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + let f128_enabled = match cfg.target_arch.as_str() { + // Unsupported (libcall is not supported) + "amdgpu" => false, + // Unsupported + "arm64ec" => false, + // Selection failure + "mips64" | "mips64r6" => false, + // Selection failure + "nvptx64" => false, + // Selection failure + "powerpc64" if &cfg.target_os == "aix" => false, + // Selection failure + "sparc" => false, + // Most everything else works as of LLVM 19 + _ => true, + }; + + // If the feature is set, disable these types. + let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some(); + + println!("cargo:rustc-check-cfg=cfg(f16_enabled)"); + println!("cargo:rustc-check-cfg=cfg(f128_enabled)"); + + if f16_enabled && !disable_both { + println!("cargo:rustc-cfg=f16_enabled"); + } + + if f128_enabled && !disable_both { + println!("cargo:rustc-cfg=f128_enabled"); + } +} diff --git a/libm/src/lib.rs b/libm/src/lib.rs new file mode 100644 index 000000000..7e56bd079 --- /dev/null +++ b/libm/src/lib.rs @@ -0,0 +1,29 @@ +//! libm in pure Rust +#![no_std] +#![cfg_attr(intrinsics_enabled, allow(internal_features))] +#![cfg_attr(intrinsics_enabled, feature(core_intrinsics))] +#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] +#![allow(clippy::assign_op_pattern)] +#![allow(clippy::deprecated_cfg_attr)] +#![allow(clippy::eq_op)] +#![allow(clippy::excessive_precision)] +#![allow(clippy::float_cmp)] +#![allow(clippy::int_plus_one)] +#![allow(clippy::many_single_char_names)] +#![allow(clippy::mixed_case_hex_literals)] +#![allow(clippy::needless_late_init)] +#![allow(clippy::needless_return)] +#![allow(clippy::unreadable_literal)] +#![allow(clippy::zero_divided_by_zero)] +#![forbid(unsafe_op_in_unsafe_fn)] + +mod libm_helper; +mod math; + +use core::{f32, f64}; + +pub use libm_helper::*; + +pub use self::math::*; diff --git a/src/libm_helper.rs b/libm/src/libm_helper.rs similarity index 67% rename from src/libm_helper.rs rename to libm/src/libm_helper.rs index 52d0c4c2a..dfa1ff77b 100644 --- a/src/libm_helper.rs +++ b/libm/src/libm_helper.rs @@ -30,7 +30,7 @@ macro_rules! libm_helper { } }; - ({$($func:tt);*}) => { + ({$($func:tt;)*}) => { $( libm_helper! { $func } )* @@ -44,9 +44,11 @@ macro_rules! libm_helper { }; } +// verify-apilist-start libm_helper! { f32, funcs: { + // verify-sorted-start (fn acos(x: f32) -> (f32); => acosf); (fn acosh(x: f32) -> (f32); => acoshf); (fn asin(x: f32) -> (f32); => asinf); @@ -62,8 +64,8 @@ libm_helper! { (fn erf(x: f32) -> (f32); => erff); (fn erfc(x: f32) -> (f32); => erfcf); (fn exp(x: f32) -> (f32); => expf); - (fn exp2(x: f32) -> (f32); => exp2f); (fn exp10(x: f32) -> (f32); => exp10f); + (fn exp2(x: f32) -> (f32); => exp2f); (fn expm1(x: f32) -> (f32); => expm1f); (fn fabs(x: f32) -> (f32); => fabsf); (fn fdim(x: f32, y: f32) -> (f32); => fdimf); @@ -79,12 +81,12 @@ libm_helper! { (fn j1(x: f32) -> (f32); => j1f); (fn jn(n: i32, x: f32) -> (f32); => jnf); (fn ldexp(x: f32, n: i32) -> (f32); => ldexpf); - (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); (fn lgamma(x: f32) -> (f32); => lgammaf); + (fn lgamma_r(x: f32) -> (f32, i32); => lgammaf_r); (fn log(x: f32) -> (f32); => logf); + (fn log10(x: f32) -> (f32); => log10f); (fn log1p(x: f32) -> (f32); => log1pf); (fn log2(x: f32) -> (f32); => log2f); - (fn log10(x: f32) -> (f32); => log10f); (fn modf(x: f32) -> (f32, f32); => modff); (fn nextafter(x: f32, y: f32) -> (f32); => nextafterf); (fn pow(x: f32, y: f32) -> (f32); => powf); @@ -92,6 +94,7 @@ libm_helper! { (fn remquo(x: f32, y: f32) -> (f32, i32); => remquof); (fn rint(x: f32) -> (f32); => rintf); (fn round(x: f32) -> (f32); => roundf); + (fn roundeven(x: f32) -> (f32); => roundevenf); (fn scalbn(x: f32, n: i32) -> (f32); => scalbnf); (fn sin(x: f32) -> (f32); => sinf); (fn sincos(x: f32) -> (f32, f32); => sincosf); @@ -103,13 +106,15 @@ libm_helper! { (fn trunc(x: f32) -> (f32); => truncf); (fn y0(x: f32) -> (f32); => y0f); (fn y1(x: f32) -> (f32); => y1f); - (fn yn(n: i32, x: f32) -> (f32); => ynf) + (fn yn(n: i32, x: f32) -> (f32); => ynf); + // verify-sorted-end } } libm_helper! { f64, funcs: { + // verify-sorted-start (fn acos(x: f64) -> (f64); => acos); (fn acosh(x: f64) -> (f64); => acosh); (fn asin(x: f64) -> (f64); => asin); @@ -125,15 +130,23 @@ libm_helper! { (fn erf(x: f64) -> (f64); => erf); (fn erfc(x: f64) -> (f64); => erfc); (fn exp(x: f64) -> (f64); => exp); - (fn exp2(x: f64) -> (f64); => exp2); (fn exp10(x: f64) -> (f64); => exp10); + (fn exp2(x: f64) -> (f64); => exp2); (fn expm1(x: f64) -> (f64); => expm1); (fn fabs(x: f64) -> (f64); => fabs); (fn fdim(x: f64, y: f64) -> (f64); => fdim); (fn floor(x: f64) -> (f64); => floor); (fn fma(x: f64, y: f64, z: f64) -> (f64); => fma); (fn fmax(x: f64, y: f64) -> (f64); => fmax); + (fn fmaximum(x: f64, y: f64) -> (f64); => fmaximum); + (fn fmaximum_num(x: f64, y: f64) -> (f64); => fmaximum_num); + (fn fmaximum_numf(x: f32, y: f32) -> (f32); => fmaximum_numf); + (fn fmaximumf(x: f32, y: f32) -> (f32); => fmaximumf); (fn fmin(x: f64, y: f64) -> (f64); => fmin); + (fn fminimum(x: f64, y: f64) -> (f64); => fminimum); + (fn fminimum_num(x: f64, y: f64) -> (f64); => fminimum_num); + (fn fminimum_numf(x: f32, y: f32) -> (f32); => fminimum_numf); + (fn fminimumf(x: f32, y: f32) -> (f32); => fminimumf); (fn fmod(x: f64, y: f64) -> (f64); => fmod); (fn frexp(x: f64) -> (f64, i32); => frexp); (fn hypot(x: f64, y: f64) -> (f64); => hypot); @@ -142,12 +155,12 @@ libm_helper! { (fn j1(x: f64) -> (f64); => j1); (fn jn(n: i32, x: f64) -> (f64); => jn); (fn ldexp(x: f64, n: i32) -> (f64); => ldexp); - (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); (fn lgamma(x: f64) -> (f64); => lgamma); + (fn lgamma_r(x: f64) -> (f64, i32); => lgamma_r); (fn log(x: f64) -> (f64); => log); + (fn log10(x: f64) -> (f64); => log10); (fn log1p(x: f64) -> (f64); => log1p); (fn log2(x: f64) -> (f64); => log2); - (fn log10(x: f64) -> (f64); => log10); (fn modf(x: f64) -> (f64, f64); => modf); (fn nextafter(x: f64, y: f64) -> (f64); => nextafter); (fn pow(x: f64, y: f64) -> (f64); => pow); @@ -155,6 +168,7 @@ libm_helper! { (fn remquo(x: f64, y: f64) -> (f64, i32); => remquo); (fn rint(x: f64) -> (f64); => rint); (fn round(x: f64) -> (f64); => round); + (fn roundevem(x: f64) -> (f64); => roundeven); (fn scalbn(x: f64, n: i32) -> (f64); => scalbn); (fn sin(x: f64) -> (f64); => sin); (fn sincos(x: f64) -> (f64, f64); => sincos); @@ -166,6 +180,65 @@ libm_helper! { (fn trunc(x: f64) -> (f64); => trunc); (fn y0(x: f64) -> (f64); => y0); (fn y1(x: f64) -> (f64); => y1); - (fn yn(n: i32, x: f64) -> (f64); => yn) + (fn yn(n: i32, x: f64) -> (f64); => yn); + // verify-sorted-end + } +} + +#[cfg(f16_enabled)] +libm_helper! { + f16, + funcs: { + // verify-sorted-start + (fn ceil(x: f16) -> (f16); => ceilf16); + (fn copysign(x: f16, y: f16) -> (f16); => copysignf16); + (fn fabs(x: f16) -> (f16); => fabsf16); + (fn fdim(x: f16, y: f16) -> (f16); => fdimf16); + (fn floor(x: f16) -> (f16); => floorf16); + (fn fmax(x: f16, y: f16) -> (f16); => fmaxf16); + (fn fmaximum_num(x: f16, y: f16) -> (f16); => fmaximum_numf16); + (fn fmaximumf16(x: f16, y: f16) -> (f16); => fmaximumf16); + (fn fmin(x: f16, y: f16) -> (f16); => fminf16); + (fn fminimum(x: f16, y: f16) -> (f16); => fminimumf16); + (fn fminimum_num(x: f16, y: f16) -> (f16); => fminimum_numf16); + (fn fmod(x: f16, y: f16) -> (f16); => fmodf16); + (fn ldexp(x: f16, n: i32) -> (f16); => ldexpf16); + (fn rint(x: f16) -> (f16); => rintf16); + (fn round(x: f16) -> (f16); => roundf16); + (fn roundeven(x: f16) -> (f16); => roundevenf16); + (fn scalbn(x: f16, n: i32) -> (f16); => scalbnf16); + (fn sqrtf(x: f16) -> (f16); => sqrtf16); + (fn truncf(x: f16) -> (f16); => truncf16); + // verify-sorted-end + } +} + +#[cfg(f128_enabled)] +libm_helper! { + f128, + funcs: { + // verify-sorted-start + (fn ceil(x: f128) -> (f128); => ceilf128); + (fn copysign(x: f128, y: f128) -> (f128); => copysignf128); + (fn fabs(x: f128) -> (f128); => fabsf128); + (fn fdim(x: f128, y: f128) -> (f128); => fdimf128); + (fn floor(x: f128) -> (f128); => floorf128); + (fn fma(x: f128, y: f128, z: f128) -> (f128); => fmaf128); + (fn fmax(x: f128, y: f128) -> (f128); => fmaxf128); + (fn fmaximum(x: f128, y: f128) -> (f128); => fmaximumf128); + (fn fmaximum_num(x: f128, y: f128) -> (f128); => fmaximum_numf128); + (fn fmin(x: f128, y: f128) -> (f128); => fminf128); + (fn fminimum(x: f128, y: f128) -> (f128); => fminimumf128); + (fn fminimum_num(x: f128, y: f128) -> (f128); => fminimum_numf128); + (fn fmod(x: f128, y: f128) -> (f128); => fmodf128); + (fn ldexp(x: f128, n: i32) -> (f128); => ldexpf128); + (fn rint(x: f128) -> (f128); => rintf128); + (fn round(x: f128) -> (f128); => roundf128); + (fn roundeven(x: f128) -> (f128); => roundevenf128); + (fn scalbn(x: f128, n: i32) -> (f128); => scalbnf128); + (fn sqrt(x: f128) -> (f128); => sqrtf128); + (fn trunc(x: f128) -> (f128); => truncf128); + // verify-sorted-end } } +// verify-apilist-end diff --git a/src/math/acos.rs b/libm/src/math/acos.rs similarity index 100% rename from src/math/acos.rs rename to libm/src/math/acos.rs diff --git a/src/math/acosf.rs b/libm/src/math/acosf.rs similarity index 98% rename from src/math/acosf.rs rename to libm/src/math/acosf.rs index 1a60479e3..dd88eea5b 100644 --- a/src/math/acosf.rs +++ b/libm/src/math/acosf.rs @@ -13,7 +13,7 @@ * ==================================================== */ -use super::sqrtf::sqrtf; +use super::sqrt::sqrtf; const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ diff --git a/src/math/acosh.rs b/libm/src/math/acosh.rs similarity index 100% rename from src/math/acosh.rs rename to libm/src/math/acosh.rs diff --git a/src/math/acoshf.rs b/libm/src/math/acoshf.rs similarity index 100% rename from src/math/acoshf.rs rename to libm/src/math/acoshf.rs diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs new file mode 100644 index 000000000..020bb731c --- /dev/null +++ b/libm/src/math/arch/aarch64.rs @@ -0,0 +1,115 @@ +//! Architecture-specific support for aarch64 with neon. + +use core::arch::asm; + +pub fn fma(mut x: f64, y: f64, z: f64) -> f64 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:d}, {x:d}, {y:d}, {z:d}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 { + // SAFETY: `fmadd` is available with neon and has no side effects. + unsafe { + asm!( + "fmadd {x:s}, {x:s}, {y:s}, {z:s}", + x = inout(vreg) x, + y = in(vreg) y, + z = in(vreg) z, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn rint(mut x: f64) -> f64 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn rintf(mut x: f32) -> f32 { + // SAFETY: `frintn` is available with neon and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn rintf16(mut x: f16) -> f16 { + // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects. + // + // `frintn` is always round-to-nearest which does not match the C specification, but Rust does + // not support rounding modes. + unsafe { + asm!( + "frintn {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:d}, {x:d}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `fsqrt` is available with neon and has no side effects. + unsafe { + asm!( + "fsqrt {x:s}, {x:s}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} + +#[cfg(all(f16_enabled, target_feature = "fp16"))] +pub fn sqrtf16(mut x: f16) -> f16 { + // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no + // side effects. + unsafe { + asm!( + "fsqrt {x:h}, {x:h}", + x = inout(vreg) x, + options(nomem, nostack, pure) + ); + } + x +} diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs new file mode 100644 index 000000000..f92b9a2af --- /dev/null +++ b/libm/src/math/arch/i586.rs @@ -0,0 +1,37 @@ +//! Architecture-specific support for x86-32 without SSE2 + +use super::super::fabs; + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn ceil(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } +} + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn floor(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } +} diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs new file mode 100644 index 000000000..3e1d19bfa --- /dev/null +++ b/libm/src/math/arch/i686.rs @@ -0,0 +1,27 @@ +//! Architecture-specific support for x86-32 and x86-64 with SSE2 + +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. + unsafe { + core::arch::asm!( + "sqrtss {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x +} + +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. + unsafe { + core::arch::asm!( + "sqrtsd {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x +} diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs new file mode 100644 index 000000000..d9f2aad66 --- /dev/null +++ b/libm/src/math/arch/mod.rs @@ -0,0 +1,50 @@ +//! Architecture-specific routines and operations. +//! +//! LLVM will already optimize calls to some of these in cases that there are hardware +//! instructions. Providing an implementation here just ensures that the faster implementation +//! is used when calling the function directly. This helps anyone who uses `libm` directly, as +//! well as improving things when these routines are called as part of other implementations. + +// Most implementations should be defined here, to ensure they are not made available when +// soft floats are required. +#[cfg(arch_enabled)] +cfg_if! { + if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] { + mod wasm32; + pub use wasm32::{ + ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf, + }; + } else if #[cfg(target_feature = "sse2")] { + mod i686; + pub use i686::{sqrt, sqrtf}; + } else if #[cfg(all( + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_feature = "neon" + ))] { + mod aarch64; + + pub use aarch64::{ + fma, + fmaf, + rint, + rintf, + sqrt, + sqrtf, + }; + + #[cfg(all(f16_enabled, target_feature = "fp16"))] + pub use aarch64::{ + rintf16, + sqrtf16, + }; + } +} + +// There are certain architecture-specific implementations that are needed for correctness +// even with `force-soft-float`. These are configured here. +cfg_if! { + if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + mod i586; + pub use i586::{ceil, floor}; + } +} diff --git a/libm/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs new file mode 100644 index 000000000..de80c8a58 --- /dev/null +++ b/libm/src/math/arch/wasm32.rs @@ -0,0 +1,50 @@ +//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which +//! are significantly faster than soft float operations. + +pub fn ceil(x: f64) -> f64 { + core::arch::wasm32::f64_ceil(x) +} + +pub fn ceilf(x: f32) -> f32 { + core::arch::wasm32::f32_ceil(x) +} + +pub fn fabs(x: f64) -> f64 { + x.abs() +} + +pub fn fabsf(x: f32) -> f32 { + x.abs() +} + +pub fn floor(x: f64) -> f64 { + core::arch::wasm32::f64_floor(x) +} + +pub fn floorf(x: f32) -> f32 { + core::arch::wasm32::f32_floor(x) +} + +pub fn rint(x: f64) -> f64 { + core::arch::wasm32::f64_nearest(x) +} + +pub fn rintf(x: f32) -> f32 { + core::arch::wasm32::f32_nearest(x) +} + +pub fn sqrt(x: f64) -> f64 { + core::arch::wasm32::f64_sqrt(x) +} + +pub fn sqrtf(x: f32) -> f32 { + core::arch::wasm32::f32_sqrt(x) +} + +pub fn trunc(x: f64) -> f64 { + core::arch::wasm32::f64_trunc(x) +} + +pub fn truncf(x: f32) -> f32 { + core::arch::wasm32::f32_trunc(x) +} diff --git a/src/math/asin.rs b/libm/src/math/asin.rs similarity index 98% rename from src/math/asin.rs rename to libm/src/math/asin.rs index 12fe08fc7..12d0cd35f 100644 --- a/src/math/asin.rs +++ b/libm/src/math/asin.rs @@ -90,7 +90,7 @@ pub fn asin(mut x: f64) -> f64 { /* |x| < 0.5 */ if ix < 0x3fe00000 { /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ - if ix < 0x3e500000 && ix >= 0x00100000 { + if (0x00100000..0x3e500000).contains(&ix) { return x; } else { return x + x * comp_r(x * x); diff --git a/src/math/asinf.rs b/libm/src/math/asinf.rs similarity index 94% rename from src/math/asinf.rs rename to libm/src/math/asinf.rs index 2c785abe2..ed6855567 100644 --- a/src/math/asinf.rs +++ b/libm/src/math/asinf.rs @@ -13,8 +13,8 @@ * ==================================================== */ -use super::fabsf::fabsf; use super::sqrt::sqrt; +use super::support::Float; const PIO2: f64 = 1.570796326794896558e+00; @@ -54,14 +54,14 @@ pub fn asinf(mut x: f32) -> f32 { if ix < 0x3f000000 { /* |x| < 0.5 */ /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ - if (ix < 0x39800000) && (ix >= 0x00800000) { + if (0x00800000..0x39800000).contains(&ix) { return x; } return x + x * r(x * x); } /* 1 > |x| >= 0.5 */ - let z = (1. - fabsf(x)) * 0.5; + let z = (1. - Float::abs(x)) * 0.5; let s = sqrt(z as f64); x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; if (hx >> 31) != 0 { -x } else { x } diff --git a/src/math/asinh.rs b/libm/src/math/asinh.rs similarity index 100% rename from src/math/asinh.rs rename to libm/src/math/asinh.rs diff --git a/src/math/asinhf.rs b/libm/src/math/asinhf.rs similarity index 100% rename from src/math/asinhf.rs rename to libm/src/math/asinhf.rs diff --git a/src/math/atan.rs b/libm/src/math/atan.rs similarity index 100% rename from src/math/atan.rs rename to libm/src/math/atan.rs diff --git a/src/math/atan2.rs b/libm/src/math/atan2.rs similarity index 89% rename from src/math/atan2.rs rename to libm/src/math/atan2.rs index b9bf0da93..c668731cf 100644 --- a/src/math/atan2.rs +++ b/libm/src/math/atan2.rs @@ -114,12 +114,18 @@ pub fn atan2(y: f64, x: f64) -> f64 { } } -#[test] -fn sanity_check() { - assert_eq!(atan2(0.0, 1.0), 0.0); - assert_eq!(atan2(0.0, -1.0), PI); - assert_eq!(atan2(-0.0, -1.0), -PI); - assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); - assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); - assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn sanity_check() { + assert_eq!(atan2(0.0, 1.0), 0.0); + assert_eq!(atan2(0.0, -1.0), PI); + assert_eq!(atan2(-0.0, -1.0), -PI); + assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); + assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); + assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); + } } diff --git a/src/math/atan2f.rs b/libm/src/math/atan2f.rs similarity index 78% rename from src/math/atan2f.rs rename to libm/src/math/atan2f.rs index fa33f54f6..95b466fff 100644 --- a/src/math/atan2f.rs +++ b/libm/src/math/atan2f.rs @@ -42,9 +42,9 @@ pub fn atan2f(y: f32, x: f32) -> f32 { /* when y = 0 */ if iy == 0 { return match m { - 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ - 2 => PI, /* atan(+0,-anything) = pi */ - 3 | _ => -PI, /* atan(-0,-anything) =-pi */ + 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ + 2 => PI, /* atan(+0,-anything) = pi */ + _ => -PI, /* atan(-0,-anything) =-pi */ }; } /* when x = 0 */ @@ -55,17 +55,17 @@ pub fn atan2f(y: f32, x: f32) -> f32 { if ix == 0x7f800000 { return if iy == 0x7f800000 { match m { - 0 => PI / 4., /* atan(+INF,+INF) */ - 1 => -PI / 4., /* atan(-INF,+INF) */ - 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ - 3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/ + 0 => PI / 4., /* atan(+INF,+INF) */ + 1 => -PI / 4., /* atan(-INF,+INF) */ + 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ + _ => -3. * PI / 4., /* atan(-INF,-INF)*/ } } else { match m { - 0 => 0., /* atan(+...,+INF) */ - 1 => -0., /* atan(-...,+INF) */ - 2 => PI, /* atan(+...,-INF) */ - 3 | _ => -PI, /* atan(-...,-INF) */ + 0 => 0., /* atan(+...,+INF) */ + 1 => -0., /* atan(-...,+INF) */ + 2 => PI, /* atan(+...,-INF) */ + _ => -PI, /* atan(-...,-INF) */ } }; } diff --git a/src/math/atanf.rs b/libm/src/math/atanf.rs similarity index 100% rename from src/math/atanf.rs rename to libm/src/math/atanf.rs diff --git a/src/math/atanh.rs b/libm/src/math/atanh.rs similarity index 100% rename from src/math/atanh.rs rename to libm/src/math/atanh.rs diff --git a/src/math/atanhf.rs b/libm/src/math/atanhf.rs similarity index 95% rename from src/math/atanhf.rs rename to libm/src/math/atanhf.rs index 3545411bb..80ccec1f6 100644 --- a/src/math/atanhf.rs +++ b/libm/src/math/atanhf.rs @@ -18,7 +18,7 @@ pub fn atanhf(mut x: f32) -> f32 { if u < 0x3f800000 - (32 << 23) { /* handle underflow */ if u < (1 << 23) { - force_eval!((x * x) as f32); + force_eval!(x * x); } } else { /* |x| < 0.5, up to 1.7ulp error */ diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs new file mode 100644 index 000000000..9d3311cd6 --- /dev/null +++ b/libm/src/math/cbrt.rs @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: core-math/src/binary64/cbrt/cbrt.c + * Copyright (c) 2021-2022 Alexei Sibidanov. + * Ported to Rust in 2025 by Trevor Gross. + */ + +use super::Float; +use super::support::{FpResult, Round, cold_path}; + +/// Compute the cube root of the argument. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn cbrt(x: f64) -> f64 { + cbrt_round(x, Round::Nearest).val +} + +pub fn cbrt_round(x: f64, round: Round) -> FpResult { + const ESCALE: [f64; 3] = [ + 1.0, + hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */ + hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */ + ]; + + /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2] + with maximal error < 9.2e-5 (attained at x=2) */ + const C: [f64; 4] = [ + hf64!("0x1.1b0babccfef9cp-1"), + hf64!("0x1.2c9a3e94d1da5p-1"), + hf64!("-0x1.4dc30b1a1ddbap-3"), + hf64!("0x1.7a8d3e4ec9b07p-6"), + ]; + + let u0: f64 = hf64!("0x1.5555555555555p-2"); + let u1: f64 = hf64!("0x1.c71c71c71c71cp-3"); + + let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25]; + + let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0]; + + /* rm=0 for rounding to nearest, and other values for directed roundings */ + let hx: u64 = x.to_bits(); + let mut mant: u64 = hx & f64::SIG_MASK; + let sign: u64 = hx >> 63; + + let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT; + + if ((e + 1) & f64::EXP_SAT) < 2 { + cold_path(); + + let ix: u64 = hx & !f64::SIGN_MASK; + + /* 0, inf, nan: we return x + x instead of simply x, + to that for x a signaling NaN, it correctly triggers + the invalid exception. */ + if e == f64::EXP_SAT || ix == 0 { + return FpResult::ok(x + x); + } + + let nz = ix.leading_zeros() - 11; /* subnormal */ + mant <<= nz; + mant &= f64::SIG_MASK; + e = e.wrapping_sub(nz - 1); + } + + e = e.wrapping_add(3072); + let cvt1: u64 = mant | (0x3ffu64 << 52); + let mut cvt5: u64 = cvt1; + + let et: u32 = e / 3; + let it: u32 = e % 3; + + /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */ + cvt5 += u64::from(it) << f64::SIG_BITS; + cvt5 |= sign << 63; + let zz: f64 = f64::from_bits(cvt5); + + /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */ + let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index + isc |= sign << 63; + let cvt2: u64 = isc; + let z: f64 = f64::from_bits(cvt1); + + /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3), + and 1 <= z < 2 */ + let r: f64 = 1.0 / z; + let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize]; + let z2: f64 = z * z; + let c0: f64 = C[0] + z * C[1]; + let c2: f64 = C[2] + z * C[3]; + let mut y: f64 = c0 + z2 * c2; + let mut y2: f64 = y * y; + + /* y is an approximation of z^(1/3) */ + let mut h: f64 = y2 * (y * r) - 1.0; + + /* h determines the error between y and z^(1/3) */ + y -= (h * y) * (u0 - u1 * h); + + /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant + of Newton's method, with the function f(y) = 1-z/y^3. */ + y *= f64::from_bits(cvt2); + + /* Now y is an approximation of zz^(1/3), + * and rr an approximation of 1/zz. We now perform another iteration of + * Newton-Raphson, this time with a linear approximation only. */ + y2 = y * y; + let mut y2l: f64 = y.fma(y, -y2); + + /* y2 + y2l = y^2 exactly */ + let mut y3: f64 = y2 * y; + let mut y3l: f64 = y.fma(y2, -y3) + y * y2l; + + /* y3 + y3l approximates y^3 with about 106 bits of accuracy */ + h = ((y3 - zz) + y3l) * rr; + let mut dy: f64 = h * (y * u0); + + /* the approximation of zz^(1/3) is y - dy */ + let mut y1: f64 = y - dy; + dy = (y - y1) - dy; + + /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y) + * (for rounding to nearest) */ + let mut ady: f64 = dy.abs(); + + /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near + * from ulp(1); + * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1), + * or from 3/2 ulp(1). */ + let mut ady0: f64 = (ady - off[round as usize]).abs(); + let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); + + if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") { + cold_path(); + + y2 = y1 * y1; + y2l = y1.fma(y1, -y2); + y3 = y2 * y1; + y3l = y1.fma(y2, -y3) + y1 * y2l; + h = ((y3 - zz) + y3l) * rr; + dy = h * (y1 * u0); + y = y1 - dy; + dy = (y1 - y) - dy; + y1 = y; + ady = dy.abs(); + ady0 = (ady - off[round as usize]).abs(); + ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs(); + + if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") { + cold_path(); + let azz: f64 = zz.abs(); + + // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0 + if azz == hf64!("0x1.9b78223aa307cp+1") { + y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz); + } + + // ~ 0x1.de87aa837820e80000000000001c0f08p+0 + if azz == hf64!("0x1.a202bfc89ddffp+2") { + y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz); + } + + if round != Round::Nearest { + let wlist = [ + (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0 + (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0 + (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0 + (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0 + (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0 + (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0 + (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0 + ]; + + for (a, b) in wlist { + if azz == a { + let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 }; + y1 = (b + tmp).copysign(zz); + } + } + } + } + } + + let mut cvt3: u64 = y1.to_bits(); + cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52); + let m0: u64 = cvt3 << 30; + let m1 = m0 >> 63; + + if (m0 ^ m1) <= (1u64 << 30) { + cold_path(); + + let mut cvt4: u64 = y1.to_bits(); + cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64; + + if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 { + cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64; + } + } + + FpResult::ok(f64::from_bits(cvt3)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn spot_checks() { + if !cfg!(x86_no_sse) { + // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA. + assert_biteq!( + cbrt(f64::from_bits(0xf7f792b28f600000)), + f64::from_bits(0xd29ce68655d962f3) + ); + } + } +} diff --git a/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs similarity index 100% rename from src/math/cbrtf.rs rename to libm/src/math/cbrtf.rs diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs new file mode 100644 index 000000000..4e1035457 --- /dev/null +++ b/libm/src/math/ceil.rs @@ -0,0 +1,46 @@ +/// Ceil (f16) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf16(x: f16) -> f16 { + super::generic::ceil(x) +} + +/// Ceil (f32) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf(x: f32) -> f32 { + select_implementation! { + name: ceilf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::ceil(x) +} + +/// Ceil (f64) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceil(x: f64) -> f64 { + select_implementation! { + name: ceil, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), + args: x, + } + + super::generic::ceil(x) +} + +/// Ceil (f128) +/// +/// Finds the nearest integer greater than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ceilf128(x: f128) -> f128 { + super::generic::ceil(x) +} diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs new file mode 100644 index 000000000..d2a86e7fd --- /dev/null +++ b/libm/src/math/copysign.rs @@ -0,0 +1,88 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f32) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf(x: f32, y: f32) -> f32 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f64) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysign(x: f64, y: f64) -> f64 { + super::generic::copysign(x, y) +} + +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Float; + + fn spec_test(f: impl Fn(F, F) -> F) { + assert_biteq!(f(F::ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, F::ZERO), F::ZERO); + assert_biteq!(f(F::ZERO, F::NEG_ZERO), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, F::NEG_ZERO), F::NEG_ZERO); + + assert_biteq!(f(F::ONE, F::ONE), F::ONE); + assert_biteq!(f(F::NEG_ONE, F::ONE), F::ONE); + assert_biteq!(f(F::ONE, F::NEG_ONE), F::NEG_ONE); + assert_biteq!(f(F::NEG_ONE, F::NEG_ONE), F::NEG_ONE); + + assert_biteq!(f(F::INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::INFINITY), F::INFINITY); + assert_biteq!(f(F::INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, F::NEG_INFINITY), F::NEG_INFINITY); + + // Not required but we expect it + assert_biteq!(f(F::NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN); + assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN); + assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(copysignf16); + } + + #[test] + fn spec_tests_f32() { + spec_test::(copysignf); + } + + #[test] + fn spec_tests_f64() { + spec_test::(copysign); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(copysignf128); + } +} diff --git a/src/math/copysignf.rs b/libm/src/math/copysignf.rs similarity index 68% rename from src/math/copysignf.rs rename to libm/src/math/copysignf.rs index 6c346e3a5..8b9bed4c0 100644 --- a/src/math/copysignf.rs +++ b/libm/src/math/copysignf.rs @@ -4,9 +4,5 @@ /// first argument, `x`, and the sign of its second argument, `y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysignf(x: f32, y: f32) -> f32 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= 0x7fffffff; - ux |= uy & 0x80000000; - f32::from_bits(ux) + super::generic::copysign(x, y) } diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs new file mode 100644 index 000000000..7bd81d42b --- /dev/null +++ b/libm/src/math/copysignf128.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f128) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf128(x: f128, y: f128) -> f128 { + super::generic::copysign(x, y) +} diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs new file mode 100644 index 000000000..820658686 --- /dev/null +++ b/libm/src/math/copysignf16.rs @@ -0,0 +1,8 @@ +/// Sign of Y, magnitude of X (f16) +/// +/// Constructs a number with the magnitude (absolute value) of its +/// first argument, `x`, and the sign of its second argument, `y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn copysignf16(x: f16, y: f16) -> f16 { + super::generic::copysign(x, y) +} diff --git a/src/math/cos.rs b/libm/src/math/cos.rs similarity index 100% rename from src/math/cos.rs rename to libm/src/math/cos.rs diff --git a/src/math/cosf.rs b/libm/src/math/cosf.rs similarity index 100% rename from src/math/cosf.rs rename to libm/src/math/cosf.rs diff --git a/src/math/cosh.rs b/libm/src/math/cosh.rs similarity index 100% rename from src/math/cosh.rs rename to libm/src/math/cosh.rs diff --git a/src/math/coshf.rs b/libm/src/math/coshf.rs similarity index 100% rename from src/math/coshf.rs rename to libm/src/math/coshf.rs diff --git a/src/math/erf.rs b/libm/src/math/erf.rs similarity index 100% rename from src/math/erf.rs rename to libm/src/math/erf.rs diff --git a/src/math/erff.rs b/libm/src/math/erff.rs similarity index 100% rename from src/math/erff.rs rename to libm/src/math/erff.rs diff --git a/src/math/exp.rs b/libm/src/math/exp.rs similarity index 100% rename from src/math/exp.rs rename to libm/src/math/exp.rs diff --git a/src/math/exp10.rs b/libm/src/math/exp10.rs similarity index 88% rename from src/math/exp10.rs rename to libm/src/math/exp10.rs index 559930e10..7c33c92b6 100644 --- a/src/math/exp10.rs +++ b/libm/src/math/exp10.rs @@ -6,12 +6,13 @@ const P10: &[f64] = &[ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, ]; +/// Calculates 10 raised to the power of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); let u: u64 = n.to_bits(); /* fabs(n) < 16 without raising invalid on nan */ - if (u >> 52 & 0x7ff) < 0x3ff + 4 { + if ((u >> 52) & 0x7ff) < 0x3ff + 4 { if y == 0.0 { return i!(P10, ((n as isize) + 15) as usize); } diff --git a/src/math/exp10f.rs b/libm/src/math/exp10f.rs similarity index 88% rename from src/math/exp10f.rs rename to libm/src/math/exp10f.rs index 786305481..0520a41f2 100644 --- a/src/math/exp10f.rs +++ b/libm/src/math/exp10f.rs @@ -5,12 +5,13 @@ const LN10_F64: f64 = 3.32192809488736234787031942948939; const P10: &[f32] = &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7]; +/// Calculates 10 raised to the power of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); let u = n.to_bits(); /* fabsf(n) < 8 without raising invalid on nan */ - if (u >> 23 & 0xff) < 0x7f + 3 { + if ((u >> 23) & 0xff) < 0x7f + 3 { if y == 0.0 { return i!(P10, ((n as isize) + 7) as usize); } diff --git a/src/math/exp2.rs b/libm/src/math/exp2.rs similarity index 99% rename from src/math/exp2.rs rename to libm/src/math/exp2.rs index dce2ab4df..6e98d066c 100644 --- a/src/math/exp2.rs +++ b/libm/src/math/exp2.rs @@ -341,7 +341,7 @@ pub fn exp2(mut x: f64) -> f64 { /* Filter out exceptional cases. */ let ui = f64::to_bits(x); - let ix = ui >> 32 & 0x7fffffff; + let ix = (ui >> 32) & 0x7fffffff; if ix >= 0x408ff000 { /* |x| >= 1022 or nan */ if ix >= 0x40900000 && ui >> 63 == 0 { diff --git a/src/math/exp2f.rs b/libm/src/math/exp2f.rs similarity index 98% rename from src/math/exp2f.rs rename to libm/src/math/exp2f.rs index f4867b80e..f452b6a20 100644 --- a/src/math/exp2f.rs +++ b/libm/src/math/exp2f.rs @@ -95,7 +95,7 @@ pub fn exp2f(mut x: f32) -> f32 { /* NaN */ return x; } - if ui >= 0x43000000 && ui < 0x80000000 { + if (0x43000000..0x80000000).contains(&ui) { /* x >= 128 */ x *= x1p127; return x; @@ -127,7 +127,7 @@ pub fn exp2f(mut x: f32) -> f32 { let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); - let t: f64 = r as f64 * z; + let t: f64 = r * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); /* Scale by 2**k */ diff --git a/src/math/expf.rs b/libm/src/math/expf.rs similarity index 100% rename from src/math/expf.rs rename to libm/src/math/expf.rs diff --git a/src/math/expm1.rs b/libm/src/math/expm1.rs similarity index 99% rename from src/math/expm1.rs rename to libm/src/math/expm1.rs index 42608509a..f25153f32 100644 --- a/src/math/expm1.rs +++ b/libm/src/math/expm1.rs @@ -115,7 +115,7 @@ pub fn expm1(mut x: f64) -> f64 { } ui = ((0x3ff + k) as u64) << 52; /* 2^k */ let twopk = f64::from_bits(ui); - if k < 0 || k > 56 { + if !(0..=56).contains(&k) { /* suffice to return exp(x)-1 */ y = x - e + 1.0; if k == 1024 { diff --git a/src/math/expm1f.rs b/libm/src/math/expm1f.rs similarity index 99% rename from src/math/expm1f.rs rename to libm/src/math/expm1f.rs index a862fe255..12c6f532b 100644 --- a/src/math/expm1f.rs +++ b/libm/src/math/expm1f.rs @@ -115,7 +115,7 @@ pub fn expm1f(mut x: f32) -> f32 { return 1. + 2. * (x - e); } let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ - if (k < 0) || (k > 56) { + if !(0..=56).contains(&k) { /* suffice to return exp(x)-1 */ let mut y = x - e + 1.; if k == 128 { diff --git a/src/math/expo2.rs b/libm/src/math/expo2.rs similarity index 100% rename from src/math/expo2.rs rename to libm/src/math/expo2.rs diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs new file mode 100644 index 000000000..0050a309f --- /dev/null +++ b/libm/src/math/fabs.rs @@ -0,0 +1,116 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f32) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf(x: f32) -> f32 { + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f64) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabs(x: f64) -> f64 { + select_implementation! { + name: fabs, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::fabs(x) +} + +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Float; + + /// Based on https://en.cppreference.com/w/cpp/numeric/math/fabs + fn spec_test(f: impl Fn(F) -> F) { + assert_biteq!(f(F::ZERO), F::ZERO); + assert_biteq!(f(F::NEG_ZERO), F::ZERO); + assert_biteq!(f(F::INFINITY), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY), F::INFINITY); + assert!(f(F::NAN).is_nan()); + + // Not spec rewquired but we expect it + assert!(f(F::NAN).is_sign_positive()); + assert!(f(F::from_bits(F::NAN.to_bits() | F::SIGN_MASK)).is_sign_positive()); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(fabsf16(-1.0f16), 1.0); + assert_eq!(fabsf16(2.8f16), 2.8); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(fabsf16); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(fabsf(-1.0f32), 1.0); + assert_eq!(fabsf(2.8f32), 2.8); + } + + #[test] + fn spec_tests_f32() { + spec_test::(fabsf); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(fabs(-1.0f64), 1.0); + assert_eq!(fabs(2.8f64), 2.8); + } + + #[test] + fn spec_tests_f64() { + spec_test::(fabs); + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(fabsf128(-1.0f128), 1.0); + assert_eq!(fabsf128(2.8f128), 2.8); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(fabsf128); + } +} diff --git a/src/math/fabsf.rs b/libm/src/math/fabsf.rs similarity index 59% rename from src/math/fabsf.rs rename to libm/src/math/fabsf.rs index f81c8ca44..e5820a26c 100644 --- a/src/math/fabsf.rs +++ b/libm/src/math/fabsf.rs @@ -1,25 +1,22 @@ /// Absolute value (magnitude) (f32) +/// /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf32(x) } - } + select_implementation! { + name: fabsf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, } - f32::from_bits(x.to_bits() & 0x7fffffff) + + super::generic::fabs(x) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { - use core::f32::*; - use super::*; #[test] @@ -31,12 +28,12 @@ mod tests { /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] fn spec_tests() { - assert!(fabsf(NAN).is_nan()); + assert!(fabsf(f32::NAN).is_nan()); for f in [0.0, -0.0].iter().copied() { assert_eq!(fabsf(f), 0.0); } - for f in [INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(fabsf(f), INFINITY); + for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf(f), f32::INFINITY); } } } diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs new file mode 100644 index 000000000..46429ca49 --- /dev/null +++ b/libm/src/math/fabsf128.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f128) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf128(x: f128) -> f128 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf128(-1.0), 1.0); + assert_eq!(fabsf128(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf128(f128::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf128(f), 0.0); + } + for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf128(f), f128::INFINITY); + } + } +} diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs new file mode 100644 index 000000000..eee42ac6a --- /dev/null +++ b/libm/src/math/fabsf16.rs @@ -0,0 +1,31 @@ +/// Absolute value (magnitude) (f16) +/// +/// Calculates the absolute value (magnitude) of the argument `x`, +/// by direct manipulation of the bit representation of `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fabsf16(x: f16) -> f16 { + super::generic::fabs(x) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanity_check() { + assert_eq!(fabsf16(-1.0), 1.0); + assert_eq!(fabsf16(2.8), 2.8); + } + + /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs + #[test] + fn spec_tests() { + assert!(fabsf16(f16::NAN).is_nan()); + for f in [0.0, -0.0].iter().copied() { + assert_eq!(fabsf16(f), 0.0); + } + for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() { + assert_eq!(fabsf16(f), f16::INFINITY); + } + } +} diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs new file mode 100644 index 000000000..082c5478b --- /dev/null +++ b/libm/src/math/fdim.rs @@ -0,0 +1,53 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} + +/// Positive difference (f32) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf(x: f32, y: f32) -> f32 { + super::generic::fdim(x, y) +} + +/// Positive difference (f64) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdim(x: f64, y: f64) -> f64 { + super::generic::fdim(x, y) +} + +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/src/math/fdimf.rs b/libm/src/math/fdimf.rs similarity index 50% rename from src/math/fdimf.rs rename to libm/src/math/fdimf.rs index ea0b592d7..367ef517c 100644 --- a/src/math/fdimf.rs +++ b/libm/src/math/fdimf.rs @@ -1,22 +1,12 @@ -use core::f32; - /// Positive difference (f32) /// /// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. /// /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { - if x.is_nan() { - x - } else if y.is_nan() { - y - } else if x > y { - x - y - } else { - 0.0 - } + super::generic::fdim(x, y) } diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs new file mode 100644 index 000000000..6f3d1d0ff --- /dev/null +++ b/libm/src/math/fdimf128.rs @@ -0,0 +1,12 @@ +/// Positive difference (f128) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf128(x: f128, y: f128) -> f128 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs new file mode 100644 index 000000000..37bd68858 --- /dev/null +++ b/libm/src/math/fdimf16.rs @@ -0,0 +1,12 @@ +/// Positive difference (f16) +/// +/// Determines the positive difference between arguments, returning: +/// * x - y if x > y, or +/// * +0 if x <= y, or +/// * NAN if either argument is NAN. +/// +/// A range error may occur. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fdimf16(x: f16, y: f16) -> f16 { + super::generic::fdim(x, y) +} diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs new file mode 100644 index 000000000..3c5eab101 --- /dev/null +++ b/libm/src/math/floor.rs @@ -0,0 +1,46 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} + +/// Floor (f64) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floor(x: f64) -> f64 { + select_implementation! { + name: floor, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), + args: x, + } + + return super::generic::floor(x); +} + +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} + +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs new file mode 100644 index 000000000..16957b7f3 --- /dev/null +++ b/libm/src/math/floorf.rs @@ -0,0 +1,13 @@ +/// Floor (f32) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf(x: f32) -> f32 { + select_implementation! { + name: floorf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + return super::generic::floor(x); +} diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs new file mode 100644 index 000000000..9a9fe4151 --- /dev/null +++ b/libm/src/math/floorf128.rs @@ -0,0 +1,7 @@ +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs new file mode 100644 index 000000000..f9b868e04 --- /dev/null +++ b/libm/src/math/floorf16.rs @@ -0,0 +1,7 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs new file mode 100644 index 000000000..e0b3347ac --- /dev/null +++ b/libm/src/math/fma.rs @@ -0,0 +1,397 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; +use super::{CastFrom, CastInto, Float, Int, MinInt}; + +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply-add that works when there is not a larger float size available. Computes +/// `(x * y) + z`. +#[inline] +pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult +where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + let one = IntTy::::ONE; + let zero = IntTy::::ZERO; + + // Normalize such that the top of the mantissa is zero and we have a guard bit. + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { + // Value will overflow, defer to non-fused operations. + return FpResult::ok(x * y + z); + } + + if nz.is_zero_nan_inf() { + if nz.is_zero() { + // Empty add component means we only need to multiply. + return FpResult::ok(x * y); + } + // `z` is NaN or infinity, which sets the result. + return FpResult::ok(z); + } + + // multiply: r = x * y + let zhi: F::Int; + let zlo: F::Int; + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + // Exponent result of multiplication + let mut e: i32 = nx.e + ny.e; + // Needed shift to align `z` to the multiplication result + let mut d: i32 = nz.e - e; + let sbits = F::BITS as i32; + + // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) + if d > 0 { + // The magnitude of `z` is larger than `x * y` + if d < sbits { + // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift + // it into `(zhi, zlo)`. No exponent adjustment necessary. + zlo = nz.m << d; + zhi = nz.m >> (sbits - d); + } else { + // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts + // as a shift by `sbits`). + zlo = zero; + zhi = nz.m; + d -= sbits; + + // `z`'s exponent is large enough that it now needs to be taken into account. + e = nz.e - sbits; + + if d == 0 { + // Exactly `sbits`, nothing to do + } else if d < sbits { + // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` + rlo = (rhi << (sbits - d)) | (rlo >> d); + // Set the sticky bit + rlo |= IntTy::::from((rlo << (sbits - d)) != zero); + rhi = rhi >> d; + } else { + // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set + // the sticky bit. + rlo = one; + rhi = zero; + } + } + } else { + // `z`'s magnitude once shifted fits entirely within `zlo` + zhi = zero; + d = -d; + if d == 0 { + // No shift needed + zlo = nz.m; + } else if d < sbits { + // Shift s.t. `nz.m` fits into `zlo` + let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); + zlo = (nz.m >> d) | sticky; + } else { + // Would be entirely shifted out, only set the sticky bit + zlo = one; + } + } + + /* addition */ + + let mut neg = nx.neg ^ ny.neg; + let samesign: bool = !neg ^ nz.neg; + let mut rhi_nonzero = true; + + if samesign { + // r += z + rlo = rlo.wrapping_add(zlo); + rhi += zhi + IntTy::::from(rlo < zlo); + } else { + // r -= z + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); + if (rhi >> (F::BITS - 1)) != zero { + rlo = rlo.signed().wrapping_neg().unsigned(); + rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); + neg = !neg; + } + rhi_nonzero = rhi != zero; + } + + /* Construct result */ + + // Shift result into `rhi`, left-aligned. Last bit is sticky + if rhi_nonzero { + // `d` > 0, need to shift both `rhi` and `rlo` into result + e += sbits; + d = rhi.leading_zeros() as i32 - 1; + rhi = (rhi << d) | (rlo >> (sbits - d)); + // Update sticky + rhi |= IntTy::::from((rlo << d) != zero); + } else if rlo != zero { + // `rhi` is zero, `rlo` is the entire result and needs to be shifted + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + // Shift and set sticky + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + // exact +/- 0.0 + return FpResult::ok(x * y + z); + } + + e -= d; + + // Use int->float conversion to populate the significand. + // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] + let mut i: F::SignedInt = rhi.signed(); + + if neg { + i = -i; + } + + // `|r|` is in `[0x1p62,0x1p63]` for `f64` + let mut r: F = F::cast_from_lossy(i); + + /* Account for subnormal and rounding */ + + // Unbiased exponent for the maximum value of `r` + let max_pow = F::BITS - 1 + F::EXP_BIAS; + + let mut status = Status::OK; + + if e < -(max_pow as i32 - 2) { + // Result is subnormal before rounding + if e == -(max_pow as i32 - 1) { + let mut c = F::from_parts(false, max_pow, zero); + if neg { + c = -c; + } + + if r == c { + // Min normal after rounding, + status.set_underflow(true); + r = F::MIN_POSITIVE_NORMAL.copysign(r); + return FpResult::new(r, status); + } + + if (rhi << (F::SIG_BITS + 1)) != zero { + // Account for truncated bits. One bit will be lost in the `scalbn` call, add + // another top bit to avoid double rounding if inexact. + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); + i = iu.signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + + // Remove the top bit + r = F::cast_from(2i8) * r - c; + status.set_underflow(true); + } + } else { + // Only round once when scaled + d = F::EXP_BITS as i32 - 1; + let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); + i = (((rhi >> d) | sticky) << d).signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + } + } + + // Use our exponent to scale the final value. + FpResult::new(super::generic::scalbn(r, e), status) +} + +/// Representation of `F` that has handled subnormals. +#[derive(Clone, Copy, Debug)] +struct Norm { + /// Normalized significand with one guard bit, unsigned. + m: F::Int, + /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa + /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. + e: i32, + neg: bool, +} + +impl Norm { + /// Unbias the exponent and account for the mantissa's precision, including the guard bit. + const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; + + /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we + /// adjusted the exponent such that it exceeds this threashold. + const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; + + fn from_float(x: F) -> Self { + let mut ix = x.to_bits(); + let mut e = x.ex() as i32; + let neg = x.is_sign_negative(); + if e == 0 { + // Normalize subnormals by multiplication + let scale_i = F::BITS - 1; + let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * scale_f; + ix = scaled.to_bits(); + e = scaled.ex() as i32; + e = if e == 0 { + // If the exponent is still zero, the input was zero. Artifically set this value + // such that the final `e` will exceed `ZERO_INF_NAN`. + 1 << F::EXP_BITS + } else { + // Otherwise, account for the scaling we just did. + e - scale_i as i32 + }; + } + + e -= Self::EXP_UNBIAS as i32; + + // Absolute value, set the implicit bit, and shift to create a guard bit + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; + + Self { m: ix, e, neg } + } + + /// True if the value was zero, infinity, or NaN. + fn is_zero_nan_inf(self) -> bool { + self.e >= Self::ZERO_INF_NAN as i32 + } + + /// The only value we have + fn is_zero(self) -> bool { + // The only exponent that strictly exceeds this value is our sentinel value for zero. + self.e > Self::ZERO_INF_NAN as i32 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test() + where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(fma(x, y, z), F::ZERO); + assert_biteq!(fma(x, -y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::(); + } + + #[test] + fn spec_test_f64() { + spec_test::(); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(); + } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); + } + + #[test] + fn fma_underflow() { + assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); + } +} diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs new file mode 100644 index 000000000..08b78b022 --- /dev/null +++ b/libm/src/math/fma_wide.rs @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::support::{FpResult, IntTy, Round, Status}; +use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, +/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +#[inline] +pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B = x.widen() * y.widen(); + let mut result: B = xy + z.widen(); + let mut ui: B::Int = result.to_bits(); + let re = result.ex(); + let zb: B = z.widen(); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let halfway = one << (prec_diff - 1); + + // Common case: the larger precision is fine if... + // This is not a halfway case + if excess_prec != halfway + // Or the result is NaN + || re == B::EXP_SAT + // Or the result is exact + || (result - xy == zb && result - zb == xy) + // Or the mode is something other than round to nearest + || round != Round::Nearest + { + let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; + let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; + + let mut status = Status::OK; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { + // This branch is never hit; requires previous operations to set a status + status.set_inexact(false); + + result = xy + z.widen(); + if status.inexact() { + status.set_underflow(true); + } else { + status.set_inexact(true); + } + } + + return FpResult { val: result.narrow(), status }; + } + + let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; + let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + FpResult::ok(B::from_bits(ui).narrow()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } +} diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs new file mode 100644 index 000000000..2947b783e --- /dev/null +++ b/libm/src/math/fmin_fmax.rs @@ -0,0 +1,167 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf16(x: f16, y: f16) -> f16 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf(x: f32, y: f32) -> f32 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmin(x: f64, y: f64) -> f64 { + super::generic::fmin(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminf128(x: f128, y: f128) -> f128 { + super::generic::fmin(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf16(x: f16, y: f16) -> f16 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf(x: f32, y: f32) -> f32 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmax(x: f64, y: f64) -> f64 { + super::generic::fmax(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if +/// the inputs are -0.0 and +0.0, either may be returned). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaxf128(x: f128, y: f128) -> f128 { + super::generic::fmax(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fmin_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmin_spec_tests_f16() { + fmin_spec_test::(fminf16); + } + + #[test] + fn fmin_spec_tests_f32() { + fmin_spec_test::(fminf); + } + + #[test] + fn fmin_spec_tests_f64() { + fmin_spec_test::(fmin); + } + + #[test] + #[cfg(f128_enabled)] + fn fmin_spec_tests_f128() { + fmin_spec_test::(fminf128); + } + + fn fmax_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmax_spec_tests_f16() { + fmax_spec_test::(fmaxf16); + } + + #[test] + fn fmax_spec_tests_f32() { + fmax_spec_test::(fmaxf); + } + + #[test] + fn fmax_spec_tests_f64() { + fmax_spec_test::(fmax); + } + + #[test] + #[cfg(f128_enabled)] + fn fmax_spec_tests_f128() { + fmax_spec_test::(fmaxf128); + } +} diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs new file mode 100644 index 000000000..b7999e273 --- /dev/null +++ b/libm/src/math/fminimum_fmaximum.rs @@ -0,0 +1,163 @@ +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf16(x: f16, y: f16) -> f16 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum(x: f64, y: f64) -> f64 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf(x: f32, y: f32) -> f32 { + super::generic::fminimum(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimumf128(x: f128, y: f128) -> f128 { + super::generic::fminimum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf(x: f32, y: f32) -> f32 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum(x: f64, y: f64) -> f64 { + super::generic::fmaximum(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, the other argument. +/// +/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximumf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_spec_tests_f16() { + fminimum_spec_test::(fminimumf16); + } + + #[test] + fn fminimum_spec_tests_f32() { + fminimum_spec_test::(fminimumf); + } + + #[test] + fn fminimum_spec_tests_f64() { + fminimum_spec_test::(fminimum); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_spec_tests_f128() { + fminimum_spec_test::(fminimumf128); + } + + fn fmaximum_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::NAN), + (F::ZERO, F::NAN, F::NAN), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_spec_tests_f16() { + fmaximum_spec_test::(fmaximumf16); + } + + #[test] + fn fmaximum_spec_tests_f32() { + fmaximum_spec_test::(fmaximumf); + } + + #[test] + fn fmaximum_spec_tests_f64() { + fmaximum_spec_test::(fmaximum); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_spec_tests_f128() { + fmaximum_spec_test::(fmaximumf128); + } +} diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs new file mode 100644 index 000000000..180d21f72 --- /dev/null +++ b/libm/src/math/fminimum_fmaximum_num.rs @@ -0,0 +1,163 @@ +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf16(x: f16, y: f16) -> f16 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf(x: f32, y: f32) -> f32 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_num(x: f64, y: f64) -> f64 { + super::generic::fminimum_num(x, y) +} + +/// Return the lesser of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fminimum_numf128(x: f128, y: f128) -> f128 { + super::generic::fminimum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf16(x: f16, y: f16) -> f16 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf(x: f32, y: f32) -> f32 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_num(x: f64, y: f64) -> f64 { + super::generic::fmaximum_num(x, y) +} + +/// Return the greater of two arguments or, if either argument is NaN, NaN. +/// +/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaximum_numf128(x: f128, y: f128) -> f128 { + super::generic::fmaximum_num(x, y) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Float, Hexf}; + + fn fminimum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ZERO), + (F::ONE, F::ZERO, F::ZERO), + (F::ZERO, F::NEG_ONE, F::NEG_ONE), + (F::NEG_ONE, F::ZERO, F::NEG_ONE), + (F::INFINITY, F::ZERO, F::ZERO), + (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::NEG_ZERO), + (F::NEG_ZERO, F::ZERO, F::NEG_ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fminimum_num_spec_tests_f16() { + fminimum_num_spec_test::(fminimum_numf16); + } + + #[test] + fn fminimum_num_spec_tests_f32() { + fminimum_num_spec_test::(fminimum_numf); + } + + #[test] + fn fminimum_num_spec_tests_f64() { + fminimum_num_spec_test::(fminimum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fminimum_num_spec_tests_f128() { + fminimum_num_spec_test::(fminimum_numf128); + } + + fn fmaximum_num_spec_test(f: impl Fn(F, F) -> F) { + let cases = [ + (F::ZERO, F::ZERO, F::ZERO), + (F::ONE, F::ONE, F::ONE), + (F::ZERO, F::ONE, F::ONE), + (F::ONE, F::ZERO, F::ONE), + (F::ZERO, F::NEG_ONE, F::ZERO), + (F::NEG_ONE, F::ZERO, F::ZERO), + (F::INFINITY, F::ZERO, F::INFINITY), + (F::NEG_INFINITY, F::ZERO, F::ZERO), + (F::NAN, F::ZERO, F::ZERO), + (F::ZERO, F::NAN, F::ZERO), + (F::NAN, F::NAN, F::NAN), + (F::ZERO, F::NEG_ZERO, F::ZERO), + (F::NEG_ZERO, F::ZERO, F::ZERO), + ]; + + for (x, y, res) in cases { + let val = f(x, y); + assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn fmaximum_num_spec_tests_f16() { + fmaximum_num_spec_test::(fmaximum_numf16); + } + + #[test] + fn fmaximum_num_spec_tests_f32() { + fmaximum_num_spec_test::(fmaximum_numf); + } + + #[test] + fn fmaximum_num_spec_tests_f64() { + fmaximum_num_spec_test::(fmaximum_num); + } + + #[test] + #[cfg(f128_enabled)] + fn fmaximum_num_spec_tests_f128() { + fmaximum_num_spec_test::(fmaximum_numf128); + } +} diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs new file mode 100644 index 000000000..c4752b925 --- /dev/null +++ b/libm/src/math/fmod.rs @@ -0,0 +1,25 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmod(x: f64, y: f64) -> f64 { + super::generic::fmod(x, y) +} + +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs new file mode 100644 index 000000000..4e95696e2 --- /dev/null +++ b/libm/src/math/fmodf.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf(x: f32, y: f32) -> f32 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs new file mode 100644 index 000000000..ff0e0493e --- /dev/null +++ b/libm/src/math/fmodf128.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf128(x: f128, y: f128) -> f128 { + super::generic::fmod(x, y) +} diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs new file mode 100644 index 000000000..11972a7de --- /dev/null +++ b/libm/src/math/fmodf16.rs @@ -0,0 +1,5 @@ +/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmodf16(x: f16, y: f16) -> f16 { + super::generic::fmod(x, y) +} diff --git a/src/math/frexp.rs b/libm/src/math/frexp.rs similarity index 89% rename from src/math/frexp.rs rename to libm/src/math/frexp.rs index badad786a..de7a64fda 100644 --- a/src/math/frexp.rs +++ b/libm/src/math/frexp.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn frexp(x: f64) -> (f64, i32) { let mut y = x.to_bits(); let ee = ((y >> 52) & 0x7ff) as i32; diff --git a/src/math/frexpf.rs b/libm/src/math/frexpf.rs similarity index 88% rename from src/math/frexpf.rs rename to libm/src/math/frexpf.rs index 2919c0ab0..0ec91c2d3 100644 --- a/src/math/frexpf.rs +++ b/libm/src/math/frexpf.rs @@ -1,3 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn frexpf(x: f32) -> (f32, i32) { let mut y = x.to_bits(); let ee: i32 = ((y >> 23) & 0xff) as i32; diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs new file mode 100644 index 000000000..5c5bb4763 --- /dev/null +++ b/libm/src/math/generic/ceil.rs @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/ceilf.c */ + +//! Generic `ceil` algorithm. +//! +//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::support::{FpResult, Status}; +use super::super::{Float, Int, IntTy, MinInt}; + +#[inline] +pub fn ceil(x: F) -> F { + ceil_status(x).val +} + +#[inline] +pub fn ceil_status(x: F) -> FpResult { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let status; + let res = if e >= 0 { + // |x| >= 1.0 + let m = F::SIG_MASK >> e.unsigned(); + if (ix & m) == zero { + // Portion to be masked is already zero; no adjustment needed. + return FpResult::ok(x); + } + + // Otherwise, raise an inexact exception. + status = Status::INEXACT; + + if x.is_sign_positive() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } + + if x.is_sign_negative() { + // -1.0 < x <= -0.0; rounding up goes toward -0.0. + F::NEG_ZERO + } else if ix << 1 != zero { + // 0.0 < x < 1.0; rounding up goes toward +1.0. + F::ONE + } else { + // +0.0 remains unchanged + x + } + }; + + FpResult::new(res, status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = ceil_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(ceil(1.1f32), 2.0); + assert_eq!(ceil(2.9f32), 3.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(ceil(1.1f64), 2.0); + assert_eq!(ceil(2.9f64), 3.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = [ + (0.1, 1.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 1.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 2.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 2.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } +} diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs new file mode 100644 index 000000000..a61af22f0 --- /dev/null +++ b/libm/src/math/generic/copysign.rs @@ -0,0 +1,11 @@ +use super::super::Float; + +/// Copy the sign of `y` to `x`. +#[inline] +pub fn copysign(x: F, y: F) -> F { + let mut ux = x.to_bits(); + let uy = y.to_bits(); + ux &= !F::SIGN_MASK; + ux |= uy & F::SIGN_MASK; + F::from_bits(ux) +} diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs new file mode 100644 index 000000000..0fa0edf9b --- /dev/null +++ b/libm/src/math/generic/fabs.rs @@ -0,0 +1,8 @@ +use super::super::Float; + +/// Absolute value. +#[inline] +pub fn fabs(x: F) -> F { + let abs_mask = !F::SIGN_MASK; + F::from_bits(x.to_bits() & abs_mask) +} diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs new file mode 100644 index 000000000..a63007b19 --- /dev/null +++ b/libm/src/math/generic/fdim.rs @@ -0,0 +1,6 @@ +use super::super::Float; + +#[inline] +pub fn fdim(x: F, y: F) -> F { + if x <= y { F::ZERO } else { x - y } +} diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs new file mode 100644 index 000000000..243804625 --- /dev/null +++ b/libm/src/math/generic/floor.rs @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/floor.c */ + +//! Generic `floor` algorithm. +//! +//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::support::{FpResult, Status}; +use super::super::{Float, Int, IntTy, MinInt}; + +#[inline] +pub fn floor(x: F) -> F { + floor_status(x).val +} + +#[inline] +pub fn floor_status(x: F) -> FpResult { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let status; + let res = if e >= 0 { + // |x| >= 1.0 + let m = F::SIG_MASK >> e.unsigned(); + if ix & m == zero { + // Portion to be masked is already zero; no adjustment needed. + return FpResult::ok(x); + } + + // Otherwise, raise an inexact exception. + status = Status::INEXACT; + + if x.is_sign_negative() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen. + if ix & F::SIG_MASK == F::Int::ZERO { + status = Status::OK; + } else { + status = Status::INEXACT; + } + + if x.is_sign_positive() { + // 0.0 <= x < 1.0; rounding down goes toward +0.0. + F::ZERO + } else if ix << 1 != zero { + // -1.0 < x < 0.0; rounding down goes toward -1.0. + F::NEG_ONE + } else { + // -0.0 remains unchanged + x + } + }; + + FpResult::new(res, status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = floor_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(floor(0.5f32), 0.0); + assert_eq!(floor(1.1f32), 1.0); + assert_eq!(floor(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(floor(1.1f64), 1.0); + assert_eq!(floor(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -1.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -1.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -2.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -2.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs new file mode 100644 index 000000000..bf3f847e8 --- /dev/null +++ b/libm/src/math/generic/fmax.rs @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + +use super::super::Float; + +#[inline] +pub fn fmax(x: F, y: F) -> F { + let res = if x.is_nan() || x < y { y } else { x }; + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs new file mode 100644 index 000000000..387055af2 --- /dev/null +++ b/libm/src/math/generic/fmaximum.rs @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +#[inline] +pub fn fmaximum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs new file mode 100644 index 000000000..f7efdde80 --- /dev/null +++ b/libm/src/math/generic/fmaximum_num.rs @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `maximumNumber`. +//! +//! Per the spec, returns: +//! - `x` if `x > y` +//! - `y` if `y > x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +#[inline] +pub fn fmaximum_num(x: F, y: F) -> F { + let res = + if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + y + } else { + x + }; + + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs new file mode 100644 index 000000000..cd3caeee4 --- /dev/null +++ b/libm/src/math/generic/fmin.rs @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - The other number if one is NaN +//! - Otherwise, either `x` or `y`, canonicalized +//! - -0.0 and +0.0 may be disregarded (unlike newer operations) +//! +//! Excluded from our implementation is sNaN handling. +//! +//! More on the differences: [link]. +//! +//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf + +use super::super::Float; + +#[inline] +pub fn fmin(x: F, y: F) -> F { + let res = if y.is_nan() || x < y { x } else { y }; + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs new file mode 100644 index 000000000..4ddb36455 --- /dev/null +++ b/libm/src/math/generic/fminimum.rs @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns the canonicalized result of: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - qNaN if either operation is NaN +//! - Logic following +0.0 > -0.0 +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +#[inline] +pub fn fminimum(x: F, y: F) -> F { + let res = if x.is_nan() { + x + } else if y.is_nan() { + y + } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs new file mode 100644 index 000000000..441c204a9 --- /dev/null +++ b/libm/src/math/generic/fminimum_num.rs @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT OR Apache-2.0 */ +//! IEEE 754-2019 `minimum`. +//! +//! Per the spec, returns: +//! - `x` if `x < y` +//! - `y` if `y < x` +//! - Non-NaN if one operand is NaN +//! - Logic following +0.0 > -0.0 +//! - Either `x` or `y` if `x == y` and the signs are the same +//! - qNaN if either operand is a NaN +//! +//! Excluded from our implementation is sNaN handling. + +use super::super::Float; + +#[inline] +pub fn fminimum_num(x: F, y: F) -> F { + let res = + if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) { + x + } else { + y + }; + + // Canonicalize + res * F::ONE +} diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs new file mode 100644 index 000000000..6414bbd25 --- /dev/null +++ b/libm/src/math/generic/fmod.rs @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::super::{CastFrom, Float, Int, MinInt}; + +#[inline] +pub fn fmod(x: F, y: F) -> F { + let zero = F::Int::ZERO; + let one = F::Int::ONE; + let mut ix = x.to_bits(); + let mut iy = y.to_bits(); + let mut ex = x.ex().signed(); + let mut ey = y.ex().signed(); + let sx = ix & F::SIGN_MASK; + + if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 { + return (x * y) / (x * y); + } + + if ix << 1 <= iy << 1 { + if ix << 1 == iy << 1 { + return F::ZERO * x; + } + return x; + } + + /* normalize x and y */ + if ex == 0 { + let i = ix << (F::EXP_BITS + 1); + ex -= i.leading_zeros() as i32; + ix <<= -ex + 1; + } else { + ix &= F::Int::MAX >> F::EXP_BITS; + ix |= one << F::SIG_BITS; + } + + if ey == 0 { + let i = iy << (F::EXP_BITS + 1); + ey -= i.leading_zeros() as i32; + iy <<= -ey + 1; + } else { + iy &= F::Int::MAX >> F::EXP_BITS; + iy |= one << F::SIG_BITS; + } + + /* x mod y */ + while ex > ey { + let i = ix.wrapping_sub(iy); + if i >> (F::BITS - 1) == zero { + if i == zero { + return F::ZERO * x; + } + ix = i; + } + + ix <<= 1; + ex -= 1; + } + + let i = ix.wrapping_sub(iy); + if i >> (F::BITS - 1) == zero { + if i == zero { + return F::ZERO * x; + } + + ix = i; + } + + let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS); + ix <<= shift; + ex -= shift as i32; + + /* scale result */ + if ex > 0 { + ix -= one << F::SIG_BITS; + ix |= F::Int::cast_from(ex) << F::SIG_BITS; + } else { + ix >>= -ex + 1; + } + + ix |= sx; + + F::from_bits(ix) +} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs new file mode 100644 index 000000000..35846351a --- /dev/null +++ b/libm/src/math/generic/mod.rs @@ -0,0 +1,38 @@ +// Note: generic functions are marked `#[inline]` because, even though generic functions are +// typically inlined, this does not seem to always be the case. + +mod ceil; +mod copysign; +mod fabs; +mod fdim; +mod floor; +mod fmax; +mod fmaximum; +mod fmaximum_num; +mod fmin; +mod fminimum; +mod fminimum_num; +mod fmod; +mod rint; +mod round; +mod scalbn; +mod sqrt; +mod trunc; + +pub use ceil::ceil; +pub use copysign::copysign; +pub use fabs::fabs; +pub use fdim::fdim; +pub use floor::floor; +pub use fmax::fmax; +pub use fmaximum::fmaximum; +pub use fmaximum_num::fmaximum_num; +pub use fmin::fmin; +pub use fminimum::fminimum; +pub use fminimum_num::fminimum_num; +pub use fmod::fmod; +pub use rint::rint_round; +pub use round::round; +pub use scalbn::scalbn; +pub use sqrt::sqrt; +pub use trunc::trunc; diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs new file mode 100644 index 000000000..9cdeb1185 --- /dev/null +++ b/libm/src/math/generic/rint.rs @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/rint.c */ + +use super::super::Float; +use super::super::support::{FpResult, Round}; + +/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if +/// applicable. +#[inline] +pub fn rint_round(x: F, _round: Round) -> FpResult { + let toint = F::ONE / F::EPSILON; + let e = x.ex(); + let positive = x.is_sign_positive(); + + // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise, + // the excess precission from x87 would cause an incorrect final result. + let force = |x| { + if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { force_eval!(x) } else { x } + }; + + let res = if e >= F::EXP_BIAS + F::SIG_BITS { + // No fractional part; exact result can be returned. + x + } else { + // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For + // Rust this is always nearest, but ideally it would take `round` into account. + let y = if positive { + force(force(x) + toint) - toint + } else { + force(force(x) - toint) + toint + }; + + if y == F::ZERO { + // A zero result takes the sign of the input. + if positive { F::ZERO } else { F::NEG_ZERO } + } else { + y + } + }; + + FpResult::ok(res) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{Hexf, Status}; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, x, "rint_round({})", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = rint_round(x, Round::Nearest); + assert_biteq!(val, res, "rint_round({})", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::OK), + (-0.1, -0.0, Status::OK), + (0.5, 0.0, Status::OK), + (-0.5, -0.0, Status::OK), + (0.9, 1.0, Status::OK), + (-0.9, -1.0, Status::OK), + (1.1, 1.0, Status::OK), + (-1.1, -1.0, Status::OK), + (1.5, 2.0, Status::OK), + (-1.5, -2.0, Status::OK), + (1.9, 2.0, Status::OK), + (-1.9, -2.0, Status::OK), + (2.8, 3.0, Status::OK), + (-2.8, -3.0, Status::OK), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs new file mode 100644 index 000000000..01314ac70 --- /dev/null +++ b/libm/src/math/generic/round.rs @@ -0,0 +1,83 @@ +use super::super::{Float, MinInt}; +use super::{copysign, trunc}; + +#[inline] +pub fn round(x: F) -> F { + let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5 + let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25 + + trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn zeroes_f16() { + assert_biteq!(round(0.0_f16), 0.0_f16); + assert_biteq!(round(-0.0_f16), -0.0_f16); + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_eq!(round(-1.0_f16), -1.0); + assert_eq!(round(2.8_f16), 3.0); + assert_eq!(round(-0.5_f16), -1.0); + assert_eq!(round(0.5_f16), 1.0); + assert_eq!(round(-1.5_f16), -2.0); + assert_eq!(round(1.5_f16), 2.0); + } + + #[test] + fn zeroes_f32() { + assert_biteq!(round(0.0_f32), 0.0_f32); + assert_biteq!(round(-0.0_f32), -0.0_f32); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(round(-1.0_f32), -1.0); + assert_eq!(round(2.8_f32), 3.0); + assert_eq!(round(-0.5_f32), -1.0); + assert_eq!(round(0.5_f32), 1.0); + assert_eq!(round(-1.5_f32), -2.0); + assert_eq!(round(1.5_f32), 2.0); + } + + #[test] + fn zeroes_f64() { + assert_biteq!(round(0.0_f64), 0.0_f64); + assert_biteq!(round(-0.0_f64), -0.0_f64); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(round(-1.0_f64), -1.0); + assert_eq!(round(2.8_f64), 3.0); + assert_eq!(round(-0.5_f64), -1.0); + assert_eq!(round(0.5_f64), 1.0); + assert_eq!(round(-1.5_f64), -2.0); + assert_eq!(round(1.5_f64), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn zeroes_f128() { + assert_biteq!(round(0.0_f128), 0.0_f128); + assert_biteq!(round(-0.0_f128), -0.0_f128); + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_eq!(round(-1.0_f128), -1.0); + assert_eq!(round(2.8_f128), 3.0); + assert_eq!(round(-0.5_f128), -1.0); + assert_eq!(round(0.5_f128), 1.0); + assert_eq!(round(-1.5_f128), -2.0); + assert_eq!(round(1.5_f128), 2.0); + } +} diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs new file mode 100644 index 000000000..a45db1b4a --- /dev/null +++ b/libm/src/math/generic/scalbn.rs @@ -0,0 +1,121 @@ +use super::super::{CastFrom, CastInto, Float, IntTy, MinInt}; + +/// Scale the exponent. +/// +/// From N3220: +/// +/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type +/// > of the function is a standard floating type, or `b = 10` if the return type of the function +/// > is a decimal floating type. A range error occurs for some finite x, depending on n. +/// > +/// > [...] +/// > +/// > * `scalbn(±0, n)` returns `±0`. +/// > * `scalbn(x, 0)` returns `x`. +/// > * `scalbn(±∞, n)` returns `±∞`. +/// > +/// > If the calculation does not overflow or underflow, the returned value is exact and +/// > independent of the current rounding direction mode. +#[inline] +pub fn scalbn(mut x: F, mut n: i32) -> F +where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, +{ + let zero = IntTy::::ZERO; + + // Bits including the implicit bit + let sig_total_bits = F::SIG_BITS + 1; + + // Maximum and minimum values when biased + let exp_max = F::EXP_MAX; + let exp_min = F::EXP_MIN; + + // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64) + let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); + + // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64) + let f_exp_min = F::from_parts(false, 1, zero); + + // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64) + let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + + /* + * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases + * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with + * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of + * the final scale operation by prescaling by the max/min power representable by `F`. + */ + + if n > exp_max { + // Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`. + // This can be reached by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3); + + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + x *= f_exp_max; + n -= exp_max; + if n > exp_max { + n = exp_max; + } + } + } else if n < exp_min { + // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to + // go subnormal. This avoids double rounding. + if F::BITS > 16 { + // `mul` s.t. `!(x * mul).is_subnormal() ∀ x` + let mul = f_exp_min * f_pow_subnorm; + let add = -exp_min - sig_total_bits as i32; + + // Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`. + // This must be reachable by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min); + + x *= mul; + n += add; + + if n < exp_min { + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } else { + // `f16` is unique compared to other float types in that the difference between the + // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is + // small, only three. The above method depend on decrementing `n` by `add` two times; + // for other float types this works out because `add` is a substantial fraction of + // the exponent range. For `f16`, however, 3 is relatively small compared to the + // exponent range (which is 39), so that requires ~10 prescale rounds rather than two. + // + // Work aroudn this by using a different algorithm that calculates the prescale + // dynamically based on the maximum possible value. This adds more operations per round + // since it needs to construct the scale, but works better in the general case. + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } + } + + let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero); + x * scale +} diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs new file mode 100644 index 000000000..ec9ff22df --- /dev/null +++ b/libm/src/math/generic/sqrt.rs @@ -0,0 +1,537 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */ + +//! Generic square root algorithm. +//! +//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa +//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt +//! iterations at various widths are used to approach the real values. +//! +//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number +//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4). +//! +//! With Newton-Raphson iterations, this would be: +//! +//! - `w = r * r w ~ 1 / m` +//! - `u = 3 - m * w u ~ 3 - m * w = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! +//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r), +//! since the value of performing one iteration is in reducing the error representable by `~`). +//! +//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate +//! `s = m * r`: +//! +//! - `s = m * r s ~ m / sqrt(m)` +//! - `u = 3 - s * r u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! The above is precise because it uses the original value `m`. There is also a faster version +//! that performs fewer steps but does not use `m`: +//! +//! - `u = 3 - s * r u ~ 3 - 1` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! Rounding errors accumulate faster with the second version, so it is only used for subsequent +//! iterations within the same width integer. The first version is always used for the first +//! iteration at a new width in order to avoid this accumulation. +//! +//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are +//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. + +use super::super::support::{FpResult, IntTy, Round, Status, cold_path}; +use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; + +#[inline] +pub fn sqrt(x: F) -> F +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + sqrt_round(x, Round::Nearest).val +} + +#[inline] +pub fn sqrt_round(x: F, _round: Round) -> FpResult +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + let zero = IntTy::::ZERO; + let one = IntTy::::ONE; + + let mut ix = x.to_bits(); + + // Top is the exponent and sign, which may or may not be shifted. If the float fits into a + // `u32`, we can get by without paying shifting costs. + let noshift = F::BITS <= u32::BITS; + let (mut top, special_case) = if noshift { + let exp_lsb = one << F::SIG_BITS; + let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb; + (Exp::NoShift(()), special_case) + } else { + let top = u32::cast_from(ix >> F::SIG_BITS); + let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1; + (Exp::Shifted(top), special_case) + }; + + // Handle NaN, zero, and out of domain (<= 0) + if special_case { + cold_path(); + + // +/-0 + if ix << 1 == zero { + return FpResult::ok(x); + } + + // Positive infinity + if ix == F::EXP_MASK { + return FpResult::ok(x); + } + + // NaN or negative + if ix > F::EXP_MASK { + return FpResult::new(F::NAN, Status::INVALID); + } + + // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). + let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero); + ix = scaled.to_bits(); + match top { + Exp::Shifted(ref mut v) => { + *v = scaled.ex(); + *v = (*v).wrapping_sub(F::SIG_BITS); + } + Exp::NoShift(()) => { + ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast()); + } + } + } + + // Reduce arguments such that `x = 4^e * m`: + // + // - m_u2 ∈ [1, 4), a fixed point U2.BITS number + // - 2^e is the exponent part of the result + let (m_u2, exp) = match top { + Exp::Shifted(top) => { + // We now know `x` is positive, so `top` is just its (biased) exponent + let mut e = top; + // Construct a fixed point representation of the mantissa. + let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS; + let even = (e & 1) != 0; + if even { + m_u2 >>= 1; + } + e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1; + (m_u2, Exp::Shifted(e)) + } + Exp::NoShift(()) => { + let even = ix & (one << F::SIG_BITS) != zero; + + // Exponent part of the return value + let mut e_noshift = ix >> 1; + // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0) + e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1; + e_noshift &= F::EXP_MASK; + + let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK; + let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK; + let m_u2 = if even { m0 } else { m1 }; + + (m_u2, Exp::NoShift(e_noshift)) + } + }; + + // Extract the top 6 bits of the significand with the lowest bit of the exponent. + let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111; + + // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an + // initial value for `s = sqrt(m)`. See the module documentation for details. + let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16); + let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast(); + + // Perform iterations, if any, at quarter width (used for `f128`). + let (r1_u0, _s1_u2) = goldschmidt::(r1_u0, s1_u2, F::SET1_ROUNDS, false); + + // Widen values and perform iterations at half width (used for `f64` and `f128`). + let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS); + let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast(); + let (r2_u0, _s2_u2) = goldschmidt::(r2_u0, s2_u2, F::SET2_ROUNDS, false); + + // Perform final iterations at full width (used for all float types). + let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS); + let s_u2: F::Int = m_u2; + let (_r_u0, s_u2) = goldschmidt::(r_u0, s_u2, F::FINAL_ROUNDS, true); + + // Shift back to mantissa position. + let mut m = s_u2 >> (F::EXP_BITS - 2); + + // The musl source includes the following comment (with literals replaced): + // + // > s < sqrt(m) < s + 0x1.09p-SIG_BITS + // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or + // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m. + // + // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to + // `d0` and `d1`: + // + // - `2^(2p)m ≟ ((2^p)m + 0.5)^2` + // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25` + // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25` + // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?) + // + // I do not follow how the rounding bit is extracted from this comparison with the below + // operations. In any case, the algorithm is well tested. + + // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`, + // `F::BITS - 2` accounts for the offset that `m_u2` already has. + let shift = 2 * F::SIG_BITS - (F::BITS - 2); + + // `2^(2p)m - m^2` + let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m)); + // `m - 2^(2p)m + m^2` + let d1 = m.wrapping_sub(d0); + m += d1 >> (F::BITS - 1); + m &= F::SIG_MASK; + + match exp { + Exp::Shifted(e) => m |= IntTy::::cast_from(e) << F::SIG_BITS, + Exp::NoShift(e) => m |= e, + }; + + let mut y = F::from_bits(m); + + // FIXME(f16): the fenv math does not work for `f16` + if F::BITS > 16 { + // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add + // a tiny value to cause fenv effects. + let d2 = d1.wrapping_add(m).wrapping_add(one); + let mut tiny = if d2 == zero { + cold_path(); + zero + } else { + F::IMPLICIT_BIT + }; + + tiny |= (d1 ^ d2) & F::SIGN_MASK; + let t = F::from_bits(tiny); + y = y + t; + } + + FpResult::ok(y) +} + +/// Multiply at the wider integer size, returning the high half. +fn wmulh(a: I, b: I) -> I { + a.widen_mul(b).hi() +} + +/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`. +/// +/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0. +/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2. +/// - `count` is the number of iterations to perform. +/// - `final_set` should be true if this is the last round (same-sized integer). If so, the +/// returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2. +/// +/// Note that performance relies on the optimizer being able to unroll these loops (reasonably +/// trivial, `count` is a constant when called). +#[inline] +fn goldschmidt(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I) +where + F: SqrtHelper, + I: HInt + From, +{ + let three_u2 = I::from(0b11u8) << (I::BITS - 2); + let mut u_u0 = r_u0; + + for i in 0..count { + // First iteration: `s = m*r` (`u_u0 = r_u0` set above) + // Subsequent iterations: `s=s*u/2` + s_u2 = wmulh(s_u2, u_u0); + + // Perform `s /= 2` if: + // + // 1. This is not the first iteration (the first iteration is `s = m*r`)... + // 2. ... and this is not the last set of iterations + // 3. ... or, if this is the last set, it is not the last iteration + // + // This step is not performed for the final iteration because the shift is combined with + // a later shift (moving `s` into the mantissa). + if i > 0 && (!final_set || i + 1 < count) { + s_u2 <<= 1; + } + + // u = 3 - s*r + let d_u2 = wmulh(s_u2, r_u0); + u_u0 = three_u2.wrapping_sub(d_u2); + + // r = r*u/2 + r_u0 = wmulh(r_u0, u_u0) << 1; + } + + (r_u0, s_u2) +} + +/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save +/// the shift operations. +enum Exp { + /// The exponent has been shifted to a `u32` and is LSB-aligned. + Shifted(u32), + /// The exponent is in its natural position in integer repr. + NoShift(T), +} + +/// Size-specific constants related to the square root routine. +pub trait SqrtHelper: Float { + /// Integer for the first set of rounds. If unused, set to the same type as the next set. + type ISet1: HInt + Into + CastFrom + From; + /// Integer for the second set of rounds. If unused, set to the same type as the next set. + type ISet2: HInt + From + From; + + /// Number of rounds at `ISet1`. + const SET1_ROUNDS: u32 = 0; + /// Number of rounds at `ISet2`. + const SET2_ROUNDS: u32 = 0; + /// Number of rounds at `Self::Int`. + const FINAL_ROUNDS: u32; +} + +#[cfg(f16_enabled)] +impl SqrtHelper for f16 { + type ISet1 = u16; // unused + type ISet2 = u16; // unused + + const FINAL_ROUNDS: u32 = 2; +} + +impl SqrtHelper for f32 { + type ISet1 = u32; // unused + type ISet2 = u32; // unused + + const FINAL_ROUNDS: u32 = 3; +} + +impl SqrtHelper for f64 { + type ISet1 = u32; // unused + type ISet2 = u32; + + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +#[cfg(f128_enabled)] +impl SqrtHelper for f128 { + type ISet1 = u32; + type ISet2 = u64; + + const SET1_ROUNDS: u32 = 1; + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +/// A U0.16 representation of `1/sqrt(x)`. +/// +/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. +#[rustfmt::skip] +static RSQRT_TAB: [u16; 128] = [ + 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, + 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, + 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, + 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, + 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, + 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, + 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, + 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, + 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, + 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, + 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, + 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, + 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, + 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, + 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, + 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, +]; + +#[cfg(test)] +mod tests { + use super::*; + + /// Test behavior specified in IEEE 754 `squareRoot`. + fn spec_test() + where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, + { + // Values that should return a NaN and raise invalid + let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN]; + + // Values that return unaltered + let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY]; + + for x in nan { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert!(val.is_nan()); + assert!(status == Status::INVALID); + } + + for x in roundtrip { + let FpResult { val, status } = sqrt_round(x, Round::Nearest); + assert_biteq!(val, x); + assert!(status == Status::OK); + } + } + + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_biteq!(sqrt(100.0f16), 10.0); + assert_biteq!(sqrt(4.0f16), 2.0); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + #[cfg(f16_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f16() { + let cases = [ + (f16::PI, 0x3f17_u16), + // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as + // part of the AST, so the `cfg` is irrelevant here). + (f16::from_bits(0x70e2), 0x5640_u16), + (f16::from_bits(0x0000000f), 0x13bf_u16), + (f16::INFINITY, f16::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f16::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f32() { + assert_biteq!(sqrt(100.0f32), 10.0); + assert_biteq!(sqrt(4.0f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f32() { + let cases = [ + (f32::PI, 0x3fe2dfc5_u32), + (10000.0f32, 0x42c80000_u32), + (f32::from_bits(0x0000000f), 0x1b2f456f_u32), + (f32::INFINITY, f32::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f32::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f64() { + assert_biteq!(sqrt(100.0f64), 10.0); + assert_biteq!(sqrt(4.0f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f64() { + let cases = [ + (f64::PI, 0x3ffc5bf891b4ef6a_u64), + (10000.0, 0x4059000000000000_u64), + (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64), + (f64::INFINITY, f64::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f64::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_biteq!(sqrt(100.0f128), 10.0); + assert_biteq!(sqrt(4.0f128), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f128() { + let cases = [ + (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128), + // 10_000.0, see `f16` for reasoning. + ( + f128::from_bits(0x400c3880000000000000000000000000), + 0x40059000000000000000000000000000_u128, + ), + (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128), + (f128::INFINITY, f128::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f128::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } +} diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs new file mode 100644 index 000000000..25414ecf4 --- /dev/null +++ b/libm/src/math/generic/trunc.rs @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/trunc.c */ + +use super::super::support::{FpResult, Status}; +use super::super::{Float, Int, IntTy, MinInt}; + +#[inline] +pub fn trunc(x: F) -> F { + trunc_status(x).val +} + +#[inline] +pub fn trunc_status(x: F) -> FpResult { + let mut xi: F::Int = x.to_bits(); + let e: i32 = x.exp_unbiased(); + + // C1: The represented value has no fractional part, so no truncation is needed + if e >= F::SIG_BITS as i32 { + return FpResult::ok(x); + } + + let mask = if e < 0 { + // C2: If the exponent is negative, the result will be zero so we mask out everything + // except the sign. + F::SIGN_MASK + } else { + // C3: Otherwise, we mask out the last `e` bits of the significand. + !(F::SIG_MASK >> e.unsigned()) + }; + + // C4: If the to-be-masked-out portion is already zero, we have an exact result + if (xi & !mask) == IntTy::::ZERO { + return FpResult::ok(x); + } + + // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the + // result, and return. + + let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT }; + xi &= mask; + FpResult::new(F::from_bits(xi), status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::Hexf; + + fn spec_test(cases: &[(F, F, Status)]) { + let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY]; + + for x in roundtrip { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, x, "{}", Hexf(x)); + assert_eq!(status, Status::OK, "{}", Hexf(x)); + } + + for &(x, res, res_stat) in cases { + let FpResult { val, status } = trunc_status(x); + assert_biteq!(val, res, "{}", Hexf(x)); + assert_eq!(status, res_stat, "{}", Hexf(x)); + } + } + + /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + let cases = []; + spec_test::(&cases); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(trunc(0.5f32), 0.0); + assert_eq!(trunc(1.1f32), 1.0); + assert_eq!(trunc(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + + assert_biteq!(trunc(1.1f32), 1.0); + assert_biteq!(trunc(1.1f64), 1.0); + + // C1 + assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23")); + assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52")); + assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23")); + assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52")); + + // C2 + assert_biteq!(trunc(hf32!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf64!("0x1p-1")), 0.0); + assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0); + assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(trunc(1.1f64), 1.0); + assert_eq!(trunc(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + let cases = [ + (0.1, 0.0, Status::INEXACT), + (-0.1, -0.0, Status::INEXACT), + (0.9, 0.0, Status::INEXACT), + (-0.9, -0.0, Status::INEXACT), + (1.1, 1.0, Status::INEXACT), + (-1.1, -1.0, Status::INEXACT), + (1.9, 1.0, Status::INEXACT), + (-1.9, -1.0, Status::INEXACT), + ]; + spec_test::(&cases); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + let cases = []; + spec_test::(&cases); + } +} diff --git a/src/math/hypot.rs b/libm/src/math/hypot.rs similarity index 100% rename from src/math/hypot.rs rename to libm/src/math/hypot.rs diff --git a/src/math/hypotf.rs b/libm/src/math/hypotf.rs similarity index 100% rename from src/math/hypotf.rs rename to libm/src/math/hypotf.rs diff --git a/src/math/ilogb.rs b/libm/src/math/ilogb.rs similarity index 90% rename from src/math/ilogb.rs rename to libm/src/math/ilogb.rs index 9d58d0608..ccc4914be 100644 --- a/src/math/ilogb.rs +++ b/libm/src/math/ilogb.rs @@ -21,7 +21,7 @@ pub fn ilogb(x: f64) -> i32 { e } else if e == 0x7ff { force_eval!(0.0 / 0.0); - if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() } + if (i << 12) != 0 { FP_ILOGBNAN } else { i32::MAX } } else { e - 0x3ff } diff --git a/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs similarity index 90% rename from src/math/ilogbf.rs rename to libm/src/math/ilogbf.rs index 85deb43c8..3585d6d36 100644 --- a/src/math/ilogbf.rs +++ b/libm/src/math/ilogbf.rs @@ -21,7 +21,7 @@ pub fn ilogbf(x: f32) -> i32 { e } else if e == 0xff { force_eval!(0.0 / 0.0); - if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() } + if (i << 9) != 0 { FP_ILOGBNAN } else { i32::MAX } } else { e - 0x7f } diff --git a/src/math/j0.rs b/libm/src/math/j0.rs similarity index 99% rename from src/math/j0.rs rename to libm/src/math/j0.rs index 5e5e839f8..99d656f0d 100644 --- a/src/math/j0.rs +++ b/libm/src/math/j0.rs @@ -110,6 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j0(mut x: f64) -> f64 { let z: f64; let r: f64; @@ -164,6 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y0(x: f64) -> f64 { let z: f64; let u: f64; diff --git a/src/math/j0f.rs b/libm/src/math/j0f.rs similarity index 98% rename from src/math/j0f.rs rename to libm/src/math/j0f.rs index afb6ee9ba..25e5b325c 100644 --- a/src/math/j0f.rs +++ b/libm/src/math/j0f.rs @@ -63,6 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j0f(mut x: f32) -> f32 { let z: f32; let r: f32; @@ -109,6 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y0f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/src/math/j1.rs b/libm/src/math/j1.rs similarity index 98% rename from src/math/j1.rs rename to libm/src/math/j1.rs index cef17a63e..9b604d9e4 100644 --- a/src/math/j1.rs +++ b/libm/src/math/j1.rs @@ -114,6 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j1(x: f64) -> f64 { let mut z: f64; let r: f64; @@ -160,6 +161,7 @@ const V0: [f64; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y1(x: f64) -> f64 { let z: f64; let u: f64; @@ -171,10 +173,10 @@ pub fn y1(x: f64) -> f64 { lx = get_low_word(x); /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ - if (ix << 1 | lx) == 0 { + if (ix << 1) | lx == 0 { return -1.0 / 0.0; } - if (ix >> 31) != 0 { + if ix >> 31 != 0 { return 0.0 / 0.0; } if ix >= 0x7ff00000 { diff --git a/src/math/j1f.rs b/libm/src/math/j1f.rs similarity index 98% rename from src/math/j1f.rs rename to libm/src/math/j1f.rs index 02a3efd24..a47472401 100644 --- a/src/math/j1f.rs +++ b/libm/src/math/j1f.rs @@ -64,6 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn j1f(x: f32) -> f32 { let mut z: f32; let r: f32; @@ -109,6 +110,7 @@ const V0: [f32; 5] = [ ]; /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn y1f(x: f32) -> f32 { let z: f32; let u: f32; diff --git a/src/math/jn.rs b/libm/src/math/jn.rs similarity index 58% rename from src/math/jn.rs rename to libm/src/math/jn.rs index aff051f24..31f8d9c53 100644 --- a/src/math/jn.rs +++ b/libm/src/math/jn.rs @@ -39,6 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn jn(n: i32, mut x: f64) -> f64 { let mut ix: u32; let lx: u32; @@ -55,7 +56,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { /* nan */ return x; } @@ -104,7 +105,8 @@ pub fn jn(n: i32, mut x: f64) -> f64 { 0 => -cos(x) + sin(x), 1 => -cos(x) - sin(x), 2 => cos(x) - sin(x), - 3 | _ => cos(x) + sin(x), + // 3 + _ => cos(x) + sin(x), }; b = INVSQRTPI * temp / sqrt(x); } else { @@ -118,130 +120,128 @@ pub fn jn(n: i32, mut x: f64) -> f64 { a = temp; } } - } else { - if ix < 0x3e100000 { - /* x < 2**-29 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 32 { - /* underflow */ - b = 0.0; - } else { - temp = x * 0.5; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f64; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b / a; - } + } else if ix < 0x3e100000 { + /* x < 2**-29 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 32 { + /* underflow */ + b = 0.0; } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f64; - let mut q0: f64; - let mut q1: f64; - let mut w: f64; - let h: f64; - let mut z: f64; - let mut tmp: f64; - let nf: f64; + temp = x * 0.5; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f64; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f64; + let mut q0: f64; + let mut q1: f64; + let mut w: f64; + let h: f64; + let mut z: f64; + let mut tmp: f64; + let nf: f64; - let mut k: i32; + let mut k: i32; - nf = (nm1 as f64) + 1.0; - w = 2.0 * nf / x; - h = 2.0 / x; - z = w + h; - q0 = w; - q1 = w * z - 1.0; - k = 1; - while q1 < 1.0e9 { - k += 1; - z += h; - tmp = z * q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + nf = (nm1 as f64) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e9 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * log(fabs(w)); + if tmp < 7.09782712893383973096e+02 { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; i -= 1; } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf * log(fabs(w)); - if tmp < 7.09782712893383973096e+02 { - i = nm1; - while i > 0 { - temp = b; - b = b * (2.0 * (i as f64)) / x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = b * (2.0 * (i as f64)) / x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 - if b > x1p500 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; + } else { + i = nm1; + while i > 0 { + temp = b; + b = b * (2.0 * (i as f64)) / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 + if b > x1p500 { + a /= b; + t /= b; + b = 1.0; } + i -= 1; } - z = j0(x); - w = j1(x); - if fabs(z) >= fabs(w) { - b = t * z / b; - } else { - b = t * w / a; - } + } + z = j0(x); + w = j1(x); + if fabs(z) >= fabs(w) { + b = t * z / b; + } else { + b = t * w / a; } } @@ -249,6 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 { } /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn yn(n: i32, x: f64) -> f64 { let mut ix: u32; let lx: u32; @@ -266,7 +267,7 @@ pub fn yn(n: i32, x: f64) -> f64 { ix &= 0x7fffffff; // -lx == !lx + 1 - if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { + if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 { /* nan */ return x; } @@ -315,7 +316,8 @@ pub fn yn(n: i32, x: f64) -> f64 { 0 => -sin(x) - cos(x), 1 => -sin(x) + cos(x), 2 => sin(x) + cos(x), - 3 | _ => sin(x) - cos(x), + // 3 + _ => sin(x) - cos(x), }; b = INVSQRTPI * temp / sqrt(x); } else { diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs new file mode 100644 index 000000000..52cf7d8a8 --- /dev/null +++ b/libm/src/math/jnf.rs @@ -0,0 +1,253 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +use super::{fabsf, j0f, j1f, logf, y0f, y1f}; + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn jnf(n: i32, mut x: f32) -> f32 { + let mut ix: u32; + let mut nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + + /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ + if n == 0 { + return j0f(x); + } + if n < 0 { + nm1 = -(n + 1); + x = -x; + sign = !sign; + } else { + nm1 = n - 1; + } + if nm1 == 0 { + return j1f(x); + } + + sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ + x = fabsf(x); + if ix == 0 || ix == 0x7f800000 { + /* if x is 0 or inf */ + b = 0.0; + } else if (nm1 as f32) < x { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + i = 0; + while i < nm1 { + i += 1; + temp = b; + b = b * (2.0 * (i as f32) / x) - a; + a = temp; + } + } else if ix < 0x35800000 { + /* x < 2**-20 */ + /* x is tiny, return the first Taylor expansion of J(n,x) + * J(n,x) = 1/n!*(x/2)^n - ... + */ + if nm1 > 8 { + /* underflow */ + nm1 = 8; + } + temp = 0.5 * x; + b = temp; + a = 1.0; + i = 2; + while i <= nm1 + 1 { + a *= i as f32; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + i += 1; + } + b = b / a; + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + let mut t: f32; + let mut q0: f32; + let mut q1: f32; + let mut w: f32; + let h: f32; + let mut z: f32; + let mut tmp: f32; + let nf: f32; + let mut k: i32; + + nf = (nm1 as f32) + 1.0; + w = 2.0 * nf / x; + h = 2.0 / x; + z = w + h; + q0 = w; + q1 = w * z - 1.0; + k = 1; + while q1 < 1.0e4 { + k += 1; + z += h; + tmp = z * q1 - q0; + q0 = q1; + q1 = tmp; + } + t = 0.0; + i = k; + while i >= 0 { + t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); + i -= 1; + } + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = nf * logf(fabsf(w)); + if tmp < 88.721679688 { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + i -= 1; + } + } else { + i = nm1; + while i > 0 { + temp = b; + b = 2.0 * (i as f32) * b / x - a; + a = temp; + /* scale b to avoid spurious overflow */ + let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 + if b > x1p60 { + a /= b; + t /= b; + b = 1.0; + } + i -= 1; + } + } + z = j0f(x); + w = j1f(x); + if fabsf(z) >= fabsf(w) { + b = t * z / b; + } else { + b = t * w / a; + } + } + + if sign { -b } else { b } +} + +/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ynf(n: i32, x: f32) -> f32 { + let mut ix: u32; + let mut ib: u32; + let nm1: i32; + let mut sign: bool; + let mut i: i32; + let mut a: f32; + let mut b: f32; + let mut temp: f32; + + ix = x.to_bits(); + sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + if ix > 0x7f800000 { + /* nan */ + return x; + } + if sign && ix != 0 { + /* x < 0 */ + return 0.0 / 0.0; + } + if ix == 0x7f800000 { + return 0.0; + } + + if n == 0 { + return y0f(x); + } + if n < 0 { + nm1 = -(n + 1); + sign = (n & 1) != 0; + } else { + nm1 = n - 1; + sign = false; + } + if nm1 == 0 { + if sign { + return -y1f(x); + } else { + return y1f(x); + } + } + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + ib = b.to_bits(); + i = 0; + while i < nm1 && ib != 0xff800000 { + i += 1; + temp = b; + b = (2.0 * (i as f32) / x) * b - a; + ib = b.to_bits(); + a = temp; + } + + if sign { -b } else { b } +} diff --git a/src/math/k_cos.rs b/libm/src/math/k_cos.rs similarity index 100% rename from src/math/k_cos.rs rename to libm/src/math/k_cos.rs diff --git a/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs similarity index 100% rename from src/math/k_cosf.rs rename to libm/src/math/k_cosf.rs diff --git a/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs similarity index 100% rename from src/math/k_expo2.rs rename to libm/src/math/k_expo2.rs diff --git a/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs similarity index 100% rename from src/math/k_expo2f.rs rename to libm/src/math/k_expo2f.rs diff --git a/src/math/k_sin.rs b/libm/src/math/k_sin.rs similarity index 100% rename from src/math/k_sin.rs rename to libm/src/math/k_sin.rs diff --git a/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs similarity index 100% rename from src/math/k_sinf.rs rename to libm/src/math/k_sinf.rs diff --git a/src/math/k_tan.rs b/libm/src/math/k_tan.rs similarity index 100% rename from src/math/k_tan.rs rename to libm/src/math/k_tan.rs diff --git a/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs similarity index 100% rename from src/math/k_tanf.rs rename to libm/src/math/k_tanf.rs diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs new file mode 100644 index 000000000..24899ba30 --- /dev/null +++ b/libm/src/math/ldexp.rs @@ -0,0 +1,21 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf(x: f32, n: i32) -> f32 { + super::scalbnf(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexp(x: f64, n: i32) -> f64 { + super::scalbn(x, n) +} + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs similarity index 100% rename from src/math/ldexpf.rs rename to libm/src/math/ldexpf.rs diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs new file mode 100644 index 000000000..b35277d15 --- /dev/null +++ b/libm/src/math/ldexpf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs new file mode 100644 index 000000000..8de6cffd6 --- /dev/null +++ b/libm/src/math/ldexpf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} diff --git a/src/math/lgamma.rs b/libm/src/math/lgamma.rs similarity index 55% rename from src/math/lgamma.rs rename to libm/src/math/lgamma.rs index a08bc5b64..8312dc186 100644 --- a/src/math/lgamma.rs +++ b/libm/src/math/lgamma.rs @@ -1,5 +1,7 @@ use super::lgamma_r; +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 diff --git a/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs similarity index 99% rename from src/math/lgamma_r.rs rename to libm/src/math/lgamma_r.rs index b26177e6e..6becaad2c 100644 --- a/src/math/lgamma_r.rs +++ b/libm/src/math/lgamma_r.rs @@ -160,7 +160,8 @@ fn sin_pi(mut x: f64) -> f64 { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), - 0 | _ => k_sin(x, 0.0, 0), + // 0 + _ => k_sin(x, 0.0, 0), } } diff --git a/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs similarity index 55% rename from src/math/lgammaf.rs rename to libm/src/math/lgammaf.rs index a9c2da75b..d37512397 100644 --- a/src/math/lgammaf.rs +++ b/libm/src/math/lgammaf.rs @@ -1,5 +1,7 @@ use super::lgammaf_r; +/// The natural logarithm of the +/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 diff --git a/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs similarity index 99% rename from src/math/lgammaf_r.rs rename to libm/src/math/lgammaf_r.rs index 723c90daf..10cecee54 100644 --- a/src/math/lgammaf_r.rs +++ b/libm/src/math/lgammaf_r.rs @@ -95,7 +95,8 @@ fn sin_pi(mut x: f32) -> f32 { 1 => k_cosf(y), 2 => k_sinf(-y), 3 => -k_cosf(y), - 0 | _ => k_sinf(y), + // 0 + _ => k_sinf(y), } } diff --git a/src/math/log.rs b/libm/src/math/log.rs similarity index 100% rename from src/math/log.rs rename to libm/src/math/log.rs diff --git a/src/math/log10.rs b/libm/src/math/log10.rs similarity index 98% rename from src/math/log10.rs rename to libm/src/math/log10.rs index f9d118f12..8c9d68c49 100644 --- a/src/math/log10.rs +++ b/libm/src/math/log10.rs @@ -78,7 +78,7 @@ pub fn log10(mut x: f64) -> f64 { hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; - ui = (hx as u64) << 32 | (ui & 0xffffffff); + ui = ((hx as u64) << 32) | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; diff --git a/src/math/log10f.rs b/libm/src/math/log10f.rs similarity index 100% rename from src/math/log10f.rs rename to libm/src/math/log10f.rs diff --git a/src/math/log1p.rs b/libm/src/math/log1p.rs similarity index 98% rename from src/math/log1p.rs rename to libm/src/math/log1p.rs index 80561ec74..b7f3fb09e 100644 --- a/src/math/log1p.rs +++ b/libm/src/math/log1p.rs @@ -125,7 +125,7 @@ pub fn log1p(x: f64) -> f64 { } /* reduce u into [sqrt(2)/2, sqrt(2)] */ hu = (hu & 0x000fffff) + 0x3fe6a09e; - ui = (hu as u64) << 32 | (ui & 0xffffffff); + ui = ((hu as u64) << 32) | (ui & 0xffffffff); f = f64::from_bits(ui) - 1.; } hfsq = 0.5 * f * f; diff --git a/src/math/log1pf.rs b/libm/src/math/log1pf.rs similarity index 100% rename from src/math/log1pf.rs rename to libm/src/math/log1pf.rs diff --git a/src/math/log2.rs b/libm/src/math/log2.rs similarity index 98% rename from src/math/log2.rs rename to libm/src/math/log2.rs index 59533340b..701f63c25 100644 --- a/src/math/log2.rs +++ b/libm/src/math/log2.rs @@ -75,7 +75,7 @@ pub fn log2(mut x: f64) -> f64 { hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; - ui = (hx as u64) << 32 | (ui & 0xffffffff); + ui = ((hx as u64) << 32) | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; diff --git a/src/math/log2f.rs b/libm/src/math/log2f.rs similarity index 100% rename from src/math/log2f.rs rename to libm/src/math/log2f.rs diff --git a/src/math/logf.rs b/libm/src/math/logf.rs similarity index 100% rename from src/math/logf.rs rename to libm/src/math/logf.rs diff --git a/src/math/mod.rs b/libm/src/math/mod.rs similarity index 66% rename from src/math/mod.rs rename to libm/src/math/mod.rs index 85c9fc5bf..949c18b40 100644 --- a/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -60,31 +60,68 @@ macro_rules! i { // the time of this writing this is only used in a few places, and once // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and // the native `/` operator can be used and panics won't be codegen'd. -#[cfg(any(debug_assertions, not(feature = "unstable")))] +#[cfg(any(debug_assertions, not(intrinsics_enabled)))] macro_rules! div { ($a:expr, $b:expr) => { $a / $b }; } -#[cfg(all(not(debug_assertions), feature = "unstable"))] +#[cfg(all(not(debug_assertions), intrinsics_enabled))] macro_rules! div { ($a:expr, $b:expr) => { unsafe { core::intrinsics::unchecked_div($a, $b) } }; } -macro_rules! llvm_intrinsically_optimized { - (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(feature = "unstable", not(feature = "force-soft-floats"), $($clause)*))] - { - if true { // thwart the dead code lint - $e - } - } - }; +// `support` may be public for testing +#[macro_use] +#[cfg(feature = "unstable-public-internals")] +pub mod support; + +#[macro_use] +#[cfg(not(feature = "unstable-public-internals"))] +pub(crate) mod support; + +cfg_if! { + if #[cfg(feature = "unstable-public-internals")] { + pub mod generic; + } else { + mod generic; + } } +// Private modules +mod arch; +mod expo2; +mod k_cos; +mod k_cosf; +mod k_expo2; +mod k_expo2f; +mod k_sin; +mod k_sinf; +mod k_tan; +mod k_tanf; +mod rem_pio2; +mod rem_pio2_large; +mod rem_pio2f; + +// Private re-imports +use self::expo2::expo2; +use self::k_cos::k_cos; +use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; +use self::k_expo2f::k_expo2f; +use self::k_sin::k_sin; +use self::k_sinf::k_sinf; +use self::k_tan::k_tan; +use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; +#[allow(unused_imports)] +use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt}; + // Public modules mod acos; mod acosf; @@ -103,9 +140,7 @@ mod atanhf; mod cbrt; mod cbrtf; mod ceil; -mod ceilf; mod copysign; -mod copysignf; mod cos; mod cosf; mod cosh; @@ -121,19 +156,14 @@ mod expf; mod expm1; mod expm1f; mod fabs; -mod fabsf; mod fdim; -mod fdimf; mod floor; -mod floorf; mod fma; -mod fmaf; -mod fmax; -mod fmaxf; -mod fmin; -mod fminf; +mod fma_wide; +mod fmin_fmax; +mod fminimum_fmaximum; +mod fminimum_fmaximum_num; mod fmod; -mod fmodf; mod frexp; mod frexpf; mod hypot; @@ -147,7 +177,6 @@ mod j1f; mod jn; mod jnf; mod ldexp; -mod ldexpf; mod lgamma; mod lgamma_r; mod lgammaf; @@ -171,11 +200,9 @@ mod remainderf; mod remquo; mod remquof; mod rint; -mod rintf; mod round; -mod roundf; +mod roundeven; mod scalbn; -mod scalbnf; mod sin; mod sincos; mod sincosf; @@ -183,7 +210,6 @@ mod sinf; mod sinh; mod sinhf; mod sqrt; -mod sqrtf; mod tan; mod tanf; mod tanh; @@ -191,7 +217,6 @@ mod tanhf; mod tgamma; mod tgammaf; mod trunc; -mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; @@ -210,10 +235,8 @@ pub use self::atanh::atanh; pub use self::atanhf::atanhf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; -pub use self::ceil::ceil; -pub use self::ceilf::ceilf; -pub use self::copysign::copysign; -pub use self::copysignf::copysignf; +pub use self::ceil::{ceil, ceilf}; +pub use self::copysign::{copysign, copysignf}; pub use self::cos::cos; pub use self::cosf::cosf; pub use self::cosh::cosh; @@ -228,20 +251,15 @@ pub use self::exp10f::exp10f; pub use self::expf::expf; pub use self::expm1::expm1; pub use self::expm1f::expm1f; -pub use self::fabs::fabs; -pub use self::fabsf::fabsf; -pub use self::fdim::fdim; -pub use self::fdimf::fdimf; -pub use self::floor::floor; -pub use self::floorf::floorf; +pub use self::fabs::{fabs, fabsf}; +pub use self::fdim::{fdim, fdimf}; +pub use self::floor::{floor, floorf}; pub use self::fma::fma; -pub use self::fmaf::fmaf; -pub use self::fmax::fmax; -pub use self::fmaxf::fmaxf; -pub use self::fmin::fmin; -pub use self::fminf::fminf; -pub use self::fmod::fmod; -pub use self::fmodf::fmodf; +pub use self::fma_wide::fmaf; +pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; +pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; +pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; +pub use self::fmod::{fmod, fmodf}; pub use self::frexp::frexp; pub use self::frexpf::frexpf; pub use self::hypot::hypot; @@ -254,8 +272,7 @@ pub use self::j1::{j1, y1}; pub use self::j1f::{j1f, y1f}; pub use self::jn::{jn, yn}; pub use self::jnf::{jnf, ynf}; -pub use self::ldexp::ldexp; -pub use self::ldexpf::ldexpf; +pub use self::ldexp::{ldexp, ldexpf}; pub use self::lgamma::lgamma; pub use self::lgamma_r::lgamma_r; pub use self::lgammaf::lgammaf; @@ -278,57 +295,74 @@ pub use self::remainder::remainder; pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; -pub use self::rint::rint; -pub use self::rintf::rintf; -pub use self::round::round; -pub use self::roundf::roundf; -pub use self::scalbn::scalbn; -pub use self::scalbnf::scalbnf; +pub use self::rint::{rint, rintf}; +pub use self::round::{round, roundf}; +pub use self::roundeven::{roundeven, roundevenf}; +pub use self::scalbn::{scalbn, scalbnf}; pub use self::sin::sin; pub use self::sincos::sincos; pub use self::sincosf::sincosf; pub use self::sinf::sinf; pub use self::sinh::sinh; pub use self::sinhf::sinhf; -pub use self::sqrt::sqrt; -pub use self::sqrtf::sqrtf; +pub use self::sqrt::{sqrt, sqrtf}; pub use self::tan::tan; pub use self::tanf::tanf; pub use self::tanh::tanh; pub use self::tanhf::tanhf; pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; -pub use self::trunc::trunc; -pub use self::truncf::truncf; +pub use self::trunc::{trunc, truncf}; -// Private modules -mod expo2; -mod fenv; -mod k_cos; -mod k_cosf; -mod k_expo2; -mod k_expo2f; -mod k_sin; -mod k_sinf; -mod k_tan; -mod k_tanf; -mod rem_pio2; -mod rem_pio2_large; -mod rem_pio2f; +cfg_if! { + if #[cfg(f16_enabled)] { + // verify-sorted-start + pub use self::ceil::ceilf16; + pub use self::copysign::copysignf16; + pub use self::fabs::fabsf16; + pub use self::fdim::fdimf16; + pub use self::floor::floorf16; + pub use self::fmin_fmax::{fmaxf16, fminf16}; + pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16}; + pub use self::fmod::fmodf16; + pub use self::ldexp::ldexpf16; + pub use self::rint::rintf16; + pub use self::round::roundf16; + pub use self::roundeven::roundevenf16; + pub use self::scalbn::scalbnf16; + pub use self::sqrt::sqrtf16; + pub use self::trunc::truncf16; + // verify-sorted-end -// Private re-imports -use self::expo2::expo2; -use self::k_cos::k_cos; -use self::k_cosf::k_cosf; -use self::k_expo2::k_expo2; -use self::k_expo2f::k_expo2f; -use self::k_sin::k_sin; -use self::k_sinf::k_sinf; -use self::k_tan::k_tan; -use self::k_tanf::k_tanf; -use self::rem_pio2::rem_pio2; -use self::rem_pio2_large::rem_pio2_large; -use self::rem_pio2f::rem_pio2f; + #[allow(unused_imports)] + pub(crate) use self::fma_wide::fmaf16; + } +} + +cfg_if! { + if #[cfg(f128_enabled)] { + // verify-sorted-start + pub use self::ceil::ceilf128; + pub use self::copysign::copysignf128; + pub use self::fabs::fabsf128; + pub use self::fdim::fdimf128; + pub use self::floor::floorf128; + pub use self::fma::fmaf128; + pub use self::fmin_fmax::{fmaxf128, fminf128}; + pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; + pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; + pub use self::fmod::fmodf128; + pub use self::ldexp::ldexpf128; + pub use self::rint::rintf128; + pub use self::round::roundf128; + pub use self::roundeven::roundevenf128; + pub use self::scalbn::scalbnf128; + pub use self::sqrt::sqrtf128; + pub use self::trunc::truncf128; + // verify-sorted-end + } +} #[inline] fn get_high_word(x: f64) -> u32 { @@ -358,5 +392,5 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 { #[inline] fn combine_words(hi: u32, lo: u32) -> f64 { - f64::from_bits((hi as u64) << 32 | lo as u64) + f64::from_bits(((hi as u64) << 32) | lo as u64) } diff --git a/src/math/modf.rs b/libm/src/math/modf.rs similarity index 86% rename from src/math/modf.rs rename to libm/src/math/modf.rs index bcab33a81..6541862cd 100644 --- a/src/math/modf.rs +++ b/libm/src/math/modf.rs @@ -1,8 +1,9 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); let mask: u64; - let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; + let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff; /* no fractional part */ if e >= 52 { diff --git a/src/math/modff.rs b/libm/src/math/modff.rs similarity index 86% rename from src/math/modff.rs rename to libm/src/math/modff.rs index 56ece12e3..90c6bca7d 100644 --- a/src/math/modff.rs +++ b/libm/src/math/modff.rs @@ -1,8 +1,9 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); let mask: u32; - let e = ((u >> 23 & 0xff) as i32) - 0x7f; + let e = (((u >> 23) & 0xff) as i32) - 0x7f; /* no fractional part */ if e >= 23 { diff --git a/src/math/nextafter.rs b/libm/src/math/nextafter.rs similarity index 76% rename from src/math/nextafter.rs rename to libm/src/math/nextafter.rs index 057626191..c991ff6f2 100644 --- a/src/math/nextafter.rs +++ b/libm/src/math/nextafter.rs @@ -10,20 +10,20 @@ pub fn nextafter(x: f64, y: f64) -> f64 { return y; } - let ax = ux_i & !1_u64 / 2; - let ay = uy_i & !1_u64 / 2; + let ax = ux_i & (!1_u64 / 2); + let ay = uy_i & (!1_u64 / 2); if ax == 0 { if ay == 0 { return y; } - ux_i = (uy_i & 1_u64 << 63) | 1; - } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 { + ux_i = (uy_i & (1_u64 << 63)) | 1; + } else if ax > ay || ((ux_i ^ uy_i) & (1_u64 << 63)) != 0 { ux_i -= 1; } else { ux_i += 1; } - let e = ux_i >> 52 & 0x7ff; + let e = (ux_i >> 52) & 0x7ff; // raise overflow if ux.f is infinite and x is finite if e == 0x7ff { force_eval!(x + x); diff --git a/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs similarity index 100% rename from src/math/nextafterf.rs rename to libm/src/math/nextafterf.rs diff --git a/src/math/pow.rs b/libm/src/math/pow.rs similarity index 87% rename from src/math/pow.rs rename to libm/src/math/pow.rs index 7ecad291d..80b2a2499 100644 --- a/src/math/pow.rs +++ b/libm/src/math/pow.rs @@ -98,8 +98,8 @@ pub fn pow(x: f64, y: f64) -> f64 { let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); - let mut ix: i32 = (hx & 0x7fffffff) as i32; - let iy: i32 = (hy & 0x7fffffff) as i32; + let mut ix: i32 = hx & 0x7fffffff_i32; + let iy: i32 = hy & 0x7fffffff_i32; /* x**0 = 1, even if x is NaN */ if ((iy as u32) | ly) == 0 { @@ -355,7 +355,7 @@ pub fn pow(x: f64, y: f64) -> f64 { } /* compute 2**(p_h+p_l) */ - let i: i32 = j & (0x7fffffff as i32); + let i: i32 = j & 0x7fffffff_i32; k = (i >> 20) - 0x3ff; let mut n: i32 = 0; @@ -398,7 +398,6 @@ mod tests { extern crate core; use self::core::f64::consts::{E, PI}; - use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; use super::pow; const POS_ZERO: &[f64] = &[0.0]; @@ -407,15 +406,15 @@ mod tests { const NEG_ONE: &[f64] = &[-1.0]; const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; - const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; - const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; - const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX]; - const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; + const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), f64::MIN_POSITIVE, f64::EPSILON]; + const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -f64::MIN_POSITIVE, -f64::EPSILON]; + const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, f64::MAX]; + const NEG_EVENS: &[f64] = &[f64::MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; const POS_ODDS: &[f64] = &[3.0, 7.0]; const NEG_ODDS: &[f64] = &[-7.0, -3.0]; - const NANS: &[f64] = &[NAN]; - const POS_INF: &[f64] = &[INFINITY]; - const NEG_INF: &[f64] = &[NEG_INFINITY]; + const NANS: &[f64] = &[f64::NAN]; + const POS_INF: &[f64] = &[f64::INFINITY]; + const NEG_INF: &[f64] = &[f64::NEG_INFINITY]; const ALL: &[&[f64]] = &[ POS_ZERO, @@ -492,83 +491,83 @@ mod tests { #[test] fn nan_inputs() { // NAN as the base: - // (NAN ^ anything *but 0* should be NAN) - test_sets_as_exponent(NAN, &ALL[2..], NAN); + // (f64::NAN ^ anything *but 0* should be f64::NAN) + test_sets_as_exponent(f64::NAN, &ALL[2..], f64::NAN); - // NAN as the exponent: - // (anything *but 1* ^ NAN should be NAN) - test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); + // f64::NAN as the exponent: + // (anything *but 1* ^ f64::NAN should be f64::NAN) + test_sets_as_base(&ALL[..(ALL.len() - 2)], f64::NAN, f64::NAN); } #[test] fn infinity_as_base() { // Positive Infinity as the base: - // (+Infinity ^ positive anything but 0 and NAN should be +Infinity) - test_sets_as_exponent(INFINITY, &POS[1..], INFINITY); + // (+Infinity ^ positive anything but 0 and f64::NAN should be +Infinity) + test_sets_as_exponent(f64::INFINITY, &POS[1..], f64::INFINITY); - // (+Infinity ^ negative anything except 0 and NAN should be 0.0) - test_sets_as_exponent(INFINITY, &NEG[1..], 0.0); + // (+Infinity ^ negative anything except 0 and f64::NAN should be 0.0) + test_sets_as_exponent(f64::INFINITY, &NEG[1..], 0.0); // Negative Infinity as the base: // (-Infinity ^ positive odd ints should be -Infinity) - test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY); + test_sets_as_exponent(f64::NEG_INFINITY, &[POS_ODDS], f64::NEG_INFINITY); // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) // We can lump in pos/neg odd ints here because they don't seem to // cause panics (div by zero) in release mode (I think). - test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); + test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); } #[test] fn infinity_as_exponent() { // Positive/Negative base greater than 1: - // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base) - test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); + // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes f64::NAN as the base) + test_sets_as_base(&ALL[5..(ALL.len() - 2)], f64::INFINITY, f64::INFINITY); // (pos/neg > 1 ^ -Infinity should be 0.0) - test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0); + test_sets_as_base(&ALL[5..ALL.len() - 2], f64::NEG_INFINITY, 0.0); // Positive/Negative base less than 1: let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; - // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base) - test_sets_as_base(base_below_one, INFINITY, 0.0); + // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes f64::NAN as the base) + test_sets_as_base(base_below_one, f64::INFINITY, 0.0); // (pos/neg < 1 ^ -Infinity should be Infinity) - test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY); + test_sets_as_base(base_below_one, f64::NEG_INFINITY, f64::INFINITY); // Positive/Negative 1 as the base: // (pos/neg 1 ^ Infinity should be 1) - test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0); + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::INFINITY, 1.0); // (pos/neg 1 ^ -Infinity should be 1) - test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); + test_sets_as_base(&[NEG_ONE, POS_ONE], f64::NEG_INFINITY, 1.0); } #[test] fn zero_as_base() { // Positive Zero as the base: - // (+0 ^ anything positive but 0 and NAN should be +0) + // (+0 ^ anything positive but 0 and f64::NAN should be +0) test_sets_as_exponent(0.0, &POS[1..], 0.0); - // (+0 ^ anything negative but 0 and NAN should be Infinity) + // (+0 ^ anything negative but 0 and f64::NAN should be Infinity) // (this should panic because we're dividing by zero) - test_sets_as_exponent(0.0, &NEG[1..], INFINITY); + test_sets_as_exponent(0.0, &NEG[1..], f64::INFINITY); // Negative Zero as the base: - // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) + // (-0 ^ anything positive but 0, f64::NAN, and odd ints should be +0) test_sets_as_exponent(-0.0, &POS[3..], 0.0); - // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) + // (-0 ^ anything negative but 0, f64::NAN, and odd ints should be Infinity) // (should panic because of divide by zero) - test_sets_as_exponent(-0.0, &NEG[3..], INFINITY); + test_sets_as_exponent(-0.0, &NEG[3..], f64::INFINITY); // (-0 ^ positive odd ints should be -0) test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); // (-0 ^ negative odd ints should be -Infinity) // (should panic because of divide by zero) - test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); + test_sets_as_exponent(-0.0, &[NEG_ODDS], f64::NEG_INFINITY); } #[test] @@ -583,21 +582,17 @@ mod tests { // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) - (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]).iter().for_each( - |int_set| { - int_set.iter().for_each(|int| { - test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { - pow(-1.0, *int) * pow(v, *int) - }); - }) - }, - ); + [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| { + int_set.iter().for_each(|int| { + test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int)); + }) + }); // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) - (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| { + NEG[1..(NEG.len() - 1)].iter().for_each(|set| { set.iter().for_each(|val| { - test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); + test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| f64::NAN); }) }); } diff --git a/src/math/powf.rs b/libm/src/math/powf.rs similarity index 97% rename from src/math/powf.rs rename to libm/src/math/powf.rs index 2d9d1e4bb..839c6c23d 100644 --- a/src/math/powf.rs +++ b/libm/src/math/powf.rs @@ -13,6 +13,8 @@ * ==================================================== */ +use core::cmp::Ordering; + use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; @@ -115,15 +117,13 @@ pub fn powf(x: f32, y: f32) -> f32 { /* special value of y */ if iy == 0x7f800000 { /* y is +-inf */ - if ix == 0x3f800000 { + match ix.cmp(&0x3f800000) { /* (-1)**+-inf is 1 */ - return 1.0; - } else if ix > 0x3f800000 { + Ordering::Equal => return 1.0, /* (|x|>1)**+-inf = inf,0 */ - return if hy >= 0 { y } else { 0.0 }; - } else { + Ordering::Greater => return if hy >= 0 { y } else { 0.0 }, /* (|x|<1)**+-inf = 0,inf */ - return if hy >= 0 { 0.0 } else { -y }; + Ordering::Less => return if hy >= 0 { 0.0 } else { -y }, } } if iy == 0x3f800000 { diff --git a/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs similarity index 99% rename from src/math/rem_pio2.rs rename to libm/src/math/rem_pio2.rs index 4dfb8c658..917e90819 100644 --- a/src/math/rem_pio2.rs +++ b/libm/src/math/rem_pio2.rs @@ -50,7 +50,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ - let tmp = x as f64 * INV_PIO2 + TO_INT; + let tmp = x * INV_PIO2 + TO_INT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] diff --git a/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs similarity index 99% rename from src/math/rem_pio2_large.rs rename to libm/src/math/rem_pio2_large.rs index 1dfbba3b1..6d679bbe9 100644 --- a/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -226,8 +226,9 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) - #[cfg(all(target_pointer_width = "64", feature = "checked"))] - assert!(e0 <= 16360); + if cfg!(target_pointer_width = "64") { + debug_assert!(e0 <= 16360); + } let nx = x.len(); @@ -425,8 +426,6 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> for i in (0..=jz).rev() { fw += i!(fq, i); } - // TODO: drop excess precision here once double_t is used - fw = fw as f64; i!(y, 0, =, if ih == 0 { fw } else { -fw }); fw = i!(fq, 0) - fw; for i in 1..=jz { diff --git a/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs similarity index 100% rename from src/math/rem_pio2f.rs rename to libm/src/math/rem_pio2f.rs diff --git a/src/math/remainder.rs b/libm/src/math/remainder.rs similarity index 100% rename from src/math/remainder.rs rename to libm/src/math/remainder.rs diff --git a/src/math/remainderf.rs b/libm/src/math/remainderf.rs similarity index 100% rename from src/math/remainderf.rs rename to libm/src/math/remainderf.rs diff --git a/src/math/remquo.rs b/libm/src/math/remquo.rs similarity index 100% rename from src/math/remquo.rs rename to libm/src/math/remquo.rs diff --git a/src/math/remquof.rs b/libm/src/math/remquof.rs similarity index 100% rename from src/math/remquof.rs rename to libm/src/math/remquof.rs diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs new file mode 100644 index 000000000..e1c32c943 --- /dev/null +++ b/libm/src/math/rint.rs @@ -0,0 +1,51 @@ +use super::support::Round; + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf16(x: f16) -> f16 { + select_implementation! { + name: rintf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf(x: f32) -> f32 { + select_implementation! { + name: rintf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rint(x: f64) -> f64 { + select_implementation! { + name: rint, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + ), + args: x, + } + + super::generic::rint_round(x, Round::Nearest).val +} + +/// Round `x` to the nearest integer, breaking ties toward even. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn rintf128(x: f128) -> f128 { + super::generic::rint_round(x, Round::Nearest).val +} diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs new file mode 100644 index 000000000..6cd091cd7 --- /dev/null +++ b/libm/src/math/round.rs @@ -0,0 +1,25 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn round(x: f64) -> f64 { + super::generic::round(x) +} + +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs new file mode 100644 index 000000000..6e621d762 --- /dev/null +++ b/libm/src/math/roundeven.rs @@ -0,0 +1,36 @@ +use super::support::{Float, Round}; + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf16(x: f16) -> f16 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf(x: f32) -> f32 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundeven(x: f64) -> f64 { + roundeven_impl(x) +} + +/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754 +/// `roundToIntegralTiesToEven`. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundevenf128(x: f128) -> f128 { + roundeven_impl(x) +} + +#[inline] +pub fn roundeven_impl(x: F) -> F { + super::generic::rint_round(x, Round::Nearest).val +} diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs new file mode 100644 index 000000000..b5d7c9d69 --- /dev/null +++ b/libm/src/math/roundf.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf(x: f32) -> f32 { + super::generic::round(x) +} diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs new file mode 100644 index 000000000..fc3164929 --- /dev/null +++ b/libm/src/math/roundf128.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf128(x: f128) -> f128 { + super::generic::round(x) +} diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs new file mode 100644 index 000000000..8b356eaab --- /dev/null +++ b/libm/src/math/roundf16.rs @@ -0,0 +1,5 @@ +/// Round `x` to the nearest integer, breaking ties away from zero. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn roundf16(x: f16) -> f16 { + super::generic::round(x) +} diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs new file mode 100644 index 000000000..ed73c3f94 --- /dev/null +++ b/libm/src/math/scalbn.rs @@ -0,0 +1,87 @@ +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} + +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbn(x: f64, n: i32) -> f64 { + super::generic::scalbn(x, n) +} + +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float}; + + // Tests against N3220 + fn spec_test(f: impl Fn(F, i32) -> F) + where + u32: CastInto, + F::Int: CastFrom, + F::Int: CastFrom, + { + // `scalbn(±0, n)` returns `±0`. + assert_biteq!(f(F::NEG_ZERO, 10), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + assert_biteq!(f(F::NEG_ZERO, -10), F::NEG_ZERO); + assert_biteq!(f(F::ZERO, 10), F::ZERO); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::ZERO, -10), F::ZERO); + + // `scalbn(x, 0)` returns `x`. + assert_biteq!(f(F::MIN, 0), F::MIN); + assert_biteq!(f(F::MAX, 0), F::MAX); + assert_biteq!(f(F::INFINITY, 0), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 0), F::NEG_INFINITY); + assert_biteq!(f(F::ZERO, 0), F::ZERO); + assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO); + + // `scalbn(±∞, n)` returns `±∞`. + assert_biteq!(f(F::INFINITY, 10), F::INFINITY); + assert_biteq!(f(F::INFINITY, -10), F::INFINITY); + assert_biteq!(f(F::NEG_INFINITY, 10), F::NEG_INFINITY); + assert_biteq!(f(F::NEG_INFINITY, -10), F::NEG_INFINITY); + + // NaN should remain NaNs. + assert!(f(F::NAN, 10).is_nan()); + assert!(f(F::NAN, 0).is_nan()); + assert!(f(F::NAN, -10).is_nan()); + assert!(f(-F::NAN, 10).is_nan()); + assert!(f(-F::NAN, 0).is_nan()); + assert!(f(-F::NAN, -10).is_nan()); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_test_f16() { + spec_test::(scalbnf16); + } + + #[test] + fn spec_test_f32() { + spec_test::(scalbnf); + } + + #[test] + fn spec_test_f64() { + spec_test::(scalbn); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(scalbnf128); + } +} diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs new file mode 100644 index 000000000..57e7ba76f --- /dev/null +++ b/libm/src/math/scalbnf.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf(x: f32, n: i32) -> f32 { + super::generic::scalbn(x, n) +} diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs new file mode 100644 index 000000000..c1d2b4855 --- /dev/null +++ b/libm/src/math/scalbnf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs new file mode 100644 index 000000000..2209e1a17 --- /dev/null +++ b/libm/src/math/scalbnf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +} diff --git a/src/math/sin.rs b/libm/src/math/sin.rs similarity index 87% rename from src/math/sin.rs rename to libm/src/math/sin.rs index e04e0d6a0..229fa4bef 100644 --- a/src/math/sin.rs +++ b/libm/src/math/sin.rs @@ -81,12 +81,15 @@ pub fn sin(x: f64) -> f64 { } } -#[test] -fn test_near_pi() { - let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 - let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 - let result = sin(x); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, sx); +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")] + fn test_near_pi() { + let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 + let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 + assert_eq!(sin(x), sx); + } } diff --git a/src/math/sincos.rs b/libm/src/math/sincos.rs similarity index 100% rename from src/math/sincos.rs rename to libm/src/math/sincos.rs diff --git a/src/math/sincosf.rs b/libm/src/math/sincosf.rs similarity index 90% rename from src/math/sincosf.rs rename to libm/src/math/sincosf.rs index 423845e44..f33607676 100644 --- a/src/math/sincosf.rs +++ b/libm/src/math/sincosf.rs @@ -67,14 +67,12 @@ pub fn sincosf(x: f32) -> (f32, f32) { } } /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ - else { - if sign { - s = -k_sinf(x as f64 + S2PIO2); - c = -k_cosf(x as f64 + S2PIO2); - } else { - s = -k_sinf(x as f64 - S2PIO2); - c = -k_cosf(x as f64 - S2PIO2); - } + else if sign { + s = -k_sinf(x as f64 + S2PIO2); + c = -k_cosf(x as f64 + S2PIO2); + } else { + s = -k_sinf(x as f64 - S2PIO2); + c = -k_cosf(x as f64 - S2PIO2); } return (s, c); @@ -91,14 +89,12 @@ pub fn sincosf(x: f32) -> (f32, f32) { s = -k_cosf(x as f64 - S3PIO2); c = k_sinf(x as f64 - S3PIO2); } + } else if sign { + s = k_sinf(x as f64 + S4PIO2); + c = k_cosf(x as f64 + S4PIO2); } else { - if sign { - s = k_sinf(x as f64 + S4PIO2); - c = k_cosf(x as f64 + S4PIO2); - } else { - s = k_sinf(x as f64 - S4PIO2); - c = k_cosf(x as f64 - S4PIO2); - } + s = k_sinf(x as f64 - S4PIO2); + c = k_cosf(x as f64 - S4PIO2); } return (s, c); diff --git a/src/math/sinf.rs b/libm/src/math/sinf.rs similarity index 100% rename from src/math/sinf.rs rename to libm/src/math/sinf.rs diff --git a/src/math/sinh.rs b/libm/src/math/sinh.rs similarity index 100% rename from src/math/sinh.rs rename to libm/src/math/sinh.rs diff --git a/src/math/sinhf.rs b/libm/src/math/sinhf.rs similarity index 100% rename from src/math/sinhf.rs rename to libm/src/math/sinhf.rs diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs new file mode 100644 index 000000000..76bc240cf --- /dev/null +++ b/libm/src/math/sqrt.rs @@ -0,0 +1,51 @@ +/// The square root of `x` (f16). +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} + +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} + +/// The square root of `x` (f64). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrt(x: f64) -> f64 { + select_implementation! { + name: sqrt, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} + +/// The square root of `x` (f128). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs new file mode 100644 index 000000000..c28a705e3 --- /dev/null +++ b/libm/src/math/sqrtf.rs @@ -0,0 +1,15 @@ +/// The square root of `x` (f32). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf(x: f32) -> f32 { + select_implementation! { + name: sqrtf, + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + all(target_arch = "wasm32", intrinsics_enabled), + target_feature = "sse2" + ), + args: x, + } + + super::generic::sqrt(x) +} diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs new file mode 100644 index 000000000..eaef6ae0c --- /dev/null +++ b/libm/src/math/sqrtf128.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f128). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs new file mode 100644 index 000000000..7bedb7f8b --- /dev/null +++ b/libm/src/math/sqrtf16.rs @@ -0,0 +1,11 @@ +/// The square root of `x` (f16). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + select_implementation! { + name: sqrtf16, + use_arch: all(target_arch = "aarch64", target_feature = "fp16"), + args: x, + } + + return super::generic::sqrt(x); +} diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs new file mode 100644 index 000000000..eae08238e --- /dev/null +++ b/libm/src/math/support/big.rs @@ -0,0 +1,239 @@ +//! Integers used for wide operations, larger than `u128`. + +#[cfg(test)] +mod tests; + +use core::ops; + +use super::{DInt, HInt, Int, MinInt}; + +const U128_LO_MASK: u128 = u64::MAX as u128; + +/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256 { + pub lo: u128, + pub hi: u128, +} + +impl u256 { + #[cfg(any(test, feature = "unstable-public-internals"))] + pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256 { lo: self.lo, hi: self.hi } + } +} + +/// A 256-bit signed integer represented as two 128-bit native-endian limbs. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256 { + pub lo: u128, + pub hi: u128, +} + +impl i256 { + /// Reinterpret as an unsigned integer + #[cfg(any(test, feature = "unstable-public-internals"))] + pub fn unsigned(self) -> u256 { + u256 { lo: self.lo, hi: self.hi } + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { lo: 0, hi: 0 }; + const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX }; +} + +impl MinInt for i256 { + type OtherSign = u256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self { lo: 0, hi: 0 }; + const ONE: Self = Self { lo: 1, hi: 0 }; + const MIN: Self = Self { lo: 0, hi: 1 << 127 }; + const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 }; +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.lo |= rhs.lo; + self.hi |= rhs.hi; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(mut self) -> Self::Output { + self.lo = !self.lo; + self.hi = !self.hi; + self + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, _rhs: u32) -> Self::Output { + unimplemented!("only used to meet trait bounds") + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +impl ops::Add for u256 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + let (lo, carry) = self.lo.overflowing_add(rhs.lo); + let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi); + + Self { lo, hi } + } +} + +impl ops::Shr for u256 { + type Output = Self; + + fn shr(mut self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + if rhs >= Self::BITS { + return Self::ZERO; + } + + if rhs == 0 { + return self; + } + + if rhs < 128 { + self.lo >>= rhs; + self.lo |= self.hi << (128 - rhs); + } else { + self.lo = self.hi >> (rhs - 128); + } + + if rhs < 128 { + self.hi >>= rhs; + } else { + self.hi = 0; + } + + self + } +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + u256 { lo: self, hi: 0 } + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let l0 = self & U128_LO_MASK; + let l1 = rhs & U128_LO_MASK; + let h0 = self >> 64; + let h1 = rhs >> 64; + + let p_ll: u128 = l0.overflowing_mul(l1).0; + let p_lh: u128 = l0.overflowing_mul(h1).0; + let p_hl: u128 = h0.overflowing_mul(l1).0; + let p_hh: u128 = h0.overflowing_mul(h1).0; + + let s0 = p_hl + (p_ll >> 64); + let s1 = (p_ll & U128_LO_MASK) + (s0 << 64); + let s2 = p_lh + (s1 >> 64); + + let lo = (p_ll & U128_LO_MASK) + (s2 << 64); + let hi = p_hh + (s0 >> 64) + (s2 >> 64); + + u256 { lo, hi } + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.hi = u128::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, _rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + self.lo + } + + fn hi(self) -> Self::H { + self.hi + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + self.lo as i128 + } + + fn hi(self) -> Self::H { + self.hi as i128 + } +} diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs new file mode 100644 index 000000000..2c71191ba --- /dev/null +++ b/libm/src/math/support/big/tests.rs @@ -0,0 +1,149 @@ +extern crate std; +use std::string::String; +use std::{eprintln, format}; + +use super::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!("0x{:032x}{:032x}", v.hi, v.lo) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 }); + assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 }); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX }); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }), + (u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }), + (u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }), + (0, 0, u256::ZERO), + (1234u128, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut has_errors = false; + let mut add_error = |i, a, b, expected, actual| { + has_errors = true; + eprintln!( + "\ + FAILURE ({i}): {a:#034x} * {b:#034x}\n\ + expected: {}\n\ + got: {}\ + ", + hexu(expected), + hexu(actual) + ); + }; + + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + add_error(i, a, b, exp, res); + } + } + + assert!(!has_errors); +} + +#[test] +fn not_u256() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u256() { + let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; + let mut has_errors = false; + + let mut add_error = |a, b, expected, actual| { + has_errors = true; + eprintln!( + "\ + FAILURE: {} >> {b}\n\ + expected: {}\n\ + actual: {}\ + ", + hexu(a), + hexu(expected), + hexu(actual), + ); + }; + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + add_error(a.widen(), shift, expected, res); + } + } + } + } + + let check = [ + (u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }), + (u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }), + (u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }), + (u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }), + (u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }), + (u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }), + (u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }), + (u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }), + (u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }), + (u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }), + (u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }), + (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }), + (u256::MAX, 255, u256 { lo: 1, hi: 0 }), + ( + u256 { hi: LOHI_SPLIT, lo: 0 }, + 64, + u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa }, + ), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + add_error(input, shift, expected, res); + } + } + + assert!(!has_errors); +} + +#[test] +#[should_panic] +#[cfg(debug_assertions)] +// FIXME(ppc): ppc64le seems to have issues with `should_panic` tests. +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +fn shr_u256_overflow() { + // Like regular shr, panic on overflow with debug assertions + let _ = u256::MAX >> 256; +} + +#[test] +#[cfg(not(debug_assertions))] +fn shr_u256_overflow() { + // No panic without debug assertions + assert_eq!(u256::MAX >> 256, u256::ZERO); + assert_eq!(u256::MAX >> 257, u256::ZERO); + assert_eq!(u256::MAX >> u32::MAX, u256::ZERO); +} diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs new file mode 100644 index 000000000..796309372 --- /dev/null +++ b/libm/src/math/support/env.rs @@ -0,0 +1,127 @@ +//! Support for rounding directions and status flags as specified by IEEE 754. +//! +//! Rust does not support the floating point environment so rounding mode is passed as an argument +//! and status flags are returned as part of the result. There is currently not much support for +//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this +//! has no side effects in Rust. Further, correct behavior relies on elementary operations making +//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust. +//! +//! This module exists so no functionality is lost when porting algorithms that respect floating +//! point environment, and so that some functionality may be tested (that which does not rely on +//! side effects from elementary operations). Full support would require wrappers around basic +//! operations, but there is no plan to add this at the current time. + +/// A value combined with a floating point status. +pub struct FpResult { + pub val: T, + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub status: Status, +} + +impl FpResult { + pub fn new(val: T, status: Status) -> Self { + Self { val, status } + } + + /// Return `val` with `Status::OK`. + pub fn ok(val: T) -> Self { + Self { val, status: Status::OK } + } +} + +/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest. +/// +/// Integer representation comes from what CORE-MATH uses for indexing. +#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Round { + /// IEEE 754 nearest, `roundTiesToEven`. + Nearest = 0, + /// IEEE 754 `roundTowardNegative`. + Negative = 1, + /// IEEE 754 `roundTowardPositive`. + Positive = 2, + /// IEEE 754 `roundTowardZero`. + Zero = 3, +} + +/// IEEE 754 exception status flags. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Status(u8); + +impl Status { + /// Default status indicating no errors. + pub const OK: Self = Self(0); + + /// No definable result. + /// + /// Includes: + /// - Any ops on sNaN, with a few exceptions. + /// - `0 * inf`, `inf * 0`. + /// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`. + /// - `+inf + -inf` and similar (includes subtraction and fma). + /// - `0.0 / 0.0`, `inf / inf` + /// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN. + /// - `sqrt(x)` with `x < 0.0`. + pub const INVALID: Self = Self(1); + + /// Division by zero. + /// + /// The default result for division is +/-inf based on operand sign. For `logB`, the default + /// result is -inf. + /// `x / y` when `x != 0.0` and `y == 0.0`, + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const DIVIDE_BY_ZERO: Self = Self(1 << 2); + + /// The result exceeds the maximum finite value. + /// + /// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based + /// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and + /// `Negative` round to signed maximum finite in one direction, signed infinity in the other. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const OVERFLOW: Self = Self(1 << 3); + + /// The result is subnormal and lost precision. + pub const UNDERFLOW: Self = Self(1 << 4); + + /// The finite-precision result does not match that of infinite precision, and the reason + /// is not represented by one of the other flags. + pub const INEXACT: Self = Self(1 << 5); + + /// True if `UNDERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const fn underflow(self) -> bool { + self.0 & Self::UNDERFLOW.0 != 0 + } + + /// True if `OVERFLOW` is set. + #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] + pub const fn overflow(self) -> bool { + self.0 & Self::OVERFLOW.0 != 0 + } + + pub fn set_underflow(&mut self, val: bool) { + self.set_flag(val, Self::UNDERFLOW); + } + + /// True if `INEXACT` is set. + pub const fn inexact(self) -> bool { + self.0 & Self::INEXACT.0 != 0 + } + + pub fn set_inexact(&mut self, val: bool) { + self.set_flag(val, Self::INEXACT); + } + + fn set_flag(&mut self, val: bool, mask: Self) { + if val { + self.0 |= mask.0; + } else { + self.0 &= !mask.0; + } + } + + pub(crate) const fn with(self, rhs: Self) -> Self { + Self(self.0 | rhs.0) + } +} diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs new file mode 100644 index 000000000..fac104832 --- /dev/null +++ b/libm/src/math/support/float_traits.rs @@ -0,0 +1,484 @@ +use core::{fmt, mem, ops}; + +use super::int_traits::{CastFrom, Int, MinInt}; + +/// Trait for some basic operations on floats +// #[allow(dead_code)] +pub trait Float: + Copy + + fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Rem + + ops::Neg + + 'static +{ + /// A uint of the same width as the float + type Int: Int; + + /// A int of the same width as the float + type SignedInt: Int + + MinInt + + ops::Neg; + + const ZERO: Self; + const NEG_ZERO: Self; + const ONE: Self; + const NEG_ONE: Self; + const INFINITY: Self; + const NEG_INFINITY: Self; + const NAN: Self; + const NEG_NAN: Self; + const MAX: Self; + const MIN: Self; + const EPSILON: Self; + const PI: Self; + const NEG_PI: Self; + const FRAC_PI_2: Self; + + const MIN_POSITIVE_NORMAL: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIG_BITS: u32; + + /// The bitwidth of the exponent + const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; + + /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite + /// representation. + /// + /// This shifted fully right, use `EXP_MASK` for the shifted value. + const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; + + /// The exponent bias value + const EXP_BIAS: u32 = Self::EXP_SAT >> 1; + + /// Maximum unbiased exponent value. + const EXP_MAX: i32 = Self::EXP_BIAS as i32; + + /// Minimum *NORMAL* unbiased exponent value. + const EXP_MIN: i32 = -(Self::EXP_MAX - 1); + + /// Minimum subnormal exponent value. + const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIG_MASK: Self::Int; + + /// A mask for the exponent + const EXP_MASK: Self::Int; + + /// The implicit bit of the float format + const IMPLICIT_BIT: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn to_bits(self) -> Self::Int; + + /// Returns `self` transmuted to `Self::SignedInt` + #[allow(dead_code)] + fn to_bits_signed(self) -> Self::SignedInt { + self.to_bits().signed() + } + + /// Check bitwise equality. + #[allow(dead_code)] + fn biteq(self, rhs: Self) -> bool { + self.to_bits() == rhs.to_bits() + } + + /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be + /// represented in multiple different ways. + /// + /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead + /// if `NaN` should not be treated separately. + #[allow(dead_code)] + fn eq_repr(self, rhs: Self) -> bool { + if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) } + } + + /// Returns true if the value is NaN. + fn is_nan(self) -> bool; + + /// Returns true if the value is +inf or -inf. + fn is_infinite(self) -> bool; + + /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN. + fn is_sign_negative(self) -> bool; + + /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN. + fn is_sign_positive(self) -> bool { + !self.is_sign_negative() + } + + /// Returns if `self` is subnormal. + #[allow(dead_code)] + fn is_subnormal(self) -> bool { + (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO + } + + /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero. + fn ex(self) -> u32 { + u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT + } + + /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero. + fn exp_unbiased(self) -> i32 { + self.ex().signed() - (Self::EXP_BIAS as i32) + } + + /// Returns the significand with no implicit bit (or the "fractional" part) + #[allow(dead_code)] + fn frac(self) -> Self::Int { + self.to_bits() & Self::SIG_MASK + } + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_bits(a: Self::Int) -> Self; + + /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. + fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self { + let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO }; + Self::from_bits( + (sign << (Self::BITS - 1)) + | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS) + | (significand & Self::SIG_MASK), + ) + } + + #[allow(dead_code)] + fn abs(self) -> Self; + + /// Returns a number composed of the magnitude of self and the sign of sign. + fn copysign(self, other: Self) -> Self; + + /// Fused multiply add, rounding once. + fn fma(self, y: Self, z: Self) -> Self; + + /// Returns (normalized exponent, normalized significand) + #[allow(dead_code)] + fn normalize(significand: Self::Int) -> (i32, Self::Int); + + /// Returns a number that represents the sign of self. + #[allow(dead_code)] + fn signum(self) -> Self { + if self.is_nan() { self } else { Self::ONE.copysign(self) } + } +} + +/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types). +pub type IntTy = ::Int; + +macro_rules! float_impl { + ( + $ty:ident, + $ity:ident, + $sity:ident, + $bits:expr, + $significand_bits:expr, + $from_bits:path, + $to_bits:path, + $fma_fn:ident, + $fma_intrinsic:ident + ) => { + impl Float for $ty { + type Int = $ity; + type SignedInt = $sity; + + const ZERO: Self = 0.0; + const NEG_ZERO: Self = -0.0; + const ONE: Self = 1.0; + const NEG_ONE: Self = -1.0; + const INFINITY: Self = Self::INFINITY; + const NEG_INFINITY: Self = Self::NEG_INFINITY; + const NAN: Self = Self::NAN; + // NAN isn't guaranteed to be positive but it usually is. We only use this for + // tests. + const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK); + const MAX: Self = -Self::MIN; + // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed + const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS)); + const EPSILON: Self = <$ty>::EPSILON; + + // Exponent is a 1 in the LSB + const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS); + + const PI: Self = core::$ty::consts::PI; + const NEG_PI: Self = -Self::PI; + const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2; + + const BITS: u32 = $bits; + const SIG_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1; + const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK); + const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS; + + fn to_bits(self) -> Self::Int { + self.to_bits() + } + fn is_nan(self) -> bool { + self.is_nan() + } + fn is_infinite(self) -> bool { + self.is_infinite() + } + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + fn from_bits(a: Self::Int) -> Self { + Self::from_bits(a) + } + fn abs(self) -> Self { + cfg_if! { + // FIXME(msrv): `abs` is available in `core` starting with 1.85. + if #[cfg(intrinsics_enabled)] { + self.abs() + } else { + super::super::generic::fabs(self) + } + } + } + fn copysign(self, other: Self) -> Self { + cfg_if! { + // FIXME(msrv): `copysign` is available in `core` starting with 1.85. + if #[cfg(intrinsics_enabled)] { + self.copysign(other) + } else { + super::super::generic::copysign(self, other) + } + } + } + fn fma(self, y: Self, z: Self) -> Self { + cfg_if! { + // fma is not yet available in `core` + if #[cfg(intrinsics_enabled)] { + unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) } + } else { + super::super::$fma_fn(self, y, z) + } + } + } + fn normalize(significand: Self::Int) -> (i32, Self::Int) { + let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); + (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) + } + } + }; +} + +#[cfg(f16_enabled)] +float_impl!(f16, u16, i16, 16, 10, f16::from_bits, f16::to_bits, fmaf16, fmaf16); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits, f32_to_bits, fmaf, fmaf32); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits, f64_to_bits, fma, fmaf64); +#[cfg(f128_enabled)] +float_impl!(f128, u128, i128, 128, 112, f128::from_bits, f128::to_bits, fmaf128, fmaf128); + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `f32::from_bits` +pub const fn f32_from_bits(bits: u32) -> f32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} + +/// `f32::to_bits` +pub const fn f32_to_bits(x: f32) -> u32 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + +/// `f64::from_bits` +pub const fn f64_from_bits(bits: u64) -> f64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(bits) } +} + +/// `f64::to_bits` +pub const fn f64_to_bits(x: f64) -> u64 { + // SAFETY: POD cast with no preconditions + unsafe { mem::transmute::(x) } +} + +/// Trait for floats twice the bit width of another integer. +pub trait DFloat: Float { + /// Float that is half the bit width of the floatthis trait is implemented for. + type H: HFloat; + + /// Narrow the float type. + fn narrow(self) -> Self::H; +} + +/// Trait for floats half the bit width of another float. +pub trait HFloat: Float { + /// Float that is double the bit width of the float this trait is implemented for. + type D: DFloat; + + /// Widen the float type. + fn widen(self) -> Self::D; +} + +macro_rules! impl_d_float { + ($($X:ident $D:ident),*) => { + $( + impl DFloat for $D { + type H = $X; + + fn narrow(self) -> Self::H { + self as $X + } + } + )* + }; +} + +macro_rules! impl_h_float { + ($($H:ident $X:ident),*) => { + $( + impl HFloat for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + } + )* + }; +} + +impl_d_float!(f32 f64); +#[cfg(f16_enabled)] +impl_d_float!(f16 f32); +#[cfg(f128_enabled)] +impl_d_float!(f64 f128); + +impl_h_float!(f32 f64); +#[cfg(f16_enabled)] +impl_h_float!(f16 f32); +#[cfg(f128_enabled)] +impl_h_float!(f64 f128); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn check_f16() { + // Constants + assert_eq!(f16::EXP_SAT, 0b11111); + assert_eq!(f16::EXP_BIAS, 15); + assert_eq!(f16::EXP_MAX, 15); + assert_eq!(f16::EXP_MIN, -14); + assert_eq!(f16::EXP_MIN_SUBNORM, -24); + + // `exp_unbiased` + assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1); + assert_eq!(f16::MAX.exp_unbiased(), 15); + assert_eq!(f16::MIN.exp_unbiased(), 15); + assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f16::ZERO.exp_unbiased(), -15); + assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15); + assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16); + assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1)); + } + + #[test] + fn check_f32() { + // Constants + assert_eq!(f32::EXP_SAT, 0b11111111); + assert_eq!(f32::EXP_BIAS, 127); + assert_eq!(f32::EXP_MAX, 127); + assert_eq!(f32::EXP_MIN, -126); + assert_eq!(f32::EXP_MIN_SUBNORM, -149); + + // `exp_unbiased` + assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1); + assert_eq!(f32::MAX.exp_unbiased(), 127); + assert_eq!(f32::MIN.exp_unbiased(), 127); + assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f32::ZERO.exp_unbiased(), -127); + assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127); + assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32); + assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10")); + assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1)); + } + + #[test] + fn check_f64() { + // Constants + assert_eq!(f64::EXP_SAT, 0b11111111111); + assert_eq!(f64::EXP_BIAS, 1023); + assert_eq!(f64::EXP_MAX, 1023); + assert_eq!(f64::EXP_MIN, -1022); + assert_eq!(f64::EXP_MIN_SUBNORM, -1074); + + // `exp_unbiased` + assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1); + assert_eq!(f64::MAX.exp_unbiased(), 1023); + assert_eq!(f64::MIN.exp_unbiased(), 1023); + assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f64::ZERO.exp_unbiased(), -1023); + assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023); + assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64); + assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10")); + assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1)); + } + + #[test] + #[cfg(f128_enabled)] + fn check_f128() { + // Constants + assert_eq!(f128::EXP_SAT, 0b111111111111111); + assert_eq!(f128::EXP_BIAS, 16383); + assert_eq!(f128::EXP_MAX, 16383); + assert_eq!(f128::EXP_MIN, -16382); + assert_eq!(f128::EXP_MIN_SUBNORM, -16494); + + // `exp_unbiased` + assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0); + assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1); + assert_eq!(f128::MAX.exp_unbiased(), 16383); + assert_eq!(f128::MIN.exp_unbiased(), 16383); + assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382); + // This is a convenience method and not ldexp, `exp_unbiased` does not return correct + // results for zero and subnormals. + assert_eq!(f128::ZERO.exp_unbiased(), -16383); + assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383); + assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL); + + // `from_parts` + assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128); + assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1)); + } +} diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs new file mode 100644 index 000000000..819e2f56e --- /dev/null +++ b/libm/src/math/support/hex_float.rs @@ -0,0 +1,1155 @@ +//! Utilities for working with hex float formats. + +use core::fmt; + +use super::{Float, Round, Status, f32_from_bits, f64_from_bits}; + +/// Construct a 16-bit float from hex float representation (C-style) +#[cfg(f16_enabled)] +pub const fn hf16(s: &str) -> f16 { + match parse_hex_exact(s, 16, 10) { + Ok(bits) => f16::from_bits(bits as u16), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 32-bit float from hex float representation (C-style) +#[allow(unused)] +pub const fn hf32(s: &str) -> f32 { + match parse_hex_exact(s, 32, 23) { + Ok(bits) => f32_from_bits(bits as u32), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 64-bit float from hex float representation (C-style) +pub const fn hf64(s: &str) -> f64 { + match parse_hex_exact(s, 64, 52) { + Ok(bits) => f64_from_bits(bits as u64), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} + +/// Construct a 128-bit float from hex float representation (C-style) +#[cfg(f128_enabled)] +pub const fn hf128(s: &str) -> f128 { + match parse_hex_exact(s, 128, 112) { + Ok(bits) => f128::from_bits(bits), + Err(HexFloatParseError(s)) => panic!("{}", s), + } +} +#[derive(Copy, Clone, Debug)] +pub struct HexFloatParseError(&'static str); + +/// Parses any float to its bitwise representation, returning an error if it cannot be represented exactly +pub const fn parse_hex_exact( + s: &str, + bits: u32, + sig_bits: u32, +) -> Result { + match parse_any(s, bits, sig_bits, Round::Nearest) { + Err(e) => Err(e), + Ok((bits, Status::OK)) => Ok(bits), + Ok((_, status)) if status.overflow() => Err(HexFloatParseError("the value is too huge")), + Ok((_, status)) if status.underflow() => Err(HexFloatParseError("the value is too tiny")), + Ok((_, status)) if status.inexact() => Err(HexFloatParseError("the value is too precise")), + Ok(_) => unreachable!(), + } +} + +/// Parse any float from hex to its bitwise representation. +pub const fn parse_any( + s: &str, + bits: u32, + sig_bits: u32, + round: Round, +) -> Result<(u128, Status), HexFloatParseError> { + let mut b = s.as_bytes(); + + if sig_bits > 119 || bits > 128 || bits < sig_bits + 3 || bits > sig_bits + 30 { + return Err(HexFloatParseError("unsupported target float configuration")); + } + + let neg = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { + b = rest; + } + + let sign_bit = 1 << (bits - 1); + let quiet_bit = 1 << (sig_bits - 1); + let nan = sign_bit - quiet_bit; + let inf = nan - quiet_bit; + + let (mut x, status) = match *b { + [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => (inf, Status::OK), + [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => (nan, Status::OK), + [b'0', b'x' | b'X', ref rest @ ..] => { + let round = match (neg, round) { + // parse("-x", Round::Positive) == -parse("x", Round::Negative) + (true, Round::Positive) => Round::Negative, + (true, Round::Negative) => Round::Positive, + // rounding toward nearest or zero are symmetric + (true, Round::Nearest | Round::Zero) | (false, _) => round, + }; + match parse_finite(rest, bits, sig_bits, round) { + Err(e) => return Err(e), + Ok(res) => res, + } + } + _ => return Err(HexFloatParseError("no hex indicator")), + }; + + if neg { + x ^= sign_bit; + } + + Ok((x, status)) +} + +const fn parse_finite( + b: &[u8], + bits: u32, + sig_bits: u32, + rounding_mode: Round, +) -> Result<(u128, Status), HexFloatParseError> { + let exp_bits: u32 = bits - sig_bits - 1; + let max_msb: i32 = (1 << (exp_bits - 1)) - 1; + // The exponent of one ULP in the subnormals + let min_lsb: i32 = 1 - max_msb - sig_bits as i32; + + let (mut sig, mut exp) = match parse_hex(b) { + Err(e) => return Err(e), + Ok(Parsed { sig: 0, .. }) => return Ok((0, Status::OK)), + Ok(Parsed { sig, exp }) => (sig, exp), + }; + + let mut round_bits = u128_ilog2(sig) as i32 - sig_bits as i32; + + // Round at least up to min_lsb + if exp < min_lsb - round_bits { + round_bits = min_lsb - exp; + } + + let mut status = Status::OK; + + exp += round_bits; + + if round_bits > 0 { + // first, prepare for rounding exactly two bits + if round_bits == 1 { + sig <<= 1; + } else if round_bits > 2 { + sig = shr_odd_rounding(sig, (round_bits - 2) as u32); + } + + if sig & 0b11 != 0 { + status = Status::INEXACT; + } + + sig = shr2_round(sig, rounding_mode); + } else if round_bits < 0 { + sig <<= -round_bits; + } + + // The parsed value is X = sig * 2^exp + // Expressed as a multiple U of the smallest subnormal value: + // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb) + let uexp = (exp - min_lsb) as u128; + let uexp = uexp << sig_bits; + + // Note that it is possible for the exponent bits to equal 2 here + // if the value rounded up, but that means the mantissa is all zeroes + // so the value is still correct + debug_assert!(sig <= 2 << sig_bits); + + let inf = ((1 << exp_bits) - 1) << sig_bits; + + let bits = match sig.checked_add(uexp) { + Some(bits) if bits < inf => { + // inexact subnormal or zero? + if status.inexact() && bits < (1 << sig_bits) { + status = status.with(Status::UNDERFLOW); + } + bits + } + _ => { + // overflow to infinity + status = status.with(Status::OVERFLOW).with(Status::INEXACT); + match rounding_mode { + Round::Positive | Round::Nearest => inf, + Round::Negative | Round::Zero => inf - 1, + } + } + }; + Ok((bits, status)) +} + +/// Shift right, rounding all inexact divisions to the nearest odd number +/// E.g. (0 >> 4) -> 0, (1..=31 >> 4) -> 1, (32 >> 4) -> 2, ... +/// +/// Useful for reducing a number before rounding the last two bits, since +/// the result of the final rounding is preserved for all rounding modes. +const fn shr_odd_rounding(x: u128, k: u32) -> u128 { + if k < 128 { + let inexact = x.trailing_zeros() < k; + (x >> k) | (inexact as u128) + } else { + (x != 0) as u128 + } +} + +/// Divide by 4, rounding with the given mode +const fn shr2_round(mut x: u128, round: Round) -> u128 { + let t = (x as u32) & 0b111; + x >>= 2; + match round { + // Look-up-table on the last three bits for when to round up + Round::Nearest => x + ((0b11001000_u8 >> t) & 1) as u128, + + Round::Negative => x, + Round::Zero => x, + Round::Positive => x + (t & 0b11 != 0) as u128, + } +} + +/// A parsed finite and unsigned floating point number. +struct Parsed { + /// Absolute value sig * 2^exp + sig: u128, + exp: i32, +} + +/// Parse a hexadecimal float x +const fn parse_hex(mut b: &[u8]) -> Result { + let mut sig: u128 = 0; + let mut exp: i32 = 0; + + let mut seen_point = false; + let mut some_digits = false; + let mut inexact = false; + + while let &[c, ref rest @ ..] = b { + b = rest; + + match c { + b'.' => { + if seen_point { + return Err(HexFloatParseError("unexpected '.' parsing fractional digits")); + } + seen_point = true; + continue; + } + b'p' | b'P' => break, + c => { + let digit = match hex_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected hexadecimal digit")), + }; + some_digits = true; + + if (sig >> 124) == 0 { + sig <<= 4; + sig |= digit as u128; + } else { + // FIXME: it is technically possible for exp to overflow if parsing a string with >500M digits + exp += 4; + inexact |= digit != 0; + } + // Up until the fractional point, the value grows + // with more digits, but after it the exponent is + // compensated to match. + if seen_point { + exp -= 4; + } + } + } + } + // If we've set inexact, the exact value has more than 125 + // significant bits, and lies somewhere between sig and sig + 1. + // Because we'll round off at least two of the trailing bits, + // setting the last bit gives correct rounding for inexact values. + sig |= inexact as u128; + + if !some_digits { + return Err(HexFloatParseError("at least one digit is required")); + }; + + some_digits = false; + + let negate_exp = matches!(b, [b'-', ..]); + if let &[b'-' | b'+', ref rest @ ..] = b { + b = rest; + } + + let mut pexp: u32 = 0; + while let &[c, ref rest @ ..] = b { + b = rest; + let digit = match dec_digit(c) { + Some(d) => d, + None => return Err(HexFloatParseError("expected decimal digit")), + }; + some_digits = true; + pexp = pexp.saturating_mul(10); + pexp += digit as u32; + } + + if !some_digits { + return Err(HexFloatParseError("at least one exponent digit is required")); + }; + + { + let e; + if negate_exp { + e = (exp as i64) - (pexp as i64); + } else { + e = (exp as i64) + (pexp as i64); + }; + + exp = if e < i32::MIN as i64 { + i32::MIN + } else if e > i32::MAX as i64 { + i32::MAX + } else { + e as i32 + }; + } + /* FIXME(msrv): once MSRV >= 1.66, replace the above workaround block with: + if negate_exp { + exp = exp.saturating_sub_unsigned(pexp); + } else { + exp = exp.saturating_add_unsigned(pexp); + }; + */ + + Ok(Parsed { sig, exp }) +} + +const fn dec_digit(c: u8) -> Option { + match c { + b'0'..=b'9' => Some(c - b'0'), + _ => None, + } +} + +const fn hex_digit(c: u8) -> Option { + match c { + b'0'..=b'9' => Some(c - b'0'), + b'a'..=b'f' => Some(c - b'a' + 10), + b'A'..=b'F' => Some(c - b'A' + 10), + _ => None, + } +} + +/* FIXME(msrv): vendor some things that are not const stable at our MSRV */ + +/// `u128::ilog2` +const fn u128_ilog2(v: u128) -> u32 { + assert!(v != 0); + u128::BITS - 1 - v.leading_zeros() +} + +/// Format a floating point number as its IEEE hex (`%a`) representation. +pub struct Hexf(pub F); + +// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs +#[cfg(not(feature = "compiler-builtins"))] +fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if x.is_sign_negative() { + write!(f, "-")?; + } + + if x.is_nan() { + return write!(f, "NaN"); + } else if x.is_infinite() { + return write!(f, "inf"); + } else if *x == F::ZERO { + return write!(f, "0x0p+0"); + } + + let mut exponent = x.exp_unbiased(); + let sig = x.to_bits() & F::SIG_MASK; + + let bias = F::EXP_BIAS as i32; + // The mantissa MSB needs to be shifted up to the nearest nibble. + let mshift = (4 - (F::SIG_BITS % 4)) % 4; + let sig = sig << mshift; + // The width is rounded up to the nearest char (4 bits) + let mwidth = (F::SIG_BITS as usize + 3) / 4; + let leading = if exponent == -bias { + // subnormal number means we shift our output by 1 bit. + exponent += 1; + "0." + } else { + "1." + }; + + write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") +} + +#[cfg(feature = "compiler-builtins")] +fn fmt_any_hex(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt_any_hex(&self.0, f) + } + } + } +} + +impl fmt::LowerHex for Hexf<(F, F)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } + } +} + +impl fmt::LowerHex for Hexf<(F, i32)> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) + } + } + } +} + +impl fmt::LowerHex for Hexf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(&self.0, f) + } + } + } +} + +impl fmt::Debug for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } + } +} + +impl fmt::Display for Hexf +where + Hexf: fmt::LowerHex, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + cfg_if! { + if #[cfg(feature = "compiler-builtins")] { + let _ = f; + unimplemented!() + } else { + fmt::LowerHex::fmt(self, f) + } + } + } +} + +#[cfg(test)] +mod parse_tests { + extern crate std; + use std::{format, println}; + + use super::*; + + #[cfg(f16_enabled)] + fn rounding_properties(s: &str) -> Result<(), HexFloatParseError> { + let (xd, s0) = parse_any(s, 16, 10, Round::Negative)?; + let (xu, s1) = parse_any(s, 16, 10, Round::Positive)?; + let (xz, s2) = parse_any(s, 16, 10, Round::Zero)?; + let (xn, s3) = parse_any(s, 16, 10, Round::Nearest)?; + + // FIXME: A value between the least normal and largest subnormal + // could have underflow status depend on rounding mode. + + if let Status::OK = s0 { + // an exact result is the same for all rounding modes + assert_eq!(s0, s1); + assert_eq!(s0, s2); + assert_eq!(s0, s3); + + assert_eq!(xd, xu); + assert_eq!(xd, xz); + assert_eq!(xd, xn); + } else { + assert!([s0, s1, s2, s3].into_iter().all(Status::inexact)); + + let xd = f16::from_bits(xd as u16); + let xu = f16::from_bits(xu as u16); + let xz = f16::from_bits(xz as u16); + let xn = f16::from_bits(xn as u16); + + assert_biteq!(xd.next_up(), xu, "s={s}, xd={xd:?}, xu={xu:?}"); + + let signs = [xd, xu, xz, xn].map(f16::is_sign_negative); + + if signs == [true; 4] { + assert_biteq!(xz, xu); + } else { + assert_eq!(signs, [false; 4]); + assert_biteq!(xz, xd); + } + + if xn.to_bits() != xd.to_bits() { + assert_biteq!(xn, xu); + } + } + Ok(()) + } + #[test] + #[cfg(f16_enabled)] + fn test_rounding() { + let n = 1_i32 << 14; + for i in -n..n { + let u = i.rotate_right(11) as u32; + let s = format!("{}", Hexf(f32::from_bits(u))); + assert!(rounding_properties(&s).is_ok()); + } + } + + #[test] + fn test_parse_any() { + for k in -149..=127 { + let s = format!("0x1p{k}"); + let x = hf32(&s); + let y = if k < 0 { 0.5f32.powi(-k) } else { 2.0f32.powi(k) }; + assert_eq!(x, y); + } + + let mut s = *b"0x.0000000p-121"; + for e in 0..40 { + for k in 0..(1 << 15) { + let expected = f32::from_bits(k) * 2.0f32.powi(e); + let x = hf32(std::str::from_utf8(&s).unwrap()); + assert_eq!( + x.to_bits(), + expected.to_bits(), + "\ + e={e}\n\ + k={k}\n\ + x={x}\n\ + expected={expected}\n\ + s={}\n\ + f32::from_bits(k)={}\n\ + 2.0f32.powi(e)={}\ + ", + std::str::from_utf8(&s).unwrap(), + f32::from_bits(k), + 2.0f32.powi(e), + ); + for i in (3..10).rev() { + if s[i] == b'f' { + s[i] = b'0'; + } else if s[i] == b'9' { + s[i] = b'a'; + break; + } else { + s[i] += 1; + break; + } + } + } + for i in (12..15).rev() { + if s[i] == b'0' { + s[i] = b'9'; + } else { + s[i] -= 1; + break; + } + } + for i in (3..10).rev() { + s[i] = b'0'; + } + } + } + + // FIXME: this test is causing failures that are likely UB on various platforms + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + #[test] + #[cfg(f128_enabled)] + fn rounding() { + let pi = std::f128::consts::PI; + let s = format!("{}", Hexf(pi)); + + for k in 0..=111 { + let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap(); + let scale = (1u128 << (112 - k - 1)) as f128; + let expected = (pi * scale).round_ties_even() / scale; + assert_eq!(bits << k, expected.to_bits(), "k = {k}, s = {s}"); + assert_eq!(expected != pi, status.inexact()); + } + } + #[test] + fn rounding_extreme_underflow() { + for k in 1..1000 { + let s = format!("0x1p{}", -149 - k); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}"); + assert!(status.underflow(), "should indicate underflow when parsing {s}"); + assert!(status.inexact(), "should indicate inexact when parsing {s}"); + } + } + #[test] + fn long_tail() { + for k in 1..1000 { + let s = format!("0x1.{}p0", "0".repeat(k)); + let Ok(bits) = parse_hex_exact(&s, 32, 23) else { panic!("parsing {s} failed") }; + assert_eq!(f32::from_bits(bits as u32), 1.0); + + let s = format!("0x1.{}1p0", "0".repeat(k)); + let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() }; + if status.inexact() { + assert!(1.0 == f32::from_bits(bits as u32)); + } else { + assert!(1.0 < f32::from_bits(bits as u32)); + } + } + } + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16() { + let checks = [ + ("0x.1234p+16", (0x1234 as f16).to_bits()), + ("0x1.234p+12", (0x1234 as f16).to_bits()), + ("0x12.34p+8", (0x1234 as f16).to_bits()), + ("0x123.4p+4", (0x1234 as f16).to_bits()), + ("0x1234p+0", (0x1234 as f16).to_bits()), + ("0x1234.p+0", (0x1234 as f16).to_bits()), + ("0x1234.0p+0", (0x1234 as f16).to_bits()), + ("0x1.ffcp+15", f16::MAX.to_bits()), + ("0x1.0p+1", 2.0f16.to_bits()), + ("0x1.0p+0", 1.0f16.to_bits()), + ("0x1.ffp+8", 0x5ffc), + ("+0x1.ffp+8", 0x5ffc), + ("0x1p+0", 0x3c00), + ("0x1.998p-4", 0x2e66), + ("0x1.9p+6", 0x5640), + ("0x0.0p0", 0.0f16.to_bits()), + ("-0x0.0p0", (-0.0f16).to_bits()), + ("0x1.0p0", 1.0f16.to_bits()), + ("0x1.998p-4", (0.1f16).to_bits()), + ("-0x1.998p-4", (-0.1f16).to_bits()), + ("0x0.123p-12", 0x0123), + ("0x1p-24", 0x0001), + ("nan", f16::NAN.to_bits()), + ("-nan", (-f16::NAN).to_bits()), + ("inf", f16::INFINITY.to_bits()), + ("-inf", f16::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + assert!(rounding_properties(s).is_ok()); + let act = hf16(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#06x} != {exp:#06x}\nact: {act:#018b}\nexp: {exp:#018b}" + ); + } + } + + #[test] + fn test_macros_f16() { + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + + #[test] + fn test_f32() { + let checks = [ + ("0x.1234p+16", (0x1234 as f32).to_bits()), + ("0x1.234p+12", (0x1234 as f32).to_bits()), + ("0x12.34p+8", (0x1234 as f32).to_bits()), + ("0x123.4p+4", (0x1234 as f32).to_bits()), + ("0x1234p+0", (0x1234 as f32).to_bits()), + ("0x1234.p+0", (0x1234 as f32).to_bits()), + ("0x1234.0p+0", (0x1234 as f32).to_bits()), + ("0x1.fffffep+127", f32::MAX.to_bits()), + ("0x1.0p+1", 2.0f32.to_bits()), + ("0x1.0p+0", 1.0f32.to_bits()), + ("0x1.ffep+8", 0x43fff000), + ("+0x1.ffep+8", 0x43fff000), + ("0x1p+0", 0x3f800000), + ("0x1.99999ap-4", 0x3dcccccd), + ("0x1.9p+6", 0x42c80000), + ("0x1.2d5ed2p+20", 0x4996af69), + ("-0x1.348eb8p+10", 0xc49a475c), + ("-0x1.33dcfep-33", 0xaf19ee7f), + ("0x0.0p0", 0.0f32.to_bits()), + ("-0x0.0p0", (-0.0f32).to_bits()), + ("0x1.0p0", 1.0f32.to_bits()), + ("0x1.99999ap-4", (0.1f32).to_bits()), + ("-0x1.99999ap-4", (-0.1f32).to_bits()), + ("0x1.111114p-127", 0x00444445), + ("0x1.23456p-130", 0x00091a2b), + ("0x1p-149", 0x00000001), + ("nan", f32::NAN.to_bits()), + ("-nan", (-f32::NAN).to_bits()), + ("inf", f32::INFINITY.to_bits()), + ("-inf", f32::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf32(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#010x} != {exp:#010x}\nact: {act:#034b}\nexp: {exp:#034b}" + ); + } + } + + #[test] + fn test_f64() { + let checks = [ + ("0x.1234p+16", (0x1234 as f64).to_bits()), + ("0x1.234p+12", (0x1234 as f64).to_bits()), + ("0x12.34p+8", (0x1234 as f64).to_bits()), + ("0x123.4p+4", (0x1234 as f64).to_bits()), + ("0x1234p+0", (0x1234 as f64).to_bits()), + ("0x1234.p+0", (0x1234 as f64).to_bits()), + ("0x1234.0p+0", (0x1234 as f64).to_bits()), + ("0x1.ffep+8", 0x407ffe0000000000), + ("0x1p+0", 0x3ff0000000000000), + ("0x1.999999999999ap-4", 0x3fb999999999999a), + ("0x1.9p+6", 0x4059000000000000), + ("0x1.2d5ed1fe1da7bp+20", 0x4132d5ed1fe1da7b), + ("-0x1.348eb851eb852p+10", 0xc09348eb851eb852), + ("-0x1.33dcfe54a3803p-33", 0xbde33dcfe54a3803), + ("0x1.0p0", 1.0f64.to_bits()), + ("0x0.0p0", 0.0f64.to_bits()), + ("-0x0.0p0", (-0.0f64).to_bits()), + ("0x1.999999999999ap-4", 0.1f64.to_bits()), + ("0x1.999999999998ap-4", (0.1f64 - f64::EPSILON).to_bits()), + ("-0x1.999999999999ap-4", (-0.1f64).to_bits()), + ("-0x1.999999999998ap-4", (-0.1f64 + f64::EPSILON).to_bits()), + ("0x0.8000000000001p-1022", 0x0008000000000001), + ("0x0.123456789abcdp-1022", 0x000123456789abcd), + ("0x0.0000000000002p-1022", 0x0000000000000002), + ("nan", f64::NAN.to_bits()), + ("-nan", (-f64::NAN).to_bits()), + ("inf", f64::INFINITY.to_bits()), + ("-inf", f64::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf64(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#018x} != {exp:#018x}\nact: {act:#066b}\nexp: {exp:#066b}" + ); + } + } + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128() { + let checks = [ + ("0x.1234p+16", (0x1234 as f128).to_bits()), + ("0x1.234p+12", (0x1234 as f128).to_bits()), + ("0x12.34p+8", (0x1234 as f128).to_bits()), + ("0x123.4p+4", (0x1234 as f128).to_bits()), + ("0x1234p+0", (0x1234 as f128).to_bits()), + ("0x1234.p+0", (0x1234 as f128).to_bits()), + ("0x1234.0p+0", (0x1234 as f128).to_bits()), + ("0x1.ffffffffffffffffffffffffffffp+16383", f128::MAX.to_bits()), + ("0x1.0p+1", 2.0f128.to_bits()), + ("0x1.0p+0", 1.0f128.to_bits()), + ("0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("+0x1.ffep+8", 0x4007ffe0000000000000000000000000), + ("0x1p+0", 0x3fff0000000000000000000000000000), + ("0x1.999999999999999999999999999ap-4", 0x3ffb999999999999999999999999999a), + ("0x1.9p+6", 0x40059000000000000000000000000000), + ("0x0.0p0", 0.0f128.to_bits()), + ("-0x0.0p0", (-0.0f128).to_bits()), + ("0x1.0p0", 1.0f128.to_bits()), + ("0x1.999999999999999999999999999ap-4", (0.1f128).to_bits()), + ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()), + ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345), + ("0x1p-16494", 0x00000000000000000000000000000001), + ("nan", f128::NAN.to_bits()), + ("-nan", (-f128::NAN).to_bits()), + ("inf", f128::INFINITY.to_bits()), + ("-inf", f128::NEG_INFINITY.to_bits()), + ]; + for (s, exp) in checks { + println!("parsing {s}"); + let act = hf128(s).to_bits(); + assert_eq!( + act, exp, + "parsing {s}: {act:#034x} != {exp:#034x}\nact: {act:#0130b}\nexp: {exp:#0130b}" + ); + } + } + + #[test] + fn test_macros_f128() { + assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + } + } + } + + #[cfg(f128_enabled)] + f128_tests!(); + + #[test] + fn test_macros() { + #[cfg(f16_enabled)] + assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16); + assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32); + assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64); + #[cfg(f128_enabled)] + assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128); + } +} + +#[cfg(test)] +// FIXME(ppc): something with `should_panic` tests cause a SIGILL with ppc64le +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +mod tests_panicking { + extern crate std; + use super::*; + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f16_enabled)] + macro_rules! f16_tests { + () => { + #[test] + fn test_f16_almost_extra_precision() { + // Exact maximum precision allowed + hf16("0x1.ffcp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f16_extra_precision() { + // One bit more than the above. + hf16("0x1.ffdp+0"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f16_overflow() { + // One bit more than the above. + hf16("0x1p+16"); + } + + #[test] + fn test_f16_tiniest() { + let x = hf16("0x1.p-24"); + let y = hf16("0x0.001p-12"); + let z = hf16("0x0.8p-23"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_too_tiny() { + hf16("0x1.p-25"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_also_too_tiny() { + hf16("0x0.8p-24"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f16_again_too_tiny() { + hf16("0x0.001p-13"); + } + }; + } + + #[cfg(f16_enabled)] + f16_tests!(); + + #[test] + fn test_f32_almost_extra_precision() { + // Exact maximum precision allowed + hf32("0x1.abcdeep+0"); + } + + #[test] + #[should_panic] + fn test_f32_extra_precision2() { + // One bit more than the above. + hf32("0x1.ffffffp+127"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f32_overflow() { + // One bit more than the above. + hf32("0x1p+128"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f32_extra_precision() { + // One bit more than the above. + hf32("0x1.abcdefp+0"); + } + + #[test] + fn test_f32_tiniest() { + let x = hf32("0x1.p-149"); + let y = hf32("0x0.0000000000000001p-85"); + let z = hf32("0x0.8p-148"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_too_tiny() { + hf32("0x1.p-150"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_also_too_tiny() { + hf32("0x0.8p-149"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f32_again_too_tiny() { + hf32("0x0.0000000000000001p-86"); + } + + #[test] + fn test_f64_almost_extra_precision() { + // Exact maximum precision allowed + hf64("0x1.abcdabcdabcdfp+0"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f64_extra_precision() { + // One bit more than the above. + hf64("0x1.abcdabcdabcdf8p+0"); + } + + // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to + // hide them from the AST. + #[cfg(f128_enabled)] + macro_rules! f128_tests { + () => { + #[test] + fn test_f128_almost_extra_precision() { + // Exact maximum precision allowed + hf128("0x1.ffffffffffffffffffffffffffffp+16383"); + } + + #[test] + #[should_panic(expected = "the value is too precise")] + fn test_f128_extra_precision() { + // Just below the maximum finite. + hf128("0x1.fffffffffffffffffffffffffffe8p+16383"); + } + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_extra_precision_overflow() { + // One bit more than the above. Should overflow. + hf128("0x1.ffffffffffffffffffffffffffff8p+16383"); + } + + #[test] + #[should_panic(expected = "the value is too huge")] + fn test_f128_overflow() { + // One bit more than the above. + hf128("0x1p+16384"); + } + + #[test] + fn test_f128_tiniest() { + let x = hf128("0x1.p-16494"); + let y = hf128("0x0.0000000000000001p-16430"); + let z = hf128("0x0.8p-16493"); + assert_eq!(x, y); + assert_eq!(x, z); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_too_tiny() { + hf128("0x1.p-16495"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_again_too_tiny() { + hf128("0x0.0000000000000001p-16431"); + } + + #[test] + #[should_panic(expected = "the value is too tiny")] + fn test_f128_also_too_tiny() { + hf128("0x0.8p-16494"); + } + }; + } + + #[cfg(f128_enabled)] + f128_tests!(); +} + +#[cfg(test)] +mod print_tests { + extern crate std; + use std::string::ToString; + + use super::*; + + #[test] + #[cfg(f16_enabled)] + fn test_f16() { + use std::format; + // Exhaustively check that `f16` roundtrips. + for x in 0..=u16::MAX { + let f = f16::from_bits(x); + let s = format!("{}", Hexf(f)); + let from_s = hf16(&s); + + if f.is_nan() && from_s.is_nan() { + continue; + } + + assert_eq!( + f.to_bits(), + from_s.to_bits(), + "{f:?} formatted as {s} but parsed as {from_s:?}" + ); + } + } + + #[test] + #[cfg(f16_enabled)] + fn test_f16_to_f32() { + use std::format; + // Exhaustively check that these are equivalent for all `f16`: + // - `f16 -> f32` + // - `f16 -> str -> f32` + // - `f16 -> f32 -> str -> f32` + // - `f16 -> f32 -> str -> f16 -> f32` + for x in 0..=u16::MAX { + let f16 = f16::from_bits(x); + let s16 = format!("{}", Hexf(f16)); + let f32 = f16 as f32; + let s32 = format!("{}", Hexf(f32)); + + let a = hf32(&s16); + let b = hf32(&s32); + let c = hf16(&s32); + + if f32.is_nan() && a.is_nan() && b.is_nan() && c.is_nan() { + continue; + } + + assert_eq!( + f32.to_bits(), + a.to_bits(), + "{f16:?} : f16 formatted as {s16} which parsed as {a:?} : f16" + ); + assert_eq!( + f32.to_bits(), + b.to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {b:?} : f32" + ); + assert_eq!( + f32.to_bits(), + (c as f32).to_bits(), + "{f32:?} : f32 formatted as {s32} which parsed as {c:?} : f16" + ); + } + } + #[test] + fn spot_checks() { + assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127"); + assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127"); + assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023"); + + assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0"); + + assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0"); + + assert_eq!(Hexf(f32::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f64::NAN).to_string(), "NaN"); + + assert_eq!(Hexf(f32::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f64::INFINITY).to_string(), "inf"); + + assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf"); + assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf"); + + #[cfg(f16_enabled)] + { + assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15"); + assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15"); + assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f16::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f16::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf"); + } + + #[cfg(f128_enabled)] + { + assert_eq!(Hexf(f128::MAX).to_string(), "0x1.ffffffffffffffffffffffffffffp+16383"); + assert_eq!(Hexf(f128::MIN).to_string(), "-0x1.ffffffffffffffffffffffffffffp+16383"); + assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0"); + assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0"); + assert_eq!(Hexf(f128::NAN).to_string(), "NaN"); + assert_eq!(Hexf(f128::INFINITY).to_string(), "inf"); + assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf"); + } + } +} diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs new file mode 100644 index 000000000..491adb1f2 --- /dev/null +++ b/libm/src/math/support/int_traits.rs @@ -0,0 +1,451 @@ +use core::{cmp, fmt, ops}; + +/// Minimal integer implementations needed on all integer types, including wide integers. +pub trait MinInt: + Copy + + fmt::Debug + + ops::BitOr + + ops::Not + + ops::Shl +{ + /// Type with the same width but other signedness + type OtherSign: MinInt; + /// Unsigned version of Self + type Unsigned: MinInt; + + /// If `Self` is a signed integer + const SIGNED: bool; + + /// The bitwidth of the int type + const BITS: u32; + + const ZERO: Self; + const ONE: Self; + const MIN: Self; + const MAX: Self; +} + +/// Access the associated `OtherSign` type from an int (helper to avoid ambiguous associated +/// types). +pub type OtherSign = ::OtherSign; + +/// Trait for some basic operations on integers +#[allow(dead_code)] +pub trait Int: + MinInt + + fmt::Display + + fmt::Binary + + fmt::LowerHex + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Mul + + ops::Div + + ops::Shl + + ops::Shl + + ops::Shr + + ops::Shr + + ops::BitXor + + ops::BitAnd + + cmp::Ord + + From + + CastFrom + + CastFrom + + CastFrom + + CastFrom + + CastFrom + + CastInto + + CastInto + + CastInto + + CastInto + + CastInto +{ + fn signed(self) -> OtherSign; + fn unsigned(self) -> Self::Unsigned; + fn from_unsigned(unsigned: Self::Unsigned) -> Self; + fn abs(self) -> Self; + + fn from_bool(b: bool) -> Self; + + /// Prevents the need for excessive conversions between signed and unsigned + fn logical_shr(self, other: u32) -> Self; + + /// Absolute difference between two integers. + fn abs_diff(self, other: Self) -> Self::Unsigned; + + // copied from primitive integers, but put in a trait + fn is_zero(self) -> bool; + fn checked_add(self, other: Self) -> Option; + fn checked_sub(self, other: Self) -> Option; + fn wrapping_neg(self) -> Self; + fn wrapping_add(self, other: Self) -> Self; + fn wrapping_mul(self, other: Self) -> Self; + fn wrapping_sub(self, other: Self) -> Self; + fn wrapping_shl(self, other: u32) -> Self; + fn wrapping_shr(self, other: u32) -> Self; + fn rotate_left(self, other: u32) -> Self; + fn overflowing_add(self, other: Self) -> (Self, bool); + fn overflowing_sub(self, other: Self) -> (Self, bool); + fn leading_zeros(self) -> u32; + fn ilog2(self) -> u32; +} + +macro_rules! int_impl_common { + ($ty:ty) => { + fn from_bool(b: bool) -> Self { + b as $ty + } + + fn logical_shr(self, other: u32) -> Self { + Self::from_unsigned(self.unsigned().wrapping_shr(other)) + } + + fn is_zero(self) -> bool { + self == Self::ZERO + } + + fn checked_add(self, other: Self) -> Option { + self.checked_add(other) + } + + fn checked_sub(self, other: Self) -> Option { + self.checked_sub(other) + } + + fn wrapping_neg(self) -> Self { + ::wrapping_neg(self) + } + + fn wrapping_add(self, other: Self) -> Self { + ::wrapping_add(self, other) + } + + fn wrapping_mul(self, other: Self) -> Self { + ::wrapping_mul(self, other) + } + + fn wrapping_sub(self, other: Self) -> Self { + ::wrapping_sub(self, other) + } + + fn wrapping_shl(self, other: u32) -> Self { + ::wrapping_shl(self, other) + } + + fn wrapping_shr(self, other: u32) -> Self { + ::wrapping_shr(self, other) + } + + fn rotate_left(self, other: u32) -> Self { + ::rotate_left(self, other) + } + + fn overflowing_add(self, other: Self) -> (Self, bool) { + ::overflowing_add(self, other) + } + + fn overflowing_sub(self, other: Self) -> (Self, bool) { + ::overflowing_sub(self, other) + } + + fn leading_zeros(self) -> u32 { + ::leading_zeros(self) + } + + fn ilog2(self) -> u32 { + // On our older MSRV, this resolves to the trait method. Which won't actually work, + // but this is only called behind other gates. + #[allow(clippy::incompatible_msrv)] + ::ilog2(self) + } + }; +} + +macro_rules! int_impl { + ($ity:ty, $uty:ty) => { + impl MinInt for $uty { + type OtherSign = $ity; + type Unsigned = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $uty { + fn signed(self) -> $ity { + self as $ity + } + + fn unsigned(self) -> Self { + self + } + + fn abs(self) -> Self { + unimplemented!() + } + + // It makes writing macros easier if this is implemented for both signed and unsigned + #[allow(clippy::wrong_self_convention)] + fn from_unsigned(me: $uty) -> Self { + me + } + + fn abs_diff(self, other: Self) -> Self { + self.abs_diff(other) + } + + int_impl_common!($uty); + } + + impl MinInt for $ity { + type OtherSign = $uty; + type Unsigned = $uty; + + const BITS: u32 = ::ZERO.count_zeros(); + const SIGNED: bool = Self::MIN != Self::ZERO; + + const ZERO: Self = 0; + const ONE: Self = 1; + const MIN: Self = ::MIN; + const MAX: Self = ::MAX; + } + + impl Int for $ity { + fn signed(self) -> Self { + self + } + + fn unsigned(self) -> $uty { + self as $uty + } + + fn abs(self) -> Self { + self.abs() + } + + fn from_unsigned(me: $uty) -> Self { + me as $ity + } + + fn abs_diff(self, other: Self) -> $uty { + self.abs_diff(other) + } + + int_impl_common!($ity); + } + }; +} + +int_impl!(isize, usize); +int_impl!(i8, u8); +int_impl!(i16, u16); +int_impl!(i32, u32); +int_impl!(i64, u64); +int_impl!(i128, u128); + +/// Trait for integers twice the bit width of another integer. This is implemented for all +/// primitives except for `u8`, because there is not a smaller primitive. +pub trait DInt: MinInt { + /// Integer that is half the bit width of the integer this trait is implemented for + type H: HInt; + + /// Returns the low half of `self` + fn lo(self) -> Self::H; + /// Returns the high half of `self` + fn hi(self) -> Self::H; + /// Returns the low and high halves of `self` as a tuple + fn lo_hi(self) -> (Self::H, Self::H) { + (self.lo(), self.hi()) + } + /// Constructs an integer using lower and higher half parts + #[allow(unused)] + fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { + lo.zero_widen() | hi.widen_hi() + } +} + +/// Trait for integers half the bit width of another integer. This is implemented for all +/// primitives except for `u128`, because it there is not a larger primitive. +pub trait HInt: Int { + /// Integer that is double the bit width of the integer this trait is implemented for + type D: DInt + MinInt; + + // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for + // unknown reasons this can cause infinite recursion when optimizations are disabled. See + // for context. + + /// Widens (using default extension) the integer to have double bit width + fn widen(self) -> Self::D; + /// Widens (zero extension only) the integer to have double bit width. This is needed to get + /// around problems with associated type bounds (such as `Int`) being unstable + fn zero_widen(self) -> Self::D; + /// Widens the integer to have double bit width and shifts the integer into the higher bits + #[allow(unused)] + fn widen_hi(self) -> Self::D; + /// Widening multiplication with zero widening. This cannot overflow. + fn zero_widen_mul(self, rhs: Self) -> Self::D; + /// Widening multiplication. This cannot overflow. + fn widen_mul(self, rhs: Self) -> Self::D; +} + +macro_rules! impl_d_int { + ($($X:ident $D:ident),*) => { + $( + impl DInt for $D { + type H = $X; + + fn lo(self) -> Self::H { + self as $X + } + fn hi(self) -> Self::H { + (self >> <$X as MinInt>::BITS) as $X + } + } + )* + }; +} + +macro_rules! impl_h_int { + ($($H:ident $uH:ident $X:ident),*) => { + $( + impl HInt for $H { + type D = $X; + + fn widen(self) -> Self::D { + self as $X + } + fn zero_widen(self) -> Self::D { + (self as $uH) as $X + } + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen().wrapping_mul(rhs.zero_widen()) + } + fn widen_mul(self, rhs: Self) -> Self::D { + self.widen().wrapping_mul(rhs.widen()) + } + fn widen_hi(self) -> Self::D { + (self as $X) << ::BITS + } + } + )* + }; +} + +impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); +impl_h_int!( + u8 u8 u16, + u16 u16 u32, + u32 u32 u64, + u64 u64 u128, + i8 u8 i16, + i16 u16 i32, + i32 u32 i64, + i64 u64 i128 +); + +/// Trait to express (possibly lossy) casting of integers +pub trait CastInto: Copy { + /// By default, casts should be exact. + fn cast(self) -> T; + + /// Call for casts that are expected to truncate. + fn cast_lossy(self) -> T; +} + +pub trait CastFrom: Copy { + /// By default, casts should be exact. + fn cast_from(value: T) -> Self; + + /// Call for casts that are expected to truncate. + fn cast_from_lossy(value: T) -> Self; +} + +impl + Copy> CastFrom for T { + fn cast_from(value: U) -> Self { + value.cast() + } + + fn cast_from_lossy(value: U) -> Self { + value.cast_lossy() + } +} + +macro_rules! cast_into { + ($ty:ty) => { + cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + // All we can really do to enforce casting rules is check the rules when in + // debug mode. + #[cfg(not(feature = "compiler-builtins"))] + debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}"); + self as $into + } + + fn cast_lossy(self) -> $into { + self as $into + } + } + )*}; +} + +macro_rules! cast_into_float { + ($ty:ty) => { + #[cfg(f16_enabled)] + cast_into_float!($ty; f16); + + cast_into_float!($ty; f32, f64); + + #[cfg(f128_enabled)] + cast_into_float!($ty; f128); + }; + ($ty:ty; $($into:ty),*) => {$( + impl CastInto<$into> for $ty { + fn cast(self) -> $into { + #[cfg(not(feature = "compiler-builtins"))] + debug_assert_eq!(self as $into as $ty, self, "inexact float cast"); + self as $into + } + + fn cast_lossy(self) -> $into { + self as $into + } + } + )*}; +} + +cast_into!(usize); +cast_into!(isize); +cast_into!(u8); +cast_into!(i8); +cast_into!(u16); +cast_into!(i16); +cast_into!(u32); +cast_into!(i32); +cast_into!(u64); +cast_into!(i64); +cast_into!(u128); +cast_into!(i128); + +cast_into_float!(i8); +cast_into_float!(i16); +cast_into_float!(i32); +cast_into_float!(i64); +cast_into_float!(i128); diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs new file mode 100644 index 000000000..0b72db0e4 --- /dev/null +++ b/libm/src/math/support/macros.rs @@ -0,0 +1,157 @@ +/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate +/// (with some comments stripped for compactness). +macro_rules! cfg_if { + // match if/else chains with a final `else` + ($( + if #[cfg($meta:meta)] { $($tokens:tt)* } + ) else * else { + $($tokens2:tt)* + }) => { + cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), } + }; + + // match if/else chains lacking a final `else` + ( + if #[cfg($i_met:meta)] { $($i_tokens:tt)* } + $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )* + ) => { + cfg_if! { + @__items + () ; + ( ($i_met) ($($i_tokens)*) ), + $( ( ($e_met) ($($e_tokens)*) ), )* + ( () () ), + } + }; + + // Internal and recursive macro to emit all the items + // + // Collects all the negated cfgs in a list at the beginning and after the + // semicolon is all the remaining items + (@__items ($($not:meta,)*) ; ) => {}; + (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => { + #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* } + cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } + }; + + // Internal macro to make __apply work out right for different match types, + // because of how macros matching/expand stuff. + (@__identity $($tokens:tt)*) => { $($tokens)* }; +} + +/// Choose between using an arch-specific implementation and the function body. Returns directly +/// if the arch implementation is used, otherwise continue with the rest of the function. +/// +/// Specify a `use_arch` meta field if an architecture-specific implementation is provided. +/// These live in the `math::arch::some_target_arch` module. +/// +/// Specify a `use_arch_required` meta field if something architecture-specific must be used +/// regardless of feature configuration (`force-soft-floats`). +/// +/// The passed meta options do not need to account for the `arch` target feature. +macro_rules! select_implementation { + ( + name: $fn_name:ident, + // Configuration meta for when to use arch-specific implementation that requires hard + // float ops + $( use_arch: $use_arch:meta, )? + // Configuration meta for when to use the arch module regardless of whether softfloats + // have been requested. + $( use_arch_required: $use_arch_required:meta, )? + args: $($arg:ident),+ , + ) => { + // FIXME: these use paths that are a pretty fragile (`super`). We should figure out + // something better w.r.t. how this is vendored into compiler-builtins. + + // However, we do need a few things from `arch` that are used even with soft floats. + select_implementation! { + @cfg $($use_arch_required)?; + if true { + return super::arch::$fn_name( $($arg),+ ); + } + } + + // By default, never use arch-specific implementations if we have force-soft-floats + #[cfg(arch_enabled)] + select_implementation! { + @cfg $($use_arch)?; + // Wrap in `if true` to avoid unused warnings + if true { + return super::arch::$fn_name( $($arg),+ ); + } + } + }; + + // Coalesce helper to construct an expression only if a config is provided + (@cfg ; $ex:expr) => { }; + (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; +} + +/// Construct a 16-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f16_enabled)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +#[allow(unused_macros)] +macro_rules! hf16 { + ($s:literal) => {{ + const X: f16 = $crate::support::hf16($s); + X + }}; +} + +/// Construct a 32-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf32 { + ($s:literal) => {{ + const X: f32 = $crate::support::hf32($s); + X + }}; +} + +/// Construct a 64-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf64 { + ($s:literal) => {{ + const X: f64 = $crate::support::hf64($s); + X + }}; +} + +/// Construct a 128-bit float from hex float representation (C-style), guaranteed to +/// evaluate at compile time. +#[cfg(f128_enabled)] +#[allow(unused_macros)] +#[cfg_attr(feature = "unstable-public-internals", macro_export)] +macro_rules! hf128 { + ($s:literal) => {{ + const X: f128 = $crate::support::hf128($s); + X + }}; +} + +/// Assert `F::biteq` with better messages. +#[cfg(test)] +macro_rules! assert_biteq { + ($left:expr, $right:expr, $($tt:tt)*) => {{ + use $crate::support::Int; + let l = $left; + let r = $right; + let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value + assert!( + l.biteq(r), + "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + format_args!($($tt)*), + lb = l.to_bits(), + rb = r.to_bits(), + width = ((bits / 4) + 2) as usize, + + ); + }}; + ($left:expr, $right:expr $(,)?) => { + assert_biteq!($left, $right, "") + }; +} diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs new file mode 100644 index 000000000..ee3f2bbdf --- /dev/null +++ b/libm/src/math/support/mod.rs @@ -0,0 +1,29 @@ +#[macro_use] +pub mod macros; +mod big; +mod env; +mod float_traits; +pub mod hex_float; +mod int_traits; + +#[allow(unused_imports)] +pub use big::{i256, u256}; +pub use env::{FpResult, Round, Status}; +#[allow(unused_imports)] +pub use float_traits::{DFloat, Float, HFloat, IntTy}; +pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; +#[cfg(f16_enabled)] +#[allow(unused_imports)] +pub use hex_float::hf16; +#[cfg(f128_enabled)] +#[allow(unused_imports)] +pub use hex_float::hf128; +#[allow(unused_imports)] +pub use hex_float::{Hexf, hf32, hf64}; +pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; + +/// Hint to the compiler that the current path is cold. +pub fn cold_path() { + #[cfg(intrinsics_enabled)] + core::intrinsics::cold_path(); +} diff --git a/src/math/tan.rs b/libm/src/math/tan.rs similarity index 100% rename from src/math/tan.rs rename to libm/src/math/tan.rs diff --git a/src/math/tanf.rs b/libm/src/math/tanf.rs similarity index 100% rename from src/math/tanf.rs rename to libm/src/math/tanf.rs diff --git a/src/math/tanh.rs b/libm/src/math/tanh.rs similarity index 100% rename from src/math/tanh.rs rename to libm/src/math/tanh.rs diff --git a/src/math/tanhf.rs b/libm/src/math/tanhf.rs similarity index 100% rename from src/math/tanhf.rs rename to libm/src/math/tanhf.rs diff --git a/src/math/tgamma.rs b/libm/src/math/tgamma.rs similarity index 97% rename from src/math/tgamma.rs rename to libm/src/math/tgamma.rs index 3f38c0b1d..305986064 100644 --- a/src/math/tgamma.rs +++ b/libm/src/math/tgamma.rs @@ -45,7 +45,8 @@ fn sinpi(mut x: f64) -> f64 { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), - 0 | _ => k_sin(x, 0.0, 0), + // 0 + _ => k_sin(x, 0.0, 0), } } @@ -129,6 +130,7 @@ fn s(x: f64) -> f64 { return num / den; } +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgamma(mut x: f64) -> f64 { let u: u64 = x.to_bits(); @@ -143,7 +145,7 @@ pub fn tgamma(mut x: f64) -> f64 { /* special cases */ if ix >= 0x7ff00000 { /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ - return x + core::f64::INFINITY; + return x + f64::INFINITY; } if ix < ((0x3ff - 54) << 20) { /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ diff --git a/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs similarity index 64% rename from src/math/tgammaf.rs rename to libm/src/math/tgammaf.rs index 23e3814f9..fe178f7a3 100644 --- a/src/math/tgammaf.rs +++ b/libm/src/math/tgammaf.rs @@ -1,5 +1,6 @@ use super::tgamma; +/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgammaf(x: f32) -> f32 { tgamma(x as f64) as f32 diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs new file mode 100644 index 000000000..fa50d55e1 --- /dev/null +++ b/libm/src/math/trunc.rs @@ -0,0 +1,53 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f64). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn trunc(x: f64) -> f64 { + select_implementation! { + name: trunc, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs new file mode 100644 index 000000000..14533a267 --- /dev/null +++ b/libm/src/math/truncf.rs @@ -0,0 +1,23 @@ +/// Rounds the number toward 0 to the closest integral value (f32). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf(x: f32) -> f32 { + select_implementation! { + name: truncf, + use_arch: all(target_arch = "wasm32", intrinsics_enabled), + args: x, + } + + super::generic::trunc(x) +} + +// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 +#[cfg(not(target_arch = "powerpc64"))] +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::truncf(1.1), 1.0); + } +} diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs new file mode 100644 index 000000000..9dccc0d0e --- /dev/null +++ b/libm/src/math/truncf128.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f128). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf128(x: f128) -> f128 { + super::generic::trunc(x) +} diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs new file mode 100644 index 000000000..d7c3d225c --- /dev/null +++ b/libm/src/math/truncf16.rs @@ -0,0 +1,7 @@ +/// Rounds the number toward 0 to the closest integral value (f16). +/// +/// This effectively removes the decimal part of the number, leaving the integral part. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn truncf16(x: f16) -> f16 { + super::generic::trunc(x) +} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 6d95fa173..000000000 --- a/src/lib.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! libm in pure Rust -#![no_std] -#![cfg_attr(feature = "unstable", allow(internal_features))] -#![cfg_attr(feature = "unstable", feature(core_intrinsics))] -#![allow(clippy::assign_op_pattern)] -#![allow(clippy::deprecated_cfg_attr)] -#![allow(clippy::eq_op)] -#![allow(clippy::float_cmp)] -#![allow(clippy::int_plus_one)] -#![allow(clippy::many_single_char_names)] -#![allow(clippy::mixed_case_hex_literals)] -#![allow(clippy::needless_return)] -#![allow(clippy::unreadable_literal)] - -mod libm_helper; -mod math; - -use core::{f32, f64}; - -pub use libm_helper::*; - -pub use self::math::*; - -/// Approximate equality with 1 ULP of tolerance -#[doc(hidden)] -#[inline] -pub fn _eqf(a: f32, b: f32) -> Result<(), u32> { - if a.is_nan() && b.is_nan() { - Ok(()) - } else { - let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs(); - - if err <= 1 { Ok(()) } else { Err(err as u32) } - } -} - -#[doc(hidden)] -#[inline] -pub fn _eq(a: f64, b: f64) -> Result<(), u64> { - if a.is_nan() && b.is_nan() { - Ok(()) - } else { - let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs(); - - if err <= 1 { Ok(()) } else { Err(err as u64) } - } -} diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs deleted file mode 100644 index b4e77eaa2..000000000 --- a/src/math/cbrt.rs +++ /dev/null @@ -1,113 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - * - * Optimized by Bruce D. Evans. - */ -/* cbrt(x) - * Return cube root of x - */ - -use core::f64; - -const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */ -const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ - -/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ -const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */ -const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */ -const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ -const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ -const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ - -// Cube root (f64) -/// -/// Computes the cube root of the argument. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn cbrt(x: f64) -> f64 { - let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 - - let mut ui: u64 = x.to_bits(); - let mut r: f64; - let s: f64; - let mut t: f64; - let w: f64; - let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff; - - if hx >= 0x7ff00000 { - /* cbrt(NaN,INF) is itself */ - return x + x; - } - - /* - * Rough cbrt to 5 bits: - * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) - * where e is integral and >= 0, m is real and in [0, 1), and "/" and - * "%" are integer division and modulus with rounding towards minus - * infinity. The RHS is always >= the LHS and has a maximum relative - * error of about 1 in 16. Adding a bias of -0.03306235651 to the - * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE - * floating point representation, for finite positive normal values, - * ordinary integer divison of the value in bits magically gives - * almost exactly the RHS of the above provided we first subtract the - * exponent bias (1023 for doubles) and later add it back. We do the - * subtraction virtually to keep e >= 0 so that ordinary integer - * division rounds towards minus infinity; this is also efficient. - */ - if hx < 0x00100000 { - /* zero or subnormal? */ - ui = (x * x1p54).to_bits(); - hx = (ui >> 32) as u32 & 0x7fffffff; - if hx == 0 { - return x; /* cbrt(0) is itself */ - } - hx = hx / 3 + B2; - } else { - hx = hx / 3 + B1; - } - ui &= 1 << 63; - ui |= (hx as u64) << 32; - t = f64::from_bits(ui); - - /* - * New cbrt to 23 bits: - * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) - * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) - * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation - * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this - * gives us bounds for r = t**3/x. - * - * Try to optimize for parallel evaluation as in __tanf.c. - */ - r = (t * t) * (t / x); - t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); - - /* - * Round t away from zero to 23 bits (sloppily except for ensuring that - * the result is larger in magnitude than cbrt(x) but not much more than - * 2 23-bit ulps larger). With rounding towards zero, the error bound - * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps - * in the rounded t, the infinite-precision error in the Newton - * approximation barely affects third digit in the final error - * 0.667; the error in the rounded t can be up to about 3 23-bit ulps - * before the final error is larger than 0.667 ulps. - */ - ui = t.to_bits(); - ui = (ui + 0x80000000) & 0xffffffffc0000000; - t = f64::from_bits(ui); - - /* one step Newton iteration to 53 bits with error < 0.667 ulps */ - s = t * t; /* t*t is exact */ - r = x / s; /* error <= 0.5 ulps; |r| < |t| */ - w = t + t; /* t+t is exact */ - r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ - t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ - t -} diff --git a/src/math/ceil.rs b/src/math/ceil.rs deleted file mode 100644 index 1593fdaff..000000000 --- a/src/math/ceil.rs +++ /dev/null @@ -1,75 +0,0 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - -/// Ceil (f64) -/// -/// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ceil(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf64(x) } - } - } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; - } - } - let u: u64 = x.to_bits(); - let e: i64 = (u >> 52 & 0x7ff) as i64; - let y: f64; - - if e >= 0x3ff + 52 || x == 0. { - return x; - } - // y = int(x) - x, where int(x) is an integer neighbor of x - y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - // special case because of non-nearest rounding modes - if e < 0x3ff { - force_eval!(y); - return if (u >> 63) != 0 { -0. } else { 1. }; - } - if y < 0. { x + y + 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use core::f64::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(ceil(1.1), 2.0); - assert_eq!(ceil(2.9), 3.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(ceil(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(ceil(f), f); - } - } -} diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs deleted file mode 100644 index bf9ba1227..000000000 --- a/src/math/ceilf.rs +++ /dev/null @@ -1,66 +0,0 @@ -use core::f32; - -/// Ceil (f32) -/// -/// Finds the nearest integer greater than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ceilf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf32(x) } - } - } - let mut ui = x.to_bits(); - let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; - - if e >= 23 { - return x; - } - if e >= 0 { - let m = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - return -0.0; - } else if ui << 1 != 0 { - return 1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use core::f32::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(ceilf(1.1), 2.0); - assert_eq!(ceilf(2.9), 3.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(ceilf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(ceilf(f), f); - } - } -} diff --git a/src/math/copysign.rs b/src/math/copysign.rs deleted file mode 100644 index 1f4a35a33..000000000 --- a/src/math/copysign.rs +++ /dev/null @@ -1,12 +0,0 @@ -/// Sign of Y, magnitude of X (f64) -/// -/// Constructs a number with the magnitude (absolute value) of its -/// first argument, `x`, and the sign of its second argument, `y`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn copysign(x: f64, y: f64) -> f64 { - let mut ux = x.to_bits(); - let uy = y.to_bits(); - ux &= (!0) >> 1; - ux |= uy & (1 << 63); - f64::from_bits(ux) -} diff --git a/src/math/fabs.rs b/src/math/fabs.rs deleted file mode 100644 index 3b0628aa6..000000000 --- a/src/math/fabs.rs +++ /dev/null @@ -1,42 +0,0 @@ -use core::u64; - -/// Absolute value (magnitude) (f64) -/// Calculates the absolute value (magnitude) of the argument `x`, -/// by direct manipulation of the bit representation of `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fabs(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf64(x) } - } - } - f64::from_bits(x.to_bits() & (u64::MAX / 2)) -} - -#[cfg(test)] -mod tests { - use core::f64::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(fabs(-1.0), 1.0); - assert_eq!(fabs(2.8), 2.8); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs - #[test] - fn spec_tests() { - assert!(fabs(NAN).is_nan()); - for f in [0.0, -0.0].iter().copied() { - assert_eq!(fabs(f), 0.0); - } - for f in [INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(fabs(f), INFINITY); - } - } -} diff --git a/src/math/fdim.rs b/src/math/fdim.rs deleted file mode 100644 index 014930097..000000000 --- a/src/math/fdim.rs +++ /dev/null @@ -1,22 +0,0 @@ -use core::f64; - -/// Positive difference (f64) -/// -/// Determines the positive difference between arguments, returning: -/// * x - y if x > y, or -/// * +0 if x <= y, or -/// * NAN if either argument is NAN. -/// -/// A range error may occur. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fdim(x: f64, y: f64) -> f64 { - if x.is_nan() { - x - } else if y.is_nan() { - y - } else if x > y { - x - y - } else { - 0.0 - } -} diff --git a/src/math/fenv.rs b/src/math/fenv.rs deleted file mode 100644 index c91272e82..000000000 --- a/src/math/fenv.rs +++ /dev/null @@ -1,27 +0,0 @@ -// src: musl/src/fenv/fenv.c -/* Dummy functions for archs lacking fenv implementation */ - -pub(crate) const FE_UNDERFLOW: i32 = 0; -pub(crate) const FE_INEXACT: i32 = 0; - -pub(crate) const FE_TONEAREST: i32 = 0; - -#[inline] -pub(crate) fn feclearexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn feraiseexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn fetestexcept(_mask: i32) -> i32 { - 0 -} - -#[inline] -pub(crate) fn fegetround() -> i32 { - FE_TONEAREST -} diff --git a/src/math/floor.rs b/src/math/floor.rs deleted file mode 100644 index e8fb21e58..000000000 --- a/src/math/floor.rs +++ /dev/null @@ -1,74 +0,0 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - -/// Floor (f64) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floor(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf64(x) } - } - } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; - } - } - let ui = x.to_bits(); - let e = ((ui >> 52) & 0x7ff) as i32; - - if (e >= 0x3ff + 52) || (x == 0.) { - return x; - } - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - /* special case because of non-nearest rounding modes */ - if e < 0x3ff { - force_eval!(y); - return if (ui >> 63) != 0 { -1. } else { 0. }; - } - if y > 0. { x + y - 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use core::f64::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floor(1.1), 1.0); - assert_eq!(floor(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floor(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(floor(f), f); - } - } -} diff --git a/src/math/floorf.rs b/src/math/floorf.rs deleted file mode 100644 index f66cab74f..000000000 --- a/src/math/floorf.rs +++ /dev/null @@ -1,67 +0,0 @@ -use core::f32; - -/// Floor (f32) -/// -/// Finds the nearest integer less than or equal to `x`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn floorf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf32(x) } - } - } - let mut ui = x.to_bits(); - let e = (((ui >> 23) as i32) & 0xff) - 0x7f; - - if e >= 23 { - return x; - } - if e >= 0 { - let m: u32 = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui = 0; - } else if ui << 1 != 0 { - return -1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use core::f32::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floorf(0.5), 0.0); - assert_eq!(floorf(1.1), 1.0); - assert_eq!(floorf(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floorf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { - assert_eq!(floorf(f), f); - } - } -} diff --git a/src/math/fma.rs b/src/math/fma.rs deleted file mode 100644 index bb2028fa7..000000000 --- a/src/math/fma.rs +++ /dev/null @@ -1,226 +0,0 @@ -use core::{f32, f64}; - -use super::scalbn; - -const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; - -struct Num { - m: u64, - e: i32, - sign: i32, -} - -fn normalize(x: f64) -> Num { - let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 - - let mut ix: u64 = x.to_bits(); - let mut e: i32 = (ix >> 52) as i32; - let sign: i32 = e & 0x800; - e &= 0x7ff; - if e == 0 { - ix = (x * x1p63).to_bits(); - e = (ix >> 52) as i32 & 0x7ff; - e = if e != 0 { e - 63 } else { 0x800 }; - } - ix &= (1 << 52) - 1; - ix |= 1 << 52; - ix <<= 1; - e -= 0x3ff + 52 + 1; - Num { m: ix, e, sign } -} - -#[inline] -fn mul(x: u64, y: u64) -> (u64, u64) { - let t = (x as u128).wrapping_mul(y as u128); - ((t >> 64) as u64, t as u64) -} - -/// Floating multiply add (f64) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation: -/// Computes the value (as if) to infinite precision and rounds once to the result format, -/// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: f64, y: f64, z: f64) -> f64 { - let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 - let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 - - /* normalize so top 10bits and last bit are 0 */ - let nx = normalize(x); - let ny = normalize(y); - let nz = normalize(z); - - if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { - return x * y + z; - } - if nz.e >= ZEROINFNAN { - if nz.e > ZEROINFNAN { - /* z==0 */ - return x * y + z; - } - return z; - } - - /* mul: r = x*y */ - let zhi: u64; - let zlo: u64; - let (mut rhi, mut rlo) = mul(nx.m, ny.m); - /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ - - /* align exponents */ - let mut e: i32 = nx.e + ny.e; - let mut d: i32 = nz.e - e; - /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ - if d > 0 { - if d < 64 { - zlo = nz.m << d; - zhi = nz.m >> (64 - d); - } else { - zlo = 0; - zhi = nz.m; - e = nz.e - 64; - d -= 64; - if d == 0 { - } else if d < 64 { - rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; - rhi = rhi >> d; - } else { - rlo = 1; - rhi = 0; - } - } - } else { - zhi = 0; - d = -d; - if d == 0 { - zlo = nz.m; - } else if d < 64 { - zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; - } else { - zlo = 1; - } - } - - /* add */ - let mut sign: i32 = nx.sign ^ ny.sign; - let samesign: bool = (sign ^ nz.sign) == 0; - let mut nonzero: i32 = 1; - if samesign { - /* r += z */ - rlo = rlo.wrapping_add(zlo); - rhi += zhi + (rlo < zlo) as u64; - } else { - /* r -= z */ - let (res, borrow) = rlo.overflowing_sub(zlo); - rlo = res; - rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); - if (rhi >> 63) != 0 { - rlo = (rlo as i64).wrapping_neg() as u64; - rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64; - sign = (sign == 0) as i32; - } - nonzero = (rhi != 0) as i32; - } - - /* set rhi to top 63bit of the result (last bit is sticky) */ - if nonzero != 0 { - e += 64; - d = rhi.leading_zeros() as i32 - 1; - /* note: d > 0 */ - rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; - } else if rlo != 0 { - d = rlo.leading_zeros() as i32 - 1; - if d < 0 { - rhi = rlo >> 1 | (rlo & 1); - } else { - rhi = rlo << d; - } - } else { - /* exact +-0 */ - return x * y + z; - } - e -= d; - - /* convert to double */ - let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ - if sign != 0 { - i = -i; - } - let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ - - if e < -1022 - 62 { - /* result is subnormal before rounding */ - if e == -1022 - 63 { - let mut c: f64 = x1p63; - if sign != 0 { - c = -c; - } - if r == c { - /* min normal after rounding, underflow depends - on arch behaviour which can be imitated by - a double to float conversion */ - let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; - return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; - } - /* one bit is lost when scaled, add another top bit to - only round once at conversion if it is inexact */ - if (rhi << 53) != 0 { - i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; - if sign != 0 { - i = -i; - } - r = i as f64; - r = 2. * r - c; /* remove top bit */ - - /* raise underflow portably, such that it - cannot be optimized away */ - { - let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; - r += (tiny * tiny) * (r - r); - } - } - } else { - /* only round once when scaled */ - d = 10; - i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; - if sign != 0 { - i = -i; - } - r = i as f64; - } - } - scalbn(r, e) -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - let result = fma(-0.992, -0.992, -0.992); - //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, -0.007936000000000007,); - } - - #[test] - fn fma_sbb() { - assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277); - } - - #[test] - fn fma_underflow() { - assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,); - } -} diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs deleted file mode 100644 index 10bdaeab3..000000000 --- a/src/math/fmaf.rs +++ /dev/null @@ -1,113 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ -/*- - * Copyright (c) 2005-2011 David Schultz - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -use core::f32; -use core::ptr::read_volatile; - -use super::fenv::{ - FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept, -}; - -/* - * Fused multiply-add: Compute x * y + z with a single rounding error. - * - * A double has more than twice as much precision than a float, so - * direct double-precision arithmetic suffices, except where double - * rounding occurs. - */ - -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation: -/// Computes the value (as if) to infinite precision and rounds once to the result format, -/// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { - let xy: f64; - let mut result: f64; - let mut ui: u64; - let e: i32; - - xy = x as f64 * y as f64; - result = xy + z as f64; - ui = result.to_bits(); - e = (ui >> 52) as i32 & 0x7ff; - /* Common case: The double precision result is fine. */ - if ( - /* not a halfway case */ - ui & 0x1fffffff) != 0x10000000 || - /* NaN */ - e == 0x7ff || - /* exact */ - (result - xy == z as f64 && result - z as f64 == xy) || - /* not round-to-nearest */ - fegetround() != FE_TONEAREST - { - /* - underflow may not be raised correctly, example: - fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) - */ - if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 { - feclearexcept(FE_INEXACT); - // prevent `xy + vz` from being CSE'd with `xy + z` above - let vz: f32 = unsafe { read_volatile(&z) }; - result = xy + vz as f64; - if fetestexcept(FE_INEXACT) != 0 { - feraiseexcept(FE_UNDERFLOW); - } else { - feraiseexcept(FE_INEXACT); - } - } - z = result as f32; - return z; - } - - /* - * If result is inexact, and exactly halfway between two float values, - * we need to adjust the low-order bit in the direction of the error. - */ - let neg = ui >> 63 != 0; - let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy }; - if neg == (err < 0.0) { - ui += 1; - } else { - ui -= 1; - } - f64::from_bits(ui) as f32 -} - -#[cfg(test)] -mod tests { - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(super::fmaf(a, b, c), expected); - } -} diff --git a/src/math/fmax.rs b/src/math/fmax.rs deleted file mode 100644 index 93c97bc61..000000000 --- a/src/math/fmax.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmax(x: f64, y: f64) -> f64 { - // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if x.is_nan() || x < y { y } else { x }) * 1.0 -} diff --git a/src/math/fmaxf.rs b/src/math/fmaxf.rs deleted file mode 100644 index 607746647..000000000 --- a/src/math/fmaxf.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaxf(x: f32, y: f32) -> f32 { - // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if x.is_nan() || x < y { y } else { x }) * 1.0 -} diff --git a/src/math/fmin.rs b/src/math/fmin.rs deleted file mode 100644 index ab1509f34..000000000 --- a/src/math/fmin.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmin(x: f64, y: f64) -> f64 { - // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if y.is_nan() || x < y { x } else { y }) * 1.0 -} diff --git a/src/math/fminf.rs b/src/math/fminf.rs deleted file mode 100644 index 0049e7117..000000000 --- a/src/math/fminf.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fminf(x: f32, y: f32) -> f32 { - // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the - // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it - // is either x or y, canonicalized (this means results might differ among implementations). - // When either x or y is a signalingNaN, then the result is according to 6.2. - // - // Since we do not support sNaN in Rust yet, we do not need to handle them. - // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by - // multiplying by 1.0. Should switch to the `canonicalize` when it works. - (if y.is_nan() || x < y { x } else { y }) * 1.0 -} diff --git a/src/math/fmod.rs b/src/math/fmod.rs deleted file mode 100644 index d892ffd8b..000000000 --- a/src/math/fmod.rs +++ /dev/null @@ -1,80 +0,0 @@ -use core::u64; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmod(x: f64, y: f64) -> f64 { - let mut uxi = x.to_bits(); - let mut uyi = y.to_bits(); - let mut ex = (uxi >> 52 & 0x7ff) as i64; - let mut ey = (uyi >> 52 & 0x7ff) as i64; - let sx = uxi >> 63; - let mut i; - - if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff { - return (x * y) / (x * y); - } - if uxi << 1 <= uyi << 1 { - if uxi << 1 == uyi << 1 { - return 0.0 * x; - } - return x; - } - - /* normalize x and y */ - if ex == 0 { - i = uxi << 12; - while i >> 63 == 0 { - ex -= 1; - i <<= 1; - } - uxi <<= -ex + 1; - } else { - uxi &= u64::MAX >> 12; - uxi |= 1 << 52; - } - if ey == 0 { - i = uyi << 12; - while i >> 63 == 0 { - ey -= 1; - i <<= 1; - } - uyi <<= -ey + 1; - } else { - uyi &= u64::MAX >> 12; - uyi |= 1 << 52; - } - - /* x mod y */ - while ex > ey { - i = uxi.wrapping_sub(uyi); - if i >> 63 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - uxi <<= 1; - ex -= 1; - } - i = uxi.wrapping_sub(uyi); - if i >> 63 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - while uxi >> 52 == 0 { - uxi <<= 1; - ex -= 1; - } - - /* scale result */ - if ex > 0 { - uxi -= 1 << 52; - uxi |= (ex as u64) << 52; - } else { - uxi >>= -ex + 1; - } - uxi |= (sx as u64) << 63; - - f64::from_bits(uxi) -} diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs deleted file mode 100644 index 1d8001384..000000000 --- a/src/math/fmodf.rs +++ /dev/null @@ -1,88 +0,0 @@ -use core::{f32, u32}; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmodf(x: f32, y: f32) -> f32 { - let mut uxi = x.to_bits(); - let mut uyi = y.to_bits(); - let mut ex = (uxi >> 23 & 0xff) as i32; - let mut ey = (uyi >> 23 & 0xff) as i32; - let sx = uxi & 0x80000000; - let mut i; - - if uyi << 1 == 0 || y.is_nan() || ex == 0xff { - return (x * y) / (x * y); - } - - if uxi << 1 <= uyi << 1 { - if uxi << 1 == uyi << 1 { - return 0.0 * x; - } - - return x; - } - - /* normalize x and y */ - if ex == 0 { - i = uxi << 9; - while i >> 31 == 0 { - ex -= 1; - i <<= 1; - } - - uxi <<= -ex + 1; - } else { - uxi &= u32::MAX >> 9; - uxi |= 1 << 23; - } - - if ey == 0 { - i = uyi << 9; - while i >> 31 == 0 { - ey -= 1; - i <<= 1; - } - - uyi <<= -ey + 1; - } else { - uyi &= u32::MAX >> 9; - uyi |= 1 << 23; - } - - /* x mod y */ - while ex > ey { - i = uxi.wrapping_sub(uyi); - if i >> 31 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - uxi <<= 1; - - ex -= 1; - } - - i = uxi.wrapping_sub(uyi); - if i >> 31 == 0 { - if i == 0 { - return 0.0 * x; - } - uxi = i; - } - - while uxi >> 23 == 0 { - uxi <<= 1; - ex -= 1; - } - - /* scale result up */ - if ex > 0 { - uxi -= 1 << 23; - uxi |= (ex as u32) << 23; - } else { - uxi >>= -ex + 1; - } - uxi |= sx; - - f32::from_bits(uxi) -} diff --git a/src/math/jnf.rs b/src/math/jnf.rs deleted file mode 100644 index e5afda448..000000000 --- a/src/math/jnf.rs +++ /dev/null @@ -1,253 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -use super::{fabsf, j0f, j1f, logf, y0f, y1f}; - -/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32). -pub fn jnf(n: i32, mut x: f32) -> f32 { - let mut ix: u32; - let mut nm1: i32; - let mut sign: bool; - let mut i: i32; - let mut a: f32; - let mut b: f32; - let mut temp: f32; - - ix = x.to_bits(); - sign = (ix >> 31) != 0; - ix &= 0x7fffffff; - if ix > 0x7f800000 { - /* nan */ - return x; - } - - /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ - if n == 0 { - return j0f(x); - } - if n < 0 { - nm1 = -(n + 1); - x = -x; - sign = !sign; - } else { - nm1 = n - 1; - } - if nm1 == 0 { - return j1f(x); - } - - sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ - x = fabsf(x); - if ix == 0 || ix == 0x7f800000 { - /* if x is 0 or inf */ - b = 0.0; - } else if (nm1 as f32) < x { - /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ - a = j0f(x); - b = j1f(x); - i = 0; - while i < nm1 { - i += 1; - temp = b; - b = b * (2.0 * (i as f32) / x) - a; - a = temp; - } - } else { - if ix < 0x35800000 { - /* x < 2**-20 */ - /* x is tiny, return the first Taylor expansion of J(n,x) - * J(n,x) = 1/n!*(x/2)^n - ... - */ - if nm1 > 8 { - /* underflow */ - nm1 = 8; - } - temp = 0.5 * x; - b = temp; - a = 1.0; - i = 2; - while i <= nm1 + 1 { - a *= i as f32; /* a = n! */ - b *= temp; /* b = (x/2)^n */ - i += 1; - } - b = b / a; - } else { - /* use backward recurrence */ - /* x x^2 x^2 - * J(n,x)/J(n-1,x) = ---- ------ ------ ..... - * 2n - 2(n+1) - 2(n+2) - * - * 1 1 1 - * (for large x) = ---- ------ ------ ..... - * 2n 2(n+1) 2(n+2) - * -- - ------ - ------ - - * x x x - * - * Let w = 2n/x and h=2/x, then the above quotient - * is equal to the continued fraction: - * 1 - * = ----------------------- - * 1 - * w - ----------------- - * 1 - * w+h - --------- - * w+2h - ... - * - * To determine how many terms needed, let - * Q(0) = w, Q(1) = w(w+h) - 1, - * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), - * When Q(k) > 1e4 good for single - * When Q(k) > 1e9 good for double - * When Q(k) > 1e17 good for quadruple - */ - /* determine k */ - let mut t: f32; - let mut q0: f32; - let mut q1: f32; - let mut w: f32; - let h: f32; - let mut z: f32; - let mut tmp: f32; - let nf: f32; - let mut k: i32; - - nf = (nm1 as f32) + 1.0; - w = 2.0 * (nf as f32) / x; - h = 2.0 / x; - z = w + h; - q0 = w; - q1 = w * z - 1.0; - k = 1; - while q1 < 1.0e4 { - k += 1; - z += h; - tmp = z * q1 - q0; - q0 = q1; - q1 = tmp; - } - t = 0.0; - i = k; - while i >= 0 { - t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); - i -= 1; - } - a = t; - b = 1.0; - /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) - * Hence, if n*(log(2n/x)) > ... - * single 8.8722839355e+01 - * double 7.09782712893383973096e+02 - * long double 1.1356523406294143949491931077970765006170e+04 - * then recurrent value may overflow and the result is - * likely underflow to zero - */ - tmp = nf * logf(fabsf(w)); - if tmp < 88.721679688 { - i = nm1; - while i > 0 { - temp = b; - b = 2.0 * (i as f32) * b / x - a; - a = temp; - i -= 1; - } - } else { - i = nm1; - while i > 0 { - temp = b; - b = 2.0 * (i as f32) * b / x - a; - a = temp; - /* scale b to avoid spurious overflow */ - let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 - if b > x1p60 { - a /= b; - t /= b; - b = 1.0; - } - i -= 1; - } - } - z = j0f(x); - w = j1f(x); - if fabsf(z) >= fabsf(w) { - b = t * z / b; - } else { - b = t * w / a; - } - } - } - - if sign { -b } else { b } -} - -/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32). -pub fn ynf(n: i32, x: f32) -> f32 { - let mut ix: u32; - let mut ib: u32; - let nm1: i32; - let mut sign: bool; - let mut i: i32; - let mut a: f32; - let mut b: f32; - let mut temp: f32; - - ix = x.to_bits(); - sign = (ix >> 31) != 0; - ix &= 0x7fffffff; - if ix > 0x7f800000 { - /* nan */ - return x; - } - if sign && ix != 0 { - /* x < 0 */ - return 0.0 / 0.0; - } - if ix == 0x7f800000 { - return 0.0; - } - - if n == 0 { - return y0f(x); - } - if n < 0 { - nm1 = -(n + 1); - sign = (n & 1) != 0; - } else { - nm1 = n - 1; - sign = false; - } - if nm1 == 0 { - if sign { - return -y1f(x); - } else { - return y1f(x); - } - } - - a = y0f(x); - b = y1f(x); - /* quit if b is -inf */ - ib = b.to_bits(); - i = 0; - while i < nm1 && ib != 0xff800000 { - i += 1; - temp = b; - b = (2.0 * (i as f32) / x) * b - a; - ib = b.to_bits(); - a = temp; - } - - if sign { -b } else { b } -} diff --git a/src/math/ldexp.rs b/src/math/ldexp.rs deleted file mode 100644 index e46242e55..000000000 --- a/src/math/ldexp.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn ldexp(x: f64, n: i32) -> f64 { - super::scalbn(x, n) -} diff --git a/src/math/rint.rs b/src/math/rint.rs deleted file mode 100644 index 618b26e54..000000000 --- a/src/math/rint.rs +++ /dev/null @@ -1,50 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rint(x: f64) -> f64 { - let one_over_e = 1.0 / f64::EPSILON; - let as_u64: u64 = x.to_bits(); - let exponent: u64 = as_u64 >> 52 & 0x7ff; - let is_positive = (as_u64 >> 63) == 0; - if exponent >= 0x3ff + 52 { - x - } else { - let ans = if is_positive { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xplusoneovere = x + one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xplusoneovere = force_eval!(xplusoneovere); - xplusoneovere - one_over_e - } else { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xminusoneovere = x - one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xminusoneovere = force_eval!(xminusoneovere); - xminusoneovere + one_over_e - }; - - if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } - } -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::rint; - - #[test] - fn negative_zero() { - assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(rint(-1.0), -1.0); - assert_eq!(rint(2.8), 3.0); - assert_eq!(rint(-0.5), -0.0); - assert_eq!(rint(0.5), 0.0); - assert_eq!(rint(-1.5), -2.0); - assert_eq!(rint(1.5), 2.0); - } -} diff --git a/src/math/rintf.rs b/src/math/rintf.rs deleted file mode 100644 index 0726d83ba..000000000 --- a/src/math/rintf.rs +++ /dev/null @@ -1,50 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn rintf(x: f32) -> f32 { - let one_over_e = 1.0 / f32::EPSILON; - let as_u32: u32 = x.to_bits(); - let exponent: u32 = as_u32 >> 23 & 0xff; - let is_positive = (as_u32 >> 31) == 0; - if exponent >= 0x7f + 23 { - x - } else { - let ans = if is_positive { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xplusoneovere = x + one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xplusoneovere = force_eval!(xplusoneovere); - xplusoneovere - one_over_e - } else { - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let x = force_eval!(x); - let xminusoneovere = x - one_over_e; - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let xminusoneovere = force_eval!(xminusoneovere); - xminusoneovere + one_over_e - }; - - if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } - } -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::rintf; - - #[test] - fn negative_zero() { - assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(rintf(-1.0), -1.0); - assert_eq!(rintf(2.8), 3.0); - assert_eq!(rintf(-0.5), -0.0); - assert_eq!(rintf(0.5), 0.0); - assert_eq!(rintf(-1.5), -2.0); - assert_eq!(rintf(1.5), 2.0); - } -} diff --git a/src/math/round.rs b/src/math/round.rs deleted file mode 100644 index b81ebaa1d..000000000 --- a/src/math/round.rs +++ /dev/null @@ -1,28 +0,0 @@ -use core::f64; - -use super::{copysign, trunc}; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn round(x: f64) -> f64 { - trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) -} - -#[cfg(test)] -mod tests { - use super::round; - - #[test] - fn negative_zero() { - assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(round(-1.0), -1.0); - assert_eq!(round(2.8), 3.0); - assert_eq!(round(-0.5), -1.0); - assert_eq!(round(0.5), 1.0); - assert_eq!(round(-1.5), -2.0); - assert_eq!(round(1.5), 2.0); - } -} diff --git a/src/math/roundf.rs b/src/math/roundf.rs deleted file mode 100644 index fb974bbfe..000000000 --- a/src/math/roundf.rs +++ /dev/null @@ -1,30 +0,0 @@ -use core::f32; - -use super::{copysignf, truncf}; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn roundf(x: f32) -> f32 { - truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::roundf; - - #[test] - fn negative_zero() { - assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); - } - - #[test] - fn sanity_check() { - assert_eq!(roundf(-1.0), -1.0); - assert_eq!(roundf(2.8), 3.0); - assert_eq!(roundf(-0.5), -1.0); - assert_eq!(roundf(0.5), 1.0); - assert_eq!(roundf(-1.5), -2.0); - assert_eq!(roundf(1.5), 2.0); - } -} diff --git a/src/math/scalbn.rs b/src/math/scalbn.rs deleted file mode 100644 index 00c455a10..000000000 --- a/src/math/scalbn.rs +++ /dev/null @@ -1,33 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbn(x: f64, mut n: i32) -> f64 { - let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 - let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 - let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) - - let mut y = x; - - if n > 1023 { - y *= x1p1023; - n -= 1023; - if n > 1023 { - y *= x1p1023; - n -= 1023; - if n > 1023 { - n = 1023; - } - } - } else if n < -1022 { - /* make sure final n < -53 to avoid double - rounding in the subnormal range */ - y *= x1p_1022 * x1p53; - n += 1022 - 53; - if n < -1022 { - y *= x1p_1022 * x1p53; - n += 1022 - 53; - if n < -1022 { - n = -1022; - } - } - } - y * f64::from_bits(((0x3ff + n) as u64) << 52) -} diff --git a/src/math/scalbnf.rs b/src/math/scalbnf.rs deleted file mode 100644 index 73f4bb57a..000000000 --- a/src/math/scalbnf.rs +++ /dev/null @@ -1,29 +0,0 @@ -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { - let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 - let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 - let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 - - if n > 127 { - x *= x1p127; - n -= 127; - if n > 127 { - x *= x1p127; - n -= 127; - if n > 127 { - n = 127; - } - } - } else if n < -126 { - x *= x1p_126 * x1p24; - n += 126 - 24; - if n < -126 { - x *= x1p_126 * x1p24; - n += 126 - 24; - if n < -126 { - n = -126; - } - } - } - x * f32::from_bits(((0x7f + n) as u32) << 23) -} diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs deleted file mode 100644 index e2907384d..000000000 --- a/src/math/sqrt.rs +++ /dev/null @@ -1,282 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* sqrt(x) - * Return correctly rounded sqrt. - * ------------------------------------------ - * | Use the hardware sqrt if you have one | - * ------------------------------------------ - * Method: - * Bit by bit method using integer arithmetic. (Slow, but portable) - * 1. Normalization - * Scale x to y in [1,4) with even powers of 2: - * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then - * sqrt(x) = 2^k * sqrt(y) - * 2. Bit by bit computation - * Let q = sqrt(y) truncated to i bit after binary point (q = 1), - * i 0 - * i+1 2 - * s = 2*q , and y = 2 * ( y - q ). (1) - * i i i i - * - * To compute q from q , one checks whether - * i+1 i - * - * -(i+1) 2 - * (q + 2 ) <= y. (2) - * i - * -(i+1) - * If (2) is false, then q = q ; otherwise q = q + 2 . - * i+1 i i+1 i - * - * With some algebraic manipulation, it is not difficult to see - * that (2) is equivalent to - * -(i+1) - * s + 2 <= y (3) - * i i - * - * The advantage of (3) is that s and y can be computed by - * i i - * the following recurrence formula: - * if (3) is false - * - * s = s , y = y ; (4) - * i+1 i i+1 i - * - * otherwise, - * -i -(i+1) - * s = s + 2 , y = y - s - 2 (5) - * i+1 i i+1 i i - * - * One may easily use induction to prove (4) and (5). - * Note. Since the left hand side of (3) contain only i+2 bits, - * it does not necessary to do a full (53-bit) comparison - * in (3). - * 3. Final rounding - * After generating the 53 bits result, we compute one more bit. - * Together with the remainder, we can decide whether the - * result is exact, bigger than 1/2ulp, or less than 1/2ulp - * (it will never equal to 1/2ulp). - * The rounding mode can be detected by checking whether - * huge + tiny is equal to huge, and whether huge - tiny is - * equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - * sqrt(+-0) = +-0 ... exact - * sqrt(inf) = inf - * sqrt(-ve) = NaN ... with invalid signal - * sqrt(NaN) = NaN ... with invalid signal for signaling NaN - */ - -use core::f64; - -/// The square root of `x` (f64). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrt(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - f64::NAN - } else { - unsafe { ::core::intrinsics::sqrtf64(x) } - } - } - } - #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse2 is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } - } - #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))] - { - use core::num::Wrapping; - - const TINY: f64 = 1.0e-300; - - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; - - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); - - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ - - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) - } -} - -#[cfg(test)] -mod tests { - use core::f64::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrt(100.0), 10.0); - assert_eq!(sqrt(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrt(-1.0).is_nan()); - assert!(sqrt(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY].iter().copied() { - assert_eq!(sqrt(f), f); - } - } - - #[test] - fn conformance_tests() { - let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY]; - let results = [ - 4610661241675116657u64, - 4636737291354636288u64, - 2197470602079456986u64, - 9218868437227405312u64, - ]; - - for i in 0..values.len() { - let bits = f64::to_bits(sqrt(values[i])); - assert_eq!(results[i], bits); - } - } -} diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs deleted file mode 100644 index a738fc0b6..000000000 --- a/src/math/sqrtf.rs +++ /dev/null @@ -1,167 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/// The square root of `x` (f32). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn sqrtf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - ::core::f32::NAN - } else { - unsafe { ::core::intrinsics::sqrtf32(x) } - } - } - } - #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } - } - #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] - { - const TINY: f32 = 1.0e-30; - - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) - } -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use core::f32::*; - - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrtf(100.0), 10.0); - assert_eq!(sqrtf(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrtf(-1.0).is_nan()); - assert!(sqrtf(NAN).is_nan()); - for f in [0.0, -0.0, INFINITY].iter().copied() { - assert_eq!(sqrtf(f), f); - } - } - - #[test] - fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; - let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; - - for i in 0..values.len() { - let bits = f32::to_bits(sqrtf(values[i])); - assert_eq!(results[i], bits); - } - } -} diff --git a/src/math/trunc.rs b/src/math/trunc.rs deleted file mode 100644 index f7892a2c5..000000000 --- a/src/math/trunc.rs +++ /dev/null @@ -1,40 +0,0 @@ -use core::f64; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn trunc(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf64(x) } - } - } - let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 - - let mut i: u64 = x.to_bits(); - let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12; - let m: u64; - - if e >= 52 + 12 { - return x; - } - if e < 12 { - e = 1; - } - m = -1i64 as u64 >> e; - if (i & m) == 0 { - return x; - } - force_eval!(x + x1p120); - i &= !m; - f64::from_bits(i) -} - -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::trunc(1.1), 1.0); - } -} diff --git a/src/math/truncf.rs b/src/math/truncf.rs deleted file mode 100644 index 20d5b73bd..000000000 --- a/src/math/truncf.rs +++ /dev/null @@ -1,42 +0,0 @@ -use core::f32; - -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn truncf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf32(x) } - } - } - let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 - - let mut i: u32 = x.to_bits(); - let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9; - let m: u32; - - if e >= 23 + 9 { - return x; - } - if e < 9 { - e = 1; - } - m = -1i32 as u32 >> e; - if (i & m) == 0 { - return x; - } - force_eval!(x + x1p120); - i &= !m; - f32::from_bits(i) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - #[test] - fn sanity_check() { - assert_eq!(super::truncf(1.1), 1.0); - } -}