diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
new file mode 100644
index 000000000..5ce0dbc26
--- /dev/null
+++ b/.github/workflows/main.yaml
@@ -0,0 +1,324 @@
+name: CI
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+concurrency:
+  # Make sure that new pushes cancel running jobs
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+  RUSTDOCFLAGS: -Dwarnings
+  RUSTFLAGS: -Dwarnings
+  RUST_BACKTRACE: full
+  BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
+
+jobs:
+  test:
+    name: Build and test
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - target: aarch64-apple-darwin
+          os: macos-15
+          # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804
+          channel: nightly-2025-02-07
+        - target: aarch64-unknown-linux-gnu
+          os: ubuntu-24.04-arm
+          # FIXME: pinned due to https://github.com/llvm/llvm-project/issues/127804
+          channel: nightly-2025-02-07
+        - target: aarch64-pc-windows-msvc
+          os: windows-2025
+          build_only: 1 # Can't run on x86 hosts
+        - target: arm-unknown-linux-gnueabi
+          os: ubuntu-24.04
+        - target: arm-unknown-linux-gnueabihf
+          os: ubuntu-24.04
+        - target: armv7-unknown-linux-gnueabihf
+          os: ubuntu-24.04
+        - target: i586-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: i686-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: loongarch64-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: powerpc-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: powerpc64-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: powerpc64le-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: riscv64gc-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: thumbv6m-none-eabi
+          os: ubuntu-24.04
+        - target: thumbv7em-none-eabi
+          os: ubuntu-24.04
+        - target: thumbv7em-none-eabihf
+          os: ubuntu-24.04
+        - target: thumbv7m-none-eabi
+          os: ubuntu-24.04
+        - target: x86_64-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: x86_64-apple-darwin
+          os: macos-13
+        - target: wasm32-unknown-unknown
+          os: ubuntu-24.04
+          build_only: 1
+        - target: i686-pc-windows-msvc
+          os: windows-2025
+        - target: x86_64-pc-windows-msvc
+          os: windows-2025
+        - target: i686-pc-windows-gnu
+          os: windows-2025
+          # FIXME: pinned due to https://github.com/rust-lang/rust/issues/136795
+          channel: nightly-2025-02-07-i686-gnu
+        - target: x86_64-pc-windows-gnu
+          os: windows-2025
+          channel: nightly-x86_64-gnu
+    runs-on: ${{ matrix.os }}
+    env:
+      BUILD_ONLY: ${{ matrix.build_only }}
+    steps:
+    - name: Print runner information
+      run: uname -a
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+    - name: Install Rust (rustup)
+      shell: bash
+      run: |
+        channel="nightly"
+        # Account for channels that have required components (MinGW)
+        [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
+        rustup update "$channel" --no-self-update
+        rustup default "$channel"
+        rustup target add "${{ matrix.target }}"
+        rustup component add clippy llvm-tools-preview
+    - uses: taiki-e/install-action@nextest
+    - uses: Swatinem/rust-cache@v2
+      with:
+        key: ${{ matrix.target }}
+
+    - name: Verify API list
+      if: matrix.os == 'ubuntu-24.04'
+      run: python3 etc/update-api-list.py --check
+
+    # Non-linux tests just use our raw script
+    - name: Run locally
+      if: matrix.os != 'ubuntu-24.04' || contains(matrix.target, 'wasm')
+      shell: bash
+      run: ./ci/run.sh ${{ matrix.target }}
+
+    # Otherwise we use our docker containers to run builds
+    - name: Run in Docker
+      if: matrix.os == 'ubuntu-24.04' && !contains(matrix.target, 'wasm')
+      run: |
+        rustup target add x86_64-unknown-linux-musl
+        cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
+
+    - name: Print test logs if available
+      if: always()
+      run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+      shell: bash
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@master
+      with:
+        submodules: true
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add clippy
+    - uses: Swatinem/rust-cache@v2
+    - run: cargo clippy --all --all-features --all-targets
+
+  builtins:
+    name: Check use with compiler-builtins
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@master
+    - name: Install Rust
+      run: rustup update nightly --no-self-update && rustup default nightly
+    - uses: Swatinem/rust-cache@v2
+    - run: cargo check --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml
+    - run: cargo test --manifest-path crates/compiler-builtins-smoke-test/Cargo.toml
+
+  benchmarks:
+    name: Benchmarks
+    runs-on: ubuntu-24.04
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@master
+      with:
+        submodules: true
+    - uses: taiki-e/install-action@cargo-binstall
+
+    - name: Set up dependencies
+      run: |
+        sudo apt update
+        sudo apt install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
+        rustup update "$BENCHMARK_RUSTC" --no-self-update
+        rustup default "$BENCHMARK_RUSTC"
+        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
+        iai_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
+        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        sudo apt-get install valgrind
+
+    - uses: Swatinem/rust-cache@v2
+
+    - name: Run icount benchmarks
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+      run: ./ci/bench-icount.sh
+
+    - name: Upload the benchmark baseline
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ env.BASELINE_NAME }}
+        path: ${{ env.BASELINE_NAME }}.tar.xz
+    
+    - name: Run wall time benchmarks
+      run: |
+        # Always use the same seed for benchmarks. Ideally we should switch to a
+        # non-random generator.
+        export LIBM_SEED=benchesbenchesbenchesbencheswoo!
+        cargo bench --all --features short-benchmarks,build-musl,force-soft-floats
+
+    - name: Print test logs if available
+      if: always()
+      run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+      shell: bash
+
+  msrv:
+    name: Check MSRV
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    env:
+      RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
+    steps:
+    - uses: actions/checkout@master
+    - name: Install Rust
+      run: |
+        msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
+        echo "MSRV: $msrv"
+        rustup update "$msrv" --no-self-update && rustup default "$msrv"
+    - uses: Swatinem/rust-cache@v2
+    - run: |
+        # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see
+        # `edition = "2024"` and get spooked.
+        rm Cargo.toml
+        cargo build --manifest-path libm/Cargo.toml
+
+  rustfmt:
+    name: Rustfmt
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@master
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add rustfmt
+    - run: cargo fmt -- --check
+
+  # Determine which extensive tests should be run based on changed files.
+  calculate_extensive_matrix:
+    name: Calculate job matrix
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    env:
+      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      PR_NUMBER: ${{ github.event.pull_request.number }}
+    outputs:
+      matrix: ${{ steps.script.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 100
+      - name: Fetch pull request ref
+        run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
+        if: github.event_name == 'pull_request'
+      - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
+        id: script
+
+  extensive:
+    name: Extensive tests for ${{ matrix.ty }}
+    needs:
+      # Wait on `clippy` so we have some confidence that the crate will build
+      - clippy
+      - calculate_extensive_matrix
+    runs-on: ubuntu-24.04
+    timeout-minutes: 240 # 4 hours
+    strategy:
+      matrix:
+        # Use the output from `calculate_extensive_matrix` to calculate the matrix
+        # FIXME: it would be better to run all jobs (i.e. all types) but mark those that
+        # didn't change as skipped, rather than completely excluding the job. However,
+        # this is not currently possible https://github.com/actions/runner/issues/1985.
+        include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }}
+    env:
+      TO_TEST: ${{ matrix.to_test }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Install Rust
+        run: |
+          rustup update nightly --no-self-update
+          rustup default nightly
+      - uses: Swatinem/rust-cache@v2
+      - name: Run extensive tests
+        run: |
+          echo "Tests to run: '$TO_TEST'"
+          if [ -z "$TO_TEST" ]; then
+            echo "No tests to run, exiting."
+            exit
+          fi
+
+          set -x
+
+          # Run the non-extensive tests first to catch any easy failures
+          cargo t --profile release-checked -- "$TO_TEST"
+
+          LIBM_EXTENSIVE_TESTS="$TO_TEST" cargo t \
+            --features build-mpfr,unstable,force-soft-floats \
+            --profile release-checked \
+            -- extensive
+      - name: Print test logs if available
+        run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+        shell: bash
+
+  success:
+    needs:
+      - test
+      - builtins
+      - benchmarks
+      - msrv
+      - rustfmt
+      - extensive
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
+    # failed" as success. So we have to do some contortions to ensure the job fails if any of its
+    # dependencies fails.
+    if: always() # make sure this is never "skipped"
+    steps:
+      # Manually check the status of all dependencies. `if: failure()` does not work.
+      - name: check if any dependency failed
+        run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 15eba6e89..000000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,180 +0,0 @@
-name: CI
-on: [push, pull_request]
-
-env:
-  CARGO_TERM_VERBOSE: true
-  RUSTDOCFLAGS: -Dwarnings
-  RUSTFLAGS: -Dwarnings
-  RUST_BACKTRACE: full
-
-jobs:
-  test:
-    name: Docker
-    timeout-minutes: 20
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-        - target: aarch64-apple-darwin
-          os: macos-latest
-        - target: aarch64-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: aarch64-pc-windows-msvc
-          os: windows-latest
-          build_only: 1 # Can't run on x86 hosts
-        - target: arm-unknown-linux-gnueabi
-          os: ubuntu-latest
-        - target: arm-unknown-linux-gnueabihf
-          os: ubuntu-latest
-        - target: armv7-unknown-linux-gnueabihf
-          os: ubuntu-latest
-        - target: i586-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: i686-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: powerpc-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: powerpc64-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: powerpc64le-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: riscv64gc-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: thumbv6m-none-eabi
-          os: ubuntu-latest
-        - target: thumbv7em-none-eabi
-          os: ubuntu-latest
-        - target: thumbv7em-none-eabihf
-          os: ubuntu-latest
-        - target: thumbv7m-none-eabi
-          os: ubuntu-latest
-        - target: x86_64-unknown-linux-gnu
-          os: ubuntu-latest
-        - target: x86_64-apple-darwin
-          os: macos-13
-        - target: i686-pc-windows-msvc
-          os: windows-latest
-        - target: x86_64-pc-windows-msvc
-          os: windows-latest
-        - target: i686-pc-windows-gnu
-          os: windows-latest
-          channel: nightly-i686-gnu
-        - target: x86_64-pc-windows-gnu
-          os: windows-latest
-          channel: nightly-x86_64-gnu
-    runs-on: ${{ matrix.os }}
-    env:
-      BUILD_ONLY: ${{ matrix.build_only }}
-    steps:
-    - name: Print runner information
-      run: uname -a
-    - uses: actions/checkout@v4
-    - name: Install Rust (rustup)
-      shell: bash
-      run: |
-        channel="nightly"
-        # Account for channels that have required components (MinGW)
-        [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
-        rustup update "$channel" --no-self-update
-        rustup default "$channel"
-        rustup target add ${{ matrix.target }}
-        rustup component add llvm-tools-preview
-    - uses: Swatinem/rust-cache@v2
-      with:
-        key: ${{ matrix.target }}
-
-    - name: Download musl source
-      run: ./ci/download-musl.sh
-      shell: bash
-
-    # Non-linux tests just use our raw script
-    - name: Run locally
-      if: matrix.os != 'ubuntu-latest'
-      shell: bash
-      run: ./ci/run.sh ${{ matrix.target }}
-
-    # Otherwise we use our docker containers to run builds
-    - name: Run in Docker
-      if: matrix.os == 'ubuntu-latest'
-      run: |
-        rustup target add x86_64-unknown-linux-musl
-        cargo generate-lockfile && ./ci/run-docker.sh ${{ matrix.target }}
-
-  wasm:
-    name: WebAssembly
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: rustup target add wasm32-unknown-unknown
-    - uses: Swatinem/rust-cache@v2
-    - run: cargo build --target wasm32-unknown-unknown
-
-  builtins:
-    name: "The compiler-builtins crate works"
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - uses: Swatinem/rust-cache@v2
-    - run: cargo build -p cb
-
-  benchmarks:
-    name: Benchmarks
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - uses: Swatinem/rust-cache@v2
-    - name: Download musl source
-      run: ./ci/download-musl.sh
-    - run: cargo bench --all
-
-  msrv:
-    name: Check MSRV
-    runs-on: ubuntu-latest
-    env:
-      RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
-    steps:
-    - uses: actions/checkout@master
-    - run: |
-        msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' Cargo.toml)"
-        echo "MSRV: $msrv"
-        echo "MSRV=$msrv" >> "$GITHUB_ENV"
-    - name: Install Rust
-      run: rustup update "$MSRV" --no-self-update && rustup default "$MSRV"
-    - uses: Swatinem/rust-cache@v2
-    - run: cargo build -p libm
-
-  rustfmt:
-    name: Rustfmt
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@master
-    - name: Install Rust
-      run: |
-        rustup update nightly --no-self-update
-        rustup default nightly
-        rustup component add rustfmt
-    - run: cargo fmt -- --check
-
-  success:
-    needs:
-      - test
-      - wasm
-      - builtins
-      - benchmarks
-      - msrv
-      - rustfmt
-    runs-on: ubuntu-latest
-    # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
-    # failed" as success. So we have to do some contortions to ensure the job fails if any of its
-    # dependencies fails.
-    if: always() # make sure this is never "skipped"
-    steps:
-      # Manually check the status of all dependencies. `if: failure()` does not work.
-      - name: check if any dependency failed
-        run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yaml
similarity index 95%
rename from .github/workflows/publish.yml
rename to .github/workflows/publish.yaml
index e715c6187..15904079d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yaml
@@ -12,7 +12,7 @@ on:
 jobs:
   release-plz:
     name: Release-plz
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
diff --git a/.gitignore b/.gitignore
index b6a532751..d5caba1a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,8 +2,10 @@
 .#*
 /bin
 /math/src
-/math/target
-/target
+target
 Cargo.lock
-musl/
 **.tar.gz
+
+# Benchmark cache
+iai-home
+baseline-*
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..35b269ead
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "musl"]
+	path = crates/musl-math-sys/musl
+	url = https://git.musl-libc.org/git/musl
+	shallow = true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a39623696..dc4006035 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,9 +6,8 @@
   `src/math/mod.rs` accordingly. Also, uncomment the corresponding trait method
   in `src/lib.rs`.
 - Write some simple tests in your module (using `#[test]`)
-- Run `cargo test` to make sure it works
-- Run `cargo test --features libm-test/test-musl-serialized` to compare your
-  implementation against musl's
+- Run `cargo test` to make sure it works. Full tests are only run when enabling
+  features, see [Testing](#testing) below.
 - Send us a pull request! Make sure to run `cargo fmt` on your code before
   sending the PR. Also include "closes #42" in the PR description to close the
   corresponding issue.
@@ -44,52 +43,40 @@ Check [PR #65] for an example.
   `mod.rs`.
 
 - You may encounter weird literals like `0x1p127f` in the MUSL code. These are hexadecimal floating
-  point literals. Rust (the language) doesn't support these kind of literals. The best way I have
-  found to deal with these literals is to turn them into their integer representation using the
-  [`hexf!`] macro and then turn them back into floats. See below:
+  point literals. Rust (the language) doesn't support these kind of literals. This crate provides
+  two macros, `hf32!` and `hf64!`, which convert string literals to floats at compile time.
 
-[`hexf!`]: https://crates.io/crates/hexf
-
-``` rust
-// Step 1: write a program to convert the float into its integer representation
-#[macro_use]
-extern crate hexf;
-
-fn main() {
-    println!("{:#x}", hexf32!("0x1.0p127").to_bits());
-}
-```
-
-``` console
-$ # Step 2: run the program
-$ cargo run
-0x7f000000
-```
-
-``` rust
-// Step 3: copy paste the output into libm
-let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 12
-```
+  ```rust
+  assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000);
+  assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000);
+  ```
 
 - Rust code panics on arithmetic overflows when not optimized. You may need to use the [`Wrapping`]
-  newtype to avoid this problem.
+  newtype to avoid this problem, or individual methods like [`wrapping_add`].
 
 [`Wrapping`]: https://doc.rust-lang.org/std/num/struct.Wrapping.html
+[`wrapping_add`]: https://doc.rust-lang.org/std/primitive.u32.html#method.wrapping_add
 
 ## Testing
 
 Normal tests can be executed with:
 
 ```sh
-cargo test
+# Tests against musl require that the submodule is up to date.
+git submodule init
+git submodule update
+
+# `--release` ables more test cases
+cargo test --release
 ```
 
-If you'd like to run tests with randomized inputs that get compared against musl
-itself, you'll need to be on a Linux system and then you can execute:
+If you are on a system that cannot build musl or MPFR, passing
+`--no-default-features` will run some limited tests.
 
-```sh
-cargo test --features libm-test/test-musl-serialized
-```
+The multiprecision tests use the [`rug`] crate for bindings to MPFR. MPFR can
+be difficult to build on non-Unix systems, refer to [`gmp_mpfr_sys`] for help.
+
+`build-musl` does not build with MSVC, Wasm, or Thumb.
 
-Note that you may need to pass `--release` to Cargo if there are errors related
-to integer overflow.
+[`rug`]: https://docs.rs/rug/latest/rug/
+[`gmp_mpfr_sys`]: https://docs.rs/gmp-mpfr-sys/1.6.4/gmp_mpfr_sys/
diff --git a/Cargo.toml b/Cargo.toml
index aa6c08ddb..268b6fb0e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,47 +1,37 @@
-[package]
-authors = ["Jorge Aparicio <jorge@japaric.io>"]
-categories = ["no-std"]
-description = "libm in pure Rust"
-documentation = "https://docs.rs/libm"
-keywords = ["libm", "math"]
-license = "MIT AND (MIT OR Apache-2.0)"
-name = "libm"
-readme = "README.md"
-repository = "https://github.com/rust-lang/libm"
-version = "0.2.11"
-edition = "2021"
-exclude = ["/ci/", "/.github/workflows/"]
-rust-version = "1.63"
-
-[features]
-default = []
-
-# This tells the compiler to assume that a Nightly toolchain is being used and
-# that it should activate any useful Nightly things accordingly.
-unstable = []
-
-# Used to prevent using any intrinsics or arch-specific code.
-force-soft-floats = []
-
 [workspace]
 resolver = "2"
 members = [
-  "crates/compiler-builtins-smoke-test",
-  "crates/libm-bench",
+  "libm",
   "crates/libm-macros",
   "crates/libm-test",
   "crates/musl-math-sys",
+  "crates/util",
 ]
 default-members = [
-  ".",
+  "libm",
   "crates/libm-macros",
-  "crates/libm-test",
+  "crates/libm-test"
+]
+exclude = [
+  # Requires `panic = abort` so can't be a member of the workspace
+  "crates/compiler-builtins-smoke-test",
 ]
 
-[dev-dependencies]
-no-panic = "0.1.30"
+# The default release profile is unchanged.
 
+# Release mode with debug assertions
+[profile.release-checked]
+inherits = "release"
+debug-assertions = true
+overflow-checks = true
 
-# This is needed for no-panic to correctly detect the lack of panics
-[profile.release]
+# Release with maximum optimizations, which is very slow to build. This is also
+# what is needed to check `no-panic`.
+[profile.release-opt]
+inherits = "release"
+codegen-units = 1
 lto = "fat"
+
+[profile.bench]
+# Required for iai-callgrind
+debug = true
diff --git a/README.md b/README.md
index e5d64bd2d..c120a7588 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,10 @@
 
 A port of [MUSL]'s libm to Rust.
 
+> [!NOTE]  
+> The `libm` crate has been merged into the `compiler-builtins` repository. Future
+> development work will take place there: https://github.com/rust-lang/compiler-builtins.
+
 [MUSL]: https://musl.libc.org/
 
 ## Goals
@@ -41,14 +45,14 @@ This crate supports rustc 1.63 and newer.
 ## License
 
 Usage is licensed under the MIT license ([LICENSE-MIT](LICENSE-MIT) or
-http://opensource.org/licenses/MIT).
+https://opensource.org/licenses/MIT).
 
 
 ### Contribution
 
 Contributions are licensed under both the MIT license and the Apache License,
 Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or
-http://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state
+https://www.apache.org/licenses/LICENSE-2.0). Unless you explicitly state
 otherwise, any contribution intentionally submitted for inclusion in the work
 by you, as defined in the Apache-2.0 license, shall be dual licensed as
 mentioned, without any additional terms or conditions.
diff --git a/build.rs b/build.rs
deleted file mode 100644
index b683557e4..000000000
--- a/build.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-use std::env;
-
-fn main() {
-    println!("cargo:rerun-if-changed=build.rs");
-    println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
-    println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable\"))");
-
-    println!("cargo:rustc-check-cfg=cfg(feature, values(\"checked\"))");
-
-    #[allow(unexpected_cfgs)]
-    if !cfg!(feature = "checked") {
-        let lvl = env::var("OPT_LEVEL").unwrap();
-        if lvl != "0" {
-            println!("cargo:rustc-cfg=assert_no_panic");
-        }
-    }
-}
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
new file mode 100755
index 000000000..4d93e257a
--- /dev/null
+++ b/ci/bench-icount.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+set -eux
+
+iai_home="iai-home"
+
+# Download the baseline from master
+./ci/ci-util.py locate-baseline --download --extract
+
+# Run benchmarks once
+function run_icount_benchmarks() {
+    cargo_args=(
+        "--bench" "icount"
+        "--no-default-features"
+        "--features" "unstable,unstable-float,icount"
+    )
+
+    iai_args=(
+        "--home" "$(pwd)/$iai_home"
+        "--regression=ir=5.0"
+        "--save-summary"
+    )
+
+    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
+    parsing_iai_args=0
+    while [ "$#" -gt 0 ]; do
+        if [ "$parsing_iai_args" == "1" ]; then
+            iai_args+=("$1")
+        elif [ "$1" == "--" ]; then
+            parsing_iai_args=1
+        else
+            cargo_args+=("$1")
+        fi
+
+        shift
+    done
+
+    # Run iai-callgrind benchmarks
+    cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
+
+    # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
+    # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
+    if [ -n "${PR_NUMBER:-}" ]; then
+        # If this is for a pull request, ignore regressions if specified.
+        ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
+    else
+        ./ci/ci-util.py check-regressions --home "$iai_home" || true
+    fi
+}
+
+# Run once with softfloats, once with arch instructions enabled
+run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
+run_icount_benchmarks -- --save-baseline=hardfloat
+
+# Name and tar the new baseline
+name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
+tar cJf "$name.tar.xz" "$iai_home"
diff --git a/ci/ci-util.py b/ci/ci-util.py
new file mode 100755
index 000000000..d9e402d6b
--- /dev/null
+++ b/ci/ci-util.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""Utilities for CI.
+
+This dynamically prepares a list of routines that had a source file change based on
+git history.
+"""
+
+import json
+import os
+import subprocess as sp
+import sys
+from dataclasses import dataclass
+from glob import glob, iglob
+from inspect import cleandoc
+from os import getenv
+from pathlib import Path
+from typing import TypedDict, Self
+
+USAGE = cleandoc(
+    """
+    usage:
+
+    ./ci/ci-util.py <COMMAND> [flags]
+
+    COMMAND:
+        generate-matrix
+            Calculate a matrix of which functions had source change, print that as
+            a JSON object.
+
+        locate-baseline [--download] [--extract]
+            Locate the most recent benchmark baseline available in CI and, if flags
+            specify, download and extract it. Never exits with nonzero status if
+            downloading fails.
+
+            Note that `--extract` will overwrite files in `iai-home`.
+
+        check-regressions [--home iai-home] [--allow-pr-override pr_number]
+            Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
+            files and see if there are any regressions. This is used as a workaround
+            for `iai-callgrind` not exiting with error status; see
+            <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
+
+            If `--allow-pr-override` is specified, the regression check will not exit
+            with failure if any line in the PR starts with `allow-regressions`.
+    """
+)
+
+REPO_ROOT = Path(__file__).parent.parent
+GIT = ["git", "-C", REPO_ROOT]
+DEFAULT_BRANCH = "master"
+WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
+ARTIFACT_GLOB = "baseline-icount*"
+# Place this in a PR body to skip regression checks (must be at the start of a line).
+REGRESSION_DIRECTIVE = "ci: allow-regressions"
+# Place this in a PR body to skip extensive tests
+SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
+# Place this in a PR body to allow running a large number of extensive tests. If not
+# set, this script will error out if a threshold is exceeded in order to avoid
+# accidentally spending huge amounts of CI time.
+ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
+MANY_EXTENSIVE_THRESHOLD = 20
+
+# Don't run exhaustive tests if these files change, even if they contaiin a function
+# definition.
+IGNORE_FILES = [
+    "libm/src/math/support/",
+    "libm/src/libm_helper.rs",
+    "libm/src/math/arch/intrinsics.rs",
+]
+
+TYPES = ["f16", "f32", "f64", "f128"]
+
+
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
+@dataclass
+class PrInfo:
+    """GitHub response for PR query"""
+
+    body: str
+    commits: list[str]
+    created_at: str
+    number: int
+
+    @classmethod
+    def load(cls, pr_number: int | str) -> Self:
+        """For a given PR number, query the body and commit list"""
+        pr_info = sp.check_output(
+            [
+                "gh",
+                "pr",
+                "view",
+                str(pr_number),
+                "--json=number,commits,body,createdAt",
+                # Flatten the commit list to only hashes, change a key to snake naming
+                "--jq=.commits |= map(.oid) | .created_at = .createdAt | del(.createdAt)",
+            ],
+            text=True,
+        )
+        eprint("PR info:", json.dumps(pr_info, indent=4))
+        return cls(**json.loads(pr_info))
+
+    def contains_directive(self, directive: str) -> bool:
+        """Return true if the provided directive is on a line in the PR body"""
+        lines = self.body.splitlines()
+        return any(line.startswith(directive) for line in lines)
+
+
+class FunctionDef(TypedDict):
+    """Type for an entry in `function-definitions.json`"""
+
+    sources: list[str]
+    type: str
+
+
+@dataclass
+class Context:
+    gh_ref: str | None
+    changed: list[Path]
+    defs: dict[str, FunctionDef]
+
+    def __init__(self) -> None:
+        self.gh_ref = getenv("GITHUB_REF")
+        self.changed = []
+        self._init_change_list()
+
+        with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
+            defs = json.load(f)
+
+        defs.pop("__comment", None)
+        self.defs = defs
+
+    def _init_change_list(self):
+        """Create a list of files that have been changed. This uses GITHUB_REF if
+        available, otherwise a diff between `HEAD` and `master`.
+        """
+
+        # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
+        # the PR number), and sets this as `GITHUB_REF`.
+        ref = self.gh_ref
+        eprint(f"using ref `{ref}`")
+        if ref is None or "merge" not in ref:
+            # If the ref is not for `merge` then we are not in PR CI
+            eprint("No diff available for ref")
+            return
+
+        # The ref is for a dummy merge commit. We can extract the merge base by
+        # inspecting all parents (`^@`).
+        merge_sha = sp.check_output(
+            GIT + ["show-ref", "--hash", ref], text=True
+        ).strip()
+        merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
+        eprint(f"Merge:\n{merge_log}\n")
+
+        parents = (
+            sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
+            .strip()
+            .splitlines()
+        )
+        assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
+        base = parents[0].strip()
+        incoming = parents[1].strip()
+
+        eprint(f"base: {base}, incoming: {incoming}")
+        textlist = sp.check_output(
+            GIT + ["diff", base, incoming, "--name-only"], text=True
+        )
+        self.changed = [Path(p) for p in textlist.splitlines()]
+
+    @staticmethod
+    def _ignore_file(fname: str) -> bool:
+        return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
+
+    def changed_routines(self) -> dict[str, list[str]]:
+        """Create a list of routines for which one or more files have been updated,
+        separated by type.
+        """
+        routines = set()
+        for name, meta in self.defs.items():
+            # Don't update if changes to the file should be ignored
+            sources = (f for f in meta["sources"] if not self._ignore_file(f))
+
+            # Select changed files
+            changed = [f for f in sources if Path(f) in self.changed]
+
+            if len(changed) > 0:
+                eprint(f"changed files for {name}: {changed}")
+                routines.add(name)
+
+        ret: dict[str, list[str]] = {}
+        for r in sorted(routines):
+            ret.setdefault(self.defs[r]["type"], []).append(r)
+
+        return ret
+
+    def make_workflow_output(self) -> str:
+        """Create a JSON object a list items for each type's changed files, if any
+        did change, and the routines that were affected by the change.
+        """
+
+        pr_number = os.environ.get("PR_NUMBER")
+        skip_tests = False
+        error_on_many_tests = False
+
+        if pr_number is not None and len(pr_number) > 0:
+            pr = PrInfo.load(pr_number)
+            skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
+            error_on_many_tests = not pr.contains_directive(
+                ALLOW_MANY_EXTENSIVE_DIRECTIVE
+            )
+
+            if skip_tests:
+                eprint("Skipping all extensive tests")
+
+        changed = self.changed_routines()
+        ret = []
+        total_to_test = 0
+
+        for ty in TYPES:
+            ty_changed = changed.get(ty, [])
+            ty_to_test = [] if skip_tests else ty_changed
+            total_to_test += len(ty_to_test)
+
+            item = {
+                "ty": ty,
+                "changed": ",".join(ty_changed),
+                "to_test": ",".join(ty_to_test),
+            }
+
+            ret.append(item)
+        output = json.dumps({"matrix": ret}, separators=(",", ":"))
+        eprint(f"output: {output}")
+        eprint(f"total extensive tests: {total_to_test}")
+
+        if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
+            eprint(
+                f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
+                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional"
+            )
+            exit(1)
+
+        return output
+
+
+def locate_baseline(flags: list[str]) -> None:
+    """Find the most recent baseline from CI, download it if specified.
+
+    This returns rather than erroring, even if the `gh` commands fail. This is to avoid
+    erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
+    run on the branch, etc).
+    """
+
+    download = False
+    extract = False
+
+    while len(flags) > 0:
+        match flags[0]:
+            case "--download":
+                download = True
+            case "--extract":
+                extract = True
+            case _:
+                eprint(USAGE)
+                exit(1)
+        flags = flags[1:]
+
+    if extract and not download:
+        eprint("cannot extract without downloading")
+        exit(1)
+
+    try:
+        # Locate the most recent job to complete with success on our branch
+        latest_job = sp.check_output(
+            [
+                "gh",
+                "run",
+                "list",
+                "--status=success",
+                f"--branch={DEFAULT_BRANCH}",
+                "--json=databaseId,url,headSha,conclusion,createdAt,"
+                "status,workflowDatabaseId,workflowName",
+                # Return the first array element matching our workflow name. NB: cannot
+                # just use `--limit=1`, jq filtering happens after limiting. We also
+                # cannot just use `--workflow` because GH gets confused from
+                # different file names in history.
+                f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]',
+            ],
+            text=True,
+        )
+    except sp.CalledProcessError as e:
+        eprint(f"failed to run github command: {e}")
+        return
+
+    try:
+        latest = json.loads(latest_job)
+        eprint("latest job: ", json.dumps(latest, indent=4))
+    except json.JSONDecodeError as e:
+        eprint(f"failed to decode json '{latest_job}', {e}")
+        return
+
+    if not download:
+        eprint("--download not specified, returning")
+        return
+
+    job_id = latest.get("databaseId")
+    if job_id is None:
+        eprint("skipping download step")
+        return
+
+    sp.run(
+        ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
+        check=False,
+    )
+
+    if not extract:
+        eprint("skipping extraction step")
+        return
+
+    # Find the baseline with the most recent timestamp. GH downloads the files to e.g.
+    # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
+    candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
+    if len(candidate_baselines) == 0:
+        eprint("no possible baseline directories found")
+        return
+
+    candidate_baselines.sort(reverse=True)
+    baseline_archive = candidate_baselines[0]
+    eprint(f"extracting {baseline_archive}")
+    sp.run(["tar", "xJvf", baseline_archive], check=True)
+    eprint("baseline extracted successfully")
+
+
+def check_iai_regressions(args: list[str]):
+    """Find regressions in iai summary.json files, exit with failure if any are
+    found.
+    """
+
+    iai_home_str = "iai-home"
+    pr_number = None
+
+    while len(args) > 0:
+        match args:
+            case ["--home", home, *rest]:
+                iai_home_str = home
+                args = rest
+            case ["--allow-pr-override", pr_num, *rest]:
+                pr_number = pr_num
+                args = rest
+            case _:
+                eprint(USAGE)
+                exit(1)
+
+    iai_home = Path(iai_home_str)
+
+    found_summaries = False
+    regressions: list[dict] = []
+    for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
+        found_summaries = True
+        with open(iai_home / summary_path, "r") as f:
+            summary = json.load(f)
+
+        summary_regs = []
+        run = summary["callgrind_summary"]["callgrind_run"]
+        fname = summary["function_name"]
+        id = summary["id"]
+        name_entry = {"name": f"{fname}.{id}"}
+
+        for segment in run["segments"]:
+            summary_regs.extend(segment["regressions"])
+
+        summary_regs.extend(run["total"]["regressions"])
+
+        regressions.extend(name_entry | reg for reg in summary_regs)
+
+    if not found_summaries:
+        eprint(f"did not find any summary.json files within {iai_home}")
+        exit(1)
+
+    if len(regressions) == 0:
+        eprint("No regressions found")
+        return
+
+    eprint("Found regressions:", json.dumps(regressions, indent=4))
+
+    if pr_number is not None:
+        pr = PrInfo.load(pr_number)
+        if pr.contains_directive(REGRESSION_DIRECTIVE):
+            eprint("PR allows regressions, returning")
+            return
+
+    exit(1)
+
+
+def main():
+    match sys.argv[1:]:
+        case ["generate-matrix"]:
+            ctx = Context()
+            output = ctx.make_workflow_output()
+            print(f"matrix={output}")
+        case ["locate-baseline", *flags]:
+            locate_baseline(flags)
+        case ["check-regressions", *args]:
+            check_iai_regressions(args)
+        case ["--help" | "-h"]:
+            print(USAGE)
+            exit()
+        case _:
+            eprint(USAGE)
+            exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index a7b23cb9e..7fa06b286 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -3,7 +3,7 @@ FROM ubuntu:24.04
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     gcc libc6-dev ca-certificates \
-    gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
+    gcc-aarch64-linux-gnu m4 make libc6-dev-arm64-cross \
     qemu-user-static
 
 ENV TOOLCHAIN_PREFIX=aarch64-linux-gnu-
diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
index 3b0bfc0d3..37e206a84 100644
--- a/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc-multilib libc6-dev ca-certificates
+    gcc-multilib m4 make libc6-dev ca-certificates
diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile
index 3b0bfc0d3..37e206a84 100644
--- a/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc-multilib libc6-dev ca-certificates
+    gcc-multilib m4 make libc6-dev ca-certificates
diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 000000000..a9ce320e8
--- /dev/null
+++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:24.04
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc libc6-dev qemu-user-static ca-certificates \
+    gcc-14-loongarch64-linux-gnu libc6-dev-loong64-cross
+
+ENV CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_LINKER=loongarch64-linux-gnu-gcc-14 \
+    CARGO_TARGET_LOONGARCH64_UNKNOWN_LINUX_GNU_RUNNER=qemu-loongarch64-static \
+    AR_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-ar \
+    CC_loongarch64_unknown_linux_gnu=loongarch64-linux-gnu-gcc-14 \
+    QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu \
+    RUST_TEST_THREADS=1
diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 15723ab57..c84a31c57 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:24.04
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    gcc libc6-dev ca-certificates
+    gcc m4 make libc6-dev ca-certificates
diff --git a/ci/download-musl.sh b/ci/download-musl.sh
deleted file mode 100755
index d0d8b310e..000000000
--- a/ci/download-musl.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-# Download the expected version of musl to a directory `musl`
-
-set -eux
-
-fname=musl-1.2.5.tar.gz
-sha=a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
-
-mkdir musl
-curl "https://musl.libc.org/releases/$fname" -O
-
-case "$(uname -s)" in
-    MINGW*)
-        # Need to extract the second line because certutil does human output
-        fsha=$(certutil -hashfile "$fname" SHA256 | sed -n '2p')
-        [ "$sha" = "$fsha" ] || exit 1
-    ;;
-    *)
-        echo "$sha  $fname" | shasum -a 256 --check || exit 1
-    ;;
-esac
-
-tar -xzf "$fname" -C musl --strip-components 1
-rm "$fname"
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index a040126df..6626e7226 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -24,11 +24,14 @@ run() {
     # will be owned by root
     mkdir -p target
 
-    docker build -t "$target" "ci/docker/$target"
+    set_env="HOME=/tmp PATH=\$PATH:/rust/bin:/cargo/bin"
+    docker build -t "libm-$target" "ci/docker/$target"
     docker run \
         --rm \
         --user "$(id -u):$(id -g)" \
+        -e CI \
         -e RUSTFLAGS \
+        -e CARGO_TERM_COLOR \
         -e CARGO_HOME=/cargo \
         -e CARGO_TARGET_DIR=/target \
         -e "EMULATED=$emulated" \
@@ -38,8 +41,8 @@ run() {
         -v "$(rustc --print sysroot):/rust:ro" \
         --init \
         -w /checkout \
-        "$target" \
-        sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh $target"
+        "libm-$target" \
+        sh -c "$set_env exec ci/run.sh $target"
 }
 
 if [ -z "$1" ]; then
diff --git a/ci/run.sh b/ci/run.sh
index f61fff843..a946d325e 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -3,10 +3,10 @@
 set -eux
 
 export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
-# Needed for no-panic to correct detect a lack of panics
-export RUSTFLAGS="${RUSTFLAGS:-} -Ccodegen-units=1"
+export NEXTEST_STATUS_LEVEL=all
 
 target="${1:-}"
+flags=""
 
 if [ -z "$target" ]; then
     host_target=$(rustc -vV | awk '/^host/ { print $2 }')
@@ -14,15 +14,23 @@ if [ -z "$target" ]; then
     target="$host_target"
 fi
 
-extra_flags=""
+# We enumerate features manually.
+flags="$flags --no-default-features"
+
+# Enable arch-specific routines when available.
+flags="$flags --features arch"
+
+# Always enable `unstable-float` since it expands available API but does not
+# change any implementations.
+flags="$flags --features unstable-float"
 
 # We need to specifically skip tests for musl-math-sys on systems that can't
 # build musl since otherwise `--all` will activate it.
 case "$target" in
     # Can't build at all on MSVC, WASM, or thumb
-    *windows-msvc*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
-    *wasm*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
-    *thumb*) extra_flags="$extra_flags --exclude musl-math-sys" ;;
+    *windows-msvc*) flags="$flags --exclude musl-math-sys" ;;
+    *wasm*) flags="$flags --exclude musl-math-sys" ;;
+    *thumb*) flags="$flags --exclude musl-math-sys" ;;
 
     # We can build musl on MinGW but running tests gets a stack overflow
     *windows-gnu*) ;;
@@ -32,34 +40,91 @@ case "$target" in
     *powerpc64le*) ;;
 
     # Everything else gets musl enabled
-    *) extra_flags="$extra_flags --features libm-test/build-musl" ;;
+    *) flags="$flags --features libm-test/build-musl" ;;
+esac
+
+# Configure which targets test against MPFR
+case "$target" in
+    # MSVC cannot link MPFR
+    *windows-msvc*) ;;
+    # FIXME: MinGW should be able to build MPFR, but setup in CI is nontrivial.
+    *windows-gnu*) ;;
+    # Targets that aren't cross compiled in CI work fine
+    aarch64*apple*) flags="$flags --features libm-test/build-mpfr" ;;
+    aarch64*linux*) flags="$flags --features libm-test/build-mpfr" ;;
+    i586*) flags="$flags --features libm-test/build-mpfr --features gmp-mpfr-sys/force-cross" ;;
+    i686*) flags="$flags --features libm-test/build-mpfr" ;;
+    x86_64*) flags="$flags --features libm-test/build-mpfr" ;;
 esac
 
 # FIXME: `STATUS_DLL_NOT_FOUND` testing macros on CI.
 # <https://github.com/rust-lang/rust/issues/128944>
 case "$target" in
-    *windows-gnu) extra_flags="$extra_flags --exclude libm-macros" ;;
+    *windows-gnu) flags="$flags --exclude libm-macros" ;;
 esac
 
-if [ "$(uname -a)" = "Linux" ]; then
-    # also run the reference tests when we can. requires a Linux host.
-    extra_flags="$extra_flags --features libm-test/test-musl-serialized"
-fi
+# Make sure we can build with overriding features.
+cargo check -p libm --no-default-features
 
 if [ "${BUILD_ONLY:-}" = "1" ]; then
+    # If we are on targets that can't run tests, verify that we can build.
     cmd="cargo build --target $target --package libm"
     $cmd
-    $cmd --features 'unstable'
+    $cmd --features unstable-intrinsics
+
+    echo "can't run tests on $target; skipping"
+    exit
+fi
 
-    echo "can't run tests on $target"
-else
-    cmd="cargo test --all --target $target $extra_flags"
+flags="$flags --all --target $target"
+cmd="cargo test $flags"
+profile="--profile"
 
-    # stable by default
-    $cmd
-    $cmd --release
+# If nextest is available, use that
+command -v cargo-nextest && nextest=1 || nextest=0
+if [ "$nextest" = "1" ]; then
+    # Workaround for https://github.com/nextest-rs/nextest/issues/2066
+    if [ -f /.dockerenv ]; then
+        cfg_file="/tmp/nextest-config.toml"
+        echo "[store]" >> "$cfg_file"
+        echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
+        cfg_flag="--config-file $cfg_file"
+    fi
 
-    # unstable with a feature
-    $cmd --features 'unstable'
-    $cmd --release --features 'unstable'
+    cmd="cargo nextest run ${cfg_flag:-} --max-fail=10 $flags"
+    profile="--cargo-profile"
 fi
+
+# Test once without intrinsics
+$cmd
+
+# Run doctests if they were excluded by nextest
+[ "$nextest" = "1" ] && cargo test --doc $flags
+
+# Exclude the macros and utile crates from the rest of the tests to save CI
+# runtime, they shouldn't have anything feature- or opt-level-dependent.
+cmd="$cmd --exclude util --exclude libm-macros"
+
+# Test once with intrinsics enabled
+$cmd --features unstable-intrinsics
+$cmd --features unstable-intrinsics --benches
+
+# Test the same in release mode, which also increases coverage. Also ensure
+# the soft float routines are checked.
+$cmd "$profile" release-checked
+$cmd "$profile" release-checked --features force-soft-floats
+$cmd "$profile" release-checked --features unstable-intrinsics
+$cmd "$profile" release-checked --features unstable-intrinsics --benches
+
+# Ensure that the routines do not panic.
+# 
+# `--tests` must be passed because no-panic is only enabled as a dev
+# dependency. The `release-opt` profile must be used to enable LTO and a
+# single CGU.
+ENSURE_NO_PANIC=1 cargo build \
+     -p libm \
+    --target "$target" \
+    --no-default-features \
+    --features unstable-float \
+    --tests \
+    --profile release-opt
diff --git a/crates/compiler-builtins-smoke-test/Cargo.toml b/crates/compiler-builtins-smoke-test/Cargo.toml
index 8d084ee34..38a511669 100644
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@@ -6,10 +6,33 @@ edition = "2021"
 publish = false
 
 [lib]
+crate-type = ["staticlib"]
 test = false
 bench = false
 
 [features]
-unstable = []
-checked = []
-force-soft-floats = []
+default = ["arch", "compiler-builtins", "unstable-float"]
+
+# Copied from `libm`'s root `Cargo.toml`'
+arch = []
+compiler-builtins = []
+unstable-float = []
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+  "cfg(arch_enabled)",
+  "cfg(assert_no_panic)",
+  "cfg(intrinsics_enabled)",
+  'cfg(feature, values("force-soft-floats"))',
+  'cfg(feature, values("unstable"))',
+  'cfg(feature, values("unstable-intrinsics"))',
+  'cfg(feature, values("unstable-public-internals"))',
+] }
+
+[profile.dev]
+panic = "abort"
+
+[profile.release]
+panic = "abort"
+codegen-units = 1
+lto = "fat"
diff --git a/crates/compiler-builtins-smoke-test/build.rs b/crates/compiler-builtins-smoke-test/build.rs
index 27d4a0e89..ef8d613c9 100644
--- a/crates/compiler-builtins-smoke-test/build.rs
+++ b/crates/compiler-builtins-smoke-test/build.rs
@@ -1,3 +1,8 @@
+#[path = "../../libm/configure.rs"]
+mod configure;
+
 fn main() {
-    println!("cargo::rustc-check-cfg=cfg(assert_no_panic)");
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
+    let cfg = configure::Config::from_env();
+    configure::emit_libm_config(&cfg);
 }
diff --git a/crates/compiler-builtins-smoke-test/src/lib.rs b/crates/compiler-builtins-smoke-test/src/lib.rs
index e65cb8da3..e70f6d9e0 100644
--- a/crates/compiler-builtins-smoke-test/src/lib.rs
+++ b/crates/compiler-builtins-smoke-test/src/lib.rs
@@ -1,11 +1,17 @@
 //! Fake compiler-builtins crate
 //!
 //! This is used to test that we can source import `libm` into the compiler-builtins crate.
+//! Additionally, it provides a `#[no_mangle]` C API that can be easier to inspect than the
+//! default `.rlib`.
 
+#![compiler_builtins]
 #![feature(core_intrinsics)]
+#![feature(compiler_builtins)]
+#![feature(f16)]
+#![feature(f128)]
 #![allow(internal_features)]
-#![allow(dead_code)]
 #![no_std]
 
-#[path = "../../../src/math/mod.rs"]
-pub mod libm;
+mod math;
+// Required for macro paths.
+use math::libm::support;
diff --git a/crates/compiler-builtins-smoke-test/src/math.rs b/crates/compiler-builtins-smoke-test/src/math.rs
new file mode 100644
index 000000000..58a5bfbb9
--- /dev/null
+++ b/crates/compiler-builtins-smoke-test/src/math.rs
@@ -0,0 +1,182 @@
+use core::ffi::c_int;
+
+#[allow(dead_code)]
+#[allow(clippy::all)] // We don't get `libm`'s list of `allow`s, so just ignore Clippy.
+#[allow(unused_imports)]
+#[path = "../../../libm/src/math/mod.rs"]
+pub mod libm;
+
+/// Mark functions `#[no_mangle]` and with the C ABI.
+macro_rules! no_mangle {
+    ($( $name:ident( $($tt:tt)+ ) -> $ret:ty; )+) => {
+        $( no_mangle!(@inner $name( $($tt)+ ) -> $ret); )+
+    };
+
+    // Handle simple functions with single return types
+    (@inner $name:ident( $($arg:ident: $aty:ty),+ ) -> $ret:ty) => {
+        #[unsafe(no_mangle)]
+        extern "C" fn $name($($arg: $aty),+) -> $ret {
+            libm::$name($($arg),+)
+        }
+    };
+
+
+    // Functions with `&mut` return values need to be handled differently, use `|` to
+    // separate inputs vs. outputs.
+    (
+        @inner $name:ident( $($arg:ident: $aty:ty),+ | $($rarg:ident: $rty:ty),+) -> $ret:ty
+    ) => {
+        #[unsafe(no_mangle)]
+        extern "C" fn $name($($arg: $aty,)+ $($rarg: $rty),+) -> $ret {
+            let ret;
+            (ret, $(*$rarg),+) = libm::$name($($arg),+);
+            ret
+        }
+    };
+}
+
+no_mangle! {
+    frexp(x: f64 | y: &mut c_int) -> f64;
+    frexpf(x: f32 | y: &mut c_int) -> f32;
+    acos(x: f64) -> f64;
+    acosf(x: f32) -> f32;
+    acosh(x: f64) -> f64;
+    acoshf(x: f32) -> f32;
+    asin(x: f64) -> f64;
+    asinf(x: f32) -> f32;
+    asinh(x: f64) -> f64;
+    asinhf(x: f32) -> f32;
+    atan(x: f64) -> f64;
+    atan2(x: f64, y: f64) -> f64;
+    atan2f(x: f32, y: f32) -> f32;
+    atanf(x: f32) -> f32;
+    atanh(x: f64) -> f64;
+    atanhf(x: f32) -> f32;
+    cbrt(x: f64) -> f64;
+    cbrtf(x: f32) -> f32;
+    ceil(x: f64) -> f64;
+    ceilf(x: f32) -> f32;
+    ceilf128(x: f128) -> f128;
+    ceilf16(x: f16) -> f16;
+    copysign(x: f64, y: f64) -> f64;
+    copysignf(x: f32, y: f32) -> f32;
+    copysignf128(x: f128, y: f128) -> f128;
+    copysignf16(x: f16, y: f16) -> f16;
+    cos(x: f64) -> f64;
+    cosf(x: f32) -> f32;
+    cosh(x: f64) -> f64;
+    coshf(x: f32) -> f32;
+    erf(x: f64) -> f64;
+    erfc(x: f64) -> f64;
+    erfcf(x: f32) -> f32;
+    erff(x: f32) -> f32;
+    exp(x: f64) -> f64;
+    exp10(x: f64) -> f64;
+    exp10f(x: f32) -> f32;
+    exp2(x: f64) -> f64;
+    exp2f(x: f32) -> f32;
+    expf(x: f32) -> f32;
+    expm1(x: f64) -> f64;
+    expm1f(x: f32) -> f32;
+    fabs(x: f64) -> f64;
+    fabsf(x: f32) -> f32;
+    fabsf128(x: f128) -> f128;
+    fabsf16(x: f16) -> f16;
+    fdim(x: f64, y: f64) -> f64;
+    fdimf(x: f32, y: f32) -> f32;
+    fdimf128(x: f128, y: f128) -> f128;
+    fdimf16(x: f16, y: f16) -> f16;
+    floor(x: f64) -> f64;
+    floorf(x: f32) -> f32;
+    floorf128(x: f128) -> f128;
+    floorf16(x: f16) -> f16;
+    fma(x: f64, y: f64, z: f64) -> f64;
+    fmaf(x: f32, y: f32, z: f32) -> f32;
+    fmax(x: f64, y: f64) -> f64;
+    fmaxf(x: f32, y: f32) -> f32;
+    fmin(x: f64, y: f64) -> f64;
+    fminf(x: f32, y: f32) -> f32;
+    fmod(x: f64, y: f64) -> f64;
+    fmodf(x: f32, y: f32) -> f32;
+    hypot(x: f64, y: f64) -> f64;
+    hypotf(x: f32, y: f32) -> f32;
+    ilogb(x: f64) -> c_int;
+    ilogbf(x: f32) -> c_int;
+    j0(x: f64) -> f64;
+    j0f(x: f32) -> f32;
+    j1(x: f64) -> f64;
+    j1f(x: f32) -> f32;
+    jn(x: c_int, y: f64) -> f64;
+    jnf(x: c_int, y: f32) -> f32;
+    ldexp(x: f64, y: c_int) -> f64;
+    ldexpf(x: f32, y: c_int) -> f32;
+    lgamma(x: f64) -> f64;
+    lgamma_r(x: f64 | r: &mut c_int) -> f64;
+    lgammaf(x: f32) -> f32;
+    lgammaf_r(x: f32 | r: &mut c_int) -> f32;
+    log(x: f64) -> f64;
+    log10(x: f64) -> f64;
+    log10f(x: f32) -> f32;
+    log1p(x: f64) -> f64;
+    log1pf(x: f32) -> f32;
+    log2(x: f64) -> f64;
+    log2f(x: f32) -> f32;
+    logf(x: f32) -> f32;
+    modf(x: f64 | r: &mut f64) -> f64;
+    modff(x: f32 | r: &mut f32) -> f32;
+    nextafter(x: f64, y: f64) -> f64;
+    nextafterf(x: f32, y: f32) -> f32;
+    pow(x: f64, y: f64) -> f64;
+    powf(x: f32, y: f32) -> f32;
+    remainder(x: f64, y: f64) -> f64;
+    remainderf(x: f32, y: f32) -> f32;
+    remquo(x: f64, y: f64 | q: &mut c_int) -> f64;
+    remquof(x: f32, y: f32 | q: &mut c_int) -> f32;
+    rint(x: f64) -> f64;
+    rintf(x: f32) -> f32;
+    rintf128(x: f128) -> f128;
+    rintf16(x: f16) -> f16;
+    round(x: f64) -> f64;
+    roundf(x: f32) -> f32;
+    scalbn(x: f64, y: c_int) -> f64;
+    scalbnf(x: f32, y: c_int) -> f32;
+    sin(x: f64) -> f64;
+    sinf(x: f32) -> f32;
+    sinh(x: f64) -> f64;
+    sinhf(x: f32) -> f32;
+    sqrt(x: f64) -> f64;
+    sqrtf(x: f32) -> f32;
+    tan(x: f64) -> f64;
+    tanf(x: f32) -> f32;
+    tanh(x: f64) -> f64;
+    tanhf(x: f32) -> f32;
+    tgamma(x: f64) -> f64;
+    tgammaf(x: f32) -> f32;
+    trunc(x: f64) -> f64;
+    truncf(x: f32) -> f32;
+    truncf128(x: f128) -> f128;
+    truncf16(x: f16) -> f16;
+    y0(x: f64) -> f64;
+    y0f(x: f32) -> f32;
+    y1(x: f64) -> f64;
+    y1f(x: f32) -> f32;
+    yn(x: c_int, y: f64) -> f64;
+    ynf(x: c_int, y: f32) -> f32;
+}
+
+/* sincos has no direct return type, not worth handling in the macro */
+
+#[unsafe(no_mangle)]
+extern "C" fn sincos(x: f64, s: &mut f64, c: &mut f64) {
+    (*s, *c) = libm::sincos(x);
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn sincosf(x: f32, s: &mut f32, c: &mut f32) {
+    (*s, *c) = libm::sincosf(x);
+}
+
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+    loop {}
+}
diff --git a/crates/libm-bench/Cargo.toml b/crates/libm-bench/Cargo.toml
deleted file mode 100644
index ee8c58200..000000000
--- a/crates/libm-bench/Cargo.toml
+++ /dev/null
@@ -1,16 +0,0 @@
-[package]
-name = "libm-bench"
-version = "0.1.0"
-authors = ["Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>"]
-edition = "2021"
-license = "MIT OR Apache-2.0"
-publish = false
-
-[dependencies]
-libm = { path = "../..", default-features = false }
-rand = "0.8.5"
-paste = "1.0.15"
-
-[features]
-default = []
-unstable = [ "libm/unstable" ]
diff --git a/crates/libm-bench/benches/bench.rs b/crates/libm-bench/benches/bench.rs
deleted file mode 100644
index ca999b90f..000000000
--- a/crates/libm-bench/benches/bench.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-#![feature(test)]
-extern crate test;
-
-use rand::Rng;
-use test::Bencher;
-
-macro_rules! unary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x)))
-        }
-    }
-  )*);
-}
-macro_rules! binary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let y = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, y)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let y = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, y)))
-        }
-    }
-  )*);
-  ($($func:ident);*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let n = rng.gen::<i32>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, n)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let n = rng.gen::<i32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, n)))
-        }
-    }
-  )*);
-}
-macro_rules! trinary {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f64>();
-            let y = rng.gen::<f64>();
-            let z = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](x, y, z)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let x = rng.gen::<f32>();
-            let y = rng.gen::<f32>();
-            let z = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](x, y, z)))
-        }
-    }
-  )*);
-}
-macro_rules! bessel {
-  ($($func:ident),*) => ($(
-      paste::item! {
-        #[bench]
-        pub fn [<$func>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let mut n = rng.gen::<i32>();
-            n &= 0xffff;
-            let x = rng.gen::<f64>();
-            bh.iter(|| test::black_box(libm::[<$func>](n, x)))
-        }
-        #[bench]
-        pub fn [<$func f>](bh: &mut Bencher) {
-            let mut rng = rand::thread_rng();
-            let mut n = rng.gen::<i32>();
-            n &= 0xffff;
-            let x = rng.gen::<f32>();
-            bh.iter(|| test::black_box(libm::[<$func f>](n, x)))
-        }
-    }
-  )*);
-}
-
-unary!(
-    acos, acosh, asin, atan, cbrt, ceil, cos, cosh, erf, exp, exp2, exp10, expm1, fabs, floor, j0,
-    j1, lgamma, log, log1p, log2, log10, rint, round, sin, sinh, sqrt, tan, tanh, tgamma, trunc,
-    y0, y1
-);
-binary!(atan2, copysign, fdim, fmax, fmin, fmod, hypot, pow);
-trinary!(fma);
-bessel!(jn, yn);
-binary!(ldexp; scalbn);
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 3da9d45a2..50c869db7 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -1,13 +1,21 @@
 [package]
 name = "libm-macros"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [lib]
 proc-macro = true
 
 [dependencies]
-proc-macro2 = "1.0.88"
-quote = "1.0.37"
-syn = { version = "2.0.79", features = ["full", "extra-traits", "visit-mut"] }
+heck = "0.5.0"
+proc-macro2 = "1.0.94"
+quote = "1.0.40"
+syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] }
+
+[lints.rust]
+# Values used during testing
+unexpected_cfgs = { level = "warn", check-cfg = [
+  'cfg(f16_enabled)',
+  'cfg(f128_enabled)',
+] }
diff --git a/crates/libm-macros/src/enums.rs b/crates/libm-macros/src/enums.rs
new file mode 100644
index 000000000..864b625ea
--- /dev/null
+++ b/crates/libm-macros/src/enums.rs
@@ -0,0 +1,154 @@
+use heck::ToUpperCamelCase;
+use proc_macro2 as pm2;
+use proc_macro2::{Ident, Span};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Fields, ItemEnum, Variant};
+
+use crate::{ALL_OPERATIONS, base_name};
+
+/// Implement `#[function_enum]`, see documentation in `lib.rs`.
+pub fn function_enum(
+    mut item: ItemEnum,
+    attributes: pm2::TokenStream,
+) -> syn::Result<pm2::TokenStream> {
+    expect_empty_enum(&item)?;
+    let attr_span = attributes.span();
+    let mut attr = attributes.into_iter();
+
+    // Attribute should be the identifier of the `BaseName` enum.
+    let Some(tt) = attr.next() else {
+        return Err(syn::Error::new(attr_span, "expected one attribute"));
+    };
+
+    let pm2::TokenTree::Ident(base_enum) = tt else {
+        return Err(syn::Error::new(tt.span(), "expected an identifier"));
+    };
+
+    if let Some(tt) = attr.next() {
+        return Err(syn::Error::new(tt.span(), "unexpected token after identifier"));
+    }
+
+    let enum_name = &item.ident;
+    let mut as_str_arms = Vec::new();
+    let mut from_str_arms = Vec::new();
+    let mut base_arms = Vec::new();
+
+    for func in ALL_OPERATIONS.iter() {
+        let fn_name = func.name;
+        let ident = Ident::new(&fn_name.to_upper_camel_case(), Span::call_site());
+        let bname_ident = Ident::new(&base_name(fn_name).to_upper_camel_case(), Span::call_site());
+
+        // Match arm for `fn as_str(self)` matcher
+        as_str_arms.push(quote! { Self::#ident => #fn_name });
+        from_str_arms.push(quote! { #fn_name => Self::#ident });
+
+        // Match arm for `fn base_name(self)` matcher
+        base_arms.push(quote! { Self::#ident => #base_enum::#bname_ident });
+
+        let variant =
+            Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None };
+
+        item.variants.push(variant);
+    }
+
+    let variants = item.variants.iter();
+
+    let res = quote! {
+        // Instantiate the enum
+        #item
+
+        impl #enum_name {
+            /// All variants of this enum.
+            pub const ALL: &[Self] = &[
+                #( Self::#variants, )*
+            ];
+
+            /// The stringified version of this function name.
+            pub const fn as_str(self) -> &'static str {
+                match self {
+                    #( #as_str_arms , )*
+                }
+            }
+
+            /// If `s` is the name of a function, return it.
+            pub fn from_str(s: &str) -> Option<Self> {
+                let ret = match s {
+                    #( #from_str_arms , )*
+                    _ => return None,
+                };
+                Some(ret)
+            }
+
+            /// The base name enum for this function.
+            pub const fn base_name(self) -> #base_enum {
+                match self {
+                    #( #base_arms, )*
+                }
+            }
+
+            /// Return information about this operation.
+            pub fn math_op(self) -> &'static crate::op::MathOpInfo {
+                crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap()
+            }
+        }
+    };
+
+    Ok(res)
+}
+
+/// Implement `#[base_name_enum]`, see documentation in `lib.rs`.
+pub fn base_name_enum(
+    mut item: ItemEnum,
+    attributes: pm2::TokenStream,
+) -> syn::Result<pm2::TokenStream> {
+    expect_empty_enum(&item)?;
+    if !attributes.is_empty() {
+        let sp = attributes.span();
+        return Err(syn::Error::new(sp.span(), "no attributes expected"));
+    }
+
+    let mut base_names: Vec<_> = ALL_OPERATIONS.iter().map(|func| base_name(func.name)).collect();
+    base_names.sort_unstable();
+    base_names.dedup();
+
+    let item_name = &item.ident;
+    let mut as_str_arms = Vec::new();
+
+    for base_name in base_names {
+        let ident = Ident::new(&base_name.to_upper_camel_case(), Span::call_site());
+
+        // Match arm for `fn as_str(self)` matcher
+        as_str_arms.push(quote! { Self::#ident => #base_name });
+
+        let variant =
+            Variant { attrs: Vec::new(), ident, fields: Fields::Unit, discriminant: None };
+
+        item.variants.push(variant);
+    }
+
+    let res = quote! {
+        // Instantiate the enum
+        #item
+
+        impl #item_name {
+            /// The stringified version of this base name.
+            pub const fn as_str(self) -> &'static str {
+                match self {
+                    #( #as_str_arms ),*
+                }
+            }
+        }
+    };
+
+    Ok(res)
+}
+
+/// Verify that an enum is empty, otherwise return an error
+fn expect_empty_enum(item: &ItemEnum) -> syn::Result<()> {
+    if !item.variants.is_empty() {
+        Err(syn::Error::new(item.variants.span(), "expected an empty enum"))
+    } else {
+        Ok(())
+    }
+}
diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index dc78598ca..3cee5385b 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,242 +1,48 @@
+mod enums;
 mod parse;
-use std::sync::LazyLock;
+mod shared;
 
 use parse::{Invocation, StructuredInput};
 use proc_macro as pm;
 use proc_macro2::{self as pm2, Span};
 use quote::{ToTokens, quote};
-use syn::Ident;
+pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
+use syn::spanned::Spanned;
 use syn::visit_mut::VisitMut;
+use syn::{Ident, ItemEnum};
 
-const ALL_FUNCTIONS: &[(Signature, Option<Signature>, &[&str])] = &[
-    (
-        // `fn(f32) -> f32`
-        Signature { args: &[Ty::F32], returns: &[Ty::F32] },
-        None,
-        &[
-            "acosf", "acoshf", "asinf", "asinhf", "atanf", "atanhf", "cbrtf", "ceilf", "cosf",
-            "coshf", "erff", "exp10f", "exp2f", "expf", "expm1f", "fabsf", "floorf", "j0f", "j1f",
-            "lgammaf", "log10f", "log1pf", "log2f", "logf", "rintf", "roundf", "sinf", "sinhf",
-            "sqrtf", "tanf", "tanhf", "tgammaf", "truncf",
-        ],
-    ),
-    (
-        // `(f64) -> f64`
-        Signature { args: &[Ty::F64], returns: &[Ty::F64] },
-        None,
-        &[
-            "acos", "acosh", "asin", "asinh", "atan", "atanh", "cbrt", "ceil", "cos", "cosh",
-            "erf", "exp10", "exp2", "exp", "expm1", "fabs", "floor", "j0", "j1", "lgamma", "log10",
-            "log1p", "log2", "log", "rint", "round", "sin", "sinh", "sqrt", "tan", "tanh",
-            "tgamma", "trunc",
-        ],
-    ),
-    (
-        // `(f32, f32) -> f32`
-        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &[
-            "atan2f",
-            "copysignf",
-            "fdimf",
-            "fmaxf",
-            "fminf",
-            "fmodf",
-            "hypotf",
-            "nextafterf",
-            "powf",
-            "remainderf",
-        ],
-    ),
-    (
-        // `(f64, f64) -> f64`
-        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &[
-            "atan2",
-            "copysign",
-            "fdim",
-            "fmax",
-            "fmin",
-            "fmod",
-            "hypot",
-            "nextafter",
-            "pow",
-            "remainder",
-        ],
-    ),
-    (
-        // `(f32, f32, f32) -> f32`
-        Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &["fmaf"],
-    ),
-    (
-        // `(f64, f64, f64) -> f64`
-        Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &["fma"],
-    ),
-    (
-        // `(f32) -> i32`
-        Signature { args: &[Ty::F32], returns: &[Ty::I32] },
-        None,
-        &["ilogbf"],
-    ),
-    (
-        // `(f64) -> i32`
-        Signature { args: &[Ty::F64], returns: &[Ty::I32] },
-        None,
-        &["ilogb"],
-    ),
-    (
-        // `(i32, f32) -> f32`
-        Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
-        None,
-        &["jnf"],
-    ),
-    (
-        // `(i32, f64) -> f64`
-        Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
-        None,
-        &["jn"],
-    ),
-    (
-        // `(f32, i32) -> f32`
-        Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
-        None,
-        &["scalbnf", "ldexpf"],
-    ),
-    (
-        // `(f64, i64) -> f64`
-        Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
-        None,
-        &["scalbn", "ldexp"],
-    ),
-    (
-        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }),
-        &["modff"],
-    ),
-    (
-        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
-        Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }),
-        &["modf"],
-    ),
-    (
-        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
-        &["frexpf", "lgammaf_r"],
-    ),
-    (
-        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] },
-        Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
-        &["frexp", "lgamma_r"],
-    ),
-    (
-        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
-        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] },
-        Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
-        &["remquof"],
-    ),
-    (
-        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
-        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] },
-        Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
-        &["remquo"],
-    ),
-    (
-        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
-        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
-        Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }),
-        &["sincosf"],
-    ),
-    (
-        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
-        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
-        Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }),
-        &["sincos"],
-    ),
-];
-
-/// A type used in a function signature.
-#[allow(dead_code)]
-#[derive(Debug, Clone, Copy)]
-enum Ty {
-    F16,
-    F32,
-    F64,
-    F128,
-    I32,
-    CInt,
-    MutF16,
-    MutF32,
-    MutF64,
-    MutF128,
-    MutI32,
-    MutCInt,
-}
+const KNOWN_TYPES: &[&str] = &["FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet"];
 
-impl ToTokens for Ty {
-    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
-        let ts = match self {
-            Ty::F16 => quote! { f16 },
-            Ty::F32 => quote! { f32 },
-            Ty::F64 => quote! { f64 },
-            Ty::F128 => quote! { f128 },
-            Ty::I32 => quote! { i32 },
-            Ty::CInt => quote! { ::core::ffi::c_int },
-            Ty::MutF16 => quote! { &mut f16 },
-            Ty::MutF32 => quote! { &mut f32 },
-            Ty::MutF64 => quote! { &mut f64 },
-            Ty::MutF128 => quote! { &mut f128 },
-            Ty::MutI32 => quote! { &mut i32 },
-            Ty::MutCInt => quote! { &mut core::ffi::c_int },
-        };
+/// Populate an enum with a variant representing function. Names are in upper camel case.
+///
+/// Applied to an empty enum. Expects one attribute `#[function_enum(BaseName)]` that provides
+/// the name of the `BaseName` enum.
+#[proc_macro_attribute]
+pub fn function_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream {
+    let item = syn::parse_macro_input!(tokens as ItemEnum);
+    let res = enums::function_enum(item, attributes.into());
 
-        tokens.extend(ts);
+    match res {
+        Ok(ts) => ts,
+        Err(e) => e.into_compile_error(),
     }
+    .into()
 }
 
-/// Representation of e.g. `(f32, f32) -> f32`
-#[derive(Debug, Clone)]
-struct Signature {
-    args: &'static [Ty],
-    returns: &'static [Ty],
-}
-
-/// Combined information about a function implementation.
-#[derive(Debug, Clone)]
-struct FunctionInfo {
-    name: &'static str,
-    /// Function signature for C implementations
-    c_sig: Signature,
-    /// Function signature for Rust implementations
-    rust_sig: Signature,
-}
-
-/// A flat representation of `ALL_FUNCTIONS`.
-static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
-    let mut ret = Vec::new();
+/// Create an enum representing all possible base names, with names in upper camel case.
+///
+/// Applied to an empty enum.
+#[proc_macro_attribute]
+pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> pm::TokenStream {
+    let item = syn::parse_macro_input!(tokens as ItemEnum);
+    let res = enums::base_name_enum(item, attributes.into());
 
-    for (rust_sig, c_sig, names) in ALL_FUNCTIONS {
-        for name in *names {
-            let api = FunctionInfo {
-                name,
-                rust_sig: rust_sig.clone(),
-                c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
-            };
-            ret.push(api);
-        }
+    match res {
+        Ok(ts) => ts,
+        Err(e) => e.into_compile_error(),
     }
-
-    ret.sort_by_key(|item| item.name);
-    ret
-});
+    .into()
+}
 
 /// Do something for each function present in this crate.
 ///
@@ -258,6 +64,8 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     (
 ///         // Name of that function
 ///         fn_name: $fn_name:ident,
+///         // The basic float type for this function (e.g. `f32`, `f64`)
+///         FTy: $FTy:ty,
 ///         // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`)
 ///         CFn: $CFn:ty,
 ///         // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`)
@@ -271,7 +79,7 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///         // The Rust version's return type (e.g. `(f32, f32)`)
 ///         RustRet: $RustRet:ty,
 ///         // Attributes for the current function, if any
-///         attrs: [$($meta:meta)*]
+///         attrs: [$($attr:meta),*],
 ///         // Extra tokens passed directly (if any)
 ///         extra: [$extra:ident],
 ///         // Extra function-tokens passed directly (if any)
@@ -279,17 +87,19 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     ) => { };
 /// }
 ///
+/// // All fields except for `callback` are optional.
 /// libm_macros::for_each_function! {
 ///     // The macro to invoke as a callback
 ///     callback: callback_macro,
+///     // Which types to include either as a list (`[CFn, RustFn, RustArgs]`) or "all"
+///     emit_types: all,
 ///     // Functions to skip, i.e. `callback` shouldn't be called at all for these.
-///     //
-///     // This is an optional field.
 ///     skip: [sin, cos],
 ///     // Attributes passed as `attrs` for specific functions. For example, here the invocation
 ///     // with `sinf` and that with `cosf` will both get `meta1` and `meta2`, but no others will.
 ///     //
-///     // This is an optional field.
+///     // Note that `f16_enabled` and `f128_enabled` will always get emitted regardless of whether
+///     // or not this is specified.
 ///     attributes: [
 ///         #[meta1]
 ///         #[meta2]
@@ -297,8 +107,6 @@ static ALL_FUNCTIONS_FLAT: LazyLock<Vec<FunctionInfo>> = LazyLock::new(|| {
 ///     ],
 ///     // Any tokens that should be passed directly to all invocations of the callback. This can
 ///     // be used to pass local variables or other things the macro needs access to.
-///     //
-///     // This is an optional field.
 ///     extra: [foo],
 ///     // Similar to `extra`, but allow providing a pattern for only specific functions. Uses
 ///     // a simplified match-like syntax.
@@ -313,7 +121,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
     let input = syn::parse_macro_input!(tokens as Invocation);
 
     let res = StructuredInput::from_fields(input)
-        .and_then(|s_in| validate(&s_in).map(|fn_list| (s_in, fn_list)))
+        .and_then(|mut s_in| validate(&mut s_in).map(|fn_list| (s_in, fn_list)))
         .and_then(|(s_in, fn_list)| expand(s_in, &fn_list));
 
     match res {
@@ -325,7 +133,7 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
 /// Check for any input that is structurally correct but has other problems.
 ///
 /// Returns the list of function names that we should expand for.
-fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>> {
+fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>> {
     // Collect lists of all functions that are provied as macro inputs in various fields (only,
     // skip, attributes).
     let attr_mentions = input
@@ -341,7 +149,7 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
 
     // Make sure that every function mentioned is a real function
     for mentioned in all_mentioned_fns {
-        if !ALL_FUNCTIONS_FLAT.iter().any(|func| mentioned == func.name) {
+        if !ALL_OPERATIONS.iter().any(|func| mentioned == func.name) {
             let e = syn::Error::new(
                 mentioned.span(),
                 format!("unrecognized function name `{mentioned}`"),
@@ -353,14 +161,14 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
     if !input.skip.is_empty() && input.only.is_some() {
         let e = syn::Error::new(
             input.only_span.unwrap(),
-            format!("only one of `skip` or `only` may be specified"),
+            "only one of `skip` or `only` may be specified",
         );
         return Err(e);
     }
 
     // Construct a list of what we intend to expand
     let mut fn_list = Vec::new();
-    for func in ALL_FUNCTIONS_FLAT.iter() {
+    for func in ALL_OPERATIONS.iter() {
         let fn_name = func.name;
         // If we have an `only` list and it does _not_ contain this function name, skip it
         if input.only.as_ref().is_some_and(|only| !only.iter().any(|o| o == fn_name)) {
@@ -376,6 +184,43 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
         fn_list.push(func);
     }
 
+    // Types that the user would like us to provide in the macro
+    let mut add_all_types = false;
+    for ty in &input.emit_types {
+        let ty_name = ty.to_string();
+        if ty_name == "all" {
+            add_all_types = true;
+            continue;
+        }
+
+        // Check that all requested types are valid
+        if !KNOWN_TYPES.contains(&ty_name.as_str()) {
+            let e = syn::Error::new(
+                ty_name.span(),
+                format!("unrecognized type identifier `{ty_name}`"),
+            );
+            return Err(e);
+        }
+    }
+
+    if add_all_types {
+        // Ensure that if `all` was specified that nothing else was
+        if input.emit_types.len() > 1 {
+            let e = syn::Error::new(
+                input.emit_types_span.unwrap(),
+                "if `all` is specified, no other type identifiers may be given",
+            );
+            return Err(e);
+        }
+
+        // ...and then add all types
+        input.emit_types.clear();
+        for ty in KNOWN_TYPES {
+            let ident = Ident::new(ty, Span::call_site());
+            input.emit_types.push(ident);
+        }
+    }
+
     if let Some(map) = &input.fn_extra {
         if !map.keys().any(|key| key == "_") {
             // No default provided; make sure every expected function is covered
@@ -404,7 +249,7 @@ fn validate(input: &StructuredInput) -> syn::Result<Vec<&'static FunctionInfo>>
 }
 
 /// Expand our structured macro input into invocations of the callback macro.
-fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2::TokenStream> {
+fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::TokenStream> {
     let mut out = pm2::TokenStream::new();
     let default_ident = Ident::new("_", Span::call_site());
     let callback = input.callback;
@@ -413,16 +258,28 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2:
         let fn_name = Ident::new(func.name, Span::call_site());
 
         // Prepare attributes in an `attrs: ...` field
-        let meta_field = match &input.attributes {
-            Some(attrs) => {
-                let meta = attrs
-                    .iter()
-                    .filter(|map| map.names.contains(&fn_name))
-                    .flat_map(|map| &map.meta);
-                quote! { attrs: [ #( #meta )* ]  }
-            }
-            None => pm2::TokenStream::new(),
-        };
+        let mut meta_fields = Vec::new();
+        if let Some(attrs) = &input.attributes {
+            let meta_iter = attrs
+                .iter()
+                .filter(|map| map.names.contains(&fn_name))
+                .flat_map(|map| &map.meta)
+                .map(|v| v.into_token_stream());
+
+            meta_fields.extend(meta_iter);
+        }
+
+        // Always emit f16 and f128 meta so this doesn't need to be repeated everywhere
+        if func.rust_sig.args.contains(&Ty::F16) || func.rust_sig.returns.contains(&Ty::F16) {
+            let ts = quote! { cfg(f16_enabled) };
+            meta_fields.push(ts);
+        }
+        if func.rust_sig.args.contains(&Ty::F128) || func.rust_sig.returns.contains(&Ty::F128) {
+            let ts = quote! { cfg(f128_enabled) };
+            meta_fields.push(ts);
+        }
+
+        let meta_field = quote! { attrs: [ #( #meta_fields ),* ], };
 
         // Prepare extra in an `extra: ...` field, running the replacer
         let extra_field = match input.extra.clone() {
@@ -451,20 +308,31 @@ fn expand(input: StructuredInput, fn_list: &[&FunctionInfo]) -> syn::Result<pm2:
             None => pm2::TokenStream::new(),
         };
 
+        let base_fty = func.float_ty;
         let c_args = &func.c_sig.args;
         let c_ret = &func.c_sig.returns;
         let rust_args = &func.rust_sig.args;
         let rust_ret = &func.rust_sig.returns;
 
+        let mut ty_fields = Vec::new();
+        for ty in &input.emit_types {
+            let field = match ty.to_string().as_str() {
+                "FTy" => quote! { FTy: #base_fty, },
+                "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), },
+                "CArgs" => quote! { CArgs: ( #(#c_args),* ,), },
+                "CRet" => quote! { CRet: ( #(#c_ret),* ), },
+                "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), },
+                "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), },
+                "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), },
+                _ => unreachable!("checked in validation"),
+            };
+            ty_fields.push(field);
+        }
+
         let new = quote! {
             #callback! {
                 fn_name: #fn_name,
-                CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ),
-                CArgs: ( #(#c_args),* ,),
-                CRet: ( #(#c_ret),* ),
-                RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ),
-                RustArgs: ( #(#rust_args),* ,),
-                RustRet: ( #(#rust_ret),* ),
+                #( #ty_fields )*
                 #meta_field
                 #extra_field
                 #fn_extra_field
@@ -488,24 +356,7 @@ struct MacroReplace {
 
 impl MacroReplace {
     fn new(name: &'static str) -> Self {
-        // Keep this in sync with `libm_test::canonical_name`
-        let known_mappings = &[
-            ("erff", "erf"),
-            ("erf", "erf"),
-            ("lgammaf_r", "lgamma_r"),
-            ("modff", "modf"),
-            ("modf", "modf"),
-        ];
-
-        let norm_name = match known_mappings.iter().find(|known| known.0 == name) {
-            Some(found) => found.1,
-            None => name
-                .strip_suffix("f")
-                .or_else(|| name.strip_suffix("f16"))
-                .or_else(|| name.strip_suffix("f128"))
-                .unwrap_or(name),
-        };
-
+        let norm_name = base_name(name);
         Self { fn_name: name, norm_name: norm_name.to_owned(), error: None }
     }
 
@@ -539,3 +390,57 @@ impl VisitMut for MacroReplace {
         syn::visit_mut::visit_ident_mut(self, i);
     }
 }
+
+/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
+/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
+fn base_name(name: &str) -> &str {
+    let known_mappings = &[
+        ("erff", "erf"),
+        ("erf", "erf"),
+        ("lgammaf_r", "lgamma_r"),
+        ("modff", "modf"),
+        ("modf", "modf"),
+    ];
+
+    match known_mappings.iter().find(|known| known.0 == name) {
+        Some(found) => found.1,
+        None => name
+            .strip_suffix("f")
+            .or_else(|| name.strip_suffix("f16"))
+            .or_else(|| name.strip_suffix("f128"))
+            .unwrap_or(name),
+    }
+}
+
+impl ToTokens for Ty {
+    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
+        let ts = match self {
+            Ty::F16 => quote! { f16 },
+            Ty::F32 => quote! { f32 },
+            Ty::F64 => quote! { f64 },
+            Ty::F128 => quote! { f128 },
+            Ty::I32 => quote! { i32 },
+            Ty::CInt => quote! { ::core::ffi::c_int },
+            Ty::MutF16 => quote! { &'a mut f16 },
+            Ty::MutF32 => quote! { &'a mut f32 },
+            Ty::MutF64 => quote! { &'a mut f64 },
+            Ty::MutF128 => quote! { &'a mut f128 },
+            Ty::MutI32 => quote! { &'a mut i32 },
+            Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
+        };
+
+        tokens.extend(ts);
+    }
+}
+impl ToTokens for FloatTy {
+    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
+        let ts = match self {
+            FloatTy::F16 => quote! { f16 },
+            FloatTy::F32 => quote! { f32 },
+            FloatTy::F64 => quote! { f64 },
+            FloatTy::F128 => quote! { f128 },
+        };
+
+        tokens.extend(ts);
+    }
+}
diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs
index ee9bd524b..369bbae2f 100644
--- a/crates/libm-macros/src/parse.rs
+++ b/crates/libm-macros/src/parse.rs
@@ -5,7 +5,7 @@ use quote::ToTokens;
 use syn::parse::{Parse, ParseStream, Parser};
 use syn::punctuated::Punctuated;
 use syn::spanned::Spanned;
-use syn::token::Comma;
+use syn::token::{self, Comma};
 use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed};
 
 /// The input to our macro; just a list of `field: value` items.
@@ -39,6 +39,9 @@ impl Parse for Mapping {
 pub struct StructuredInput {
     /// Macro to invoke once per function
     pub callback: Ident,
+    /// Whether or not to provide `CFn` `CArgs` `RustFn` etc. This is really only needed
+    /// once for crate to set up the main trait.
+    pub emit_types: Vec<Ident>,
     /// Skip these functions
     pub skip: Vec<Ident>,
     /// Invoke only for these functions
@@ -50,6 +53,7 @@ pub struct StructuredInput {
     /// Per-function extra expressions to pass to the macro
     pub fn_extra: Option<BTreeMap<Ident, Expr>>,
     // For diagnostics
+    pub emit_types_span: Option<Span>,
     pub only_span: Option<Span>,
     pub fn_extra_span: Option<Span>,
 }
@@ -58,6 +62,7 @@ impl StructuredInput {
     pub fn from_fields(input: Invocation) -> syn::Result<Self> {
         let mut map: Vec<_> = input.fields.into_iter().collect();
         let cb_expr = expect_field(&mut map, "callback")?;
+        let emit_types_expr = expect_field(&mut map, "emit_types").ok();
         let skip_expr = expect_field(&mut map, "skip").ok();
         let only_expr = expect_field(&mut map, "only").ok();
         let attr_expr = expect_field(&mut map, "attributes").ok();
@@ -71,6 +76,12 @@ impl StructuredInput {
             ))?;
         }
 
+        let emit_types_span = emit_types_expr.as_ref().map(|expr| expr.span());
+        let emit_types = match emit_types_expr {
+            Some(expr) => Parser::parse2(parse_ident_or_array, expr.into_token_stream())?,
+            None => Vec::new(),
+        };
+
         let skip = match skip_expr {
             Some(expr) => Parser::parse2(parse_ident_array, expr.into_token_stream())?,
             None => Vec::new(),
@@ -103,6 +114,7 @@ impl StructuredInput {
 
         Ok(Self {
             callback: expect_ident(cb_expr)?,
+            emit_types,
             skip,
             only,
             only_span,
@@ -110,6 +122,7 @@ impl StructuredInput {
             extra,
             fn_extra,
             fn_extra_span,
+            emit_types_span,
         })
     }
 }
@@ -183,6 +196,15 @@ fn expect_ident(expr: Expr) -> syn::Result<Ident> {
     syn::parse2(expr.into_token_stream())
 }
 
+/// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`).
+fn parse_ident_or_array(input: ParseStream) -> syn::Result<Vec<Ident>> {
+    if !input.peek(token::Bracket) {
+        return Ok(vec![input.parse()?]);
+    }
+
+    parse_ident_array(input)
+}
+
 /// Parse an array of expressions.
 fn parse_expr_array(input: ParseStream) -> syn::Result<Vec<Expr>> {
     let content;
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
new file mode 100644
index 000000000..5e58220eb
--- /dev/null
+++ b/crates/libm-macros/src/shared.rs
@@ -0,0 +1,444 @@
+/* List of all functions that is shared between `libm-macros` and `libm-test`. */
+
+use std::fmt;
+use std::sync::LazyLock;
+
+const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])] = &[
+    (
+        // `fn(f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16], returns: &[Ty::F16] },
+        None,
+        &[
+            "ceilf16",
+            "fabsf16",
+            "floorf16",
+            "rintf16",
+            "roundevenf16",
+            "roundf16",
+            "sqrtf16",
+            "truncf16",
+        ],
+    ),
+    (
+        // `fn(f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32] },
+        None,
+        &[
+            "acosf",
+            "acoshf",
+            "asinf",
+            "asinhf",
+            "atanf",
+            "atanhf",
+            "cbrtf",
+            "ceilf",
+            "cosf",
+            "coshf",
+            "erfcf",
+            "erff",
+            "exp10f",
+            "exp2f",
+            "expf",
+            "expm1f",
+            "fabsf",
+            "floorf",
+            "j0f",
+            "j1f",
+            "lgammaf",
+            "log10f",
+            "log1pf",
+            "log2f",
+            "logf",
+            "rintf",
+            "roundevenf",
+            "roundf",
+            "sinf",
+            "sinhf",
+            "sqrtf",
+            "tanf",
+            "tanhf",
+            "tgammaf",
+            "truncf",
+            "y0f",
+            "y1f",
+        ],
+    ),
+    (
+        // `(f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64] },
+        None,
+        &[
+            "acos",
+            "acosh",
+            "asin",
+            "asinh",
+            "atan",
+            "atanh",
+            "cbrt",
+            "ceil",
+            "cos",
+            "cosh",
+            "erf",
+            "erfc",
+            "exp",
+            "exp10",
+            "exp2",
+            "expm1",
+            "fabs",
+            "floor",
+            "j0",
+            "j1",
+            "lgamma",
+            "log",
+            "log10",
+            "log1p",
+            "log2",
+            "rint",
+            "round",
+            "roundeven",
+            "sin",
+            "sinh",
+            "sqrt",
+            "tan",
+            "tanh",
+            "tgamma",
+            "trunc",
+            "y0",
+            "y1",
+        ],
+    ),
+    (
+        // `fn(f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128], returns: &[Ty::F128] },
+        None,
+        &[
+            "ceilf128",
+            "fabsf128",
+            "floorf128",
+            "rintf128",
+            "roundevenf128",
+            "roundf128",
+            "sqrtf128",
+            "truncf128",
+        ],
+    ),
+    (
+        // `(f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &[
+            "copysignf16",
+            "fdimf16",
+            "fmaxf16",
+            "fmaximum_numf16",
+            "fmaximumf16",
+            "fminf16",
+            "fminimum_numf16",
+            "fminimumf16",
+            "fmodf16",
+        ],
+    ),
+    (
+        // `(f32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &[
+            "atan2f",
+            "copysignf",
+            "fdimf",
+            "fmaxf",
+            "fmaximum_numf",
+            "fmaximumf",
+            "fminf",
+            "fminimum_numf",
+            "fminimumf",
+            "fmodf",
+            "hypotf",
+            "nextafterf",
+            "powf",
+            "remainderf",
+        ],
+    ),
+    (
+        // `(f64, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &[
+            "atan2",
+            "copysign",
+            "fdim",
+            "fmax",
+            "fmaximum",
+            "fmaximum_num",
+            "fmin",
+            "fminimum",
+            "fminimum_num",
+            "fmod",
+            "hypot",
+            "nextafter",
+            "pow",
+            "remainder",
+        ],
+    ),
+    (
+        // `(f128, f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::F128], returns: &[Ty::F128] },
+        None,
+        &[
+            "copysignf128",
+            "fdimf128",
+            "fmaxf128",
+            "fmaximum_numf128",
+            "fmaximumf128",
+            "fminf128",
+            "fminimum_numf128",
+            "fminimumf128",
+            "fmodf128",
+        ],
+    ),
+    (
+        // `(f32, f32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &["fmaf"],
+    ),
+    (
+        // `(f64, f64, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &["fma"],
+    ),
+    (
+        // `(f128, f128, f128) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::F128, Ty::F128], returns: &[Ty::F128] },
+        None,
+        &["fmaf128"],
+    ),
+    (
+        // `(f32) -> i32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::I32] },
+        None,
+        &["ilogbf"],
+    ),
+    (
+        // `(f64) -> i32`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::I32] },
+        None,
+        &["ilogb"],
+    ),
+    (
+        // `(i32, f32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::I32, Ty::F32], returns: &[Ty::F32] },
+        None,
+        &["jnf", "ynf"],
+    ),
+    (
+        // `(i32, f64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::I32, Ty::F64], returns: &[Ty::F64] },
+        None,
+        &["jn", "yn"],
+    ),
+    (
+        // `(f16, i32) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] },
+        None,
+        &["ldexpf16", "scalbnf16"],
+    ),
+    (
+        // `(f32, i32) -> f32`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::I32], returns: &[Ty::F32] },
+        None,
+        &["ldexpf", "scalbnf"],
+    ),
+    (
+        // `(f64, i64) -> f64`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::I32], returns: &[Ty::F64] },
+        None,
+        &["ldexp", "scalbn"],
+    ),
+    (
+        // `(f128, i32) -> f128`
+        FloatTy::F128,
+        Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] },
+        None,
+        &["ldexpf128", "scalbnf128"],
+    ),
+    (
+        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutF32], returns: &[Ty::F32] }),
+        &["modff"],
+    ),
+    (
+        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
+        Some(Signature { args: &[Ty::F64, Ty::MutF64], returns: &[Ty::F64] }),
+        &["modf"],
+    ),
+    (
+        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::I32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
+        &["frexpf", "lgammaf_r"],
+    ),
+    (
+        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::I32] },
+        Some(Signature { args: &[Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
+        &["frexp", "lgamma_r"],
+    ),
+    (
+        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32, Ty::F32], returns: &[Ty::F32, Ty::I32] },
+        Some(Signature { args: &[Ty::F32, Ty::F32, Ty::MutCInt], returns: &[Ty::F32] }),
+        &["remquof"],
+    ),
+    (
+        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64, Ty::F64], returns: &[Ty::F64, Ty::I32] },
+        Some(Signature { args: &[Ty::F64, Ty::F64, Ty::MutCInt], returns: &[Ty::F64] }),
+        &["remquo"],
+    ),
+    (
+        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
+        FloatTy::F32,
+        Signature { args: &[Ty::F32], returns: &[Ty::F32, Ty::F32] },
+        Some(Signature { args: &[Ty::F32, Ty::MutF32, Ty::MutF32], returns: &[] }),
+        &["sincosf"],
+    ),
+    (
+        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
+        FloatTy::F64,
+        Signature { args: &[Ty::F64], returns: &[Ty::F64, Ty::F64] },
+        Some(Signature { args: &[Ty::F64, Ty::MutF64, Ty::MutF64], returns: &[] }),
+        &["sincos"],
+    ),
+];
+
+/// A type used in a function signature.
+#[allow(dead_code)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Ty {
+    F16,
+    F32,
+    F64,
+    F128,
+    I32,
+    CInt,
+    MutF16,
+    MutF32,
+    MutF64,
+    MutF128,
+    MutI32,
+    MutCInt,
+}
+
+/// A subset of [`Ty`] representing only floats.
+#[allow(dead_code)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum FloatTy {
+    F16,
+    F32,
+    F64,
+    F128,
+}
+
+impl fmt::Display for Ty {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            Ty::F16 => "f16",
+            Ty::F32 => "f32",
+            Ty::F64 => "f64",
+            Ty::F128 => "f128",
+            Ty::I32 => "i32",
+            Ty::CInt => "::core::ffi::c_int",
+            Ty::MutF16 => "&mut f16",
+            Ty::MutF32 => "&mut f32",
+            Ty::MutF64 => "&mut f64",
+            Ty::MutF128 => "&mut f128",
+            Ty::MutI32 => "&mut i32",
+            Ty::MutCInt => "&mut ::core::ffi::c_int",
+        };
+        f.write_str(s)
+    }
+}
+
+impl fmt::Display for FloatTy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            FloatTy::F16 => "f16",
+            FloatTy::F32 => "f32",
+            FloatTy::F64 => "f64",
+            FloatTy::F128 => "f128",
+        };
+        f.write_str(s)
+    }
+}
+
+/// Representation of e.g. `(f32, f32) -> f32`
+#[derive(Debug, Clone)]
+pub struct Signature {
+    pub args: &'static [Ty],
+    pub returns: &'static [Ty],
+}
+
+/// Combined information about a function implementation.
+#[derive(Debug, Clone)]
+pub struct MathOpInfo {
+    pub name: &'static str,
+    pub float_ty: FloatTy,
+    /// Function signature for C implementations
+    pub c_sig: Signature,
+    /// Function signature for Rust implementations
+    pub rust_sig: Signature,
+}
+
+/// A flat representation of `ALL_FUNCTIONS`.
+pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
+    let mut ret = Vec::new();
+
+    for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED {
+        for name in *names {
+            let api = MathOpInfo {
+                name,
+                float_ty: *base_fty,
+                rust_sig: rust_sig.clone(),
+                c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
+            };
+            ret.push(api);
+        }
+
+        if !names.is_sorted() {
+            let mut sorted = (*names).to_owned();
+            sorted.sort_unstable();
+            panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}");
+        }
+    }
+
+    ret.sort_by_key(|item| item.name);
+    ret
+});
diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 8f8c09f1b..5314e84bb 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -1,34 +1,31 @@
+#![feature(f16)]
+#![feature(f128)]
 // `STATUS_DLL_NOT_FOUND` on i686 MinGW, not worth looking into.
 #![cfg(not(all(target_arch = "x86", target_os = "windows", target_env = "gnu")))]
 
 macro_rules! basic {
     (
         fn_name: $fn_name:ident,
+        FTy: $FTy:ty,
         CFn: $CFn:ty,
         CArgs: $CArgs:ty,
         CRet: $CRet:ty,
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
-        attrs: [$($meta:meta)*]
+        attrs: [$($attr:meta),*],
         extra: [$($extra_tt:tt)*],
         fn_extra: $fn_extra:expr,
     ) => {
-        $(#[$meta])*
-        mod $fn_name {
-            #[allow(unused)]
-            type CFnTy = $CFn;
-            // type CArgsTy<'_> = $CArgs;
-            // type CRetTy<'_> = $CRet;
-            #[allow(unused)]
+        $(#[$attr])*
+        #[allow(dead_code)]
+        pub mod $fn_name {
+            type FTy= $FTy;
+            type CFnTy<'a> = $CFn;
             type RustFnTy = $RustFn;
-            #[allow(unused)]
             type RustArgsTy = $RustArgs;
-            #[allow(unused)]
             type RustRetTy = $RustRet;
-            #[allow(unused)]
             const A: &[&str] = &[$($extra_tt)*];
-            #[allow(unused)]
             fn foo(a: f32) -> f32 {
                 $fn_extra(a)
             }
@@ -39,6 +36,7 @@ macro_rules! basic {
 mod test_basic {
     libm_macros::for_each_function! {
         callback: basic,
+        emit_types: all,
         skip: [sin, cos],
         attributes: [
             // just some random attributes
@@ -58,25 +56,10 @@ mod test_basic {
 macro_rules! basic_no_extra {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
+        attrs: [$($attr:meta),*],
     ) => {
-        mod $fn_name {
-            #[allow(unused)]
-            type CFnTy = $CFn;
-            // type CArgsTy<'_> = $CArgs;
-            // type CRetTy<'_> = $CRet;
-            #[allow(unused)]
-            type RustFnTy = $RustFn;
-            #[allow(unused)]
-            type RustArgsTy = $RustArgs;
-            #[allow(unused)]
-            type RustRetTy = $RustRet;
-        }
+        $(#[$attr])*
+        mod $fn_name {}
     };
 }
 
@@ -94,3 +77,27 @@ mod test_only {
         only: [sin, sinf],
     }
 }
+
+macro_rules! specified_types {
+    (
+        fn_name: $fn_name:ident,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        attrs: [$($attr:meta),*],
+    ) => {
+        $(#[$attr])*
+        #[allow(dead_code)]
+        mod $fn_name {
+            type RustFnTy = $RustFn;
+            type RustArgsTy = $RustArgs;
+        }
+    };
+}
+
+mod test_emit_types {
+    // Test that we can specify a couple types to emit
+    libm_macros::for_each_function! {
+        callback: specified_types,
+        emit_types: [RustFn, RustArgs],
+    }
+}
diff --git a/crates/libm-macros/tests/enum.rs b/crates/libm-macros/tests/enum.rs
new file mode 100644
index 000000000..93e209a0d
--- /dev/null
+++ b/crates/libm-macros/tests/enum.rs
@@ -0,0 +1,38 @@
+#[libm_macros::function_enum(BaseName)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Identifier {}
+
+#[libm_macros::base_name_enum]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum BaseName {}
+
+#[test]
+fn as_str() {
+    assert_eq!(Identifier::Sin.as_str(), "sin");
+    assert_eq!(Identifier::Sinf.as_str(), "sinf");
+}
+
+#[test]
+fn from_str() {
+    assert_eq!(Identifier::from_str("sin").unwrap(), Identifier::Sin);
+    assert_eq!(Identifier::from_str("sinf").unwrap(), Identifier::Sinf);
+}
+
+#[test]
+fn basename() {
+    assert_eq!(Identifier::Sin.base_name(), BaseName::Sin);
+    assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin);
+}
+
+#[test]
+fn math_op() {
+    assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64);
+    assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32);
+}
+
+// Replicate the structure that we have in `libm-test`
+mod op {
+    include!("../../libm-macros/src/shared.rs");
+}
+
+use op::FloatTy;
diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 703524bcd..5d150b4ae 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -1,31 +1,70 @@
 [package]
 name = "libm-test"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [features]
-default = []
+default = ["build-mpfr", "build-musl", "unstable-float"]
+
+# Propagated from libm because this affects which functions we test.
+unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
 
 # Generate tests which are random inputs and the outputs are calculated with
 # musl libc.
-test-musl-serialized = ["rand"]
+build-mpfr = ["dep:rug", "dep:gmp-mpfr-sys"]
 
 # Build our own musl for testing and benchmarks
 build-musl = ["dep:musl-math-sys"]
 
+# Enable report generation without bringing in more dependencies by default
+benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
+
+# Enable icount benchmarks (requires iai-callgrind and valgrind)
+icount = ["dep:iai-callgrind"]
+
+# Run with a reduced set of benchmarks, such as for CI
+short-benchmarks = []
+
 [dependencies]
-anyhow = "1.0.90"
-libm = { path = "../.." }
+anyhow = "1.0.97"
+# This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
+gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
+iai-callgrind = { version = "0.14.0", optional = true }
+indicatif = { version = "0.17.11", default-features = false }
+libm = { path = "../../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
-rand = "0.8.5"
-rand_chacha = "0.3.1"
-
-[target.'cfg(target_family = "wasm")'.dependencies]
-# Enable randomness on WASM
-getrandom = { version = "0.2", features = ["js"] }
+rand = "0.9.0"
+rand_chacha = "0.9.0"
+rayon = "1.10.0"
+rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
 [build-dependencies]
-rand = { version = "0.8.5", optional = true }
+rand = { version = "0.9.0", optional = true }
+
+[dev-dependencies]
+criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+libtest-mimic = "0.8.1"
+
+[[bench]]
+name = "icount"
+harness = false
+required-features = ["icount"]
+
+[[bench]]
+name = "random"
+harness = false
+
+[[test]]
+# No harness so that we can skip tests at runtime based on env. Prefixed with
+# `z` so these tests get run last.
+name = "z_extensive"
+harness = false
+
+[lints.rust]
+# Values from the chared config.rs used by `libm` but not the test crate
+unexpected_cfgs = { level = "warn", check-cfg = [
+  'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))',
+] }
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
new file mode 100644
index 000000000..da8c6bfd1
--- /dev/null
+++ b/crates/libm-test/benches/icount.rs
@@ -0,0 +1,316 @@
+//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+
+use std::hint::black_box;
+
+use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use libm::support::{HInt, u256};
+use libm_test::generate::spaced;
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
+
+const BENCH_ITER_ITEMS: u64 = 500;
+
+macro_rules! icount_benches {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($_attr:meta),*],
+    ) => {
+        paste::paste! {
+            // Construct benchmark inputs from the logspace generator.
+            fn [< setup_ $fn_name >]() -> Vec<OpRustArgs<op::$fn_name::Routine>> {
+                type Op = op::$fn_name::Routine;
+                let mut ctx = CheckCtx::new(
+                    Op::IDENTIFIER,
+                    CheckBasis::None,
+                    GeneratorKind::QuickSpaced
+                );
+                ctx.override_iterations(BENCH_ITER_ITEMS);
+                let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
+                println!("operation {}, {} steps", Op::NAME, ret.len());
+                ret
+            }
+
+            // Run benchmarks with the above inputs.
+            #[library_benchmark]
+            #[bench::logspace([< setup_ $fn_name >]())]
+            fn [< icount_bench_ $fn_name >](cases: Vec<OpRustArgs<op::$fn_name::Routine>>) {
+                type Op = op::$fn_name::Routine;
+                let f = black_box(Op::ROUTINE);
+                for input in cases.iter().copied() {
+                    input.call(f);
+                }
+            }
+
+            library_benchmark_group!(
+                name = [< icount_bench_ $fn_name _group  >];
+                benchmarks = [< icount_bench_ $fn_name >]
+            );
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: icount_benches,
+}
+
+fn setup_u128_mul() -> Vec<(u128, u128)> {
+    let step = u128::MAX / 300;
+    let mut x = 0u128;
+    let mut y = 0u128;
+    let mut v = Vec::new();
+
+    loop {
+        'inner: loop {
+            match y.checked_add(step) {
+                Some(new) => y = new,
+                None => break 'inner,
+            }
+
+            v.push((x, y))
+        }
+
+        match x.checked_add(step) {
+            Some(new) => x = new,
+            None => break,
+        }
+    }
+
+    v
+}
+
+fn setup_u256_add() -> Vec<(u256, u256)> {
+    let mut v = Vec::new();
+    for (x, y) in setup_u128_mul() {
+        // square the u128 inputs to cover most of the u256 range
+        v.push((x.widen_mul(x), y.widen_mul(y)));
+    }
+    // Doesn't get covered by `u128:MAX^2`
+    v.push((u256::MAX, u256::MAX));
+    v
+}
+
+fn setup_u256_shift() -> Vec<(u256, u32)> {
+    let mut v = Vec::new();
+
+    for (x, _) in setup_u128_mul() {
+        let x2 = x.widen_mul(x);
+        for y in 0u32..256 {
+            v.push((x2, y));
+        }
+    }
+
+    v
+}
+
+#[library_benchmark]
+#[bench::linspace(setup_u128_mul())]
+fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x).zero_widen_mul(black_box(y)));
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u128_widen_mul_group;
+    benchmarks = icount_bench_u128_widen_mul
+);
+
+#[library_benchmark]
+#[bench::linspace(setup_u256_add())]
+fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) + black_box(y));
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u256_add_group;
+    benchmarks = icount_bench_u256_add
+);
+
+#[library_benchmark]
+#[bench::linspace(setup_u256_shift())]
+fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) >> black_box(y));
+    }
+}
+
+library_benchmark_group!(
+    name = icount_bench_u256_shr_group;
+    benchmarks = icount_bench_u256_shr
+);
+
+main!(
+    library_benchmark_groups =
+    // u256-related benchmarks
+    icount_bench_u128_widen_mul_group,
+    icount_bench_u256_add_group,
+    icount_bench_u256_shr_group,
+    // verify-apilist-start
+    // verify-sorted-start
+    icount_bench_acos_group,
+    icount_bench_acosf_group,
+    icount_bench_acosh_group,
+    icount_bench_acoshf_group,
+    icount_bench_asin_group,
+    icount_bench_asinf_group,
+    icount_bench_asinh_group,
+    icount_bench_asinhf_group,
+    icount_bench_atan2_group,
+    icount_bench_atan2f_group,
+    icount_bench_atan_group,
+    icount_bench_atanf_group,
+    icount_bench_atanh_group,
+    icount_bench_atanhf_group,
+    icount_bench_cbrt_group,
+    icount_bench_cbrtf_group,
+    icount_bench_ceil_group,
+    icount_bench_ceilf128_group,
+    icount_bench_ceilf16_group,
+    icount_bench_ceilf_group,
+    icount_bench_copysign_group,
+    icount_bench_copysignf128_group,
+    icount_bench_copysignf16_group,
+    icount_bench_copysignf_group,
+    icount_bench_cos_group,
+    icount_bench_cosf_group,
+    icount_bench_cosh_group,
+    icount_bench_coshf_group,
+    icount_bench_erf_group,
+    icount_bench_erfc_group,
+    icount_bench_erfcf_group,
+    icount_bench_erff_group,
+    icount_bench_exp10_group,
+    icount_bench_exp10f_group,
+    icount_bench_exp2_group,
+    icount_bench_exp2f_group,
+    icount_bench_exp_group,
+    icount_bench_expf_group,
+    icount_bench_expm1_group,
+    icount_bench_expm1f_group,
+    icount_bench_fabs_group,
+    icount_bench_fabsf128_group,
+    icount_bench_fabsf16_group,
+    icount_bench_fabsf_group,
+    icount_bench_fdim_group,
+    icount_bench_fdimf128_group,
+    icount_bench_fdimf16_group,
+    icount_bench_fdimf_group,
+    icount_bench_floor_group,
+    icount_bench_floorf128_group,
+    icount_bench_floorf16_group,
+    icount_bench_floorf_group,
+    icount_bench_fma_group,
+    icount_bench_fmaf128_group,
+    icount_bench_fmaf_group,
+    icount_bench_fmax_group,
+    icount_bench_fmaxf128_group,
+    icount_bench_fmaxf16_group,
+    icount_bench_fmaxf_group,
+    icount_bench_fmaximum_group,
+    icount_bench_fmaximum_num_group,
+    icount_bench_fmaximum_numf128_group,
+    icount_bench_fmaximum_numf16_group,
+    icount_bench_fmaximum_numf_group,
+    icount_bench_fmaximumf128_group,
+    icount_bench_fmaximumf16_group,
+    icount_bench_fmaximumf_group,
+    icount_bench_fmin_group,
+    icount_bench_fminf128_group,
+    icount_bench_fminf16_group,
+    icount_bench_fminf_group,
+    icount_bench_fminimum_group,
+    icount_bench_fminimum_num_group,
+    icount_bench_fminimum_numf128_group,
+    icount_bench_fminimum_numf16_group,
+    icount_bench_fminimum_numf_group,
+    icount_bench_fminimumf128_group,
+    icount_bench_fminimumf16_group,
+    icount_bench_fminimumf_group,
+    icount_bench_fmod_group,
+    icount_bench_fmodf128_group,
+    icount_bench_fmodf16_group,
+    icount_bench_fmodf_group,
+    icount_bench_frexp_group,
+    icount_bench_frexpf_group,
+    icount_bench_hypot_group,
+    icount_bench_hypotf_group,
+    icount_bench_ilogb_group,
+    icount_bench_ilogbf_group,
+    icount_bench_j0_group,
+    icount_bench_j0f_group,
+    icount_bench_j1_group,
+    icount_bench_j1f_group,
+    icount_bench_jn_group,
+    icount_bench_jnf_group,
+    icount_bench_ldexp_group,
+    icount_bench_ldexpf128_group,
+    icount_bench_ldexpf16_group,
+    icount_bench_ldexpf_group,
+    icount_bench_lgamma_group,
+    icount_bench_lgamma_r_group,
+    icount_bench_lgammaf_group,
+    icount_bench_lgammaf_r_group,
+    icount_bench_log10_group,
+    icount_bench_log10f_group,
+    icount_bench_log1p_group,
+    icount_bench_log1pf_group,
+    icount_bench_log2_group,
+    icount_bench_log2f_group,
+    icount_bench_log_group,
+    icount_bench_logf_group,
+    icount_bench_modf_group,
+    icount_bench_modff_group,
+    icount_bench_nextafter_group,
+    icount_bench_nextafterf_group,
+    icount_bench_pow_group,
+    icount_bench_powf_group,
+    icount_bench_remainder_group,
+    icount_bench_remainderf_group,
+    icount_bench_remquo_group,
+    icount_bench_remquof_group,
+    icount_bench_rint_group,
+    icount_bench_rintf128_group,
+    icount_bench_rintf16_group,
+    icount_bench_rintf_group,
+    icount_bench_round_group,
+    icount_bench_roundeven_group,
+    icount_bench_roundevenf128_group,
+    icount_bench_roundevenf16_group,
+    icount_bench_roundevenf_group,
+    icount_bench_roundf128_group,
+    icount_bench_roundf16_group,
+    icount_bench_roundf_group,
+    icount_bench_scalbn_group,
+    icount_bench_scalbnf128_group,
+    icount_bench_scalbnf16_group,
+    icount_bench_scalbnf_group,
+    icount_bench_sin_group,
+    icount_bench_sincos_group,
+    icount_bench_sincosf_group,
+    icount_bench_sinf_group,
+    icount_bench_sinh_group,
+    icount_bench_sinhf_group,
+    icount_bench_sqrt_group,
+    icount_bench_sqrtf128_group,
+    icount_bench_sqrtf16_group,
+    icount_bench_sqrtf_group,
+    icount_bench_tan_group,
+    icount_bench_tanf_group,
+    icount_bench_tanh_group,
+    icount_bench_tanhf_group,
+    icount_bench_tgamma_group,
+    icount_bench_tgammaf_group,
+    icount_bench_trunc_group,
+    icount_bench_truncf128_group,
+    icount_bench_truncf16_group,
+    icount_bench_truncf_group,
+    icount_bench_y0_group,
+    icount_bench_y0f_group,
+    icount_bench_y1_group,
+    icount_bench_y1f_group,
+    icount_bench_yn_group,
+    icount_bench_ynf_group,
+    // verify-sorted-end
+    // verify-apilist-end
+);
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
new file mode 100644
index 000000000..63d7e5c6d
--- /dev/null
+++ b/crates/libm-test/benches/random.rs
@@ -0,0 +1,207 @@
+use std::hint::black_box;
+use std::time::Duration;
+
+use criterion::{Criterion, criterion_main};
+use libm_test::generate::random;
+use libm_test::generate::random::RandomInput;
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, TupleCall};
+
+/// Benchmark with this many items to get a variety
+const BENCH_ITER_ITEMS: usize = if cfg!(feature = "short-benchmarks") { 50 } else { 500 };
+
+/// Extra parameters we only care about if we are benchmarking against musl.
+#[allow(dead_code)]
+struct MuslExtra<F> {
+    musl_fn: Option<F>,
+    skip_on_i586: bool,
+}
+
+macro_rules! musl_rand_benches {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        fn_extra: ($skip_on_i586:expr, $musl_fn:expr),
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            fn [< musl_bench_ $fn_name >](c: &mut Criterion) {
+                type Op = libm_test::op::$fn_name::Routine;
+
+                #[cfg(feature = "build-musl")]
+                let musl_extra = MuslExtra::<libm_test::OpCFn<Op>> {
+                    musl_fn: $musl_fn,
+                    skip_on_i586: $skip_on_i586,
+                };
+
+                #[cfg(not(feature = "build-musl"))]
+                let musl_extra = MuslExtra {
+                    musl_fn: None,
+                    skip_on_i586: $skip_on_i586,
+                };
+
+                bench_one::<Op>(c, musl_extra);
+            }
+        }
+    };
+}
+
+fn bench_one<Op>(c: &mut Criterion, musl_extra: MuslExtra<Op::CFn>)
+where
+    Op: MathOp,
+    Op::RustArgs: RandomInput,
+{
+    let name = Op::NAME;
+
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Musl, GeneratorKind::Random);
+    let benchvec: Vec<_> =
+        random::get_test_cases::<Op::RustArgs>(&ctx).0.take(BENCH_ITER_ITEMS).collect();
+
+    // Perform a sanity check that we are benchmarking the same thing
+    // Don't test against musl if it is not available
+    #[cfg(feature = "build-musl")]
+    for input in benchvec.iter().copied() {
+        use anyhow::Context;
+        use libm_test::CheckOutput;
+
+        if cfg!(x86_no_sse) && musl_extra.skip_on_i586 {
+            break;
+        }
+
+        let Some(musl_fn) = musl_extra.musl_fn else {
+            continue;
+        };
+        let musl_res = input.call(musl_fn);
+        let crate_res = input.call(Op::ROUTINE);
+
+        crate_res.validate(musl_res, input, &ctx).context(name).unwrap();
+    }
+
+    #[cfg(not(feature = "build-musl"))]
+    let _ = musl_extra; // silence unused warnings
+
+    /* Option pointers are black boxed to avoid inlining in the benchmark loop */
+
+    let mut group = c.benchmark_group(name);
+    group.bench_function("crate", |b| {
+        b.iter(|| {
+            let f = black_box(Op::ROUTINE);
+            for input in benchvec.iter().copied() {
+                input.call(f);
+            }
+        })
+    });
+
+    // Don't test against musl if it is not available
+    #[cfg(feature = "build-musl")]
+    {
+        if let Some(musl_fn) = musl_extra.musl_fn {
+            group.bench_function("musl", |b| {
+                b.iter(|| {
+                    let f = black_box(musl_fn);
+                    for input in benchvec.iter().copied() {
+                        input.call(f);
+                    }
+                })
+            });
+        }
+    }
+}
+
+libm_macros::for_each_function! {
+    callback: musl_rand_benches,
+    skip: [],
+    fn_extra: match MACRO_FN_NAME {
+        // We pass a tuple of `(skip_on_i586, musl_fn)`
+
+        // FIXME(correctness): exp functions have the wrong result on i586
+        exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
+
+        // Musl does not provide `f16` and `f128` functions
+        ceilf128
+        | ceilf16
+        | copysignf128
+        | copysignf16
+        | fabsf128
+        | fabsf16
+        | fdimf128
+        | fdimf16
+        | floorf128
+        | floorf16
+        | fmaf128
+        | fmaxf128
+        | fmaxf16
+        | fmaximum
+        | fmaximum_num
+        | fmaximum_numf
+        | fmaximum_numf128
+        | fmaximum_numf16
+        | fmaximumf
+        | fmaximumf128
+        | fmaximumf16
+        | fminf128
+        | fminf16
+        | fminimum
+        | fminimum_num
+        | fminimum_numf
+        | fminimum_numf128
+        | fminimum_numf16
+        | fminimumf
+        | fminimumf128
+        | fminimumf16
+        | fmodf128
+        | fmodf16
+        | ldexpf128
+        | ldexpf16
+        | rintf128
+        | rintf16
+        | roundeven
+        | roundevenf
+        | roundevenf128
+        | roundevenf16
+        | roundf128
+        | roundf16
+        | scalbnf128
+        | scalbnf16
+        | sqrtf128
+        | sqrtf16
+        | truncf128
+        | truncf16 => (false, None),
+
+        // By default we never skip (false) and always have a musl function available
+        _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
+    }
+}
+
+macro_rules! run_callback {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        extra: [$criterion:ident],
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            [< musl_bench_ $fn_name >](&mut $criterion)
+        }
+    };
+}
+
+pub fn musl_random() {
+    let mut criterion = Criterion::default();
+
+    // For CI, run a short 0.5s warmup and 1.0s tests. This makes benchmarks complete in
+    // about the same time as other tests.
+    if cfg!(feature = "short-benchmarks") {
+        criterion = criterion
+            .warm_up_time(Duration::from_millis(200))
+            .measurement_time(Duration::from_millis(600));
+    }
+
+    criterion = criterion.configure_from_args();
+
+    libm_macros::for_each_function! {
+        callback: run_callback,
+        extra: [criterion],
+    };
+}
+
+criterion_main!(musl_random);
diff --git a/crates/libm-test/build.rs b/crates/libm-test/build.rs
index 472dec9d3..f75e3dda5 100644
--- a/crates/libm-test/build.rs
+++ b/crates/libm-test/build.rs
@@ -1,531 +1,9 @@
-use std::fmt::Write;
-use std::path::PathBuf;
-use std::{env, fs};
+#[path = "../../libm/configure.rs"]
+mod configure;
+use configure::Config;
 
 fn main() {
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
     let cfg = Config::from_env();
-
-    emit_optimization_cfg(&cfg);
-    emit_cfg_shorthands(&cfg);
-    list_all_tests(&cfg);
-
-    #[cfg(feature = "test-musl-serialized")]
-    musl_serialized_tests::generate();
-}
-
-#[allow(dead_code)]
-struct Config {
-    manifest_dir: PathBuf,
-    out_dir: PathBuf,
-    opt_level: u8,
-    target_arch: String,
-    target_env: String,
-    target_family: Option<String>,
-    target_os: String,
-    target_string: String,
-    target_vendor: String,
-    target_features: Vec<String>,
-}
-
-impl Config {
-    fn from_env() -> Self {
-        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
-            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
-            .unwrap_or_default();
-
-        Self {
-            manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
-            out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
-            opt_level: env::var("OPT_LEVEL").unwrap().parse().unwrap(),
-            target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
-            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
-            target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
-            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
-            target_string: env::var("TARGET").unwrap(),
-            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
-            target_features,
-        }
-    }
-}
-
-/// Some tests are extremely slow. Emit a config option based on optimization level.
-fn emit_optimization_cfg(cfg: &Config) {
-    println!("cargo::rustc-check-cfg=cfg(optimizations_enabled)");
-
-    if cfg.opt_level >= 2 {
-        println!("cargo::rustc-cfg=optimizations_enabled");
-    }
-}
-
-/// Provide an alias for common longer config combinations.
-fn emit_cfg_shorthands(cfg: &Config) {
-    println!("cargo::rustc-check-cfg=cfg(x86_no_sse)");
-    if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo::rustc-cfg=x86_no_sse");
-    }
-}
-
-/// Create a list of all source files in an array. This can be used for making sure that
-/// all functions are tested or otherwise covered in some way.
-// FIXME: it would probably be better to use rustdoc JSON output to get public functions.
-fn list_all_tests(cfg: &Config) {
-    let math_src = cfg.manifest_dir.join("../../src/math");
-
-    let mut files = fs::read_dir(math_src)
-        .unwrap()
-        .map(|f| f.unwrap().path())
-        .filter(|entry| entry.is_file())
-        .map(|f| f.file_stem().unwrap().to_str().unwrap().to_owned())
-        .collect::<Vec<_>>();
-    files.sort();
-
-    let mut s = "pub const ALL_FUNCTIONS: &[&str] = &[".to_owned();
-    for f in files {
-        if f == "mod" {
-            // skip mod.rs
-            continue;
-        }
-        write!(s, "\"{f}\",").unwrap();
-    }
-    write!(s, "];").unwrap();
-
-    let outfile = cfg.out_dir.join("all_files.rs");
-    fs::write(outfile, s).unwrap();
-}
-
-/// At build time, generate the output of what the corresponding `*musl` target does with a range
-/// of inputs.
-///
-/// Serialize that target's output, run the same thing with our symbols, then load and compare
-/// the resulting values.
-#[cfg(feature = "test-musl-serialized")]
-mod musl_serialized_tests {
-    use std::path::PathBuf;
-    use std::process::Command;
-    use std::{env, fs};
-
-    use rand::Rng;
-    use rand::seq::SliceRandom;
-
-    // Number of tests to generate for each function
-    const NTESTS: usize = 500;
-
-    // These files are all internal functions or otherwise miscellaneous, not
-    // defining a function we want to test.
-    const IGNORED_FILES: &[&str] = &[
-        "fenv.rs",
-        // These are giving slightly different results compared to musl
-        "lgamma.rs",
-        "lgammaf.rs",
-        "tgamma.rs",
-        "j0.rs",
-        "j0f.rs",
-        "jn.rs",
-        "jnf.rs",
-        "j1.rs",
-        "j1f.rs",
-    ];
-
-    struct Function {
-        name: String,
-        args: Vec<Ty>,
-        ret: Vec<Ty>,
-        tests: Vec<Test>,
-    }
-
-    enum Ty {
-        F32,
-        F64,
-        I32,
-        Bool,
-    }
-
-    struct Test {
-        inputs: Vec<i64>,
-        outputs: Vec<i64>,
-    }
-
-    pub fn generate() {
-        // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-        let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
-        let libm_test = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
-        let math_src = libm_test.join("../../src/math");
-
-        if target_arch == "powerpc64" {
-            return;
-        }
-
-        let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::<Vec<_>>();
-
-        let mut math = Vec::new();
-        for file in files {
-            if IGNORED_FILES.iter().any(|f| file.ends_with(f)) {
-                continue;
-            }
-
-            println!("generating musl reference tests in {:?}", file);
-
-            let contents = fs::read_to_string(file).unwrap();
-            let mut functions = contents.lines().filter(|f| f.starts_with("pub fn"));
-            while let Some(function_to_test) = functions.next() {
-                math.push(parse(function_to_test));
-            }
-        }
-
-        // Generate a bunch of random inputs for each function. This will
-        // attempt to generate a good set of uniform test cases for exercising
-        // all the various functionality.
-        generate_random_tests(&mut math, &mut rand::thread_rng());
-
-        // After we have all our inputs, use the x86_64-unknown-linux-musl
-        // target to generate the expected output.
-        generate_test_outputs(&mut math);
-        //panic!("Boo");
-        // ... and now that we have both inputs and expected outputs, do a bunch
-        // of codegen to create the unit tests which we'll actually execute.
-        generate_unit_tests(&math);
-    }
-
-    /// A "poor man's" parser for the signature of a function
-    fn parse(s: &str) -> Function {
-        let s = eat(s, "pub fn ");
-        let pos = s.find('(').unwrap();
-        let name = &s[..pos];
-        let s = &s[pos + 1..];
-        let end = s.find(')').unwrap();
-        let args = s[..end]
-            .split(',')
-            .map(|arg| {
-                let colon = arg.find(':').unwrap();
-                parse_ty(arg[colon + 1..].trim())
-            })
-            .collect::<Vec<_>>();
-        let tail = &s[end + 1..];
-        let tail = eat(tail, " -> ");
-        let ret = parse_retty(tail.replace("{", "").trim());
-
-        return Function { name: name.to_string(), args, ret, tests: Vec::new() };
-
-        fn parse_ty(s: &str) -> Ty {
-            match s {
-                "f32" => Ty::F32,
-                "f64" => Ty::F64,
-                "i32" => Ty::I32,
-                "bool" => Ty::Bool,
-                other => panic!("unknown type `{}`", other),
-            }
-        }
-
-        fn parse_retty(s: &str) -> Vec<Ty> {
-            match s {
-                "(f32, f32)" => vec![Ty::F32, Ty::F32],
-                "(f32, i32)" => vec![Ty::F32, Ty::I32],
-                "(f64, f64)" => vec![Ty::F64, Ty::F64],
-                "(f64, i32)" => vec![Ty::F64, Ty::I32],
-                other => vec![parse_ty(other)],
-            }
-        }
-
-        fn eat<'a>(s: &'a str, prefix: &str) -> &'a str {
-            if s.starts_with(prefix) {
-                &s[prefix.len()..]
-            } else {
-                panic!("{:?} didn't start with {:?}", s, prefix)
-            }
-        }
-    }
-
-    fn generate_random_tests<R: Rng>(functions: &mut [Function], rng: &mut R) {
-        for function in functions {
-            for _ in 0..NTESTS {
-                function.tests.push(generate_test(function, rng));
-            }
-        }
-
-        fn generate_test<R: Rng>(function: &Function, rng: &mut R) -> Test {
-            let mut inputs = function.args.iter().map(|ty| ty.gen_i64(rng)).collect::<Vec<_>>();
-
-            // First argument to this function appears to be a number of
-            // iterations, so passing in massive random numbers causes it to
-            // take forever to execute, so make sure we're not running random
-            // math code until the heat death of the universe.
-            if function.name == "jn" || function.name == "jnf" {
-                inputs[0] &= 0xffff;
-            }
-
-            Test {
-                inputs,
-                // zero output for now since we'll generate it later
-                outputs: vec![],
-            }
-        }
-    }
-
-    impl Ty {
-        fn gen_i64<R: Rng>(&self, r: &mut R) -> i64 {
-            use std::{f32, f64};
-
-            return match self {
-                Ty::F32 => {
-                    if r.gen_range(0..20) < 1 {
-                        let i = *[f32::NAN, f32::INFINITY, f32::NEG_INFINITY].choose(r).unwrap();
-                        i.to_bits().into()
-                    } else {
-                        r.gen::<f32>().to_bits().into()
-                    }
-                }
-                Ty::F64 => {
-                    if r.gen_range(0..20) < 1 {
-                        let i = *[f64::NAN, f64::INFINITY, f64::NEG_INFINITY].choose(r).unwrap();
-                        i.to_bits() as i64
-                    } else {
-                        r.gen::<f64>().to_bits() as i64
-                    }
-                }
-                Ty::I32 => {
-                    if r.gen_range(0..10) < 1 {
-                        let i = *[i32::max_value(), 0, i32::min_value()].choose(r).unwrap();
-                        i.into()
-                    } else {
-                        r.gen::<i32>().into()
-                    }
-                }
-                Ty::Bool => r.gen::<bool>() as i64,
-            };
-        }
-
-        fn libc_ty(&self) -> &'static str {
-            match self {
-                Ty::F32 => "f32",
-                Ty::F64 => "f64",
-                Ty::I32 => "i32",
-                Ty::Bool => "i32",
-            }
-        }
-
-        fn libc_pty(&self) -> &'static str {
-            match self {
-                Ty::F32 => "*mut f32",
-                Ty::F64 => "*mut f64",
-                Ty::I32 => "*mut i32",
-                Ty::Bool => "*mut i32",
-            }
-        }
-
-        fn default(&self) -> &'static str {
-            match self {
-                Ty::F32 => "0_f32",
-                Ty::F64 => "0_f64",
-                Ty::I32 => "0_i32",
-                Ty::Bool => "false",
-            }
-        }
-
-        fn to_i64(&self) -> &'static str {
-            match self {
-                Ty::F32 => ".to_bits() as i64",
-                Ty::F64 => ".to_bits() as i64",
-                Ty::I32 => " as i64",
-                Ty::Bool => " as i64",
-            }
-        }
-    }
-
-    fn generate_test_outputs(functions: &mut [Function]) {
-        let mut src = String::new();
-        let dst = std::env::var("OUT_DIR").unwrap();
-
-        // Generate a program which will run all tests with all inputs in
-        // `functions`. This program will write all outputs to stdout (in a
-        // binary format).
-        src.push_str("use std::io::Write;");
-        src.push_str("fn main() {");
-        src.push_str("let mut result = Vec::new();");
-        for function in functions.iter_mut() {
-            src.push_str("unsafe {");
-            src.push_str("extern { fn ");
-            src.push_str(&function.name);
-            src.push_str("(");
-
-            let (ret, retptr) = match function.name.as_str() {
-                "sincos" | "sincosf" => (None, &function.ret[..]),
-                _ => (Some(&function.ret[0]), &function.ret[1..]),
-            };
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&format!("arg{}: {},", i, arg.libc_ty()));
-            }
-            for (i, ret) in retptr.iter().enumerate() {
-                src.push_str(&format!("argret{}: {},", i, ret.libc_pty()));
-            }
-            src.push_str(")");
-            if let Some(ty) = ret {
-                src.push_str(" -> ");
-                src.push_str(ty.libc_ty());
-            }
-            src.push_str("; }");
-
-            src.push_str(&format!("static TESTS: &[[i64; {}]]", function.args.len()));
-            src.push_str(" = &[");
-            for test in function.tests.iter() {
-                src.push_str("[");
-                for val in test.inputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-            }
-            src.push_str("];");
-
-            src.push_str("for test in TESTS {");
-            for (i, arg) in retptr.iter().enumerate() {
-                src.push_str(&format!("let mut argret{} = {};", i, arg.default()));
-            }
-            src.push_str("let output = ");
-            src.push_str(&function.name);
-            src.push_str("(");
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&match arg {
-                    Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i),
-                    Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i),
-                    Ty::I32 => format!("test[{}] as i32", i),
-                    Ty::Bool => format!("test[{}] as i32", i),
-                });
-                src.push_str(",");
-            }
-            for (i, _) in retptr.iter().enumerate() {
-                src.push_str(&format!("&mut argret{},", i));
-            }
-            src.push_str(");");
-            if let Some(ty) = &ret {
-                src.push_str(&format!("let output = output{};", ty.to_i64()));
-                src.push_str("result.extend_from_slice(&output.to_le_bytes());");
-            }
-
-            for (i, ret) in retptr.iter().enumerate() {
-                src.push_str(&format!(
-                    "result.extend_from_slice(&(argret{}{}).to_le_bytes());",
-                    i,
-                    ret.to_i64(),
-                ));
-            }
-            src.push_str("}");
-
-            src.push_str("}");
-        }
-
-        src.push_str("std::io::stdout().write_all(&result).unwrap();");
-
-        src.push_str("}");
-
-        let path = format!("{}/gen.rs", dst);
-        fs::write(&path, src).unwrap();
-
-        // Make it somewhat pretty if something goes wrong
-        drop(Command::new("rustfmt").arg(&path).status());
-
-        // Compile and execute this tests for the musl target, assuming we're an
-        // x86_64 host effectively.
-        let status = Command::new("rustc")
-            .current_dir(&dst)
-            .arg(&path)
-            .arg("--target=x86_64-unknown-linux-musl")
-            .status()
-            .unwrap();
-        assert!(status.success());
-        let output = Command::new("./gen").current_dir(&dst).output().unwrap();
-        assert!(output.status.success());
-        assert!(output.stderr.is_empty());
-
-        // Map all the output bytes back to an `i64` and then shove it all into
-        // the expected results.
-        let mut results = output.stdout.chunks_exact(8).map(|buf| {
-            let mut exact = [0; 8];
-            exact.copy_from_slice(buf);
-            i64::from_le_bytes(exact)
-        });
-
-        for f in functions.iter_mut() {
-            for test in f.tests.iter_mut() {
-                test.outputs = (0..f.ret.len()).map(|_| results.next().unwrap()).collect();
-            }
-        }
-        assert!(results.next().is_none());
-    }
-
-    /// Codegens a file which has a ton of `#[test]` annotations for all the
-    /// tests that we generated above.
-    fn generate_unit_tests(functions: &[Function]) {
-        let mut src = String::new();
-        let dst = std::env::var("OUT_DIR").unwrap();
-
-        for function in functions {
-            src.push_str("#[test]");
-            src.push_str("fn ");
-            src.push_str(&function.name);
-            src.push_str("_matches_musl() {");
-            src.push_str(&format!(
-                "static TESTS: &[([i64; {}], [i64; {}])]",
-                function.args.len(),
-                function.ret.len(),
-            ));
-            src.push_str(" = &[");
-            for test in function.tests.iter() {
-                src.push_str("([");
-                for val in test.inputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-                src.push_str("[");
-                for val in test.outputs.iter() {
-                    src.push_str(&val.to_string());
-                    src.push_str(",");
-                }
-                src.push_str("],");
-                src.push_str("),");
-            }
-            src.push_str("];");
-
-            src.push_str("for (test, expected) in TESTS {");
-            src.push_str("let output = libm::");
-            src.push_str(&function.name);
-            src.push_str("(");
-            for (i, arg) in function.args.iter().enumerate() {
-                src.push_str(&match arg {
-                    Ty::F32 => format!("f32::from_bits(test[{}] as u32)", i),
-                    Ty::F64 => format!("f64::from_bits(test[{}] as u64)", i),
-                    Ty::I32 => format!("test[{}] as i32", i),
-                    Ty::Bool => format!("test[{}] as i32", i),
-                });
-                src.push_str(",");
-            }
-            src.push_str(");");
-
-            for (i, ret) in function.ret.iter().enumerate() {
-                let get = if function.ret.len() == 1 { String::new() } else { format!(".{}", i) };
-                src.push_str(&(match ret {
-                    Ty::F32 => format!("if libm::_eqf(output{}, f32::from_bits(expected[{}] as u32)).is_ok() {{ continue }}", get, i),
-                    Ty::F64 => format!("if libm::_eq(output{}, f64::from_bits(expected[{}] as u64)).is_ok() {{ continue }}", get, i),
-                    Ty::I32 => format!("if output{} as i64 == expected[{}] {{ continue }}", get, i),
-                    Ty::Bool => unreachable!(),
-                }));
-            }
-
-            src.push_str(
-                r#"
-                panic!("INPUT: {:?} EXPECTED: {:?} ACTUAL {:?}", test, expected, output);
-            "#,
-            );
-            src.push_str("}");
-
-            src.push_str("}");
-        }
-
-        let path = format!("{}/musl-tests.rs", dst);
-        fs::write(&path, src).unwrap();
-
-        // Try to make it somewhat pretty
-        drop(Command::new("rustfmt").arg(&path).status());
-    }
+    configure::emit_test_config(&cfg);
 }
diff --git a/crates/libm-test/examples/plot_domains.rs b/crates/libm-test/examples/plot_domains.rs
new file mode 100644
index 000000000..78524761e
--- /dev/null
+++ b/crates/libm-test/examples/plot_domains.rs
@@ -0,0 +1,103 @@
+//! Program to write all inputs from a generator to a file, then invoke a Julia script to plot
+//! them. Output is in `target/plots`.
+//!
+//! Requires Julia with the `CairoMakie` dependency.
+//!
+//! Note that running in release mode by default generates a _lot_ more datapoints, which
+//! causes plotting to be extremely slow (some simplification to be done in the script).
+
+use std::fmt::Write as _;
+use std::io::{BufWriter, Write};
+use std::path::Path;
+use std::process::Command;
+use std::{env, fs};
+
+use libm_test::generate::spaced::SpacedInput;
+use libm_test::generate::{edge_cases, spaced};
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
+
+const JL_PLOT: &str = "examples/plot_file.jl";
+
+fn main() {
+    let manifest_env = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let manifest_dir = Path::new(&manifest_env);
+    let out_dir = manifest_dir.join("../../target/plots");
+    if !out_dir.exists() {
+        fs::create_dir(&out_dir).unwrap();
+    }
+
+    let jl_script = manifest_dir.join(JL_PLOT);
+    let mut config = format!(r#"out_dir = "{}""#, out_dir.display());
+    config.write_str("\n\n").unwrap();
+
+    // Plot a few domains with some functions that use them.
+    plot_one_operator::<op::sqrtf::Routine>(&out_dir, &mut config);
+    plot_one_operator::<op::cosf::Routine>(&out_dir, &mut config);
+    plot_one_operator::<op::cbrtf::Routine>(&out_dir, &mut config);
+
+    let config_path = out_dir.join("config.toml");
+    fs::write(&config_path, config).unwrap();
+
+    // The script expects a path to `config.toml` to be passed as its only argument
+    let mut cmd = Command::new("julia");
+    if cfg!(optimizations_enabled) {
+        cmd.arg("-O3");
+    }
+    cmd.arg(jl_script).arg(config_path);
+
+    println!("launching script... {cmd:?}");
+    cmd.status().unwrap();
+}
+
+/// Run multiple generators for a single operator.
+fn plot_one_operator<Op>(out_dir: &Path, config: &mut String)
+where
+    Op: MathOp<FTy = f32, RustArgs = (f32,)>,
+    Op::RustArgs: SpacedInput<Op>,
+{
+    let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced);
+    plot_one_generator(out_dir, &ctx, "logspace", config, spaced::get_test_cases::<Op>(&ctx).0);
+    ctx.gen_kind = GeneratorKind::EdgeCases;
+    plot_one_generator(
+        out_dir,
+        &ctx,
+        "edge_cases",
+        config,
+        edge_cases::get_test_cases::<Op>(&ctx).0,
+    );
+}
+
+/// Plot the output of a single generator.
+fn plot_one_generator(
+    out_dir: &Path,
+    ctx: &CheckCtx,
+    gen_name: &str,
+    config: &mut String,
+    generator: impl Iterator<Item = (f32,)>,
+) {
+    let fn_name = ctx.base_name_str;
+    let text_file = out_dir.join(format!("input-{fn_name}-{gen_name}.txt"));
+
+    let f = fs::File::create(&text_file).unwrap();
+    let mut w = BufWriter::new(f);
+    let mut count = 0u64;
+
+    for input in generator {
+        writeln!(w, "{:e}", input.0).unwrap();
+        count += 1;
+    }
+
+    w.flush().unwrap();
+    println!("generated {count} inputs for {fn_name}-{gen_name}");
+
+    writeln!(
+        config,
+        r#"[[input]]
+function = "{fn_name}"
+generator = "{gen_name}"
+input_file = "{}"
+"#,
+        text_file.to_str().unwrap()
+    )
+    .unwrap()
+}
diff --git a/crates/libm-test/examples/plot_file.jl b/crates/libm-test/examples/plot_file.jl
new file mode 100644
index 000000000..acffd9756
--- /dev/null
+++ b/crates/libm-test/examples/plot_file.jl
@@ -0,0 +1,171 @@
+"A quick script for plotting a list of floats.
+
+Takes a path to a TOML file (Julia has builtin TOML support but not JSON) which
+specifies a list of source files to plot. Plots are done with both a linear and
+a log scale.
+
+Requires [Makie] (specifically CairoMakie) for plotting.
+
+[Makie]: https://docs.makie.org/stable/
+"
+
+using CairoMakie
+using TOML
+
+function main()::Nothing
+    CairoMakie.activate!(px_per_unit = 10)
+    config_path = ARGS[1]
+
+    cfg = Dict()
+    open(config_path, "r") do f
+        cfg = TOML.parse(f)
+    end
+
+    out_dir = cfg["out_dir"]
+    for input in cfg["input"]
+        fn_name = input["function"]
+        gen_name = input["generator"]
+        input_file = input["input_file"]
+
+        plot_one(input_file, out_dir, fn_name, gen_name)
+    end
+end
+
+"Read inputs from a file, create both linear and log plots for one function"
+function plot_one(
+    input_file::String,
+    out_dir::String,
+    fn_name::String,
+    gen_name::String,
+)::Nothing
+    fig = Figure()
+
+    lin_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name.png")
+    log_out_file = joinpath(out_dir, "plot-$fn_name-$gen_name-log.png")
+
+    # Map string function names to callable functions
+    if fn_name == "cos"
+        orig_func = cos
+        xlims = (-6.0, 6.0)
+        xlims_log = (-pi * 10, pi * 10)
+    elseif fn_name == "cbrt"
+        orig_func = cbrt
+        xlims = (-2.0, 2.0)
+        xlims_log = (-1000.0, 1000.0)
+    elseif fn_name == "sqrt"
+        orig_func = sqrt
+        xlims = (-1.1, 6.0)
+        xlims_log = (-1.1, 5000.0)
+    else
+        println("unrecognized function name `$fn_name`; update plot_file.jl")
+        exit(1)
+    end
+
+    # Edge cases don't do much beyond +/-1, except for infinity.
+    if gen_name == "edge_cases"
+        xlims = (-1.1, 1.1)
+        xlims_log = (-1.1, 1.1)
+    end
+
+    # Turn domain errors into NaN
+    func(x) = map_or(x, orig_func, NaN)
+
+    # Parse a series of X values produced by the generator
+    inputs = readlines(input_file)
+    gen_x = map((v) -> parse(Float32, v), inputs)
+
+    do_plot(
+        fig,
+        gen_x,
+        func,
+        xlims[1],
+        xlims[2],
+        "$fn_name $gen_name (linear scale)",
+        lin_out_file,
+        false,
+    )
+
+    do_plot(
+        fig,
+        gen_x,
+        func,
+        xlims_log[1],
+        xlims_log[2],
+        "$fn_name $gen_name (log scale)",
+        log_out_file,
+        true,
+    )
+end
+
+"Create a single plot"
+function do_plot(
+    fig::Figure,
+    gen_x::Vector{F},
+    func::Function,
+    xmin::AbstractFloat,
+    xmax::AbstractFloat,
+    title::String,
+    out_file::String,
+    logscale::Bool,
+)::Nothing where {F<:AbstractFloat}
+    println("plotting $title")
+
+    # `gen_x` is the values the generator produces. `actual_x` is for plotting a
+    # continuous function.
+    input_min = xmin - 1.0
+    input_max = xmax + 1.0
+    gen_x = filter((v) -> v >= input_min && v <= input_max, gen_x)
+    markersize = length(gen_x) < 10_000 ? 6.0 : 4.0
+
+    steps = 10_000
+    if logscale
+        r = LinRange(symlog10(input_min), symlog10(input_max), steps)
+        actual_x = sympow10.(r)
+        xscale = Makie.pseudolog10
+    else
+        actual_x = LinRange(input_min, input_max, steps)
+        xscale = identity
+    end
+
+    gen_y = @. func(gen_x)
+    actual_y = @. func(actual_x)
+
+    ax = Axis(fig[1, 1], xscale = xscale, title = title)
+
+    lines!(
+        ax,
+        actual_x,
+        actual_y,
+        color = (:lightblue, 0.6),
+        linewidth = 6.0,
+        label = "true function",
+    )
+    scatter!(
+        ax,
+        gen_x,
+        gen_y,
+        color = (:darkblue, 0.9),
+        markersize = markersize,
+        label = "checked inputs",
+    )
+    axislegend(ax, position = :rb, framevisible = false)
+
+    save(out_file, fig)
+    delete!(ax)
+end
+
+"Apply a function, returning the default if there is a domain error"
+function map_or(input::AbstractFloat, f::Function, default::Any)::Union{AbstractFloat,Any}
+    try
+        return f(input)
+    catch
+        return default
+    end
+end
+
+# Operations for logarithms that are symmetric about 0
+C = 10
+symlog10(x::Number) = sign(x) * (log10(1 + abs(x) / (10^C)))
+sympow10(x::Number) = (10^C) * (10^x - 1)
+
+main()
diff --git a/crates/libm-test/src/domain.rs b/crates/libm-test/src/domain.rs
new file mode 100644
index 000000000..41e948461
--- /dev/null
+++ b/crates/libm-test/src/domain.rs
@@ -0,0 +1,265 @@
+//! Traits and operations related to bounds of a function.
+
+use std::fmt;
+use std::ops::Bound;
+
+use libm::support::Int;
+
+use crate::{BaseName, Float, FloatExt, Identifier};
+
+/// Representation of a single dimension of a function's domain.
+#[derive(Clone, Debug)]
+pub struct Domain<T> {
+    /// Start of the region for which a function is defined (ignoring poles).
+    pub start: Bound<T>,
+    /// Endof the region for which a function is defined (ignoring poles).
+    pub end: Bound<T>,
+    /// Additional points to check closer around. These can be e.g. undefined asymptotes or
+    /// inflection points.
+    pub check_points: Option<fn() -> BoxIter<T>>,
+}
+
+type BoxIter<T> = Box<dyn Iterator<Item = T>>;
+
+impl<F: FloatExt> Domain<F> {
+    /// The start of this domain, saturating at negative infinity.
+    pub fn range_start(&self) -> F {
+        match self.start {
+            Bound::Included(v) => v,
+            Bound::Excluded(v) => v.next_up(),
+            Bound::Unbounded => F::NEG_INFINITY,
+        }
+    }
+
+    /// The end of this domain, saturating at infinity.
+    pub fn range_end(&self) -> F {
+        match self.end {
+            Bound::Included(v) => v,
+            Bound::Excluded(v) => v.next_down(),
+            Bound::Unbounded => F::INFINITY,
+        }
+    }
+}
+
+/// A value that may be any float type or any integer type.
+#[derive(Clone, Debug)]
+pub enum EitherPrim<F, I> {
+    Float(F),
+    Int(I),
+}
+
+impl<F: fmt::Debug, I: fmt::Debug> EitherPrim<F, I> {
+    pub fn unwrap_float(self) -> F {
+        match self {
+            EitherPrim::Float(f) => f,
+            EitherPrim::Int(_) => panic!("expected float; got {self:?}"),
+        }
+    }
+
+    pub fn unwrap_int(self) -> I {
+        match self {
+            EitherPrim::Float(_) => panic!("expected int; got {self:?}"),
+            EitherPrim::Int(i) => i,
+        }
+    }
+}
+
+/// Convenience 1-dimensional float domains.
+impl<F: Float> Domain<F> {
+    /// x ∈ ℝ
+    const UNBOUNDED: Self =
+        Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None };
+
+    /// x ∈ ℝ >= 0
+    const POSITIVE: Self =
+        Self { start: Bound::Included(F::ZERO), end: Bound::Unbounded, check_points: None };
+
+    /// x ∈ ℝ > 0
+    const STRICTLY_POSITIVE: Self =
+        Self { start: Bound::Excluded(F::ZERO), end: Bound::Unbounded, check_points: None };
+
+    /// Wrap in the float variant of [`EitherPrim`].
+    const fn into_prim_float<I>(self) -> EitherPrim<Self, Domain<I>> {
+        EitherPrim::Float(self)
+    }
+}
+
+/// Convenience 1-dimensional integer domains.
+impl<I: Int> Domain<I> {
+    /// x ∈ ℝ
+    const UNBOUNDED_INT: Self =
+        Self { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None };
+
+    /// Wrap in the int variant of [`EitherPrim`].
+    const fn into_prim_int<F>(self) -> EitherPrim<Domain<F>, Self> {
+        EitherPrim::Int(self)
+    }
+}
+
+/// Multidimensional domains, represented as an array of 1-D domains.
+impl<F: Float, I: Int> EitherPrim<Domain<F>, Domain<I>> {
+    /// x ∈ ℝ
+    const UNBOUNDED1: [Self; 1] =
+        [Domain { start: Bound::Unbounded, end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
+
+    /// {x1, x2} ∈ ℝ
+    const UNBOUNDED2: [Self; 2] =
+        [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED.into_prim_float()];
+
+    /// {x1, x2, x3} ∈ ℝ
+    const UNBOUNDED3: [Self; 3] = [
+        Domain::UNBOUNDED.into_prim_float(),
+        Domain::UNBOUNDED.into_prim_float(),
+        Domain::UNBOUNDED.into_prim_float(),
+    ];
+
+    /// {x1, x2} ∈ ℝ, one float and one int
+    const UNBOUNDED_F_I: [Self; 2] =
+        [Domain::UNBOUNDED.into_prim_float(), Domain::UNBOUNDED_INT.into_prim_int()];
+
+    /// x ∈ ℝ >= 0
+    const POSITIVE: [Self; 1] = [Domain::POSITIVE.into_prim_float()];
+
+    /// x ∈ ℝ > 0
+    const STRICTLY_POSITIVE: [Self; 1] = [Domain::STRICTLY_POSITIVE.into_prim_float()];
+
+    /// Used for versions of `asin` and `acos`.
+    const INVERSE_TRIG_PERIODIC: [Self; 1] = [Domain {
+        start: Bound::Included(F::NEG_ONE),
+        end: Bound::Included(F::ONE),
+        check_points: None,
+    }
+    .into_prim_float()];
+
+    /// Domain for `acosh`
+    const ACOSH: [Self; 1] =
+        [Domain { start: Bound::Included(F::ONE), end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
+
+    /// Domain for `atanh`
+    const ATANH: [Self; 1] = [Domain {
+        start: Bound::Excluded(F::NEG_ONE),
+        end: Bound::Excluded(F::ONE),
+        check_points: None,
+    }
+    .into_prim_float()];
+
+    /// Domain for `sin`, `cos`, and `tan`
+    const TRIG: [Self; 1] = [Domain {
+        // Trig functions have special behavior at fractions of π.
+        check_points: Some(|| Box::new([-F::PI, -F::FRAC_PI_2, F::FRAC_PI_2, F::PI].into_iter())),
+        ..Domain::UNBOUNDED
+    }
+    .into_prim_float()];
+
+    /// Domain for `log` in various bases
+    const LOG: [Self; 1] = Self::STRICTLY_POSITIVE;
+
+    /// Domain for `log1p` i.e. `log(1 + x)`
+    const LOG1P: [Self; 1] =
+        [Domain { start: Bound::Excluded(F::NEG_ONE), end: Bound::Unbounded, check_points: None }
+            .into_prim_float()];
+
+    /// Domain for `sqrt`
+    const SQRT: [Self; 1] = Self::POSITIVE;
+
+    /// Domain for `gamma`
+    const GAMMA: [Self; 1] = [Domain {
+        check_points: Some(|| {
+            // Negative integers are asymptotes
+            Box::new((0..u8::MAX).map(|scale| {
+                let mut base = F::ZERO;
+                for _ in 0..scale {
+                    base = base - F::ONE;
+                }
+                base
+            }))
+        }),
+        // Whether or not gamma is defined for negative numbers is implementation dependent
+        ..Domain::UNBOUNDED
+    }
+    .into_prim_float()];
+
+    /// Domain for `loggamma`
+    const LGAMMA: [Self; 1] = Self::STRICTLY_POSITIVE;
+
+    /// Domain for `jn` and `yn`.
+    // FIXME: the domain should provide some sort of "reasonable range" so we don't actually test
+    // the entire system unbounded.
+    const BESSEL_N: [Self; 2] =
+        [Domain::UNBOUNDED_INT.into_prim_int(), Domain::UNBOUNDED.into_prim_float()];
+}
+
+/// Get the domain for a given function.
+pub fn get_domain<F: Float, I: Int>(
+    id: Identifier,
+    argnum: usize,
+) -> EitherPrim<Domain<F>, Domain<I>> {
+    let x = match id.base_name() {
+        BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
+        BaseName::Acosh => &EitherPrim::ACOSH[..],
+        BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
+        BaseName::Asinh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atan => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atan2 => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Cbrt => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Atanh => &EitherPrim::ATANH[..],
+        BaseName::Ceil => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Cosh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Copysign => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Cos => &EitherPrim::TRIG[..],
+        BaseName::Exp => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Erf => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Erfc => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Expm1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Exp10 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Exp2 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Frexp => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fabs => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fdim => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Floor => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Fma => &EitherPrim::UNBOUNDED3[..],
+        BaseName::Fmax => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmaximum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::FmaximumNum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmin => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fminimum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::FminimumNum => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Fmod => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Hypot => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Ilogb => &EitherPrim::UNBOUNDED1[..],
+        BaseName::J0 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::J1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Jn => &EitherPrim::BESSEL_N[..],
+        BaseName::Ldexp => &EitherPrim::UNBOUNDED_F_I[..],
+        BaseName::Lgamma => &EitherPrim::LGAMMA[..],
+        BaseName::LgammaR => &EitherPrim::LGAMMA[..],
+        BaseName::Log => &EitherPrim::LOG[..],
+        BaseName::Log10 => &EitherPrim::LOG[..],
+        BaseName::Log1p => &EitherPrim::LOG1P[..],
+        BaseName::Log2 => &EitherPrim::LOG[..],
+        BaseName::Modf => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Nextafter => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Pow => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Remainder => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Remquo => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Rint => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Round => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Roundeven => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Scalbn => &EitherPrim::UNBOUNDED_F_I[..],
+        BaseName::Sin => &EitherPrim::TRIG[..],
+        BaseName::Sincos => &EitherPrim::TRIG[..],
+        BaseName::Sinh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Sqrt => &EitherPrim::SQRT[..],
+        BaseName::Tan => &EitherPrim::TRIG[..],
+        BaseName::Tanh => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Tgamma => &EitherPrim::GAMMA[..],
+        BaseName::Trunc => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Y0 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Y1 => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Yn => &EitherPrim::BESSEL_N[..],
+    };
+
+    x[argnum].clone()
+}
diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
new file mode 100644
index 000000000..ddb7bf90e
--- /dev/null
+++ b/crates/libm-test/src/f8_impl.rs
@@ -0,0 +1,503 @@
+//! An IEEE-compliant 8-bit float type for testing purposes.
+
+use std::cmp::{self, Ordering};
+use std::{fmt, ops};
+
+use crate::Float;
+
+/// Sometimes verifying float logic is easiest when all values can quickly be checked exhaustively
+/// or by hand.
+///
+/// IEEE-754 compliant type that includes a 1 bit sign, 4 bit exponent, and 3 bit significand.
+/// Bias is -7.
+///
+/// Based on <https://en.wikipedia.org/wiki/Minifloat#Example_8-bit_float_(1.4.3)>.
+#[derive(Clone, Copy)]
+#[repr(transparent)]
+#[allow(non_camel_case_types)]
+pub struct f8(u8);
+
+impl Float for f8 {
+    type Int = u8;
+    type SignedInt = i8;
+
+    const ZERO: Self = Self(0b0_0000_000);
+    const NEG_ZERO: Self = Self(0b1_0000_000);
+    const ONE: Self = Self(0b0_0111_000);
+    const NEG_ONE: Self = Self(0b1_0111_000);
+    const MAX: Self = Self(0b0_1110_111);
+    const MIN: Self = Self(0b1_1110_111);
+    const INFINITY: Self = Self(0b0_1111_000);
+    const NEG_INFINITY: Self = Self(0b1_1111_000);
+    const NAN: Self = Self(0b0_1111_100);
+    const NEG_NAN: Self = Self(0b1_1111_100);
+    const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS);
+    // FIXME: incorrect values
+    const EPSILON: Self = Self::ZERO;
+    const PI: Self = Self::ZERO;
+    const NEG_PI: Self = Self::ZERO;
+    const FRAC_PI_2: Self = Self::ZERO;
+
+    const BITS: u32 = 8;
+    const SIG_BITS: u32 = 3;
+    const SIGN_MASK: Self::Int = 0b1_0000_000;
+    const SIG_MASK: Self::Int = 0b0_0000_111;
+    const EXP_MASK: Self::Int = 0b0_1111_000;
+    const IMPLICIT_BIT: Self::Int = 0b0_0001_000;
+
+    fn to_bits(self) -> Self::Int {
+        self.0
+    }
+
+    fn to_bits_signed(self) -> Self::SignedInt {
+        self.0 as i8
+    }
+
+    fn is_nan(self) -> bool {
+        self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0
+    }
+
+    fn is_infinite(self) -> bool {
+        self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK == 0
+    }
+
+    fn is_sign_negative(self) -> bool {
+        self.0 & Self::SIGN_MASK != 0
+    }
+
+    fn from_bits(a: Self::Int) -> Self {
+        Self(a)
+    }
+
+    fn abs(self) -> Self {
+        libm::generic::fabs(self)
+    }
+
+    fn copysign(self, other: Self) -> Self {
+        libm::generic::copysign(self, other)
+    }
+
+    fn fma(self, _y: Self, _z: Self) -> Self {
+        unimplemented!()
+    }
+
+    fn normalize(_significand: Self::Int) -> (i32, Self::Int) {
+        unimplemented!()
+    }
+}
+
+impl f8 {
+    pub const ALL_LEN: usize = 240;
+
+    /// All non-infinite non-NaN values of `f8`
+    pub const ALL: [Self; Self::ALL_LEN] = [
+        // -m*2^7
+        Self(0b1_1110_111), // -240
+        Self(0b1_1110_110),
+        Self(0b1_1110_101),
+        Self(0b1_1110_100),
+        Self(0b1_1110_011),
+        Self(0b1_1110_010),
+        Self(0b1_1110_001),
+        Self(0b1_1110_000), // -128
+        // -m*2^6
+        Self(0b1_1101_111), // -120
+        Self(0b1_1101_110),
+        Self(0b1_1101_101),
+        Self(0b1_1101_100),
+        Self(0b1_1101_011),
+        Self(0b1_1101_010),
+        Self(0b1_1101_001),
+        Self(0b1_1101_000), // -64
+        // -m*2^5
+        Self(0b1_1100_111), // -60
+        Self(0b1_1100_110),
+        Self(0b1_1100_101),
+        Self(0b1_1100_100),
+        Self(0b1_1100_011),
+        Self(0b1_1100_010),
+        Self(0b1_1100_001),
+        Self(0b1_1100_000), // -32
+        // -m*2^4
+        Self(0b1_1011_111), // -30
+        Self(0b1_1011_110),
+        Self(0b1_1011_101),
+        Self(0b1_1011_100),
+        Self(0b1_1011_011),
+        Self(0b1_1011_010),
+        Self(0b1_1011_001),
+        Self(0b1_1011_000), // -16
+        // -m*2^3
+        Self(0b1_1010_111), // -15
+        Self(0b1_1010_110),
+        Self(0b1_1010_101),
+        Self(0b1_1010_100),
+        Self(0b1_1010_011),
+        Self(0b1_1010_010),
+        Self(0b1_1010_001),
+        Self(0b1_1010_000), // -8
+        // -m*2^2
+        Self(0b1_1001_111), // -7.5
+        Self(0b1_1001_110),
+        Self(0b1_1001_101),
+        Self(0b1_1001_100),
+        Self(0b1_1001_011),
+        Self(0b1_1001_010),
+        Self(0b1_1001_001),
+        Self(0b1_1001_000), // -4
+        // -m*2^1
+        Self(0b1_1000_111), // -3.75
+        Self(0b1_1000_110),
+        Self(0b1_1000_101),
+        Self(0b1_1000_100),
+        Self(0b1_1000_011),
+        Self(0b1_1000_010),
+        Self(0b1_1000_001),
+        Self(0b1_1000_000), // -2
+        // -m*2^0
+        Self(0b1_0111_111), // -1.875
+        Self(0b1_0111_110),
+        Self(0b1_0111_101),
+        Self(0b1_0111_100),
+        Self(0b1_0111_011),
+        Self(0b1_0111_010),
+        Self(0b1_0111_001),
+        Self(0b1_0111_000), // -1
+        // -m*2^-1
+        Self(0b1_0110_111), // −0.9375
+        Self(0b1_0110_110),
+        Self(0b1_0110_101),
+        Self(0b1_0110_100),
+        Self(0b1_0110_011),
+        Self(0b1_0110_010),
+        Self(0b1_0110_001),
+        Self(0b1_0110_000), // -0.5
+        // -m*2^-2
+        Self(0b1_0101_111), // −0.46875
+        Self(0b1_0101_110),
+        Self(0b1_0101_101),
+        Self(0b1_0101_100),
+        Self(0b1_0101_011),
+        Self(0b1_0101_010),
+        Self(0b1_0101_001),
+        Self(0b1_0101_000), // -0.25
+        // -m*2^-3
+        Self(0b1_0100_111), // −0.234375
+        Self(0b1_0100_110),
+        Self(0b1_0100_101),
+        Self(0b1_0100_100),
+        Self(0b1_0100_011),
+        Self(0b1_0100_010),
+        Self(0b1_0100_001),
+        Self(0b1_0100_000), // -0.125
+        // -m*2^-4
+        Self(0b1_0011_111), // −0.1171875
+        Self(0b1_0011_110),
+        Self(0b1_0011_101),
+        Self(0b1_0011_100),
+        Self(0b1_0011_011),
+        Self(0b1_0011_010),
+        Self(0b1_0011_001),
+        Self(0b1_0011_000), // −0.0625
+        // -m*2^-5
+        Self(0b1_0010_111), // −0.05859375
+        Self(0b1_0010_110),
+        Self(0b1_0010_101),
+        Self(0b1_0010_100),
+        Self(0b1_0010_011),
+        Self(0b1_0010_010),
+        Self(0b1_0010_001),
+        Self(0b1_0010_000), // −0.03125
+        // -m*2^-6
+        Self(0b1_0001_111), // −0.029296875
+        Self(0b1_0001_110),
+        Self(0b1_0001_101),
+        Self(0b1_0001_100),
+        Self(0b1_0001_011),
+        Self(0b1_0001_010),
+        Self(0b1_0001_001),
+        Self(0b1_0001_000), // −0.015625
+        // -m*2^-7 subnormal numbers
+        Self(0b1_0000_111), // −0.013671875
+        Self(0b1_0000_110),
+        Self(0b1_0000_101),
+        Self(0b1_0000_100),
+        Self(0b1_0000_011),
+        Self(0b1_0000_010),
+        Self(0b1_0000_001), // −0.001953125
+        // Zeroes
+        Self(0b1_0000_000), // -0.0
+        Self(0b0_0000_000), // 0.0
+        // m*2^-7 // subnormal numbers
+        Self(0b0_0000_001),
+        Self(0b0_0000_010),
+        Self(0b0_0000_011),
+        Self(0b0_0000_100),
+        Self(0b0_0000_101),
+        Self(0b0_0000_110),
+        Self(0b0_0000_111), // 0.013671875
+        // m*2^-6
+        Self(0b0_0001_000), // 0.015625
+        Self(0b0_0001_001),
+        Self(0b0_0001_010),
+        Self(0b0_0001_011),
+        Self(0b0_0001_100),
+        Self(0b0_0001_101),
+        Self(0b0_0001_110),
+        Self(0b0_0001_111), // 0.029296875
+        // m*2^-5
+        Self(0b0_0010_000), // 0.03125
+        Self(0b0_0010_001),
+        Self(0b0_0010_010),
+        Self(0b0_0010_011),
+        Self(0b0_0010_100),
+        Self(0b0_0010_101),
+        Self(0b0_0010_110),
+        Self(0b0_0010_111), // 0.05859375
+        // m*2^-4
+        Self(0b0_0011_000), // 0.0625
+        Self(0b0_0011_001),
+        Self(0b0_0011_010),
+        Self(0b0_0011_011),
+        Self(0b0_0011_100),
+        Self(0b0_0011_101),
+        Self(0b0_0011_110),
+        Self(0b0_0011_111), // 0.1171875
+        // m*2^-3
+        Self(0b0_0100_000), // 0.125
+        Self(0b0_0100_001),
+        Self(0b0_0100_010),
+        Self(0b0_0100_011),
+        Self(0b0_0100_100),
+        Self(0b0_0100_101),
+        Self(0b0_0100_110),
+        Self(0b0_0100_111), // 0.234375
+        // m*2^-2
+        Self(0b0_0101_000), // 0.25
+        Self(0b0_0101_001),
+        Self(0b0_0101_010),
+        Self(0b0_0101_011),
+        Self(0b0_0101_100),
+        Self(0b0_0101_101),
+        Self(0b0_0101_110),
+        Self(0b0_0101_111), // 0.46875
+        // m*2^-1
+        Self(0b0_0110_000), // 0.5
+        Self(0b0_0110_001),
+        Self(0b0_0110_010),
+        Self(0b0_0110_011),
+        Self(0b0_0110_100),
+        Self(0b0_0110_101),
+        Self(0b0_0110_110),
+        Self(0b0_0110_111), // 0.9375
+        // m*2^0
+        Self(0b0_0111_000), // 1
+        Self(0b0_0111_001),
+        Self(0b0_0111_010),
+        Self(0b0_0111_011),
+        Self(0b0_0111_100),
+        Self(0b0_0111_101),
+        Self(0b0_0111_110),
+        Self(0b0_0111_111), // 1.875
+        // m*2^1
+        Self(0b0_1000_000), // 2
+        Self(0b0_1000_001),
+        Self(0b0_1000_010),
+        Self(0b0_1000_011),
+        Self(0b0_1000_100),
+        Self(0b0_1000_101),
+        Self(0b0_1000_110),
+        Self(0b0_1000_111), // 3.75
+        // m*2^2
+        Self(0b0_1001_000), // 4
+        Self(0b0_1001_001),
+        Self(0b0_1001_010),
+        Self(0b0_1001_011),
+        Self(0b0_1001_100),
+        Self(0b0_1001_101),
+        Self(0b0_1001_110),
+        Self(0b0_1001_111), // 7.5
+        // m*2^3
+        Self(0b0_1010_000), // 8
+        Self(0b0_1010_001),
+        Self(0b0_1010_010),
+        Self(0b0_1010_011),
+        Self(0b0_1010_100),
+        Self(0b0_1010_101),
+        Self(0b0_1010_110),
+        Self(0b0_1010_111), // 15
+        // m*2^4
+        Self(0b0_1011_000), // 16
+        Self(0b0_1011_001),
+        Self(0b0_1011_010),
+        Self(0b0_1011_011),
+        Self(0b0_1011_100),
+        Self(0b0_1011_101),
+        Self(0b0_1011_110),
+        Self(0b0_1011_111), // 30
+        // m*2^5
+        Self(0b0_1100_000), // 32
+        Self(0b0_1100_001),
+        Self(0b0_1100_010),
+        Self(0b0_1100_011),
+        Self(0b0_1100_100),
+        Self(0b0_1100_101),
+        Self(0b0_1100_110),
+        Self(0b0_1100_111), // 60
+        // m*2^6
+        Self(0b0_1101_000), // 64
+        Self(0b0_1101_001),
+        Self(0b0_1101_010),
+        Self(0b0_1101_011),
+        Self(0b0_1101_100),
+        Self(0b0_1101_101),
+        Self(0b0_1101_110),
+        Self(0b0_1101_111), // 120
+        // m*2^7
+        Self(0b0_1110_000), // 128
+        Self(0b0_1110_001),
+        Self(0b0_1110_010),
+        Self(0b0_1110_011),
+        Self(0b0_1110_100),
+        Self(0b0_1110_101),
+        Self(0b0_1110_110),
+        Self(0b0_1110_111), // 240
+    ];
+}
+
+impl ops::Add for f8 {
+    type Output = Self;
+    fn add(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::Sub for f8 {
+    type Output = Self;
+    fn sub(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+impl ops::Mul for f8 {
+    type Output = Self;
+    fn mul(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+impl ops::Div for f8 {
+    type Output = Self;
+    fn div(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::Neg for f8 {
+    type Output = Self;
+    fn neg(self) -> Self::Output {
+        Self(self.0 ^ Self::SIGN_MASK)
+    }
+}
+
+impl ops::Rem for f8 {
+    type Output = Self;
+    fn rem(self, _rhs: Self) -> Self::Output {
+        unimplemented!()
+    }
+}
+
+impl ops::AddAssign for f8 {
+    fn add_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl ops::SubAssign for f8 {
+    fn sub_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl ops::MulAssign for f8 {
+    fn mul_assign(&mut self, _rhs: Self) {
+        unimplemented!()
+    }
+}
+
+impl cmp::PartialEq for f8 {
+    fn eq(&self, other: &Self) -> bool {
+        if self.is_nan() || other.is_nan() {
+            false
+        } else if self.abs().to_bits() | other.abs().to_bits() == 0 {
+            true
+        } else {
+            self.0 == other.0
+        }
+    }
+}
+impl cmp::PartialOrd for f8 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        let inf_rep = f8::EXP_MASK;
+
+        let a_abs = self.abs().to_bits();
+        let b_abs = other.abs().to_bits();
+
+        // If either a or b is NaN, they are unordered.
+        if a_abs > inf_rep || b_abs > inf_rep {
+            return None;
+        }
+
+        // If a and b are both zeros, they are equal.
+        if a_abs | b_abs == 0 {
+            return Some(Ordering::Equal);
+        }
+
+        let a_srep = self.to_bits_signed();
+        let b_srep = other.to_bits_signed();
+        let res = a_srep.cmp(&b_srep);
+
+        if a_srep & b_srep >= 0 {
+            // If at least one of a and b is positive, we get the same result comparing
+            // a and b as signed integers as we would with a fp_ting-point compare.
+            Some(res)
+        } else {
+            // Otherwise, both are negative, so we need to flip the sense of the
+            // comparison to get the correct result.
+            Some(res.reverse())
+        }
+    }
+}
+impl fmt::Display for f8 {
+    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        unimplemented!()
+    }
+}
+
+impl fmt::Debug for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Binary::fmt(self, f)
+    }
+}
+
+impl fmt::Binary for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let v = self.0;
+        write!(
+            f,
+            "0b{:b}_{:04b}_{:03b}",
+            v >> 7,
+            (v & Self::EXP_MASK) >> Self::SIG_BITS,
+            v & Self::SIG_MASK
+        )
+    }
+}
+
+impl fmt::LowerHex for f8 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+pub const fn hf8(s: &str) -> f8 {
+    let Ok(bits) = libm::support::hex_float::parse_hex_exact(s, 8, 3) else { panic!() };
+    f8(bits as u8)
+}
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
deleted file mode 100644
index 3e9eca37a..000000000
--- a/crates/libm-test/src/gen.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-//! Different generators that can create random or systematic bit patterns.
-
-use crate::GenerateInput;
-pub mod random;
-
-/// Helper type to turn any reusable input into a generator.
-#[derive(Clone, Debug, Default)]
-pub struct CachedInput {
-    pub inputs_f32: Vec<(f32, f32, f32)>,
-    pub inputs_f64: Vec<(f64, f64, f64)>,
-    pub inputs_i32: Vec<(i32, i32, i32)>,
-}
-
-impl GenerateInput<(f32,)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32,)> {
-        self.inputs_f32.iter().map(|f| (f.0,))
-    }
-}
-
-impl GenerateInput<(f32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, f32)> {
-        self.inputs_f32.iter().map(|f| (f.0, f.1))
-    }
-}
-
-impl GenerateInput<(i32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (i32, f32)> {
-        self.inputs_i32.iter().zip(self.inputs_f32.iter()).map(|(i, f)| (i.0, f.0))
-    }
-}
-
-impl GenerateInput<(f32, i32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, i32)> {
-        GenerateInput::<(i32, f32)>::get_cases(self).map(|(i, f)| (f, i))
-    }
-}
-
-impl GenerateInput<(f32, f32, f32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f32, f32, f32)> {
-        self.inputs_f32.iter().copied()
-    }
-}
-
-impl GenerateInput<(f64,)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64,)> {
-        self.inputs_f64.iter().map(|f| (f.0,))
-    }
-}
-
-impl GenerateInput<(f64, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, f64)> {
-        self.inputs_f64.iter().map(|f| (f.0, f.1))
-    }
-}
-
-impl GenerateInput<(i32, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (i32, f64)> {
-        self.inputs_i32.iter().zip(self.inputs_f64.iter()).map(|(i, f)| (i.0, f.0))
-    }
-}
-
-impl GenerateInput<(f64, i32)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, i32)> {
-        GenerateInput::<(i32, f64)>::get_cases(self).map(|(i, f)| (f, i))
-    }
-}
-
-impl GenerateInput<(f64, f64, f64)> for CachedInput {
-    fn get_cases(&self) -> impl Iterator<Item = (f64, f64, f64)> {
-        self.inputs_f64.iter().copied()
-    }
-}
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
deleted file mode 100644
index 601ef4f1d..000000000
--- a/crates/libm-test/src/gen/random.rs
+++ /dev/null
@@ -1,115 +0,0 @@
-//! A simple generator that produces deterministic random input, caching to use the same
-//! inputs for all functions.
-
-use std::sync::LazyLock;
-
-use rand::{Rng, SeedableRng};
-use rand_chacha::ChaCha8Rng;
-
-use super::CachedInput;
-use crate::GenerateInput;
-
-const SEED: [u8; 32] = *b"3.141592653589793238462643383279";
-
-/// Number of tests to run.
-const NTESTS: usize = {
-    if cfg!(optimizations_enabled) {
-        if crate::emulated()
-            || !cfg!(target_pointer_width = "64")
-            || cfg!(all(target_arch = "x86_64", target_vendor = "apple"))
-        {
-            // Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run
-            // in QEMU.
-            100_000
-        } else {
-            5_000_000
-        }
-    } else {
-        // Without optimizations just run a quick check
-        800
-    }
-};
-
-/// Tested inputs.
-static TEST_CASES: LazyLock<CachedInput> = LazyLock::new(|| make_test_cases(NTESTS));
-
-/// The first argument to `jn` and `jnf` is the number of iterations. Make this a reasonable
-/// value so tests don't run forever.
-static TEST_CASES_JN: LazyLock<CachedInput> = LazyLock::new(|| {
-    // Start with regular test cases
-    let mut cases = (&*TEST_CASES).clone();
-
-    // These functions are extremely slow, limit them
-    cases.inputs_i32.truncate((NTESTS / 1000).max(80));
-    cases.inputs_f32.truncate((NTESTS / 1000).max(80));
-    cases.inputs_f64.truncate((NTESTS / 1000).max(80));
-
-    // It is easy to overflow the stack with these in debug mode
-    let max_iterations = if cfg!(optimizations_enabled) && cfg!(target_pointer_width = "64") {
-        0xffff
-    } else if cfg!(windows) {
-        0x00ff
-    } else {
-        0x0fff
-    };
-
-    let mut rng = ChaCha8Rng::from_seed(SEED);
-
-    for case in cases.inputs_i32.iter_mut() {
-        case.0 = rng.gen_range(3..=max_iterations);
-    }
-
-    cases
-});
-
-fn make_test_cases(ntests: usize) -> CachedInput {
-    let mut rng = ChaCha8Rng::from_seed(SEED);
-
-    // make sure we include some basic cases
-    let mut inputs_i32 = vec![(0, 0, 0), (1, 1, 1), (-1, -1, -1)];
-    let mut inputs_f32 = vec![
-        (0.0, 0.0, 0.0),
-        (f32::EPSILON, f32::EPSILON, f32::EPSILON),
-        (f32::INFINITY, f32::INFINITY, f32::INFINITY),
-        (f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY),
-        (f32::MAX, f32::MAX, f32::MAX),
-        (f32::MIN, f32::MIN, f32::MIN),
-        (f32::MIN_POSITIVE, f32::MIN_POSITIVE, f32::MIN_POSITIVE),
-        (f32::NAN, f32::NAN, f32::NAN),
-    ];
-    let mut inputs_f64 = vec![
-        (0.0, 0.0, 0.0),
-        (f64::EPSILON, f64::EPSILON, f64::EPSILON),
-        (f64::INFINITY, f64::INFINITY, f64::INFINITY),
-        (f64::NEG_INFINITY, f64::NEG_INFINITY, f64::NEG_INFINITY),
-        (f64::MAX, f64::MAX, f64::MAX),
-        (f64::MIN, f64::MIN, f64::MIN),
-        (f64::MIN_POSITIVE, f64::MIN_POSITIVE, f64::MIN_POSITIVE),
-        (f64::NAN, f64::NAN, f64::NAN),
-    ];
-
-    inputs_i32.extend((0..(ntests - inputs_i32.len())).map(|_| rng.gen::<(i32, i32, i32)>()));
-
-    // Generate integers to get a full range of bitpatterns, then convert back to
-    // floats.
-    inputs_f32.extend((0..(ntests - inputs_f32.len())).map(|_| {
-        let ints = rng.gen::<(u32, u32, u32)>();
-        (f32::from_bits(ints.0), f32::from_bits(ints.1), f32::from_bits(ints.2))
-    }));
-    inputs_f64.extend((0..(ntests - inputs_f64.len())).map(|_| {
-        let ints = rng.gen::<(u64, u64, u64)>();
-        (f64::from_bits(ints.0), f64::from_bits(ints.1), f64::from_bits(ints.2))
-    }));
-
-    CachedInput { inputs_f32, inputs_f64, inputs_i32 }
-}
-
-/// Create a test case iterator.
-pub fn get_test_cases<RustArgs>(fname: &str) -> impl Iterator<Item = RustArgs>
-where
-    CachedInput: GenerateInput<RustArgs>,
-{
-    let inputs = if fname == "jn" || fname == "jnf" { &TEST_CASES_JN } else { &TEST_CASES };
-
-    CachedInput::get_cases(inputs)
-}
diff --git a/crates/libm-test/src/generate.rs b/crates/libm-test/src/generate.rs
new file mode 100644
index 000000000..89ca09a7a
--- /dev/null
+++ b/crates/libm-test/src/generate.rs
@@ -0,0 +1,43 @@
+//! Different generators that can create random or systematic bit patterns.
+
+pub mod case_list;
+pub mod edge_cases;
+pub mod random;
+pub mod spaced;
+
+/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure
+/// the provided size was correct.
+#[derive(Debug)]
+pub struct KnownSize<I> {
+    total: u64,
+    current: u64,
+    iter: I,
+}
+
+impl<I> KnownSize<I> {
+    pub fn new(iter: I, total: u64) -> Self {
+        Self { total, current: 0, iter }
+    }
+}
+
+impl<I: Iterator> Iterator for KnownSize<I> {
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next = self.iter.next();
+        if next.is_some() {
+            self.current += 1;
+            return next;
+        }
+
+        assert_eq!(self.current, self.total, "total items did not match expected");
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let remaining = usize::try_from(self.total - self.current).unwrap();
+        (remaining, Some(remaining))
+    }
+}
+
+impl<I: Iterator> ExactSizeIterator for KnownSize<I> {}
diff --git a/crates/libm-test/src/generate/case_list.rs b/crates/libm-test/src/generate/case_list.rs
new file mode 100644
index 000000000..e3628d51c
--- /dev/null
+++ b/crates/libm-test/src/generate/case_list.rs
@@ -0,0 +1,853 @@
+//! Test cases to verify specific values.
+//!
+//! Each routine can have a set of inputs and, optinoally, outputs. If an output is provided, it
+//! will be used to check against. If only inputs are provided, the case will be checked against
+//! a basis.
+//!
+//! This is useful for adding regression tests or expected failures.
+
+use libm::hf64;
+#[cfg(f128_enabled)]
+use libm::hf128;
+
+use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
+
+pub struct TestCase<Op: MathOp> {
+    pub input: Op::RustArgs,
+    pub output: Option<Op::RustRet>,
+}
+
+impl<Op: MathOp> TestCase<Op> {
+    #[expect(dead_code)]
+    fn append_inputs(v: &mut Vec<Self>, l: &[Op::RustArgs]) {
+        v.extend(l.iter().copied().map(|input| Self { input, output: None }));
+    }
+
+    fn append_pairs(v: &mut Vec<Self>, l: &[(Op::RustArgs, Option<Op::RustRet>)])
+    where
+        Op::RustRet: Copy,
+    {
+        v.extend(l.iter().copied().map(|(input, output)| Self { input, output }));
+    }
+}
+
+fn acos_cases() -> Vec<TestCase<op::acos::Routine>> {
+    vec![]
+}
+
+fn acosf_cases() -> Vec<TestCase<op::acosf::Routine>> {
+    vec![]
+}
+
+fn acosh_cases() -> Vec<TestCase<op::acosh::Routine>> {
+    vec![]
+}
+
+fn acoshf_cases() -> Vec<TestCase<op::acoshf::Routine>> {
+    vec![]
+}
+
+fn asin_cases() -> Vec<TestCase<op::asin::Routine>> {
+    vec![]
+}
+
+fn asinf_cases() -> Vec<TestCase<op::asinf::Routine>> {
+    vec![]
+}
+
+fn asinh_cases() -> Vec<TestCase<op::asinh::Routine>> {
+    vec![]
+}
+
+fn asinhf_cases() -> Vec<TestCase<op::asinhf::Routine>> {
+    vec![]
+}
+
+fn atan_cases() -> Vec<TestCase<op::atan::Routine>> {
+    vec![]
+}
+
+fn atan2_cases() -> Vec<TestCase<op::atan2::Routine>> {
+    vec![]
+}
+
+fn atan2f_cases() -> Vec<TestCase<op::atan2f::Routine>> {
+    vec![]
+}
+
+fn atanf_cases() -> Vec<TestCase<op::atanf::Routine>> {
+    vec![]
+}
+
+fn atanh_cases() -> Vec<TestCase<op::atanh::Routine>> {
+    vec![]
+}
+
+fn atanhf_cases() -> Vec<TestCase<op::atanhf::Routine>> {
+    vec![]
+}
+
+fn cbrt_cases() -> Vec<TestCase<op::cbrt::Routine>> {
+    vec![]
+}
+
+fn cbrtf_cases() -> Vec<TestCase<op::cbrtf::Routine>> {
+    vec![]
+}
+
+fn ceil_cases() -> Vec<TestCase<op::ceil::Routine>> {
+    vec![]
+}
+
+fn ceilf_cases() -> Vec<TestCase<op::ceilf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn ceilf128_cases() -> Vec<TestCase<op::ceilf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn ceilf16_cases() -> Vec<TestCase<op::ceilf16::Routine>> {
+    vec![]
+}
+
+fn copysign_cases() -> Vec<TestCase<op::copysign::Routine>> {
+    vec![]
+}
+
+fn copysignf_cases() -> Vec<TestCase<op::copysignf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn copysignf128_cases() -> Vec<TestCase<op::copysignf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn copysignf16_cases() -> Vec<TestCase<op::copysignf16::Routine>> {
+    vec![]
+}
+
+fn cos_cases() -> Vec<TestCase<op::cos::Routine>> {
+    vec![]
+}
+
+fn cosf_cases() -> Vec<TestCase<op::cosf::Routine>> {
+    vec![]
+}
+
+fn cosh_cases() -> Vec<TestCase<op::cosh::Routine>> {
+    vec![]
+}
+
+fn coshf_cases() -> Vec<TestCase<op::coshf::Routine>> {
+    vec![]
+}
+
+fn erf_cases() -> Vec<TestCase<op::erf::Routine>> {
+    vec![]
+}
+
+fn erfc_cases() -> Vec<TestCase<op::erfc::Routine>> {
+    vec![]
+}
+
+fn erfcf_cases() -> Vec<TestCase<op::erfcf::Routine>> {
+    vec![]
+}
+
+fn erff_cases() -> Vec<TestCase<op::erff::Routine>> {
+    vec![]
+}
+
+fn exp_cases() -> Vec<TestCase<op::exp::Routine>> {
+    vec![]
+}
+
+fn exp10_cases() -> Vec<TestCase<op::exp10::Routine>> {
+    vec![]
+}
+
+fn exp10f_cases() -> Vec<TestCase<op::exp10f::Routine>> {
+    vec![]
+}
+
+fn exp2_cases() -> Vec<TestCase<op::exp2::Routine>> {
+    vec![]
+}
+
+fn exp2f_cases() -> Vec<TestCase<op::exp2f::Routine>> {
+    vec![]
+}
+
+fn expf_cases() -> Vec<TestCase<op::expf::Routine>> {
+    vec![]
+}
+
+fn expm1_cases() -> Vec<TestCase<op::expm1::Routine>> {
+    vec![]
+}
+
+fn expm1f_cases() -> Vec<TestCase<op::expm1f::Routine>> {
+    vec![]
+}
+
+fn fabs_cases() -> Vec<TestCase<op::fabs::Routine>> {
+    vec![]
+}
+
+fn fabsf_cases() -> Vec<TestCase<op::fabsf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fabsf128_cases() -> Vec<TestCase<op::fabsf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fabsf16_cases() -> Vec<TestCase<op::fabsf16::Routine>> {
+    vec![]
+}
+
+fn fdim_cases() -> Vec<TestCase<op::fdim::Routine>> {
+    vec![]
+}
+
+fn fdimf_cases() -> Vec<TestCase<op::fdimf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fdimf128_cases() -> Vec<TestCase<op::fdimf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fdimf16_cases() -> Vec<TestCase<op::fdimf16::Routine>> {
+    vec![]
+}
+
+fn floor_cases() -> Vec<TestCase<op::floor::Routine>> {
+    vec![]
+}
+
+fn floorf_cases() -> Vec<TestCase<op::floorf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn floorf128_cases() -> Vec<TestCase<op::floorf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn floorf16_cases() -> Vec<TestCase<op::floorf16::Routine>> {
+    vec![]
+}
+
+fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previous failure with incorrect sign
+            ((5e-324, -5e-324, 0.0), Some(-0.0)),
+        ],
+    );
+    v
+}
+
+fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaf128_cases() -> Vec<TestCase<op::fmaf128::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            (
+                // Tricky rounding case that previously failed in extensive tests
+                (
+                    hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
+                    hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
+                    hf128!("-0x0.000000000000000000000000048ap-16382"),
+                ),
+                Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")),
+            ),
+            (
+                // Subnormal edge case that caused a failure
+                (
+                    hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"),
+                    hf128!("0x1.ffffffffffffffffffffffffffffp-1"),
+                    hf128!("0x0.8000000000000000000000000009p-16382"),
+                ),
+                Some(hf128!("0x1.0000000000000000000000000000p-16382")),
+            ),
+        ],
+    );
+    v
+}
+
+#[cfg(f16_enabled)]
+fn fmaxf16_cases() -> Vec<TestCase<op::fmaxf16::Routine>> {
+    vec![]
+}
+
+fn fmaxf_cases() -> Vec<TestCase<op::fmaxf::Routine>> {
+    vec![]
+}
+
+fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaxf128_cases() -> Vec<TestCase<op::fmaxf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmaximumf16_cases() -> Vec<TestCase<op::fmaximumf16::Routine>> {
+    vec![]
+}
+
+fn fmaximumf_cases() -> Vec<TestCase<op::fmaximumf::Routine>> {
+    vec![]
+}
+
+fn fmaximum_cases() -> Vec<TestCase<op::fmaximum::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaximumf128_cases() -> Vec<TestCase<op::fmaximumf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmaximum_numf16_cases() -> Vec<TestCase<op::fmaximum_numf16::Routine>> {
+    vec![]
+}
+
+fn fmaximum_numf_cases() -> Vec<TestCase<op::fmaximum_numf::Routine>> {
+    vec![]
+}
+
+fn fmaximum_num_cases() -> Vec<TestCase<op::fmaximum_num::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fmaximum_numf128_cases() -> Vec<TestCase<op::fmaximum_numf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminf16_cases() -> Vec<TestCase<op::fminf16::Routine>> {
+    vec![]
+}
+
+fn fminf_cases() -> Vec<TestCase<op::fminf::Routine>> {
+    vec![]
+}
+
+fn fmin_cases() -> Vec<TestCase<op::fmin::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminf128_cases() -> Vec<TestCase<op::fminf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminimumf16_cases() -> Vec<TestCase<op::fminimumf16::Routine>> {
+    vec![]
+}
+
+fn fminimumf_cases() -> Vec<TestCase<op::fminimumf::Routine>> {
+    vec![]
+}
+
+fn fminimum_cases() -> Vec<TestCase<op::fminimum::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminimumf128_cases() -> Vec<TestCase<op::fminimumf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fminimum_numf16_cases() -> Vec<TestCase<op::fminimum_numf16::Routine>> {
+    vec![]
+}
+
+fn fminimum_numf_cases() -> Vec<TestCase<op::fminimum_numf::Routine>> {
+    vec![]
+}
+
+fn fminimum_num_cases() -> Vec<TestCase<op::fminimum_num::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn fminimum_numf128_cases() -> Vec<TestCase<op::fminimum_numf128::Routine>> {
+    vec![]
+}
+
+fn fmod_cases() -> Vec<TestCase<op::fmod::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previous failure with incorrect loop iteration
+            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+            ((2.1, 3.123e-320), Some(2.0696e-320)),
+            ((2.1, 2.253547e-318), Some(1.772535e-318)),
+        ],
+    );
+    v
+}
+
+fn fmodf_cases() -> Vec<TestCase<op::fmodf::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Previous failure with incorrect loop iteration
+            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+            ((2.1, 8.858e-42), Some(8.085e-42)),
+            ((2.1, 6.39164e-40), Some(6.1636e-40)),
+            ((5.5, 6.39164e-40), Some(4.77036e-40)),
+            ((-151.189, 6.39164e-40), Some(-5.64734e-40)),
+        ],
+    );
+    v
+}
+
+#[cfg(f128_enabled)]
+fn fmodf128_cases() -> Vec<TestCase<op::fmodf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn fmodf16_cases() -> Vec<TestCase<op::fmodf16::Routine>> {
+    vec![]
+}
+
+fn frexp_cases() -> Vec<TestCase<op::frexp::Routine>> {
+    vec![]
+}
+
+fn frexpf_cases() -> Vec<TestCase<op::frexpf::Routine>> {
+    vec![]
+}
+
+fn hypot_cases() -> Vec<TestCase<op::hypot::Routine>> {
+    vec![]
+}
+
+fn hypotf_cases() -> Vec<TestCase<op::hypotf::Routine>> {
+    vec![]
+}
+
+fn ilogb_cases() -> Vec<TestCase<op::ilogb::Routine>> {
+    vec![]
+}
+
+fn ilogbf_cases() -> Vec<TestCase<op::ilogbf::Routine>> {
+    vec![]
+}
+
+fn j0_cases() -> Vec<TestCase<op::j0::Routine>> {
+    vec![]
+}
+
+fn j0f_cases() -> Vec<TestCase<op::j0f::Routine>> {
+    vec![]
+}
+
+fn j1_cases() -> Vec<TestCase<op::j1::Routine>> {
+    vec![]
+}
+
+fn j1f_cases() -> Vec<TestCase<op::j1f::Routine>> {
+    vec![]
+}
+
+fn jn_cases() -> Vec<TestCase<op::jn::Routine>> {
+    vec![]
+}
+
+fn jnf_cases() -> Vec<TestCase<op::jnf::Routine>> {
+    vec![]
+}
+
+fn ldexp_cases() -> Vec<TestCase<op::ldexp::Routine>> {
+    vec![]
+}
+
+fn ldexpf_cases() -> Vec<TestCase<op::ldexpf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn ldexpf128_cases() -> Vec<TestCase<op::ldexpf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn ldexpf16_cases() -> Vec<TestCase<op::ldexpf16::Routine>> {
+    vec![]
+}
+
+fn lgamma_cases() -> Vec<TestCase<op::lgamma::Routine>> {
+    vec![]
+}
+
+fn lgamma_r_cases() -> Vec<TestCase<op::lgamma_r::Routine>> {
+    vec![]
+}
+
+fn lgammaf_cases() -> Vec<TestCase<op::lgammaf::Routine>> {
+    vec![]
+}
+
+fn lgammaf_r_cases() -> Vec<TestCase<op::lgammaf_r::Routine>> {
+    vec![]
+}
+
+fn log_cases() -> Vec<TestCase<op::log::Routine>> {
+    vec![]
+}
+
+fn log10_cases() -> Vec<TestCase<op::log10::Routine>> {
+    vec![]
+}
+
+fn log10f_cases() -> Vec<TestCase<op::log10f::Routine>> {
+    vec![]
+}
+
+fn log1p_cases() -> Vec<TestCase<op::log1p::Routine>> {
+    vec![]
+}
+
+fn log1pf_cases() -> Vec<TestCase<op::log1pf::Routine>> {
+    vec![]
+}
+
+fn log2_cases() -> Vec<TestCase<op::log2::Routine>> {
+    vec![]
+}
+
+fn log2f_cases() -> Vec<TestCase<op::log2f::Routine>> {
+    vec![]
+}
+
+fn logf_cases() -> Vec<TestCase<op::logf::Routine>> {
+    vec![]
+}
+
+fn modf_cases() -> Vec<TestCase<op::modf::Routine>> {
+    vec![]
+}
+
+fn modff_cases() -> Vec<TestCase<op::modff::Routine>> {
+    vec![]
+}
+
+fn nextafter_cases() -> Vec<TestCase<op::nextafter::Routine>> {
+    vec![]
+}
+
+fn nextafterf_cases() -> Vec<TestCase<op::nextafterf::Routine>> {
+    vec![]
+}
+
+fn pow_cases() -> Vec<TestCase<op::pow::Routine>> {
+    vec![]
+}
+
+fn powf_cases() -> Vec<TestCase<op::powf::Routine>> {
+    vec![]
+}
+
+fn remainder_cases() -> Vec<TestCase<op::remainder::Routine>> {
+    vec![]
+}
+
+fn remainderf_cases() -> Vec<TestCase<op::remainderf::Routine>> {
+    vec![]
+}
+
+fn remquo_cases() -> Vec<TestCase<op::remquo::Routine>> {
+    vec![]
+}
+
+fn remquof_cases() -> Vec<TestCase<op::remquof::Routine>> {
+    vec![]
+}
+
+fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Known failure on i586
+            #[cfg(not(x86_no_sse))]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+            #[cfg(x86_no_sse)]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))),
+        ],
+    );
+    v
+}
+
+fn rintf_cases() -> Vec<TestCase<op::rintf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn rintf128_cases() -> Vec<TestCase<op::rintf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn rintf16_cases() -> Vec<TestCase<op::rintf16::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn roundf16_cases() -> Vec<TestCase<op::roundf16::Routine>> {
+    vec![]
+}
+
+fn round_cases() -> Vec<TestCase<op::round::Routine>> {
+    vec![]
+}
+
+fn roundf_cases() -> Vec<TestCase<op::roundf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn roundf128_cases() -> Vec<TestCase<op::roundf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn roundevenf16_cases() -> Vec<TestCase<op::roundevenf16::Routine>> {
+    vec![]
+}
+
+fn roundeven_cases() -> Vec<TestCase<op::roundeven::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Known failure on i586
+            #[cfg(not(x86_no_sse))]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff994000p+38"))),
+            #[cfg(x86_no_sse)]
+            ((hf64!("-0x1.e3f13ff995ffcp+38"),), Some(hf64!("-0x1.e3f13ff998000p+38"))),
+        ],
+    );
+    v
+}
+
+fn roundevenf_cases() -> Vec<TestCase<op::roundevenf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn roundevenf128_cases() -> Vec<TestCase<op::roundevenf128::Routine>> {
+    vec![]
+}
+
+fn scalbn_cases() -> Vec<TestCase<op::scalbn::Routine>> {
+    vec![]
+}
+
+fn scalbnf_cases() -> Vec<TestCase<op::scalbnf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn scalbnf128_cases() -> Vec<TestCase<op::scalbnf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn scalbnf16_cases() -> Vec<TestCase<op::scalbnf16::Routine>> {
+    vec![]
+}
+
+fn sin_cases() -> Vec<TestCase<op::sin::Routine>> {
+    vec![]
+}
+
+fn sincos_cases() -> Vec<TestCase<op::sincos::Routine>> {
+    vec![]
+}
+
+fn sincosf_cases() -> Vec<TestCase<op::sincosf::Routine>> {
+    vec![]
+}
+
+fn sinf_cases() -> Vec<TestCase<op::sinf::Routine>> {
+    vec![]
+}
+
+fn sinh_cases() -> Vec<TestCase<op::sinh::Routine>> {
+    vec![]
+}
+
+fn sinhf_cases() -> Vec<TestCase<op::sinhf::Routine>> {
+    vec![]
+}
+
+fn sqrt_cases() -> Vec<TestCase<op::sqrt::Routine>> {
+    vec![]
+}
+
+fn sqrtf_cases() -> Vec<TestCase<op::sqrtf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn sqrtf128_cases() -> Vec<TestCase<op::sqrtf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn sqrtf16_cases() -> Vec<TestCase<op::sqrtf16::Routine>> {
+    vec![]
+}
+
+fn tan_cases() -> Vec<TestCase<op::tan::Routine>> {
+    vec![]
+}
+
+fn tanf_cases() -> Vec<TestCase<op::tanf::Routine>> {
+    vec![]
+}
+
+fn tanh_cases() -> Vec<TestCase<op::tanh::Routine>> {
+    vec![]
+}
+
+fn tanhf_cases() -> Vec<TestCase<op::tanhf::Routine>> {
+    vec![]
+}
+
+fn tgamma_cases() -> Vec<TestCase<op::tgamma::Routine>> {
+    vec![]
+}
+
+fn tgammaf_cases() -> Vec<TestCase<op::tgammaf::Routine>> {
+    vec![]
+}
+
+fn trunc_cases() -> Vec<TestCase<op::trunc::Routine>> {
+    vec![]
+}
+
+fn truncf_cases() -> Vec<TestCase<op::truncf::Routine>> {
+    vec![]
+}
+
+#[cfg(f128_enabled)]
+fn truncf128_cases() -> Vec<TestCase<op::truncf128::Routine>> {
+    vec![]
+}
+
+#[cfg(f16_enabled)]
+fn truncf16_cases() -> Vec<TestCase<op::truncf16::Routine>> {
+    vec![]
+}
+
+fn y0_cases() -> Vec<TestCase<op::y0::Routine>> {
+    vec![]
+}
+
+fn y0f_cases() -> Vec<TestCase<op::y0f::Routine>> {
+    vec![]
+}
+
+fn y1_cases() -> Vec<TestCase<op::y1::Routine>> {
+    vec![]
+}
+
+fn y1f_cases() -> Vec<TestCase<op::y1f::Routine>> {
+    vec![]
+}
+
+fn yn_cases() -> Vec<TestCase<op::yn::Routine>> {
+    vec![]
+}
+
+fn ynf_cases() -> Vec<TestCase<op::ynf::Routine>> {
+    vec![]
+}
+
+pub trait CaseListInput: MathOp + Sized {
+    fn get_cases() -> Vec<TestCase<Self>>;
+}
+
+macro_rules! impl_case_list {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            impl CaseListInput for crate::op::$fn_name::Routine {
+                fn get_cases() -> Vec<TestCase<Self>> {
+                    [< $fn_name _cases >]()
+                }
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: impl_case_list,
+}
+
+/// This is the test generator for standalone tests, i.e. those with no basis. For this, it
+/// only extracts tests with a known output.
+pub fn get_test_cases_standalone<Op>(
+    ctx: &CheckCtx,
+) -> impl Iterator<Item = (Op::RustArgs, Op::RustRet)> + use<'_, Op>
+where
+    Op: MathOp + CaseListInput,
+{
+    assert_eq!(ctx.basis, CheckBasis::None);
+    assert_eq!(ctx.gen_kind, GeneratorKind::List);
+    Op::get_cases().into_iter().filter_map(|x| x.output.map(|o| (x.input, o)))
+}
+
+/// Opposite of the above; extract only test cases that don't have a known output, to be run
+/// against a basis.
+pub fn get_test_cases_basis<Op>(
+    ctx: &CheckCtx,
+) -> (impl Iterator<Item = Op::RustArgs> + use<'_, Op>, u64)
+where
+    Op: MathOp + CaseListInput,
+{
+    assert_ne!(ctx.basis, CheckBasis::None);
+    assert_eq!(ctx.gen_kind, GeneratorKind::List);
+
+    let cases = Op::get_cases();
+    let count: u64 = cases.iter().filter(|case| case.output.is_none()).count().try_into().unwrap();
+
+    (cases.into_iter().filter(|x| x.output.is_none()).map(|x| x.input), count)
+}
diff --git a/crates/libm-test/src/generate/edge_cases.rs b/crates/libm-test/src/generate/edge_cases.rs
new file mode 100644
index 000000000..56cc9fa9a
--- /dev/null
+++ b/crates/libm-test/src/generate/edge_cases.rs
@@ -0,0 +1,310 @@
+//! A generator that checks a handful of cases near infinities, zeros, asymptotes, and NaNs.
+
+use libm::support::{CastInto, Float, Int, MinInt};
+
+use crate::domain::get_domain;
+use crate::generate::KnownSize;
+use crate::op::OpITy;
+use crate::run_cfg::{check_near_count, check_point_count};
+use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log};
+
+/// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis.
+pub trait EdgeCaseInput<Op> {
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
+}
+
+/// Create a list of values around interesting points (infinities, zeroes, NaNs).
+fn float_edge_cases<Op>(
+    ctx: &CheckCtx,
+    argnum: usize,
+) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
+where
+    Op: MathOp,
+{
+    let mut ret = Vec::new();
+    let one = OpITy::<Op>::ONE;
+    let values = &mut ret;
+    let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float();
+    let domain_start = domain.range_start();
+    let domain_end = domain.range_end();
+
+    let check_points = check_point_count(ctx);
+    let near_points = check_near_count(ctx);
+
+    // Check near some notable constants
+    count_up(Op::FTy::ONE, near_points, values);
+    count_up(Op::FTy::ZERO, near_points, values);
+    count_up(Op::FTy::NEG_ONE, near_points, values);
+    count_down(Op::FTy::ONE, near_points, values);
+    count_down(Op::FTy::ZERO, near_points, values);
+    count_down(Op::FTy::NEG_ONE, near_points, values);
+    values.push(Op::FTy::NEG_ZERO);
+
+    // Check values near the extremes
+    count_up(Op::FTy::NEG_INFINITY, near_points, values);
+    count_down(Op::FTy::INFINITY, near_points, values);
+    count_down(domain_end, near_points, values);
+    count_up(domain_start, near_points, values);
+    count_down(domain_start, near_points, values);
+    count_up(domain_end, near_points, values);
+    count_down(domain_end, near_points, values);
+
+    // Check some special values that aren't included in the above ranges
+    values.push(Op::FTy::NAN);
+    values.extend(Op::FTy::consts().iter());
+
+    // Check around the maximum subnormal value
+    let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK);
+    count_up(sub_max, near_points, values);
+    count_down(sub_max, near_points, values);
+    count_up(-sub_max, near_points, values);
+    count_down(-sub_max, near_points, values);
+
+    // Check a few values around the subnormal range
+    for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) {
+        let v = Op::FTy::from_bits(one << shift);
+        count_up(v, 2, values);
+        count_down(v, 2, values);
+        count_up(-v, 2, values);
+        count_down(-v, 2, values);
+    }
+
+    // Check around asymptotes
+    if let Some(f) = domain.check_points {
+        let iter = f();
+        for x in iter.take(check_points) {
+            count_up(x, near_points, values);
+            count_down(x, near_points, values);
+        }
+    }
+
+    // Some results may overlap so deduplicate the vector to save test cycles.
+    values.sort_by_key(|x| x.to_bits());
+    values.dedup_by_key(|x| x.to_bits());
+
+    let count = ret.len().try_into().unwrap();
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases",
+        gen_kind = ctx.gen_kind,
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = ctx.input_count(),
+    ));
+
+    (ret.into_iter(), count)
+}
+
+/// Add `points` values starting at and including `x` and counting up. Uses the smallest possible
+/// increments (1 ULP).
+fn count_up<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
+    assert!(!x.is_nan());
+
+    let mut count = 0;
+    while x < F::INFINITY && count < points {
+        values.push(x);
+        x = x.next_up();
+        count += 1;
+    }
+}
+
+/// Add `points` values starting at and including `x` and counting down. Uses the smallest possible
+/// increments (1 ULP).
+fn count_down<F: Float>(mut x: F, points: u64, values: &mut Vec<F>) {
+    assert!(!x.is_nan());
+
+    let mut count = 0;
+    while x > F::NEG_INFINITY && count < points {
+        values.push(x);
+        x = x.next_down();
+        count += 1;
+    }
+}
+
+/// Create a list of values around interesting integer points (min, zero, max).
+pub fn int_edge_cases<I: Int>(
+    ctx: &CheckCtx,
+    argnum: usize,
+) -> (impl Iterator<Item = I> + Clone, u64)
+where
+    i32: CastInto<I>,
+{
+    let mut values = Vec::new();
+    let near_points = check_near_count(ctx);
+
+    // Check around max/min and zero
+    int_count_around(I::MIN, near_points, &mut values);
+    int_count_around(I::MAX, near_points, &mut values);
+    int_count_around(I::ZERO, near_points, &mut values);
+    int_count_around(I::ZERO, near_points, &mut values);
+
+    if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) {
+        assert_eq!(argnum, 1, "scalbn integer argument should be arg1");
+        let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty {
+            FloatTy::F16 => {
+                #[cfg(not(f16_enabled))]
+                unreachable!();
+                #[cfg(f16_enabled)]
+                (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM)
+            }
+            FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM),
+            FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM),
+            FloatTy::F128 => {
+                #[cfg(not(f128_enabled))]
+                unreachable!();
+                #[cfg(f128_enabled)]
+                (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM)
+            }
+        };
+
+        // `scalbn`/`ldexp` have their trickiest behavior around exponent limits
+        int_count_around(emax.cast(), near_points, &mut values);
+        int_count_around(emin.cast(), near_points, &mut values);
+        int_count_around(emin_sn.cast(), near_points, &mut values);
+        int_count_around((-emin_sn).cast(), near_points, &mut values);
+
+        // Also check values that cause the maximum possible difference in exponents
+        int_count_around((emax - emin).cast(), near_points, &mut values);
+        int_count_around((emin - emax).cast(), near_points, &mut values);
+        int_count_around((emax - emin_sn).cast(), near_points, &mut values);
+        int_count_around((emin_sn - emax).cast(), near_points, &mut values);
+    }
+
+    values.sort();
+    values.dedup();
+    let count = values.len().try_into().unwrap();
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {count} edge cases",
+        gen_kind = ctx.gen_kind,
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = ctx.input_count(),
+    ));
+
+    (values.into_iter(), count)
+}
+
+/// Add `points` values both up and down, starting at and including `x`.
+fn int_count_around<I: Int>(x: I, points: u64, values: &mut Vec<I>) {
+    let mut current = x;
+    for _ in 0..points {
+        values.push(current);
+        current = match current.checked_add(I::ONE) {
+            Some(v) => v,
+            None => break,
+        };
+    }
+
+    current = x;
+    for _ in 0..points {
+        values.push(current);
+        current = match current.checked_sub(I::ONE) {
+            Some(v) => v,
+            None => break,
+        };
+    }
+}
+
+macro_rules! impl_edge_case_input {
+    ($fty:ty) => {
+        impl<Op> EdgeCaseInput<Op> for ($fty,)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let iter0 = iter0.map(|v| (v,));
+                (iter0, steps0)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+                (iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, $fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+                let (iter2, steps2) = float_edge_cases::<Op>(ctx, 2);
+
+                let iter = iter0
+                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                    .flat_map(move |(first, second)| {
+                        iter2.clone().map(move |third| (first, second, third))
+                    });
+                let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                (iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for (i32, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = int_edge_cases(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                (iter, count)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($fty, i32)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter1, steps1) = int_edge_cases(ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                (iter, count)
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+impl_edge_case_input!(f16);
+impl_edge_case_input!(f32);
+impl_edge_case_input!(f64);
+#[cfg(f128_enabled)]
+impl_edge_case_input!(f128);
+
+pub fn get_test_cases<Op>(
+    ctx: &CheckCtx,
+) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
+where
+    Op: MathOp,
+    Op::RustArgs: EdgeCaseInput<Op>,
+{
+    let (iter, count) = Op::RustArgs::get_cases(ctx);
+
+    // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong.
+    (KnownSize::new(iter, count), count)
+}
diff --git a/crates/libm-test/src/generate/random.rs b/crates/libm-test/src/generate/random.rs
new file mode 100644
index 000000000..e8a7ee905
--- /dev/null
+++ b/crates/libm-test/src/generate/random.rs
@@ -0,0 +1,125 @@
+use std::env;
+use std::ops::RangeInclusive;
+use std::sync::LazyLock;
+
+use libm::support::Float;
+use rand::distr::{Alphanumeric, StandardUniform};
+use rand::prelude::Distribution;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+
+use super::KnownSize;
+use crate::CheckCtx;
+use crate::run_cfg::{int_range, iteration_count};
+
+pub(crate) const SEED_ENV: &str = "LIBM_SEED";
+
+pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
+    let s = env::var(SEED_ENV).unwrap_or_else(|_| {
+        let mut rng = rand::rng();
+        (0..32).map(|_| rng.sample(Alphanumeric) as char).collect()
+    });
+
+    s.as_bytes().try_into().unwrap_or_else(|_| {
+        panic!("Seed must be 32 characters, got `{s}`");
+    })
+});
+
+/// Generate a sequence of random values of this type.
+pub trait RandomInput: Sized {
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
+}
+
+/// Generate a sequence of deterministically random floats.
+fn random_floats<F: Float>(count: u64) -> impl Iterator<Item = F>
+where
+    StandardUniform: Distribution<F::Int>,
+{
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+
+    // Generate integers to get a full range of bitpatterns (including NaNs), then convert back
+    // to the float type.
+    (0..count).map(move |_| F::from_bits(rng.random::<F::Int>()))
+}
+
+/// Generate a sequence of deterministically random `i32`s within a specified range.
+fn random_ints(count: u64, range: RangeInclusive<i32>) -> impl Iterator<Item = i32> {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    (0..count).map(move |_| rng.random_range::<i32, _>(range.clone()))
+}
+
+macro_rules! impl_random_input {
+    ($fty:ty) => {
+        impl RandomInput for ($fty,) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count = iteration_count(ctx, 0);
+                let iter = random_floats(count).map(|f: $fty| (f,));
+                (iter, count)
+            }
+        }
+
+        impl RandomInput for ($fty, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let iter = random_floats(count0)
+                    .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2)));
+                (iter, count0 * count1)
+            }
+        }
+
+        impl RandomInput for ($fty, $fty, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let count2 = iteration_count(ctx, 2);
+                let iter = random_floats(count0).flat_map(move |f1: $fty| {
+                    random_floats(count1).flat_map(move |f2: $fty| {
+                        random_floats(count2).map(move |f3: $fty| (f1, f2, f3))
+                    })
+                });
+                (iter, count0 * count1 * count2)
+            }
+        }
+
+        impl RandomInput for (i32, $fty) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range0 = int_range(ctx, 0);
+                let iter = random_ints(count0, range0)
+                    .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
+                (iter, count0 * count1)
+            }
+        }
+
+        impl RandomInput for ($fty, i32) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range1 = int_range(ctx, 1);
+                let iter = random_floats(count0).flat_map(move |f1: $fty| {
+                    random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
+                });
+                (iter, count0 * count1)
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+impl_random_input!(f16);
+impl_random_input!(f32);
+impl_random_input!(f64);
+#[cfg(f128_enabled)]
+impl_random_input!(f128);
+
+/// Create a test case iterator.
+pub fn get_test_cases<RustArgs: RandomInput>(
+    ctx: &CheckCtx,
+) -> (impl Iterator<Item = RustArgs> + Send + use<'_, RustArgs>, u64) {
+    let (iter, count) = RustArgs::get_cases(ctx);
+
+    // Wrap in `KnownSize` so we get an assertion if the cuunt is wrong.
+    (KnownSize::new(iter, count), count)
+}
diff --git a/crates/libm-test/src/generate/spaced.rs b/crates/libm-test/src/generate/spaced.rs
new file mode 100644
index 000000000..bea3f4c7e
--- /dev/null
+++ b/crates/libm-test/src/generate/spaced.rs
@@ -0,0 +1,253 @@
+use std::fmt;
+use std::ops::RangeInclusive;
+
+use libm::support::{Float, MinInt};
+
+use crate::domain::get_domain;
+use crate::op::OpITy;
+use crate::run_cfg::{int_range, iteration_count};
+use crate::{CheckCtx, MathOp, linear_ints, logspace};
+
+/// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float
+/// types and single argument functions) or provide evenly spaced inputs across the domain with
+/// approximately `u32::MAX` total iterations.
+pub trait SpacedInput<Op> {
+    fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self> + Send, u64);
+}
+
+/// Construct an iterator from `logspace` and also calculate the total number of steps expected
+/// for that iterator.
+fn logspace_steps<Op>(
+    ctx: &CheckCtx,
+    argnum: usize,
+    max_steps: u64,
+) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
+where
+    Op: MathOp,
+    OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
+    u64: TryFrom<OpITy<Op>, Error: fmt::Debug>,
+    RangeInclusive<OpITy<Op>>: Iterator,
+{
+    // i8 is a dummy type here, it can be any integer.
+    let domain = get_domain::<Op::FTy, i8>(ctx.fn_ident, argnum).unwrap_float();
+    let start = domain.range_start();
+    let end = domain.range_end();
+
+    let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
+    let (iter, steps) = logspace(start, end, max_steps);
+
+    // `steps` will be <= the original `max_steps`, which is a `u64`.
+    (iter, steps.try_into().unwrap())
+}
+
+/// Represents the iterator in either `Left` or `Right`.
+enum EitherIter<A, B> {
+    A(A),
+    B(B),
+}
+
+impl<T, A: Iterator<Item = T>, B: Iterator<Item = T>> Iterator for EitherIter<A, B> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            Self::A(iter) => iter.next(),
+            Self::B(iter) => iter.next(),
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        match self {
+            Self::A(iter) => iter.size_hint(),
+            Self::B(iter) => iter.size_hint(),
+        }
+    }
+}
+
+/// Gets the total number of possible values, returning `None` if that number doesn't fit in a
+/// `u64`.
+fn value_count<F: Float>() -> Option<u64>
+where
+    u64: TryFrom<F::Int>,
+{
+    u64::try_from(F::Int::MAX).ok().and_then(|max| max.checked_add(1))
+}
+
+/// Returns an iterator of every possible value of type `F`.
+fn all_values<F: Float>() -> impl Iterator<Item = F>
+where
+    RangeInclusive<F::Int>: Iterator<Item = F::Int>,
+{
+    (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits))
+}
+
+macro_rules! impl_spaced_input {
+    ($fty:ty) => {
+        impl<Op> SpacedInput<Op> for ($fty,)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, 0);
+                // `f16` and `f32` can have exhaustive tests.
+                match value_count::<Op::FTy>() {
+                    Some(steps0) if steps0 <= max_steps0 => {
+                        let iter0 = all_values();
+                        let iter0 = iter0.map(|v| (v,));
+                        (EitherIter::A(iter0), steps0)
+                    }
+                    _ => {
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let iter0 = iter0.map(|v| (v,));
+                        (EitherIter::B(iter0), steps0)
+                    }
+                }
+            }
+        }
+
+        impl<Op> SpacedInput<Op> for ($fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
+                // `f16` can have exhaustive tests.
+                match value_count::<Op::FTy>() {
+                    Some(count) if count <= max_steps0 && count <= max_steps1 => {
+                        let iter = all_values()
+                            .flat_map(|first| all_values().map(move |second| (first, second)));
+                        (EitherIter::A(iter), count.checked_mul(count).unwrap())
+                    }
+                    _ => {
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
+                        (EitherIter::B(iter), count)
+                    }
+                }
+            }
+        }
+
+        impl<Op> SpacedInput<Op> for ($fty, $fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
+                let max_steps2 = iteration_count(ctx, 2);
+                // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed.
+                match value_count::<Op::FTy>() {
+                    Some(count)
+                        if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 =>
+                    {
+                        let iter = all_values().flat_map(|first| {
+                            all_values().flat_map(move |second| {
+                                all_values().map(move |third| (first, second, third))
+                            })
+                        });
+                        (EitherIter::A(iter), count.checked_pow(3).unwrap())
+                    }
+                    _ => {
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
+                        let (iter2, steps2) = logspace_steps::<Op>(ctx, 2, max_steps2);
+
+                        let iter = iter0
+                            .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                            .flat_map(move |(first, second)| {
+                                iter2.clone().map(move |third| (first, second, third))
+                            });
+                        let count =
+                            steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                        (EitherIter::B(iter), count)
+                    }
+                }
+            }
+        }
+
+        impl<Op> SpacedInput<Op> for (i32, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let range0 = int_range(ctx, 0);
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 1);
+                match value_count::<Op::FTy>() {
+                    Some(count1) if count1 <= max_steps1 => {
+                        let (iter0, steps0) = linear_ints(range0, max_steps0);
+                        let iter = iter0
+                            .flat_map(move |first| all_values().map(move |second| (first, second)));
+                        (EitherIter::A(iter), steps0.checked_mul(count1).unwrap())
+                    }
+                    _ => {
+                        let (iter0, steps0) = linear_ints(range0, max_steps0);
+                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
+
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
+
+                        (EitherIter::B(iter), count)
+                    }
+                }
+            }
+        }
+
+        impl<Op> SpacedInput<Op> for ($fty, i32)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let max_steps0 = iteration_count(ctx, 0);
+                let range1 = int_range(ctx, 1);
+                let max_steps1 = iteration_count(ctx, 1);
+                match value_count::<Op::FTy>() {
+                    Some(count0) if count0 <= max_steps0 => {
+                        let (iter1, steps1) = linear_ints(range1, max_steps1);
+                        let iter = all_values().flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        (EitherIter::A(iter), count0.checked_mul(steps1).unwrap())
+                    }
+                    _ => {
+                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
+                        let (iter1, steps1) = linear_ints(range1, max_steps1);
+
+                        let iter = iter0.flat_map(move |first| {
+                            iter1.clone().map(move |second| (first, second))
+                        });
+                        let count = steps0.checked_mul(steps1).unwrap();
+
+                        (EitherIter::B(iter), count)
+                    }
+                }
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+impl_spaced_input!(f16);
+impl_spaced_input!(f32);
+impl_spaced_input!(f64);
+#[cfg(f128_enabled)]
+impl_spaced_input!(f128);
+
+/// Create a test case iterator for extensive inputs. Also returns the total test case count.
+pub fn get_test_cases<Op>(
+    ctx: &CheckCtx,
+) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
+where
+    Op: MathOp,
+    Op::RustArgs: SpacedInput<Op>,
+{
+    Op::RustArgs::get_cases(ctx)
+}
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 2abe7f605..485c01a47 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -1,68 +1,44 @@
-pub mod gen;
-mod num_traits;
-mod special_case;
+#![cfg_attr(f16_enabled, feature(f16))]
+#![cfg_attr(f128_enabled, feature(f128))]
+#![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig
+
+pub mod domain;
+mod f8_impl;
+pub mod generate;
+#[cfg(feature = "build-mpfr")]
+pub mod mpfloat;
+mod num;
+pub mod op;
+mod precision;
+mod run_cfg;
 mod test_traits;
 
-pub use num_traits::{Float, Hex, Int};
-pub use special_case::{MaybeOverride, SpecialCase};
-pub use test_traits::{CheckBasis, CheckCtx, CheckOutput, GenerateInput, TupleCall};
+use std::env;
+use std::fs::File;
+use std::io::Write;
+use std::path::PathBuf;
+use std::sync::LazyLock;
+use std::time::SystemTime;
+
+pub use f8_impl::{f8, hf8};
+pub use libm::support::{Float, Int, IntTy, MinInt};
+pub use num::{FloatExt, linear_ints, logspace};
+pub use op::{
+    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
+    Ty,
+};
+pub use precision::{MaybeOverride, SpecialCase, default_ulp};
+use run_cfg::extensive_max_iterations;
+pub use run_cfg::{
+    CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count,
+    skip_extensive_test,
+};
+pub use test_traits::{CheckOutput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
 /// propagate.
 pub type TestResult<T = (), E = anyhow::Error> = Result<T, E>;
 
-// List of all files present in libm's source
-include!(concat!(env!("OUT_DIR"), "/all_files.rs"));
-
-/// ULP allowed to differ from musl (note that musl itself may not be accurate).
-const MUSL_DEFAULT_ULP: u32 = 2;
-
-/// Certain functions have different allowed ULP (consider these xfail).
-///
-/// Note that these results were obtained using 400,000,000 rounds of random inputs, which
-/// is not a value used by default.
-pub fn musl_allowed_ulp(name: &str) -> u32 {
-    match name {
-        #[cfg(x86_no_sse)]
-        "asinh" | "asinhf" => 6,
-        "lgamma" | "lgamma_r" | "lgammaf" | "lgammaf_r" => 400,
-        "tanh" | "tanhf" => 4,
-        "tgamma" => 20,
-        "j0" | "j0f" | "j1" | "j1f" => {
-            // Results seem very target-dependent
-            if cfg!(target_arch = "x86_64") { 4000 } else { 800_000 }
-        }
-        "jn" | "jnf" => 1000,
-        "sincosf" => 500,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10" => 4,
-        #[cfg(not(target_pointer_width = "64"))]
-        "exp10f" => 4,
-        _ => MUSL_DEFAULT_ULP,
-    }
-}
-
-/// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
-/// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
-pub fn canonical_name(name: &str) -> &str {
-    let known_mappings = &[
-        ("erff", "erf"),
-        ("erf", "erf"),
-        ("lgammaf_r", "lgamma_r"),
-        ("modff", "modf"),
-        ("modf", "modf"),
-    ];
-
-    match known_mappings.iter().find(|known| known.0 == name) {
-        Some(found) => found.1,
-        None => name
-            .strip_suffix("f")
-            .or_else(|| name.strip_suffix("f16"))
-            .or_else(|| name.strip_suffix("f128"))
-            .unwrap_or(name),
-    }
-}
-
 /// True if `EMULATED` is set and nonempty. Used to determine how many iterations to run.
 pub const fn emulated() -> bool {
     match option_env!("EMULATED") {
@@ -71,3 +47,59 @@ pub const fn emulated() -> bool {
         Some(_) => true,
     }
 }
+
+/// True if `CI` is set and nonempty.
+pub const fn ci() -> bool {
+    match option_env!("CI") {
+        Some(s) if s.is_empty() => false,
+        None => false,
+        Some(_) => true,
+    }
+}
+
+/// Print to stderr and additionally log it to `target/test-log.txt`. This is useful for saving
+/// output that would otherwise be consumed by the test harness.
+pub fn test_log(s: &str) {
+    // Handle to a file opened in append mode, unless a suitable path can't be determined.
+    static OUTFILE: LazyLock<Option<File>> = LazyLock::new(|| {
+        // If the target directory is overridden, use that environment variable. Otherwise, save
+        // at the default path `{workspace_root}/target`.
+        let target_dir = match env::var("CARGO_TARGET_DIR") {
+            Ok(s) => PathBuf::from(s),
+            Err(_) => {
+                let Ok(x) = env::var("CARGO_MANIFEST_DIR") else {
+                    return None;
+                };
+
+                PathBuf::from(x).parent().unwrap().parent().unwrap().join("target")
+            }
+        };
+        let outfile = target_dir.join("test-log.txt");
+
+        let mut f = File::options()
+            .create(true)
+            .append(true)
+            .open(outfile)
+            .expect("failed to open logfile");
+        let now = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap();
+
+        writeln!(f, "\n\nTest run at {}", now.as_secs()).unwrap();
+        writeln!(f, "arch: {}", env::consts::ARCH).unwrap();
+        writeln!(f, "os: {}", env::consts::OS).unwrap();
+        writeln!(f, "bits: {}", usize::BITS).unwrap();
+        writeln!(f, "emulated: {}", emulated()).unwrap();
+        writeln!(f, "ci: {}", ci()).unwrap();
+        writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
+        writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
+        writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
+        writeln!(f, "extensive iterations {}", extensive_max_iterations()).unwrap();
+
+        Some(f)
+    });
+
+    eprintln!("{s}");
+
+    if let Some(mut f) = OUTFILE.as_ref() {
+        writeln!(f, "{s}").unwrap();
+    }
+}
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
new file mode 100644
index 000000000..9b51dc605
--- /dev/null
+++ b/crates/libm-test/src/mpfloat.rs
@@ -0,0 +1,603 @@
+//! Interfaces needed to support testing with multi-precision floating point numbers.
+//!
+//! Within this module, the macros create a submodule for each `libm` function. These contain
+//! a struct named `Operation` that implements [`MpOp`].
+
+use std::cmp::Ordering;
+
+use rug::Assign;
+pub use rug::Float as MpFloat;
+use rug::az::{self, Az};
+use rug::float::Round::Nearest;
+use rug::ops::{PowAssignRound, RemAssignRound};
+
+use crate::{Float, MathOp};
+
+/// Create a multiple-precision float with the correct number of bits for a concrete float type.
+fn new_mpfloat<F: Float>() -> MpFloat {
+    MpFloat::new(F::SIG_BITS + 1)
+}
+
+/// Set subnormal emulation and convert to a concrete float type.
+fn prep_retval<F: Float>(mp: &mut MpFloat, ord: Ordering) -> F
+where
+    for<'a> &'a MpFloat: az::Cast<F>,
+{
+    mp.subnormalize_ieee_round(ord, Nearest);
+    (&*mp).az::<F>()
+}
+
+/// Structures that represent a float operation.
+///
+pub trait MpOp: MathOp {
+    /// The struct itself should hold any context that can be reused among calls to `run` (allocated
+    /// `MpFloat`s).
+    type MpTy;
+
+    /// Create a new instance.
+    fn new_mp() -> Self::MpTy;
+
+    /// Perform the operation.
+    ///
+    /// Usually this means assigning inputs to cached floats, performing the operation, applying
+    /// subnormal approximation, and converting the result back to concrete values.
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet;
+}
+
+/// Implement `MpOp` for functions with a single return value.
+macro_rules! impl_mp_op {
+    // Matcher for unary functions
+    (
+        fn_name: $fn_name:ident,
+        RustFn: fn($_fty:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    let ord = this.[< $fn_name_normalized _round >](Nearest);
+                    prep_retval::<Self::RustRet>(this, ord)
+                }
+            }
+        }
+    };
+    // Matcher for binary functions
+    (
+        fn_name: $fn_name:ident,
+        RustFn: fn($_fty:ty, $_fty2:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.[< $fn_name_normalized _round >](&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+        }
+    };
+    // Matcher for ternary functions
+    (
+        fn_name: $fn_name:ident,
+        RustFn: fn($_fty:ty, $_fty2:ty, $_fty3:ty,) -> $_ret:ty,
+        attrs: [$($attr:meta),*],
+        fn_extra: $fn_name_normalized:expr,
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            impl MpOp for crate::op::$fn_name::Routine {
+                type MpTy = (MpFloat, MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                    )
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.2.assign(input.2);
+                    let ord = this.0.[< $fn_name_normalized _round >](&this.1, &this.2, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: impl_mp_op,
+    emit_types: [RustFn],
+    skip: [
+        // Most of these need a manual implementation
+        // verify-sorted-start
+        ceil,
+        ceilf,
+        ceilf128,
+        ceilf16,
+        copysign,
+        copysignf,
+        copysignf128,
+        copysignf16,
+        fabs,
+        fabsf,
+        fabsf128,
+        fabsf16,floor,
+        floorf,
+        floorf128,
+        floorf16,
+        fmaximum,
+        fmaximumf,
+        fmaximumf128,
+        fmaximumf16,
+        fminimum,
+        fminimumf,
+        fminimumf128,
+        fminimumf16,
+        fmod,
+        fmodf,
+        fmodf128,
+        fmodf16,
+        frexp,
+        frexpf,
+        ilogb,
+        ilogbf,
+        jn,
+        jnf,
+        ldexp,
+        ldexpf,
+        ldexpf128,
+        ldexpf16,
+        lgamma_r,
+        lgammaf_r,
+        modf,
+        modff,
+        nextafter,
+        nextafterf,
+        pow,
+        powf,remquo,
+        remquof,
+        rint,
+        rintf,
+        rintf128,
+        rintf16,
+        round,
+        roundeven,
+        roundevenf,
+        roundevenf128,
+        roundevenf16,
+        roundf,
+        roundf128,
+        roundf16,
+        scalbn,
+        scalbnf,
+        scalbnf128,
+        scalbnf16,
+        sincos,sincosf,
+        trunc,
+        truncf,
+        truncf128,
+        truncf16,yn,
+        ynf,
+        // verify-sorted-end
+    ],
+    fn_extra: match MACRO_FN_NAME {
+        // Remap function names that are different between mpfr and libm
+        expm1 | expm1f => exp_m1,
+        fabs | fabsf => abs,
+        fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
+        fma | fmaf | fmaf128 => mul_add,
+        fmax | fmaxf | fmaxf16 | fmaxf128 |
+        fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max,
+        fmin | fminf | fminf16 | fminf128 |
+        fminimum_num | fminimum_numf | fminimum_numf16 | fminimum_numf128 => min,
+        lgamma | lgammaf => ln_gamma,
+        log | logf => ln,
+        log1p | log1pf => ln_1p,
+        tgamma | tgammaf => gamma,
+        _ => MACRO_FN_NAME_NORMALIZED
+    }
+}
+
+/// Implement unary functions that don't have a `_round` version
+macro_rules! impl_no_round {
+    // Unary matcher
+    ($($fn_name:ident => $rug_name:ident;)*) => {
+        paste::paste! {
+            $( impl_no_round!{ @inner_unary $fn_name, $rug_name } )*
+        }
+    };
+
+    (@inner_unary $fn_name:ident, $rug_name:ident) => {
+        impl MpOp for crate::op::$fn_name::Routine {
+            type MpTy = MpFloat;
+
+            fn new_mp() -> Self::MpTy {
+                new_mpfloat::<Self::FTy>()
+            }
+
+            fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                this.assign(input.0);
+                this.$rug_name();
+                prep_retval::<Self::RustRet>(this, Ordering::Equal)
+            }
+        }
+    };
+}
+
+impl_no_round! {
+    ceil => ceil_mut;
+    ceilf => ceil_mut;
+    fabs => abs_mut;
+    fabsf => abs_mut;
+    floor => floor_mut;
+    floorf => floor_mut;
+    rint => round_even_mut; // FIXME: respect rounding mode
+    rintf => round_even_mut; // FIXME: respect rounding mode
+    round => round_mut;
+    roundeven => round_even_mut;
+    roundevenf => round_even_mut;
+    roundf => round_mut;
+    trunc => trunc_mut;
+    truncf => trunc_mut;
+}
+
+#[cfg(f16_enabled)]
+impl_no_round! {
+    ceilf16 => ceil_mut;
+    fabsf16 => abs_mut;
+    floorf16 => floor_mut;
+    rintf16 => round_even_mut; // FIXME: respect rounding mode
+    roundf16 => round_mut;
+    roundevenf16 => round_even_mut;
+    truncf16 => trunc_mut;
+}
+
+#[cfg(f128_enabled)]
+impl_no_round! {
+    ceilf128 => ceil_mut;
+    fabsf128 => abs_mut;
+    floorf128 => floor_mut;
+    rintf128 => round_even_mut; // FIXME: respect rounding mode
+    roundf128 => round_mut;
+    roundevenf128 => round_even_mut;
+    truncf128 => trunc_mut;
+}
+
+/// Some functions are difficult to do in a generic way. Implement them here.
+macro_rules! impl_op_for_ty {
+    ($fty:ty, $suffix:literal) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<modf $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(&this.0);
+                    let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest);
+                    (
+                        prep_retval::<Self::FTy>(&mut this.1, ord0),
+                        prep_retval::<Self::FTy>(&mut this.0, ord1),
+                    )
+                }
+            }
+
+            impl MpOp for crate::op::[<pow $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.pow_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<frexp $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    let exp = this.frexp_mut();
+                    (prep_retval::<Self::FTy>(this, Ordering::Equal), exp)
+                }
+            }
+
+            impl MpOp for crate::op::[<ilogb $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+
+                    // `get_exp` follows `frexp` for `0.5 <= |m| < 1.0`. Adjust the exponent by
+                    // one to scale the significand to `1.0 <= |m| < 2.0`.
+                    this.get_exp().map(|v| v - 1).unwrap_or_else(|| {
+                        if this.is_infinite() {
+                            i32::MAX
+                        } else {
+                            // Zero or NaN
+                            i32::MIN
+                        }
+                    })
+                }
+            }
+
+            impl MpOp for crate::op::[<jn $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    let (n, x) = input;
+                    this.assign(x);
+                    let ord = this.jn_round(n, Nearest);
+                    prep_retval::<Self::FTy>(this, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<sincos $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(0.0);
+                    let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest);
+                    (
+                        prep_retval::<Self::FTy>(&mut this.0, sord),
+                        prep_retval::<Self::FTy>(&mut this.1, cord)
+                    )
+                }
+            }
+
+            impl MpOp for crate::op::[<remquo $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (
+                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Self::FTy>(),
+                    )
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest);
+                    (prep_retval::<Self::FTy>(&mut this.0, ord), q)
+                }
+            }
+
+            impl MpOp for crate::op::[<yn $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    let (n, x) = input;
+                    this.assign(x);
+                    let ord = this.yn_round(n, Nearest);
+                    prep_retval::<Self::FTy>(this, ord)
+                }
+            }
+        }
+    };
+}
+
+/// Version of `impl_op_for_ty` with only functions that have `f16` and `f128` implementations.
+macro_rules! impl_op_for_ty_all {
+    ($fty:ty, $suffix:literal) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<copysign $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0.copysign_mut(&this.1);
+                    prep_retval::<Self::RustRet>(&mut this.0, Ordering::Equal)
+                }
+            }
+
+            impl MpOp for crate::op::[<fmod $suffix>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.rem_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+
+                }
+            }
+
+            impl MpOp for crate::op::[< fmaximum $suffix >]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = if this.0.is_nan() || this.1.is_nan() {
+                        this.0.assign($fty::NAN);
+                        Ordering::Equal
+                    } else {
+                        this.0.max_round(&this.1, Nearest)
+                    };
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[< fminimum $suffix >]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = if this.0.is_nan() || this.1.is_nan() {
+                        this.0.assign($fty::NAN);
+                        Ordering::Equal
+                    } else {
+                        this.0.min_round(&this.1, Nearest)
+                    };
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            // `ldexp` and `scalbn` are the same for binary floating point, so just forward all
+            // methods.
+            impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
+                type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
+
+                fn new_mp() -> Self::MpTy {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    <crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
+                }
+            }
+
+            impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::FTy>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this <<= input.1;
+                    prep_retval::<Self::FTy>(this, Ordering::Equal)
+                }
+            }
+        }
+    };
+}
+
+impl_op_for_ty!(f32, "f");
+impl_op_for_ty!(f64, "");
+
+#[cfg(f16_enabled)]
+impl_op_for_ty_all!(f16, "f16");
+impl_op_for_ty_all!(f32, "f");
+impl_op_for_ty_all!(f64, "");
+#[cfg(f128_enabled)]
+impl_op_for_ty_all!(f128, "f128");
+
+// `lgamma_r` is not a simple suffix so we can't use the above macro.
+impl MpOp for crate::op::lgamma_r::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        new_mpfloat::<Self::FTy>()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        let (sign, ord) = this.ln_abs_gamma_round(Nearest);
+        let ret = prep_retval::<Self::FTy>(this, ord);
+        (ret, sign as i32)
+    }
+}
+
+impl MpOp for crate::op::lgammaf_r::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        new_mpfloat::<Self::FTy>()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        let (sign, ord) = this.ln_abs_gamma_round(Nearest);
+        let ret = prep_retval::<Self::FTy>(this, ord);
+        (ret, sign as i32)
+    }
+}
+
+/* stub implementations so we don't need to special case them */
+
+impl MpOp for crate::op::nextafter::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+
+    fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+}
+
+impl MpOp for crate::op::nextafterf::Routine {
+    type MpTy = MpFloat;
+
+    fn new_mp() -> Self::MpTy {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+
+    fn run(_this: &mut Self::MpTy, _input: Self::RustArgs) -> Self::RustRet {
+        unimplemented!("nextafter does not yet have a MPFR operation");
+    }
+}
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
new file mode 100644
index 000000000..eed941423
--- /dev/null
+++ b/crates/libm-test/src/num.rs
@@ -0,0 +1,529 @@
+//! Helpful numeric operations.
+
+use std::cmp::min;
+use std::ops::RangeInclusive;
+
+use libm::support::Float;
+
+use crate::{Int, MinInt};
+
+/// Extension to `libm`'s `Float` trait with methods that are useful for tests but not
+/// needed in `libm` itself.
+pub trait FloatExt: Float {
+    /// The minimum subnormal number.
+    const TINY_BITS: Self::Int = Self::Int::ONE;
+
+    /// Retrieve additional constants for this float type.
+    fn consts() -> Consts<Self> {
+        Consts::new()
+    }
+
+    /// Increment by one ULP, saturating at infinity.
+    fn next_up(self) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let next_bits = if abs == Self::Int::ZERO {
+            // Next up from 0 is the smallest subnormal
+            Self::TINY_BITS
+        } else if bits == abs {
+            // Positive: counting up is more positive
+            bits + Self::Int::ONE
+        } else {
+            // Negative: counting down is more positive
+            bits - Self::Int::ONE
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// A faster way to effectively call `next_up` `n` times.
+    fn n_up(self, n: Self::Int) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::INFINITY.to_bits() || n == Self::Int::ZERO {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let is_positive = bits == abs;
+        let crosses_zero = !is_positive && n > abs;
+        let inf_bits = Self::INFINITY.to_bits();
+
+        let next_bits = if abs == Self::Int::ZERO {
+            min(n, inf_bits)
+        } else if crosses_zero {
+            min(n - abs, inf_bits)
+        } else if is_positive {
+            // Positive, counting up is more positive but this may overflow
+            match bits.checked_add(n) {
+                Some(v) if v >= inf_bits => inf_bits,
+                Some(v) => v,
+                None => inf_bits,
+            }
+        } else {
+            // Negative, counting down is more positive
+            bits - n
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// Decrement by one ULP, saturating at negative infinity.
+    fn next_down(self) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let next_bits = if abs == Self::Int::ZERO {
+            // Next up from 0 is the smallest negative subnormal
+            Self::TINY_BITS | Self::SIGN_MASK
+        } else if bits == abs {
+            // Positive: counting down is more negative
+            bits - Self::Int::ONE
+        } else {
+            // Negative: counting up is more negative
+            bits + Self::Int::ONE
+        };
+        Self::from_bits(next_bits)
+    }
+
+    /// A faster way to effectively call `next_down` `n` times.
+    fn n_down(self, n: Self::Int) -> Self {
+        let bits = self.to_bits();
+        if self.is_nan() || bits == Self::NEG_INFINITY.to_bits() || n == Self::Int::ZERO {
+            return self;
+        }
+
+        let abs = self.abs().to_bits();
+        let is_positive = bits == abs;
+        let crosses_zero = is_positive && n > abs;
+        let inf_bits = Self::INFINITY.to_bits();
+        let ninf_bits = Self::NEG_INFINITY.to_bits();
+
+        let next_bits = if abs == Self::Int::ZERO {
+            min(n, inf_bits) | Self::SIGN_MASK
+        } else if crosses_zero {
+            min(n - abs, inf_bits) | Self::SIGN_MASK
+        } else if is_positive {
+            // Positive, counting down is more negative
+            bits - n
+        } else {
+            // Negative, counting up is more negative but this may overflow
+            match bits.checked_add(n) {
+                Some(v) if v > ninf_bits => ninf_bits,
+                Some(v) => v,
+                None => ninf_bits,
+            }
+        };
+        Self::from_bits(next_bits)
+    }
+}
+
+impl<F> FloatExt for F where F: Float {}
+
+/// Extra constants that are useful for tests.
+#[derive(Debug, Clone, Copy)]
+pub struct Consts<F> {
+    /// The default quiet NaN, which is also the minimum quiet NaN.
+    pub pos_nan: F,
+    /// The default quiet NaN with negative sign.
+    pub neg_nan: F,
+    /// NaN with maximum (unsigned) significand to be a quiet NaN. The significand is saturated.
+    pub max_qnan: F,
+    /// NaN with minimum (unsigned) significand to be a signaling NaN.
+    pub min_snan: F,
+    /// NaN with maximum (unsigned) significand to be a signaling NaN.
+    pub max_snan: F,
+    pub neg_max_qnan: F,
+    pub neg_min_snan: F,
+    pub neg_max_snan: F,
+}
+
+impl<F: FloatExt> Consts<F> {
+    fn new() -> Self {
+        let top_sigbit_mask = F::Int::ONE << (F::SIG_BITS - 1);
+        let pos_nan = F::EXP_MASK | top_sigbit_mask;
+        let max_qnan = F::EXP_MASK | F::SIG_MASK;
+        let min_snan = F::EXP_MASK | F::Int::ONE;
+        let max_snan = (F::EXP_MASK | F::SIG_MASK) ^ top_sigbit_mask;
+
+        let neg_nan = pos_nan | F::SIGN_MASK;
+        let neg_max_qnan = max_qnan | F::SIGN_MASK;
+        let neg_min_snan = min_snan | F::SIGN_MASK;
+        let neg_max_snan = max_snan | F::SIGN_MASK;
+
+        Self {
+            pos_nan: F::from_bits(pos_nan),
+            neg_nan: F::from_bits(neg_nan),
+            max_qnan: F::from_bits(max_qnan),
+            min_snan: F::from_bits(min_snan),
+            max_snan: F::from_bits(max_snan),
+            neg_max_qnan: F::from_bits(neg_max_qnan),
+            neg_min_snan: F::from_bits(neg_min_snan),
+            neg_max_snan: F::from_bits(neg_max_snan),
+        }
+    }
+
+    pub fn iter(self) -> impl Iterator<Item = F> {
+        // Destructure so we get unused warnings if we forget a list entry.
+        let Self {
+            pos_nan,
+            neg_nan,
+            max_qnan,
+            min_snan,
+            max_snan,
+            neg_max_qnan,
+            neg_min_snan,
+            neg_max_snan,
+        } = self;
+
+        [pos_nan, neg_nan, max_qnan, min_snan, max_snan, neg_max_qnan, neg_min_snan, neg_max_snan]
+            .into_iter()
+    }
+}
+
+/// Return the number of steps between two floats, returning `None` if either input is NaN.
+///
+/// This is the number of steps needed for `n_up` or `n_down` to go between values. Infinities
+/// are treated the same as those functions (will return the nearest finite value), and only one
+/// of `-0` or `+0` is counted. It does not matter which value is greater.
+pub fn ulp_between<F: Float>(x: F, y: F) -> Option<F::Int> {
+    let a = as_ulp_steps(x)?;
+    let b = as_ulp_steps(y)?;
+    Some(a.abs_diff(b))
+}
+
+/// Return the (signed) number of steps from zero to `x`.
+fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
+    let s = x.to_bits_signed();
+    let val = if s >= F::SignedInt::ZERO {
+        // each increment from `s = 0` is one step up from `x = 0.0`
+        s
+    } else {
+        // each increment from `s = F::SignedInt::MIN` is one step down from `x = -0.0`
+        F::SignedInt::MIN - s
+    };
+
+    // If `x` is NaN, return `None`
+    (!x.is_nan()).then_some(val)
+}
+
+/// An iterator that returns floats with linearly spaced integer representations, which translates
+/// to logarithmic spacing of their values.
+///
+/// Note that this tends to skip negative zero, so that needs to be checked explicitly.
+///
+/// Returns `(iterator, iterator_length)`.
+pub fn logspace<F: FloatExt>(
+    start: F,
+    end: F,
+    steps: F::Int,
+) -> (impl Iterator<Item = F> + Clone, F::Int)
+where
+    RangeInclusive<F::Int>: Iterator,
+{
+    assert!(!start.is_nan());
+    assert!(!end.is_nan());
+    assert!(end >= start);
+
+    let steps = steps.checked_sub(F::Int::ONE).expect("`steps` must be at least 2");
+    let between = ulp_between(start, end).expect("`start` or `end` is NaN");
+    let spacing = (between / steps).max(F::Int::ONE);
+    let steps = steps.min(between); // At maximum, one step per ULP
+
+    let mut x = start;
+    (
+        (F::Int::ZERO..=steps).map(move |_| {
+            let ret = x;
+            x = x.n_up(spacing);
+            ret
+        }),
+        steps + F::Int::ONE,
+    )
+}
+
+/// Returns an iterator of up to `steps` integers evenly distributed.
+pub fn linear_ints(
+    range: RangeInclusive<i32>,
+    steps: u64,
+) -> (impl Iterator<Item = i32> + Clone, u64) {
+    let steps = steps.checked_sub(1).unwrap();
+    let between = u64::from(range.start().abs_diff(*range.end()));
+    let spacing = i32::try_from((between / steps).max(1)).unwrap();
+    let steps = steps.min(between);
+    let mut x: i32 = *range.start();
+    (
+        (0..=steps).map(move |_| {
+            let res = x;
+            // Wrapping add to avoid panic on last item (where `x` could overflow past i32::MAX as
+            // there is no next item).
+            x = x.wrapping_add(spacing);
+            res
+        }),
+        steps + 1,
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use std::cmp::max;
+
+    use super::*;
+    use crate::f8;
+
+    #[test]
+    fn test_next_up_down() {
+        for (i, v) in f8::ALL.into_iter().enumerate() {
+            let down = v.next_down().to_bits();
+            let up = v.next_up().to_bits();
+
+            if i == 0 {
+                assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} next_down({v:#010b})");
+            } else {
+                let expected =
+                    if v == f8::ZERO { 1 | f8::SIGN_MASK } else { f8::ALL[i - 1].to_bits() };
+                assert_eq!(down, expected, "{i} next_down({v:#010b})");
+            }
+
+            if i == f8::ALL_LEN - 1 {
+                assert_eq!(up, f8::INFINITY.to_bits(), "{i} next_up({v:#010b})");
+            } else {
+                let expected = if v == f8::NEG_ZERO { 1 } else { f8::ALL[i + 1].to_bits() };
+                assert_eq!(up, expected, "{i} next_up({v:#010b})");
+            }
+        }
+    }
+
+    #[test]
+    fn test_next_up_down_inf_nan() {
+        assert_eq!(f8::NEG_INFINITY.next_up().to_bits(), f8::ALL[0].to_bits(),);
+        assert_eq!(f8::NEG_INFINITY.next_down().to_bits(), f8::NEG_INFINITY.to_bits(),);
+        assert_eq!(f8::INFINITY.next_down().to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits(),);
+        assert_eq!(f8::INFINITY.next_up().to_bits(), f8::INFINITY.to_bits(),);
+        assert_eq!(f8::NAN.next_up().to_bits(), f8::NAN.to_bits(),);
+        assert_eq!(f8::NAN.next_down().to_bits(), f8::NAN.to_bits(),);
+    }
+
+    #[test]
+    fn test_n_up_down_quick() {
+        assert_eq!(f8::ALL[0].n_up(4).to_bits(), f8::ALL[4].to_bits(),);
+        assert_eq!(
+            f8::ALL[f8::ALL_LEN - 1].n_down(4).to_bits(),
+            f8::ALL[f8::ALL_LEN - 5].to_bits(),
+        );
+
+        // Check around zero
+        assert_eq!(f8::from_bits(0b0).n_up(7).to_bits(), 0b0_0000_111);
+        assert_eq!(f8::from_bits(0b0).n_down(7).to_bits(), 0b1_0000_111);
+
+        // Check across zero
+        assert_eq!(f8::from_bits(0b1_0000_111).n_up(8).to_bits(), 0b0_0000_001);
+        assert_eq!(f8::from_bits(0b0_0000_111).n_down(8).to_bits(), 0b1_0000_001);
+    }
+
+    #[test]
+    fn test_n_up_down_one() {
+        // Verify that `n_up(1)` and `n_down(1)` are the same as `next_up()` and next_down()`.`
+        for i in 0..u8::MAX {
+            let v = f8::from_bits(i);
+            assert_eq!(v.next_up().to_bits(), v.n_up(1).to_bits());
+            assert_eq!(v.next_down().to_bits(), v.n_down(1).to_bits());
+        }
+    }
+
+    #[test]
+    fn test_n_up_down_inf_nan_zero() {
+        assert_eq!(f8::NEG_INFINITY.n_up(1).to_bits(), f8::ALL[0].to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_up(239).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_up(240).to_bits(), f8::INFINITY.to_bits());
+        assert_eq!(f8::NEG_INFINITY.n_down(u8::MAX).to_bits(), f8::NEG_INFINITY.to_bits());
+
+        assert_eq!(f8::INFINITY.n_down(1).to_bits(), f8::ALL[f8::ALL_LEN - 1].to_bits());
+        assert_eq!(f8::INFINITY.n_down(239).to_bits(), f8::ALL[0].to_bits());
+        assert_eq!(f8::INFINITY.n_down(240).to_bits(), f8::NEG_INFINITY.to_bits());
+        assert_eq!(f8::INFINITY.n_up(u8::MAX).to_bits(), f8::INFINITY.to_bits());
+
+        assert_eq!(f8::NAN.n_up(u8::MAX).to_bits(), f8::NAN.to_bits());
+        assert_eq!(f8::NAN.n_down(u8::MAX).to_bits(), f8::NAN.to_bits());
+
+        assert_eq!(f8::ZERO.n_down(1).to_bits(), f8::TINY_BITS | f8::SIGN_MASK);
+        assert_eq!(f8::NEG_ZERO.n_up(1).to_bits(), f8::TINY_BITS);
+    }
+
+    /// True if the specified range of `f8::ALL` includes both +0 and -0
+    fn crossed_zero(start: usize, end: usize) -> bool {
+        let crossed = &f8::ALL[start..=end];
+        crossed.iter().any(|f| f8::eq_repr(*f, f8::ZERO))
+            && crossed.iter().any(|f| f8::eq_repr(*f, f8::NEG_ZERO))
+    }
+
+    #[test]
+    fn test_n_up_down() {
+        for (i, v) in f8::ALL.into_iter().enumerate() {
+            for n in 0..f8::ALL_LEN {
+                let down = v.n_down(n as u8).to_bits();
+                let up = v.n_up(n as u8).to_bits();
+
+                if let Some(down_exp_idx) = i.checked_sub(n) {
+                    // No overflow
+                    let mut expected = f8::ALL[down_exp_idx].to_bits();
+                    if n >= 1 && crossed_zero(down_exp_idx, i) {
+                        // If both -0 and +0 are included, we need to adjust our expected value
+                        match down_exp_idx.checked_sub(1) {
+                            Some(v) => expected = f8::ALL[v].to_bits(),
+                            // Saturate to -inf if we are out of values
+                            None => expected = f8::NEG_INFINITY.to_bits(),
+                        }
+                    }
+                    assert_eq!(down, expected, "{i} {n} n_down({v:#010b})");
+                } else {
+                    // Overflow to -inf
+                    assert_eq!(down, f8::NEG_INFINITY.to_bits(), "{i} {n} n_down({v:#010b})");
+                }
+
+                let mut up_exp_idx = i + n;
+                if up_exp_idx < f8::ALL_LEN {
+                    // No overflow
+                    if n >= 1 && up_exp_idx < f8::ALL_LEN && crossed_zero(i, up_exp_idx) {
+                        // If both -0 and +0 are included, we need to adjust our expected value
+                        up_exp_idx += 1;
+                    }
+
+                    let expected = if up_exp_idx >= f8::ALL_LEN {
+                        f8::INFINITY.to_bits()
+                    } else {
+                        f8::ALL[up_exp_idx].to_bits()
+                    };
+
+                    assert_eq!(up, expected, "{i} {n} n_up({v:#010b})");
+                } else {
+                    // Overflow to +inf
+                    assert_eq!(up, f8::INFINITY.to_bits(), "{i} {n} n_up({v:#010b})");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_ulp_between() {
+        for (i, x) in f8::ALL.into_iter().enumerate() {
+            for (j, y) in f8::ALL.into_iter().enumerate() {
+                let ulp = ulp_between(x, y).unwrap();
+                let make_msg = || format!("i: {i} j: {j} x: {x:b} y: {y:b} ulp {ulp}");
+
+                let i_low = min(i, j);
+                let i_hi = max(i, j);
+                let mut expected = u8::try_from(i_hi - i_low).unwrap();
+                if crossed_zero(i_low, i_hi) {
+                    expected -= 1;
+                }
+
+                assert_eq!(ulp, expected, "{}", make_msg());
+
+                // Skip if either are zero since `next_{up,down}` will count over it
+                let either_zero = x == f8::ZERO || y == f8::ZERO;
+                if x < y && !either_zero {
+                    assert_eq!(x.n_up(ulp).to_bits(), y.to_bits(), "{}", make_msg());
+                    assert_eq!(y.n_down(ulp).to_bits(), x.to_bits(), "{}", make_msg());
+                } else if !either_zero {
+                    assert_eq!(y.n_up(ulp).to_bits(), x.to_bits(), "{}", make_msg());
+                    assert_eq!(x.n_down(ulp).to_bits(), y.to_bits(), "{}", make_msg());
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_ulp_between_inf_nan_zero() {
+        assert_eq!(ulp_between(f8::NEG_INFINITY, f8::INFINITY).unwrap(), f8::ALL_LEN as u8);
+        assert_eq!(ulp_between(f8::INFINITY, f8::NEG_INFINITY).unwrap(), f8::ALL_LEN as u8);
+        assert_eq!(
+            ulp_between(f8::NEG_INFINITY, f8::ALL[f8::ALL_LEN - 1]).unwrap(),
+            f8::ALL_LEN as u8 - 1
+        );
+        assert_eq!(ulp_between(f8::INFINITY, f8::ALL[0]).unwrap(), f8::ALL_LEN as u8 - 1);
+
+        assert_eq!(ulp_between(f8::ZERO, f8::NEG_ZERO).unwrap(), 0);
+        assert_eq!(ulp_between(f8::NAN, f8::ZERO), None);
+        assert_eq!(ulp_between(f8::ZERO, f8::NAN), None);
+    }
+
+    #[test]
+    fn test_logspace() {
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 2);
+        let ls: Vec<_> = ls.collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x4)];
+        assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
+
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x4), 3);
+        let ls: Vec<_> = ls.collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x2), f8::from_bits(0x4)];
+        assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
+
+        // Check that we include all values with no repeats if `steps` exceeds the maximum number
+        // of steps.
+        let (ls, count) = logspace(f8::from_bits(0x0), f8::from_bits(0x3), 10);
+        let ls: Vec<_> = ls.collect();
+        let exp = [f8::from_bits(0x0), f8::from_bits(0x1), f8::from_bits(0x2), f8::from_bits(0x3)];
+        assert_eq!(ls, exp);
+        assert_eq!(ls.len(), usize::from(count));
+    }
+
+    #[test]
+    fn test_linear_ints() {
+        let (ints, count) = linear_ints(0..=4, 2);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 4];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        let (ints, count) = linear_ints(0..=4, 3);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 2, 4];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        // Check that we include all values with no repeats if `steps` exceeds the maximum number
+        // of steps.
+        let (ints, count) = linear_ints(0x0..=0x3, 10);
+        let ints: Vec<_> = ints.collect();
+        let exp = [0, 1, 2, 3];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+
+        // Check that there are no panics around `i32::MAX`.
+        let (ints, count) = linear_ints(i32::MAX - 1..=i32::MAX, 5);
+        let ints: Vec<_> = ints.collect();
+        let exp = [i32::MAX - 1, i32::MAX];
+        assert_eq!(ints, exp);
+        assert_eq!(ints.len(), usize::try_from(count).unwrap());
+    }
+
+    #[test]
+    fn test_consts() {
+        let Consts {
+            pos_nan,
+            neg_nan,
+            max_qnan,
+            min_snan,
+            max_snan,
+            neg_max_qnan,
+            neg_min_snan,
+            neg_max_snan,
+        } = f8::consts();
+
+        assert_eq!(pos_nan.to_bits(), 0b0_1111_100);
+        assert_eq!(neg_nan.to_bits(), 0b1_1111_100);
+        assert_eq!(max_qnan.to_bits(), 0b0_1111_111);
+        assert_eq!(min_snan.to_bits(), 0b0_1111_001);
+        assert_eq!(max_snan.to_bits(), 0b0_1111_011);
+        assert_eq!(neg_max_qnan.to_bits(), 0b1_1111_111);
+        assert_eq!(neg_min_snan.to_bits(), 0b1_1111_001);
+        assert_eq!(neg_max_snan.to_bits(), 0b1_1111_011);
+    }
+}
diff --git a/crates/libm-test/src/num_traits.rs b/crates/libm-test/src/num_traits.rs
deleted file mode 100644
index e16f4e4dc..000000000
--- a/crates/libm-test/src/num_traits.rs
+++ /dev/null
@@ -1,214 +0,0 @@
-use std::fmt;
-
-use crate::{MaybeOverride, SpecialCase, TestResult};
-
-/// Common types and methods for floating point numbers.
-pub trait Float: Copy + fmt::Display + fmt::Debug + PartialEq<Self> {
-    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
-    type SignedInt: Int + Int<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    const ZERO: Self;
-    const ONE: Self;
-
-    /// The bitwidth of the float type
-    const BITS: u32;
-
-    /// The bitwidth of the significand
-    const SIGNIFICAND_BITS: u32;
-
-    /// The bitwidth of the exponent
-    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
-
-    fn is_nan(self) -> bool;
-    fn is_infinite(self) -> bool;
-    fn to_bits(self) -> Self::Int;
-    fn from_bits(bits: Self::Int) -> Self;
-    fn signum(self) -> Self;
-}
-
-macro_rules! impl_float {
-    ($($fty:ty, $ui:ty, $si:ty, $significand_bits:expr;)+) => {
-        $(
-            impl Float for $fty {
-                type Int = $ui;
-                type SignedInt = $si;
-
-                const ZERO: Self = 0.0;
-                const ONE: Self = 1.0;
-
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNIFICAND_BITS: u32 = $significand_bits;
-
-                fn is_nan(self) -> bool {
-                    self.is_nan()
-                }
-                fn is_infinite(self) -> bool {
-                    self.is_infinite()
-                }
-                fn to_bits(self) -> Self::Int {
-                    self.to_bits()
-                }
-                fn from_bits(bits: Self::Int) -> Self {
-                    Self::from_bits(bits)
-                }
-                fn signum(self) -> Self {
-                    self.signum()
-                }
-            }
-
-            impl Hex for $fty {
-                fn hex(self) -> String {
-                    self.to_bits().hex()
-                }
-            }
-        )+
-    }
-}
-
-impl_float!(
-    f32, u32, i32, 23;
-    f64, u64, i64, 52;
-);
-
-/// Common types and methods for integers.
-pub trait Int: Copy + fmt::Display + fmt::Debug + PartialEq<Self> {
-    type OtherSign: Int;
-    type Unsigned: Int;
-    const BITS: u32;
-    const SIGNED: bool;
-
-    fn signed(self) -> <Self::Unsigned as Int>::OtherSign;
-    fn unsigned(self) -> Self::Unsigned;
-    fn checked_sub(self, other: Self) -> Option<Self>;
-    fn abs(self) -> Self;
-}
-
-macro_rules! impl_int {
-    ($($ui:ty, $si:ty ;)+) => {
-        $(
-            impl Int for $ui {
-                type OtherSign = $si;
-                type Unsigned = Self;
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNED: bool = false;
-                fn signed(self) -> Self::OtherSign {
-                    self as $si
-                }
-                fn unsigned(self) -> Self {
-                    self
-                }
-                fn checked_sub(self, other: Self) -> Option<Self> {
-                    self.checked_sub(other)
-                }
-                fn abs(self) -> Self {
-                    unimplemented!()
-                }
-            }
-
-            impl Int for $si {
-                type OtherSign = $ui;
-                type Unsigned = $ui;
-                const BITS: u32 = <$ui>::BITS;
-                const SIGNED: bool = true;
-                fn signed(self) -> Self {
-                    self
-                }
-                fn unsigned(self) -> $ui {
-                    self as $ui
-                }
-                fn checked_sub(self, other: Self) -> Option<Self> {
-                    self.checked_sub(other)
-                }
-                fn abs(self) -> Self {
-                    self.abs()
-                }
-            }
-
-            impl_int!(@for_both $si);
-            impl_int!(@for_both $ui);
-
-        )+
-    };
-
-    (@for_both $ty:ty) => {
-        impl Hex for $ty {
-            fn hex(self) -> String {
-                format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
-            }
-        }
-
-        impl<Input> $crate::CheckOutput<Input> for $ty
-        where
-            Input: Hex + fmt::Debug,
-            SpecialCase: MaybeOverride<Input>,
-        {
-            fn validate<'a>(
-                self,
-                expected: Self,
-                input: Input,
-                ctx: &$crate::CheckCtx,
-            ) -> TestResult {
-                if let Some(res) = SpecialCase::check_int(input, self, expected, ctx) {
-                    return res;
-                }
-
-                anyhow::ensure!(
-                    self == expected,
-                    "\
-                    \n    input:    {input:?} {ibits}\
-                    \n    expected: {expected:<22?} {expbits}\
-                    \n    actual:   {self:<22?} {actbits}\
-                    ",
-                    actbits = self.hex(),
-                    expbits = expected.hex(),
-                    ibits = input.hex(),
-                );
-
-                Ok(())
-            }
-        }
-    }
-}
-
-impl_int!(
-    u32, i32;
-    u64, i64;
-);
-
-/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32`
-/// will always print with `0x` followed by 8 digits.
-///
-/// This is only used for printing errors so allocating is okay.
-pub trait Hex: Copy {
-    fn hex(self) -> String;
-}
-
-impl<T1> Hex for (T1,)
-where
-    T1: Hex,
-{
-    fn hex(self) -> String {
-        format!("({},)", self.0.hex())
-    }
-}
-
-impl<T1, T2> Hex for (T1, T2)
-where
-    T1: Hex,
-    T2: Hex,
-{
-    fn hex(self) -> String {
-        format!("({}, {})", self.0.hex(), self.1.hex())
-    }
-}
-
-impl<T1, T2, T3> Hex for (T1, T2, T3)
-where
-    T1: Hex,
-    T2: Hex,
-    T3: Hex,
-{
-    fn hex(self) -> String {
-        format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
-    }
-}
diff --git a/crates/libm-test/src/op.rs b/crates/libm-test/src/op.rs
new file mode 100644
index 000000000..47d72ae58
--- /dev/null
+++ b/crates/libm-test/src/op.rs
@@ -0,0 +1,151 @@
+//! Types representing individual functions.
+//!
+//! Each routine gets a module with its name, e.g. `mod sinf { /* ... */ }`. The module
+//! contains a unit struct `Routine` which implements `MathOp`.
+//!
+//! Basically everything could be called a "function" here, so we loosely use the following
+//! terminology:
+//!
+//! - "Function": the math operation that does not have an associated precision. E.g. `f(x) = e^x`,
+//!   `f(x) = log(x)`.
+//! - "Routine": A code implementation of a math operation with a specific precision. E.g. `exp`,
+//!   `expf`, `expl`, `log`, `logf`.
+//! - "Operation" / "Op": Something that relates a routine to a function or is otherwise higher
+//!   level. `Op` is also used as the name for generic parameters since it is terse.
+
+use std::fmt;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+
+pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
+
+use crate::{CheckOutput, Float, TupleCall};
+
+mod shared {
+    include!("../../libm-macros/src/shared.rs");
+}
+
+/// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc).
+#[libm_macros::function_enum(BaseName)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Identifier {}
+
+impl fmt::Display for Identifier {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// The name without any type specifier, e.g. `sin` and `sinf` both become `sin`.
+#[libm_macros::base_name_enum]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum BaseName {}
+
+impl fmt::Display for BaseName {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Attributes ascribed to a `libm` routine including signature, type information,
+/// and naming.
+pub trait MathOp {
+    /// The float type used for this operation.
+    type FTy: Float;
+
+    /// The function type representing the signature in a C library.
+    type CFn: Copy;
+
+    /// Arguments passed to the C library function as a tuple. These may include `&mut` return
+    /// values.
+    type CArgs<'a>
+    where
+        Self: 'a;
+
+    /// The type returned by C implementations.
+    type CRet;
+
+    /// The signature of the Rust function as a `fn(...) -> ...` type.
+    type RustFn: Copy + UnwindSafe;
+
+    /// Arguments passed to the Rust library function as a tuple.
+    ///
+    /// The required `TupleCall` bounds ensure this type can be passed either to the C function or
+    /// to the Rust function.
+    type RustArgs: Copy
+        + TupleCall<Self::RustFn, Output = Self::RustRet>
+        + TupleCall<Self::CFn, Output = Self::RustRet>
+        + RefUnwindSafe;
+
+    /// Type returned from the Rust function.
+    type RustRet: CheckOutput<Self::RustArgs>;
+
+    /// The name of this function, including suffix (e.g. `sin`, `sinf`).
+    const IDENTIFIER: Identifier;
+
+    /// The name as a string.
+    const NAME: &'static str = Self::IDENTIFIER.as_str();
+
+    /// The name of the function excluding the type suffix, e.g. `sin` and `sinf` are both `sin`.
+    const BASE_NAME: BaseName = Self::IDENTIFIER.base_name();
+
+    /// The function in `libm` which can be called.
+    const ROUTINE: Self::RustFn;
+}
+
+/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types).
+pub type OpFTy<Op> = <Op as MathOp>::FTy;
+/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types).
+pub type OpITy<Op> = <<Op as MathOp>::FTy as Float>::Int;
+/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types).
+pub type OpCFn<Op> = <Op as MathOp>::CFn;
+/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types).
+pub type OpCRet<Op> = <Op as MathOp>::CRet;
+/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
+pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
+/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
+pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
+/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
+pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
+
+macro_rules! do_thing {
+    // Matcher for unary functions
+    (
+        fn_name: $fn_name:ident,
+        FTy: $FTy:ty,
+        CFn: $CFn:ty,
+        CArgs: $CArgs:ty,
+        CRet: $CRet:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        RustRet: $RustRet:ty,
+        attrs: [$($attr:meta),*],
+
+    ) => {
+        paste::paste! {
+            $(#[$attr])*
+            pub mod $fn_name {
+                use super::*;
+                pub struct Routine;
+
+                impl MathOp for Routine {
+                    type FTy = $FTy;
+                    type CFn = for<'a> $CFn;
+                    type CArgs<'a> = $CArgs where Self: 'a;
+                    type CRet = $CRet;
+                    type RustFn = $RustFn;
+                    type RustArgs = $RustArgs;
+                    type RustRet = $RustRet;
+
+                    const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >];
+                    const ROUTINE: Self::RustFn = libm::$fn_name;
+                }
+            }
+
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: do_thing,
+    emit_types: all,
+}
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
new file mode 100644
index 000000000..f5fb5f670
--- /dev/null
+++ b/crates/libm-test/src/precision.rs
@@ -0,0 +1,573 @@
+//! Configuration for skipping or changing the result for individual test cases (inputs) rather
+//! than ignoring entire tests.
+
+use core::f32;
+
+use CheckBasis::{Mpfr, Musl};
+use libm::support::CastFrom;
+use {BaseName as Bn, Identifier as Id};
+
+use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
+
+/// Type implementing [`IgnoreCase`].
+pub struct SpecialCase;
+
+/// ULP allowed to differ from the results returned by a test basis.
+#[allow(clippy::single_match)]
+pub fn default_ulp(ctx: &CheckCtx) -> u32 {
+    // ULP compared to the infinite (MPFR) result.
+    let mut ulp = match ctx.base_name {
+        // Operations that require exact results. This list should correlate with what we
+        // have documented at <https://doc.rust-lang.org/std/primitive.f32.html>.
+        Bn::Ceil
+        | Bn::Copysign
+        | Bn::Fabs
+        | Bn::Fdim
+        | Bn::Floor
+        | Bn::Fma
+        | Bn::Fmax
+        | Bn::Fmaximum
+        | Bn::FmaximumNum
+        | Bn::Fmin
+        | Bn::Fminimum
+        | Bn::FminimumNum
+        | Bn::Fmod
+        | Bn::Frexp
+        | Bn::Ilogb
+        | Bn::Ldexp
+        | Bn::Modf
+        | Bn::Nextafter
+        | Bn::Remainder
+        | Bn::Remquo
+        | Bn::Rint
+        | Bn::Round
+        | Bn::Roundeven
+        | Bn::Scalbn
+        | Bn::Sqrt
+        | Bn::Trunc => 0,
+
+        // Operations that aren't required to be exact, but our implementations are.
+        Bn::Cbrt => 0,
+
+        // Bessel functions have large inaccuracies.
+        Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000,
+
+        // For all other operations, specify our implementation's worst case precision.
+        Bn::Acos => 1,
+        Bn::Acosh => 4,
+        Bn::Asin => 1,
+        Bn::Asinh => 2,
+        Bn::Atan => 1,
+        Bn::Atan2 => 2,
+        Bn::Atanh => 2,
+        Bn::Cos => 1,
+        Bn::Cosh => 1,
+        Bn::Erf => 1,
+        Bn::Erfc => 4,
+        Bn::Exp => 1,
+        Bn::Exp10 => 6,
+        Bn::Exp2 => 1,
+        Bn::Expm1 => 1,
+        Bn::Hypot => 1,
+        Bn::Lgamma | Bn::LgammaR => 16,
+        Bn::Log => 1,
+        Bn::Log10 => 1,
+        Bn::Log1p => 1,
+        Bn::Log2 => 1,
+        Bn::Pow => 1,
+        Bn::Sin => 1,
+        Bn::Sincos => 1,
+        Bn::Sinh => 2,
+        Bn::Tan => 1,
+        Bn::Tanh => 2,
+        // tgammaf has higher accuracy than tgamma.
+        Bn::Tgamma if ctx.fn_ident != Id::Tgamma => 1,
+        Bn::Tgamma => 20,
+    };
+
+    // There are some cases where musl's approximation is less accurate than ours. For these
+    // cases, increase the ULP.
+    if ctx.basis == Musl {
+        match ctx.base_name {
+            Bn::Cosh => ulp = 2,
+            Bn::Exp10 if usize::BITS < 64 => ulp = 4,
+            Bn::Lgamma | Bn::LgammaR => ulp = 400,
+            Bn::Tanh => ulp = 4,
+            _ => (),
+        }
+
+        match ctx.fn_ident {
+            Id::Cbrt => ulp = 2,
+            // FIXME(#401): musl has an incorrect result here.
+            Id::Fdim => ulp = 2,
+            Id::Sincosf => ulp = 500,
+            Id::Tgamma => ulp = 20,
+            _ => (),
+        }
+    }
+
+    if cfg!(target_arch = "x86") {
+        match ctx.fn_ident {
+            // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is
+            // incorrect on i586 and i686.
+            Id::Fma => ulp = 1,
+            _ => (),
+        }
+    }
+
+    // In some cases, our implementation is less accurate than musl on i586.
+    if cfg!(x86_no_sse) {
+        match ctx.fn_ident {
+            // FIXME(#401): these need to be correctly rounded but are not.
+            Id::Fmaf => ulp = 1,
+            Id::Fdim => ulp = 1,
+            Id::Round => ulp = 1,
+
+            Id::Asinh => ulp = 3,
+            Id::Asinhf => ulp = 3,
+            Id::Cbrt => ulp = 1,
+            Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
+            Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
+            Id::Log1p | Id::Log1pf => ulp = 2,
+            Id::Tan => ulp = 2,
+            _ => (),
+        }
+    }
+
+    ulp
+}
+
+/// Result of checking for possible overrides.
+#[derive(Debug, Default)]
+pub enum CheckAction {
+    /// The check should pass. Default case.
+    #[default]
+    AssertSuccess,
+
+    /// Override the ULP for this check.
+    AssertWithUlp(u32),
+
+    /// Failure is expected, ensure this is the case (xfail). Takes a contxt string to help trace
+    /// back exactly why we expect this to fail.
+    AssertFailure(&'static str),
+
+    /// The override somehow validated the result, here it is.
+    Custom(TestResult),
+
+    /// Disregard the output.
+    Skip,
+}
+
+/// Don't run further validation on this test case.
+const SKIP: CheckAction = CheckAction::Skip;
+
+/// Return this to skip checks on a test that currently fails but shouldn't. Takes a description
+/// of context.
+const XFAIL: fn(&'static str) -> CheckAction = CheckAction::AssertFailure;
+
+/// Indicates that we expect a test to fail but we aren't asserting that it does (e.g. some results
+/// within a range do actually pass).
+///
+/// Same as `SKIP`, just indicates we have something to eventually fix.
+const XFAIL_NOCHECK: CheckAction = CheckAction::Skip;
+
+/// By default, all tests should pass.
+const DEFAULT: CheckAction = CheckAction::AssertSuccess;
+
+/// Allow overriding the outputs of specific test cases.
+///
+/// There are some cases where we want to xfail specific cases or handle certain inputs
+/// differently than the rest of calls to `validate`. This provides a hook to do that.
+///
+/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks
+/// are skipped and the provided result is returned instead.
+///
+/// This gets implemented once per input type, then the functions provide further filtering
+/// based on function name and values.
+///
+/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still
+/// returned.
+pub trait MaybeOverride<Input> {
+    fn check_float<F: Float>(
+        _input: Input,
+        _actual: F,
+        _expected: F,
+        _ctx: &CheckCtx,
+    ) -> CheckAction {
+        DEFAULT
+    }
+
+    fn check_int<I: Int>(_input: Input, _actual: I, _expected: I, _ctx: &CheckCtx) -> CheckAction {
+        DEFAULT
+    }
+}
+
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16,)> for SpecialCase {}
+
+impl MaybeOverride<(f32,)> for SpecialCase {
+    fn check_float<F: Float>(input: (f32,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
+        if ctx.base_name == BaseName::Expm1
+            && !input.0.is_infinite()
+            && input.0 > 80.0
+            && actual.is_infinite()
+            && !expected.is_infinite()
+        {
+            // we return infinity but the number is representable
+            if ctx.basis == CheckBasis::Musl {
+                return XFAIL_NOCHECK;
+            }
+            return XFAIL("expm1 representable numbers");
+        }
+
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Exp2
+            && !expected.is_infinite()
+            && actual.is_infinite()
+        {
+            // We return infinity when there is a representable value. Test input: 127.97238
+            return XFAIL("586 exp2 representable numbers");
+        }
+
+        if ctx.base_name == BaseName::Sinh && input.0.abs() > 80.0 && actual.is_nan() {
+            // we return some NaN that should be real values or infinite
+            if ctx.basis == CheckBasis::Musl {
+                return XFAIL_NOCHECK;
+            }
+            return XFAIL("sinh unexpected NaN");
+        }
+
+        if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
+            && input.0 > 4e36
+            && expected.is_infinite()
+            && !actual.is_infinite()
+        {
+            // This result should saturate but we return a finite value.
+            return XFAIL_NOCHECK;
+        }
+
+        if ctx.base_name == BaseName::J0 && input.0 < -1e34 {
+            // Errors get huge close to -inf
+            return XFAIL_NOCHECK;
+        }
+
+        unop_common(input, actual, expected, ctx)
+    }
+
+    fn check_int<I: Int>(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
+        // On MPFR for lgammaf_r, we set -1 as the integer result for negative infinity but MPFR
+        // sets +1
+        if ctx.basis == CheckBasis::Mpfr
+            && ctx.base_name == BaseName::LgammaR
+            && input.0 == f32::NEG_INFINITY
+            && actual.abs() == expected.abs()
+        {
+            return XFAIL("lgammar integer result");
+        }
+
+        DEFAULT
+    }
+}
+
+impl MaybeOverride<(f64,)> for SpecialCase {
+    fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
+        if cfg!(x86_no_sse)
+            && ctx.base_name == BaseName::Ceil
+            && ctx.basis == CheckBasis::Musl
+            && input.0 < 0.0
+            && input.0 > -1.0
+            && expected == F::ZERO
+            && actual == F::ZERO
+        {
+            // musl returns -0.0, we return +0.0
+            return XFAIL("i586 ceil signed zero");
+        }
+
+        if cfg!(x86_no_sse)
+            && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
+            && (expected - actual).abs() <= F::ONE
+            && (expected - actual).abs() > F::ZERO
+        {
+            // Our rounding mode is incorrect.
+            return XFAIL("i586 rint rounding mode");
+        }
+
+        if cfg!(x86_no_sse)
+            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
+            && expected.eq_repr(F::NEG_ZERO)
+            && actual.eq_repr(F::ZERO)
+        {
+            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
+            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
+            return XFAIL("i586 ceil/floor signed zero");
+        }
+
+        if cfg!(x86_no_sse)
+            && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
+        {
+            // FIXME: i586 has very imprecise results with ULP > u32::MAX for these
+            // operations so we can't reasonably provide a limit.
+            return XFAIL_NOCHECK;
+        }
+
+        if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
+            // Errors get huge close to -inf
+            return XFAIL_NOCHECK;
+        }
+
+        // maybe_check_nan_bits(actual, expected, ctx)
+        unop_common(input, actual, expected, ctx)
+    }
+
+    fn check_int<I: Int>(input: (f64,), actual: I, expected: I, ctx: &CheckCtx) -> CheckAction {
+        // On MPFR for lgamma_r, we set -1 as the integer result for negative infinity but MPFR
+        // sets +1
+        if ctx.basis == CheckBasis::Mpfr
+            && ctx.base_name == BaseName::LgammaR
+            && input.0 == f64::NEG_INFINITY
+            && actual.abs() == expected.abs()
+        {
+            return XFAIL("lgammar integer result");
+        }
+
+        DEFAULT
+    }
+}
+
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128,)> for SpecialCase {}
+
+// F1 and F2 are always the same type, this is just to please generics
+fn unop_common<F1: Float, F2: Float>(
+    input: (F1,),
+    actual: F2,
+    expected: F2,
+    ctx: &CheckCtx,
+) -> CheckAction {
+    if ctx.base_name == BaseName::Acosh
+        && input.0 < F1::NEG_ONE
+        && !(expected.is_nan() && actual.is_nan())
+    {
+        // acoshf is undefined for x <= 1.0, but we return a random result at lower values.
+
+        if ctx.basis == CheckBasis::Musl {
+            return XFAIL_NOCHECK;
+        }
+
+        return XFAIL("acoshf undefined");
+    }
+
+    if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
+        && input.0 < F1::ZERO
+        && !input.0.is_infinite()
+    {
+        // loggamma should not be defined for x < 0, yet we both return results
+        return XFAIL_NOCHECK;
+    }
+
+    // fabs and copysign must leave NaNs untouched.
+    if ctx.base_name == BaseName::Fabs && input.0.is_nan() {
+        // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686
+        // `extern "C"` `f32`/`f64` return ABI.
+        // LLVM issue <https://github.com/llvm/llvm-project/issues/66803>
+        // Rust issue <https://github.com/rust-lang/rust/issues/115567>
+        if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl && actual.is_nan() {
+            return XFAIL_NOCHECK;
+        }
+
+        // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate.
+        if ctx.basis == CheckBasis::Mpfr {
+            return DEFAULT;
+        }
+
+        // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
+        if actual.to_bits() == expected.to_bits() {
+            return CheckAction::Custom(Ok(()));
+        } else {
+            return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns")));
+        }
+    }
+
+    DEFAULT
+}
+
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f16, f16),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        binop_common(input, actual, expected, ctx)
+    }
+}
+
+impl MaybeOverride<(f32, f32)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f32, f32),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        binop_common(input, actual, expected, ctx)
+    }
+}
+
+impl MaybeOverride<(f64, f64)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f64, f64),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        binop_common(input, actual, expected, ctx)
+    }
+}
+
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (f128, f128),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        binop_common(input, actual, expected, ctx)
+    }
+}
+
+// F1 and F2 are always the same type, this is just to please generics
+fn binop_common<F1: Float, F2: Float>(
+    input: (F1, F1),
+    actual: F2,
+    expected: F2,
+    ctx: &CheckCtx,
+) -> CheckAction {
+    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if
+    // the first input (magnitude source) is NaN and the output is also a NaN, or if the second
+    // input (sign source) is NaN.
+    if ctx.basis == CheckBasis::Mpfr
+        && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan())
+    {
+        return SKIP;
+    }
+
+    /* FIXME(#439): our fmin and fmax do not compare signed zeros */
+
+    if ctx.base_name == BaseName::Fmin
+        && input.0.biteq(F1::NEG_ZERO)
+        && input.1.biteq(F1::ZERO)
+        && expected.biteq(F2::NEG_ZERO)
+        && actual.biteq(F2::ZERO)
+    {
+        return XFAIL("fmin signed zeroes");
+    }
+
+    if ctx.base_name == BaseName::Fmax
+        && input.0.biteq(F1::NEG_ZERO)
+        && input.1.biteq(F1::ZERO)
+        && expected.biteq(F2::ZERO)
+        && actual.biteq(F2::NEG_ZERO)
+    {
+        return XFAIL("fmax signed zeroes");
+    }
+
+    // Musl propagates NaNs if one is provided as the input, but we return the other input.
+    if (ctx.base_name == BaseName::Fmax || ctx.base_name == BaseName::Fmin)
+        && ctx.basis == Musl
+        && (input.0.is_nan() ^ input.1.is_nan())
+        && expected.is_nan()
+    {
+        return XFAIL("fmax/fmin musl NaN");
+    }
+
+    DEFAULT
+}
+
+impl MaybeOverride<(i32, f32)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (i32, f32),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        // `ynf(213, 109.15641) = -inf` with our library, should be finite.
+        if ctx.basis == Mpfr
+            && ctx.base_name == BaseName::Yn
+            && input.0 > 200
+            && !expected.is_infinite()
+            && actual.is_infinite()
+        {
+            return XFAIL("ynf infinity mismatch");
+        }
+
+        int_float_common(input, actual, expected, ctx)
+    }
+}
+
+impl MaybeOverride<(i32, f64)> for SpecialCase {
+    fn check_float<F: Float>(
+        input: (i32, f64),
+        actual: F,
+        expected: F,
+        ctx: &CheckCtx,
+    ) -> CheckAction {
+        int_float_common(input, actual, expected, ctx)
+    }
+}
+
+fn int_float_common<F1: Float, F2: Float>(
+    input: (i32, F1),
+    actual: F2,
+    expected: F2,
+    ctx: &CheckCtx,
+) -> CheckAction {
+    if ctx.basis == Mpfr
+        && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn)
+        && input.1 == F1::NEG_INFINITY
+        && actual == F2::ZERO
+        && expected == F2::ZERO
+    {
+        return XFAIL("we disagree with MPFR on the sign of zero");
+    }
+
+    // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
+    // be -3.2161271e38.
+    if ctx.basis == Musl
+        && ctx.fn_ident == Identifier::Ynf
+        && !expected.is_infinite()
+        && actual.is_infinite()
+        && (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
+            < F2::Int::cast_from(10_000_000u32))
+    {
+        return XFAIL_NOCHECK;
+    }
+
+    // Our bessel functions blow up with large N values
+    if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) {
+        if cfg!(x86_no_sse) {
+            // Precision is especially bad on i586, not worth checking.
+            return XFAIL_NOCHECK;
+        }
+
+        if input.0 > 4000 {
+            return XFAIL_NOCHECK;
+        } else if input.0 > 100 {
+            return CheckAction::AssertWithUlp(1_000_000);
+        }
+    }
+    DEFAULT
+}
+
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, i32)> for SpecialCase {}
+impl MaybeOverride<(f32, i32)> for SpecialCase {}
+impl MaybeOverride<(f64, i32)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, i32)> for SpecialCase {}
+
+impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
+impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
new file mode 100644
index 000000000..b36164b00
--- /dev/null
+++ b/crates/libm-test/src/run_cfg.rs
@@ -0,0 +1,370 @@
+//! Configuration for how tests get run.
+
+use std::ops::RangeInclusive;
+use std::sync::LazyLock;
+use std::{env, str};
+
+use crate::generate::random::{SEED, SEED_ENV};
+use crate::{BaseName, FloatTy, Identifier, test_log};
+
+/// The environment variable indicating which extensive tests should be run.
+pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
+
+/// Specify the number of iterations via this environment variable, rather than using the default.
+pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS";
+
+/// The override value, if set by the above environment.
+static EXTENSIVE_ITER_OVERRIDE: LazyLock<Option<u64>> = LazyLock::new(|| {
+    env::var(EXTENSIVE_ITER_ENV).map(|v| v.parse().expect("failed to parse iteration count")).ok()
+});
+
+/// Specific tests that need to have a reduced amount of iterations to complete in a reasonable
+/// amount of time.
+///
+/// Contains the itentifier+generator combo to match on, plus the factor to reduce by.
+const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[
+    (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50),
+    (Identifier::Fmodf128, GeneratorKind::Extensive, 50),
+];
+
+/// Maximum number of iterations to run for a single routine.
+///
+/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
+/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly
+/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple
+/// hours.
+pub fn extensive_max_iterations() -> u64 {
+    let default = 1 << 32; // default value
+    EXTENSIVE_ITER_OVERRIDE.unwrap_or(default)
+}
+
+/// Context passed to [`CheckOutput`].
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CheckCtx {
+    /// Allowed ULP deviation
+    pub ulp: u32,
+    pub fn_ident: Identifier,
+    pub base_name: BaseName,
+    /// Function name.
+    pub fn_name: &'static str,
+    /// Return the unsuffixed version of the function name.
+    pub base_name_str: &'static str,
+    /// Source of truth for tests.
+    pub basis: CheckBasis,
+    pub gen_kind: GeneratorKind,
+    /// If specified, this value will override the value returned by [`iteration_count`].
+    pub override_iterations: Option<u64>,
+}
+
+impl CheckCtx {
+    /// Create a new check context, using the default ULP for the function.
+    pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self {
+        let mut ret = Self {
+            ulp: 0,
+            fn_ident,
+            fn_name: fn_ident.as_str(),
+            base_name: fn_ident.base_name(),
+            base_name_str: fn_ident.base_name().as_str(),
+            basis,
+            gen_kind,
+            override_iterations: None,
+        };
+        ret.ulp = crate::default_ulp(&ret);
+        ret
+    }
+
+    /// The number of input arguments for this function.
+    pub fn input_count(&self) -> usize {
+        self.fn_ident.math_op().rust_sig.args.len()
+    }
+
+    pub fn override_iterations(&mut self, count: u64) {
+        self.override_iterations = Some(count)
+    }
+}
+
+/// Possible items to test against
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum CheckBasis {
+    /// Check against Musl's math sources.
+    Musl,
+    /// Check against infinite precision (MPFR).
+    Mpfr,
+    /// Benchmarks or other times when this is not relevant.
+    None,
+}
+
+/// The different kinds of generators that provide test input, which account for input pattern
+/// and quantity.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum GeneratorKind {
+    EdgeCases,
+    Extensive,
+    QuickSpaced,
+    Random,
+    List,
+}
+
+/// A list of all functions that should get extensive tests.
+///
+/// This also supports the special test name `all` to run all tests, as well as `all_f16`,
+/// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type.
+static EXTENSIVE: LazyLock<Vec<Identifier>> = LazyLock::new(|| {
+    let var = env::var(EXTENSIVE_ENV).unwrap_or_default();
+    let list = var.split(",").filter(|s| !s.is_empty()).collect::<Vec<_>>();
+    let mut ret = Vec::new();
+
+    let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| {
+        let iter = Identifier::ALL.iter().filter(move |id| id.math_op().float_ty == fty).copied();
+        ret.extend(iter);
+    };
+
+    for item in list {
+        match item {
+            "all" => ret = Identifier::ALL.to_owned(),
+            "all_f16" => append_ty_ops(&mut ret, FloatTy::F16),
+            "all_f32" => append_ty_ops(&mut ret, FloatTy::F32),
+            "all_f64" => append_ty_ops(&mut ret, FloatTy::F64),
+            "all_f128" => append_ty_ops(&mut ret, FloatTy::F128),
+            s => {
+                let id = Identifier::from_str(s)
+                    .unwrap_or_else(|| panic!("unrecognized test name `{s}`"));
+                ret.push(id);
+            }
+        }
+    }
+
+    ret
+});
+
+/// Information about the function to be tested.
+#[derive(Debug)]
+struct TestEnv {
+    /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated.
+    slow_platform: bool,
+    /// The float cannot be tested exhaustively, `f64` or `f128`.
+    large_float_ty: bool,
+    /// Env indicates that an extensive test should be run.
+    should_run_extensive: bool,
+    /// Multiprecision tests will be run.
+    mp_tests_enabled: bool,
+    /// The number of inputs to the function.
+    input_count: usize,
+}
+
+impl TestEnv {
+    fn from_env(ctx: &CheckCtx) -> Self {
+        let id = ctx.fn_ident;
+        let op = id.math_op();
+
+        let will_run_mp = cfg!(feature = "build-mpfr");
+        let large_float_ty = match op.float_ty {
+            FloatTy::F16 | FloatTy::F32 => false,
+            FloatTy::F64 | FloatTy::F128 => true,
+        };
+
+        let will_run_extensive = EXTENSIVE.contains(&id);
+
+        let input_count = op.rust_sig.args.len();
+
+        Self {
+            slow_platform: slow_platform(),
+            large_float_ty,
+            should_run_extensive: will_run_extensive,
+            mp_tests_enabled: will_run_mp,
+            input_count,
+        }
+    }
+}
+
+/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
+/// with a reduced number on these platforms.
+fn slow_platform() -> bool {
+    let slow_on_ci = crate::emulated()
+        || usize::BITS < 64
+        || cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
+
+    // If not running in CI, there is no need to reduce iteration count.
+    slow_on_ci && crate::ci()
+}
+
+/// The number of iterations to run for a given test.
+pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
+    let t_env = TestEnv::from_env(ctx);
+
+    // Ideally run 5M tests
+    let mut domain_iter_count: u64 = 4_000_000;
+
+    // Start with a reduced number of tests on slow platforms.
+    if t_env.slow_platform {
+        domain_iter_count = 100_000;
+    }
+
+    // If we will be running tests against MPFR, we don't need to test as much against musl.
+    // However, there are some platforms where we have to test against musl since MPFR can't be
+    // built.
+    if t_env.mp_tests_enabled && ctx.basis == CheckBasis::Musl {
+        domain_iter_count /= 100;
+    }
+
+    // Run fewer random tests than domain tests.
+    let random_iter_count = domain_iter_count / 100;
+
+    let mut total_iterations = match ctx.gen_kind {
+        GeneratorKind::QuickSpaced => domain_iter_count,
+        GeneratorKind::Random => random_iter_count,
+        GeneratorKind::Extensive => extensive_max_iterations(),
+        GeneratorKind::EdgeCases | GeneratorKind::List => {
+            unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind)
+        }
+    };
+
+    // Larger float types get more iterations.
+    if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive {
+        if ctx.gen_kind == GeneratorKind::Extensive {
+            // Extensive already has a pretty high test count.
+            total_iterations *= 2;
+        } else {
+            total_iterations *= 4;
+        }
+    }
+
+    // Functions with more arguments get more iterations.
+    let arg_multiplier = 1 << (t_env.input_count - 1);
+    total_iterations *= arg_multiplier;
+
+    // FMA has a huge domain but is reasonably fast to run, so increase another 1.5x.
+    if ctx.base_name == BaseName::Fma {
+        total_iterations = 3 * total_iterations / 2;
+    }
+
+    // Some tests are significantly slower than others and need to be further reduced.
+    if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS
+        .iter()
+        .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind)
+    {
+        // However, do not override if the extensive iteration count has been manually set.
+        if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) {
+            total_iterations /= scale;
+        }
+    }
+
+    if cfg!(optimizations_enabled) {
+        // Always run at least 10,000 tests.
+        total_iterations = total_iterations.max(10_000);
+    } else {
+        // Without optimizations, just run a quick check regardless of other parameters.
+        total_iterations = 800;
+    }
+
+    let mut overridden = false;
+    if let Some(count) = ctx.override_iterations {
+        total_iterations = count;
+        overridden = true;
+    }
+
+    // Adjust for the number of inputs
+    let ntests = match t_env.input_count {
+        1 => total_iterations,
+        2 => (total_iterations as f64).sqrt().ceil() as u64,
+        3 => (total_iterations as f64).cbrt().ceil() as u64,
+        _ => panic!("test has more than three arguments"),
+    };
+
+    let total = ntests.pow(t_env.input_count.try_into().unwrap());
+
+    let seed_msg = match ctx.gen_kind {
+        GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(),
+        GeneratorKind::Random => {
+            format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
+        }
+        GeneratorKind::EdgeCases | GeneratorKind::List => unimplemented!(),
+    };
+
+    test_log(&format!(
+        "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
+         ({total} total){seed_msg}{omsg}",
+        gen_kind = ctx.gen_kind,
+        basis = ctx.basis,
+        fn_ident = ctx.fn_ident,
+        arg = argnum + 1,
+        args = t_env.input_count,
+        omsg = if overridden { " (overridden)" } else { "" }
+    ));
+
+    ntests
+}
+
+/// Some tests require that an integer be kept within reasonable limits; generate that here.
+pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
+    let t_env = TestEnv::from_env(ctx);
+
+    if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
+        return i32::MIN..=i32::MAX;
+    }
+
+    assert_eq!(argnum, 0, "For `jn`/`yn`, only the first argument takes an integer");
+
+    // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be
+    // completed in a reasonable amount of time.
+    let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) {
+        (-0xf)..=0xff
+    } else {
+        (-0xff)..=0xffff
+    };
+
+    let extensive_range = (-0xfff)..=0xfffff;
+
+    match ctx.gen_kind {
+        GeneratorKind::Extensive => extensive_range,
+        GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range,
+        GeneratorKind::EdgeCases => extensive_range,
+        GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind),
+    }
+}
+
+/// For domain tests, limit how many asymptotes or specified check points we test.
+pub fn check_point_count(ctx: &CheckCtx) -> usize {
+    assert_eq!(
+        ctx.gen_kind,
+        GeneratorKind::EdgeCases,
+        "check_point_count is intended for edge case tests"
+    );
+    let t_env = TestEnv::from_env(ctx);
+    if t_env.slow_platform || !cfg!(optimizations_enabled) { 4 } else { 10 }
+}
+
+/// When validating points of interest (e.g. asymptotes, inflection points, extremes), also check
+/// this many surrounding values.
+pub fn check_near_count(ctx: &CheckCtx) -> u64 {
+    assert_eq!(
+        ctx.gen_kind,
+        GeneratorKind::EdgeCases,
+        "check_near_count is intended for edge case tests"
+    );
+    if cfg!(optimizations_enabled) {
+        // Taper based on the number of inputs.
+        match ctx.input_count() {
+            1 | 2 => 100,
+            3 => 50,
+            x => panic!("unexpected argument count {x}"),
+        }
+    } else {
+        8
+    }
+}
+
+/// Check whether extensive actions should be run or skipped.
+pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
+    let t_env = TestEnv::from_env(ctx);
+    !t_env.should_run_extensive
+}
+
+/// The number of iterations to run for `u256` fuzz tests.
+pub fn bigint_fuzz_iteration_count() -> u64 {
+    if !cfg!(optimizations_enabled) {
+        return 1000;
+    }
+
+    if slow_platform() { 100_000 } else { 5_000_000 }
+}
diff --git a/crates/libm-test/src/special_case.rs b/crates/libm-test/src/special_case.rs
deleted file mode 100644
index df263d742..000000000
--- a/crates/libm-test/src/special_case.rs
+++ /dev/null
@@ -1,239 +0,0 @@
-//! Configuration for skipping or changing the result for individual test cases (inputs) rather
-//! than ignoring entire tests.
-
-use core::f32;
-
-use crate::{CheckBasis, CheckCtx, Float, Int, TestResult};
-
-/// Type implementing [`IgnoreCase`].
-pub struct SpecialCase;
-
-/// Don't run further validation on this test case.
-const SKIP: Option<TestResult> = Some(Ok(()));
-
-/// Return this to skip checks on a test that currently fails but shouldn't. Looks
-/// the same as skip, but we keep them separate to better indicate purpose.
-const XFAIL: Option<TestResult> = Some(Ok(()));
-
-/// Allow overriding the outputs of specific test cases.
-///
-/// There are some cases where we want to xfail specific cases or handle certain inputs
-/// differently than the rest of calls to `validate`. This provides a hook to do that.
-///
-/// If `None` is returned, checks will proceed as usual. If `Some(result)` is returned, checks
-/// are skipped and the provided result is returned instead.
-///
-/// This gets implemented once per input type, then the functions provide further filtering
-/// based on function name and values.
-///
-/// `ulp` can also be set to adjust the ULP for that specific test, even if `None` is still
-/// returned.
-pub trait MaybeOverride<Input> {
-    fn check_float<F: Float>(
-        _input: Input,
-        _actual: F,
-        _expected: F,
-        _ulp: &mut u32,
-        _ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        None
-    }
-
-    fn check_int<I: Int>(
-        _input: Input,
-        _actual: I,
-        _expected: I,
-        _ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        None
-    }
-}
-
-impl MaybeOverride<(f32,)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f32,),
-        actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.basis == CheckBasis::Musl {
-            if ctx.fname == "acoshf" && input.0 < -1.0 {
-                // acoshf is undefined for x <= 1.0, but we return a random result at lower
-                // values.
-                return XFAIL;
-            }
-
-            if ctx.fname == "sincosf" {
-                let factor_frac_pi_2 = input.0.abs() / f32::consts::FRAC_PI_2;
-                if (factor_frac_pi_2 - factor_frac_pi_2.round()).abs() < 1e-2 {
-                    // we have a bad approximation near multiples of pi/2
-                    return XFAIL;
-                }
-            }
-
-            if ctx.fname == "expm1f" && input.0 > 80.0 && actual.is_infinite() {
-                // we return infinity but the number is representable
-                return XFAIL;
-            }
-
-            if ctx.fname == "sinhf" && input.0.abs() > 80.0 && actual.is_nan() {
-                // we return some NaN that should be real values or infinite
-                // doesn't seem to happen on x86
-                return XFAIL;
-            }
-
-            if ctx.fname == "lgammaf" || ctx.fname == "lgammaf_r" && input.0 < 0.0 {
-                // loggamma should not be defined for x < 0, yet we both return results
-                return XFAIL;
-            }
-        }
-
-        maybe_check_nan_bits(actual, expected, ctx)
-    }
-}
-
-impl MaybeOverride<(f64,)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f64,),
-        actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        if ctx.basis == CheckBasis::Musl {
-            if cfg!(target_arch = "x86") && ctx.fname == "acosh" && input.0 < 1.0 {
-                // The function is undefined, both implementations return random results
-                return SKIP;
-            }
-
-            if cfg!(x86_no_sse)
-                && ctx.fname == "ceil"
-                && input.0 < 0.0
-                && input.0 > -1.0
-                && expected == F::ZERO
-                && actual == F::ZERO
-            {
-                // musl returns -0.0, we return +0.0
-                return XFAIL;
-            }
-
-            if ctx.fname == "lgamma" || ctx.fname == "lgamma_r" && input.0 < 0.0 {
-                // loggamma should not be defined for x < 0, yet we both return results
-                return XFAIL;
-            }
-        }
-
-        maybe_check_nan_bits(actual, expected, ctx)
-    }
-}
-
-/// Check NaN bits if the function requires it
-fn maybe_check_nan_bits<F: Float>(actual: F, expected: F, ctx: &CheckCtx) -> Option<TestResult> {
-    if !(ctx.canonical_name == "fabs" || ctx.canonical_name == "copysign") {
-        return None;
-    }
-
-    // LLVM currently uses x87 instructions which quieten signalling NaNs to handle the i686
-    // `extern "C"` `f32`/`f64` return ABI.
-    // LLVM issue <https://github.com/llvm/llvm-project/issues/66803>
-    // Rust issue <https://github.com/rust-lang/rust/issues/115567>
-    if cfg!(target_arch = "x86") && ctx.basis == CheckBasis::Musl {
-        return SKIP;
-    }
-
-    // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
-    if actual.to_bits() == expected.to_bits() {
-        return SKIP;
-    } else {
-        Some(Err(anyhow::anyhow!("NaNs have different bitpatterns")))
-    }
-}
-
-impl MaybeOverride<(f32, f32)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f32, f32),
-        _actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        maybe_skip_min_max_nan(input, expected, ctx)
-    }
-}
-impl MaybeOverride<(f64, f64)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (f64, f64),
-        _actual: F,
-        expected: F,
-        _ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        maybe_skip_min_max_nan(input, expected, ctx)
-    }
-}
-
-/// Musl propagates NaNs if one is provided as the input, but we return the other input.
-// F1 and F2 are always the same type, this is just to please generics
-fn maybe_skip_min_max_nan<F1: Float, F2: Float>(
-    input: (F1, F1),
-    expected: F2,
-    ctx: &CheckCtx,
-) -> Option<TestResult> {
-    if (ctx.canonical_name == "fmax" || ctx.canonical_name == "fmin")
-        && (input.0.is_nan() || input.1.is_nan())
-        && expected.is_nan()
-    {
-        return XFAIL;
-    } else {
-        None
-    }
-}
-
-impl MaybeOverride<(i32, f32)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (i32, f32),
-        _actual: F,
-        _expected: F,
-        ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        bessel_prec_dropoff(input, ulp, ctx)
-    }
-}
-impl MaybeOverride<(i32, f64)> for SpecialCase {
-    fn check_float<F: Float>(
-        input: (i32, f64),
-        _actual: F,
-        _expected: F,
-        ulp: &mut u32,
-        ctx: &CheckCtx,
-    ) -> Option<TestResult> {
-        bessel_prec_dropoff(input, ulp, ctx)
-    }
-}
-
-/// Our bessel functions blow up with large N values
-fn bessel_prec_dropoff<F: Float>(
-    input: (i32, F),
-    ulp: &mut u32,
-    ctx: &CheckCtx,
-) -> Option<TestResult> {
-    if ctx.canonical_name == "jn" {
-        if input.0 > 4000 {
-            return XFAIL;
-        } else if input.0 > 2000 {
-            // *ulp = 20_000;
-            *ulp = 20000;
-        } else if input.0 > 1000 {
-            *ulp = 4000;
-        }
-    }
-
-    None
-}
-
-impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
-impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
-impl MaybeOverride<(f32, i32)> for SpecialCase {}
-impl MaybeOverride<(f64, i32)> for SpecialCase {}
diff --git a/crates/libm-test/src/test_traits.rs b/crates/libm-test/src/test_traits.rs
index c24ac6e43..c560dade8 100644
--- a/crates/libm-test/src/test_traits.rs
+++ b/crates/libm-test/src/test_traits.rs
@@ -1,22 +1,21 @@
 //! Traits related to testing.
 //!
-//! There are three main traits in this module:
+//! There are two main traits in this module:
 //!
-//! - `GenerateInput`: implemented on any types that create test cases.
 //! - `TupleCall`: implemented on tuples to allow calling them as function arguments.
 //! - `CheckOutput`: implemented on anything that is an output type for validation against an
 //!   expected value.
 
-use std::fmt;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::{fmt, panic};
 
-use anyhow::{Context, bail, ensure};
+use anyhow::{Context, anyhow, bail, ensure};
+use libm::support::Hexf;
 
-use crate::{Float, Hex, Int, MaybeOverride, SpecialCase, TestResult};
-
-/// Implement this on types that can generate a sequence of tuples for test input.
-pub trait GenerateInput<TupleArgs> {
-    fn get_cases(&self) -> impl Iterator<Item = TupleArgs>;
-}
+use crate::precision::CheckAction;
+use crate::{
+    CheckBasis, CheckCtx, Float, GeneratorKind, Int, MaybeOverride, SpecialCase, TestResult,
+};
 
 /// Trait for calling a function with a tuple as arguments.
 ///
@@ -25,43 +24,45 @@ pub trait GenerateInput<TupleArgs> {
 pub trait TupleCall<Func>: fmt::Debug {
     type Output;
     fn call(self, f: Func) -> Self::Output;
-}
 
-/// Context passed to [`CheckOutput`].
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct CheckCtx {
-    /// Allowed ULP deviation
-    pub ulp: u32,
-    /// Function name.
-    pub fname: &'static str,
-    /// Return the unsuffixed version of the function name.
-    pub canonical_name: &'static str,
-    /// Source of truth for tests.
-    pub basis: CheckBasis,
-}
-
-impl CheckCtx {
-    pub fn new(ulp: u32, fname: &'static str, basis: CheckBasis) -> Self {
-        let canonical_fname = crate::canonical_name(fname);
-        Self { ulp, fname, canonical_name: canonical_fname, basis }
+    /// Intercept panics and print the input to stderr before continuing.
+    fn call_intercept_panics(self, f: Func) -> Self::Output
+    where
+        Self: RefUnwindSafe + Copy,
+        Func: UnwindSafe,
+    {
+        let res = panic::catch_unwind(|| self.call(f));
+        match res {
+            Ok(v) => v,
+            Err(e) => {
+                eprintln!("panic with the following input: {self:?}");
+                panic::resume_unwind(e)
+            }
+        }
     }
 }
 
-/// Possible items to test against
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum CheckBasis {
-    /// Check against Musl's math sources.
-    Musl,
-}
-
 /// A trait to implement on any output type so we can verify it in a generic way.
 pub trait CheckOutput<Input>: Sized {
     /// Validate `self` (actual) and `expected` are the same.
     ///
     /// `input` is only used here for error messages.
-    fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult;
+    fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult;
+}
+
+/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32`
+/// will always print with `0x` followed by 8 digits.
+///
+/// This is only used for printing errors so allocating is okay.
+pub trait Hex: Copy {
+    /// Hex integer syntax.
+    fn hex(self) -> String;
+    /// Hex float syntax.
+    fn hexf(self) -> String;
 }
 
+/* implement `TupleCall` */
+
 impl<T1, R> TupleCall<fn(T1) -> R> for (T1,)
 where
     T1: fmt::Debug,
@@ -125,7 +126,7 @@ where
     }
 }
 
-impl<T1, T2, T3> TupleCall<fn(T1, &mut T2, &mut T3)> for (T1,)
+impl<T1, T2, T3> TupleCall<for<'a> fn(T1, &'a mut T2, &'a mut T3)> for (T1,)
 where
     T1: fmt::Debug,
     T2: fmt::Debug + Default,
@@ -133,7 +134,7 @@ where
 {
     type Output = (T2, T3);
 
-    fn call(self, f: fn(T1, &mut T2, &mut T3)) -> Self::Output {
+    fn call(self, f: for<'a> fn(T1, &'a mut T2, &'a mut T3)) -> Self::Output {
         let mut t2 = T2::default();
         let mut t3 = T3::default();
         f(self.0, &mut t2, &mut t3);
@@ -141,72 +142,267 @@ where
     }
 }
 
-// Implement for floats
-impl<F, Input> CheckOutput<Input> for F
+/* implement `Hex` */
+
+impl<T1> Hex for (T1,)
+where
+    T1: Hex,
+{
+    fn hex(self) -> String {
+        format!("({},)", self.0.hex())
+    }
+
+    fn hexf(self) -> String {
+        format!("({},)", self.0.hexf())
+    }
+}
+
+impl<T1, T2> Hex for (T1, T2)
 where
-    F: Float + Hex,
+    T1: Hex,
+    T2: Hex,
+{
+    fn hex(self) -> String {
+        format!("({}, {})", self.0.hex(), self.1.hex())
+    }
+
+    fn hexf(self) -> String {
+        format!("({}, {})", self.0.hexf(), self.1.hexf())
+    }
+}
+
+impl<T1, T2, T3> Hex for (T1, T2, T3)
+where
+    T1: Hex,
+    T2: Hex,
+    T3: Hex,
+{
+    fn hex(self) -> String {
+        format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
+    }
+
+    fn hexf(self) -> String {
+        format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf())
+    }
+}
+
+/* trait implementations for ints */
+
+macro_rules! impl_int {
+    ($($ty:ty),*) => {
+        $(
+            impl Hex for $ty {
+                fn hex(self) -> String {
+                    format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
+                }
+
+                fn hexf(self) -> String {
+                    String::new()
+                }
+            }
+
+            impl<Input> $crate::CheckOutput<Input> for $ty
+            where
+                Input: Hex + fmt::Debug,
+                SpecialCase: MaybeOverride<Input>,
+            {
+                fn validate<'a>(
+                    self,
+                    expected: Self,
+                    input: Input,
+                    ctx: &$crate::CheckCtx,
+                ) -> TestResult {
+                    validate_int(self, expected, input, ctx)
+                }
+            }
+        )*
+    };
+}
+
+fn validate_int<I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult
+where
+    I: Int + Hex,
     Input: Hex + fmt::Debug,
-    u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
     SpecialCase: MaybeOverride<Input>,
 {
-    fn validate<'a>(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult {
-        // Create a wrapper function so we only need to `.with_context` once.
-        let inner = || -> TestResult {
-            let mut allowed_ulp = ctx.ulp;
-
-            // If the tested function requires a nonstandard test, run it here.
-            if let Some(res) =
-                SpecialCase::check_float(input, self, expected, &mut allowed_ulp, ctx)
-            {
-                return res;
+    let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) {
+        // `require_biteq` forbids overrides.
+        _ if ctx.gen_kind == GeneratorKind::List => (actual == expected, None),
+        CheckAction::AssertSuccess => (actual == expected, None),
+        CheckAction::AssertFailure(msg) => (actual != expected, Some(msg)),
+        CheckAction::Custom(res) => return res,
+        CheckAction::Skip => return Ok(()),
+        CheckAction::AssertWithUlp(_) => panic!("ulp has no meaning for integer checks"),
+    };
+
+    let make_xfail_msg = || match xfail_msg {
+        Some(m) => format!(
+            "expected failure but test passed. Does an XFAIL need to be updated?\n\
+            failed at: {m}",
+        ),
+        None => String::new(),
+    };
+
+    anyhow::ensure!(
+        result,
+        "\
+        \n    input:    {input:?} {ibits}\
+        \n    expected: {expected:<22?} {expbits}\
+        \n    actual:   {actual:<22?} {actbits}\
+        \n    {msg}\
+        ",
+        actbits = actual.hex(),
+        expbits = expected.hex(),
+        ibits = input.hex(),
+        msg = make_xfail_msg()
+    );
+
+    Ok(())
+}
+
+impl_int!(u32, i32, u64, i64);
+
+/* trait implementations for floats */
+
+macro_rules! impl_float {
+    ($($ty:ty),*) => {
+        $(
+            impl Hex for $ty {
+                fn hex(self) -> String {
+                    format!(
+                        "{:#0width$x}",
+                        self.to_bits(),
+                        width = ((Self::BITS / 4) + 2) as usize
+                    )
+                }
+
+                fn hexf(self) -> String {
+                    format!("{}", Hexf(self))
+                }
             }
 
-            // Check when both are NaNs
-            if self.is_nan() && expected.is_nan() {
-                // By default, NaNs have nothing special to check.
-                return Ok(());
-            } else if self.is_nan() || expected.is_nan() {
-                // Check when only one is a NaN
-                bail!("real value != NaN")
+            impl<Input> $crate::CheckOutput<Input> for $ty
+            where
+                Input: Hex + fmt::Debug,
+                SpecialCase: MaybeOverride<Input>,
+            {
+                fn validate<'a>(
+                    self,
+                    expected: Self,
+                    input: Input,
+                    ctx: &$crate::CheckCtx,
+                ) -> TestResult {
+                    validate_float(self, expected, input, ctx)
+                }
             }
+        )*
+    };
+}
 
-            // Make sure that the signs are the same before checing ULP to avoid wraparound
-            let act_sig = self.signum();
-            let exp_sig = expected.signum();
-            ensure!(act_sig == exp_sig, "mismatched signs {act_sig} {exp_sig}");
+fn validate_float<F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult
+where
+    F: Float + Hex,
+    Input: Hex + fmt::Debug,
+    u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
+    SpecialCase: MaybeOverride<Input>,
+{
+    let mut assert_failure_msg = None;
+
+    // Create a wrapper function so we only need to `.with_context` once.
+    let mut inner = || -> TestResult {
+        let mut allowed_ulp = ctx.ulp;
+
+        // Forbid overrides if the items came from an explicit list, as long as we are checking
+        // against either MPFR or the result itself.
+        let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl;
+
+        match SpecialCase::check_float(input, actual, expected, ctx) {
+            _ if require_biteq => (),
+            CheckAction::AssertSuccess => (),
+            CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
+            CheckAction::Custom(res) => return res,
+            CheckAction::Skip => return Ok(()),
+            CheckAction::AssertWithUlp(ulp_override) => allowed_ulp = ulp_override,
+        };
 
-            if self.is_infinite() ^ expected.is_infinite() {
-                bail!("mismatched infinities");
+        // Check when both are NaNs
+        if actual.is_nan() && expected.is_nan() {
+            if require_biteq && ctx.basis == CheckBasis::None {
+                ensure!(actual.to_bits() == expected.to_bits(), "mismatched NaN bitpatterns");
             }
+            // By default, NaNs have nothing special to check.
+            return Ok(());
+        } else if actual.is_nan() || expected.is_nan() {
+            // Check when only one is a NaN
+            bail!("real value != NaN")
+        }
 
-            let act_bits = self.to_bits().signed();
-            let exp_bits = expected.to_bits().signed();
+        // Make sure that the signs are the same before checing ULP to avoid wraparound
+        let act_sig = actual.signum();
+        let exp_sig = expected.signum();
+        ensure!(act_sig == exp_sig, "mismatched signs {act_sig:?} {exp_sig:?}");
 
-            let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs();
+        if actual.is_infinite() ^ expected.is_infinite() {
+            bail!("mismatched infinities");
+        }
 
-            let ulp_u32 = u32::try_from(ulp_diff)
-                .map_err(|e| anyhow::anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
+        let act_bits = actual.to_bits().signed();
+        let exp_bits = expected.to_bits().signed();
 
-            ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",);
+        let ulp_diff = act_bits.checked_sub(exp_bits).unwrap().abs();
 
-            Ok(())
-        };
+        let ulp_u32 = u32::try_from(ulp_diff)
+            .map_err(|e| anyhow!("{e:?}: ulp of {ulp_diff} exceeds u32::MAX"))?;
+
+        ensure!(ulp_u32 <= allowed_ulp, "ulp {ulp_diff} > {allowed_ulp}",);
+
+        Ok(())
+    };
 
-        inner().with_context(|| {
-            format!(
-                "\
-                \n    input:    {input:?} {ibits}\
-                \n    expected: {expected:<22?} {expbits}\
-                \n    actual:   {self:<22?} {actbits}\
-                ",
-                actbits = self.hex(),
-                expbits = expected.hex(),
-                ibits = input.hex(),
-            )
-        })
+    let mut res = inner();
+
+    if let Some(msg) = assert_failure_msg {
+        // Invert `Ok` and `Err` if the test is an xfail.
+        if res.is_ok() {
+            let e = anyhow!(
+                "expected failure but test passed. Does an XFAIL need to be updated?\n\
+                failed at: {msg}",
+            );
+            res = Err(e)
+        } else {
+            res = Ok(())
+        }
     }
+
+    res.with_context(|| {
+        format!(
+            "\
+            \n    input:    {input:?}\
+            \n    as hex:   {ihex}\
+            \n    as bits:  {ibits}\
+            \n    expected: {expected:<22?} {exphex} {expbits}\
+            \n    actual:   {actual:<22?} {acthex} {actbits}\
+            ",
+            ihex = input.hexf(),
+            ibits = input.hex(),
+            exphex = expected.hexf(),
+            expbits = expected.hex(),
+            actbits = actual.hex(),
+            acthex = actual.hexf(),
+        )
+    })
 }
 
+impl_float!(f32, f64);
+
+#[cfg(f16_enabled)]
+impl_float!(f16);
+
+#[cfg(f128_enabled)]
+impl_float!(f128);
+
+/* trait implementations for compound types */
+
 /// Implement `CheckOutput` for combinations of types.
 macro_rules! impl_tuples {
     ($(($a:ty, $b:ty);)*) => {
@@ -227,12 +423,15 @@ macro_rules! impl_tuples {
                         .with_context(|| format!(
                             "full context:\
                             \n    input:    {input:?} {ibits}\
+                            \n    as hex:   {ihex}\
+                            \n    as bits:  {ibits}\
                             \n    expected: {expected:?} {expbits}\
                             \n    actual:   {self:?} {actbits}\
                             ",
-                            actbits = self.hex(),
-                            expbits = expected.hex(),
+                            ihex = input.hexf(),
                             ibits = input.hex(),
+                            expbits = expected.hex(),
+                            actbits = self.hex(),
                         ))
                 }
             }
diff --git a/crates/libm-test/tests/check_coverage.rs b/crates/libm-test/tests/check_coverage.rs
index ef6d21fdb..c23298686 100644
--- a/crates/libm-test/tests/check_coverage.rs
+++ b/crates/libm-test/tests/check_coverage.rs
@@ -1,60 +1,61 @@
 //! Ensure that `for_each_function!` isn't missing any symbols.
 
-/// Files in `src/` that do not export a testable symbol.
-const ALLOWED_SKIPS: &[&str] = &[
-    // Not a generic test function
-    "fenv",
-    // Nonpublic functions
-    "expo2",
-    "k_cos",
-    "k_cosf",
-    "k_expo2",
-    "k_expo2f",
-    "k_sin",
-    "k_sinf",
-    "k_tan",
-    "k_tanf",
-    "rem_pio2",
-    "rem_pio2_large",
-    "rem_pio2f",
-];
+use std::collections::HashSet;
+use std::env;
+use std::path::Path;
+use std::process::Command;
 
 macro_rules! callback {
     (
         fn_name: $name:ident,
-        CFn: $_CFn:ty,
-        CArgs: $_CArgs:ty,
-        CRet: $_CRet:ty,
-        RustFn: $_RustFn:ty,
-        RustArgs: $_RustArgs:ty,
-        RustRet: $_RustRet:ty,
-        extra: [$push_to:ident],
+        attrs: [$($attr:meta),*],
+        extra: [$set:ident],
     ) => {
-        $push_to.push(stringify!($name));
+        let name = stringify!($name);
+        let new = $set.insert(name);
+        assert!(new, "duplicate function `{name}` in `ALL_OPERATIONS`");
     };
 }
 
 #[test]
 fn test_for_each_function_all_included() {
-    let mut included = Vec::new();
-    let mut missing = Vec::new();
+    let all_functions: HashSet<_> = include_str!("../../../etc/function-list.txt")
+        .lines()
+        .filter(|line| !line.starts_with("#"))
+        .collect();
+
+    let mut tested = HashSet::new();
 
     libm_macros::for_each_function! {
         callback: callback,
-        extra: [included],
+        extra: [tested],
     };
 
-    for f in libm_test::ALL_FUNCTIONS {
-        if !included.contains(f) && !ALLOWED_SKIPS.contains(f) {
-            missing.push(f)
-        }
-    }
-
-    if !missing.is_empty() {
+    let untested = all_functions.difference(&tested);
+    if untested.clone().next().is_some() {
         panic!(
-            "missing tests for the following: {missing:#?} \
+            "missing tests for the following: {untested:#?} \
             \nmake sure any new functions are entered in \
-            `ALL_FUNCTIONS` (in `libm-macros`)."
+            `ALL_OPERATIONS` (in `libm-macros`)."
         );
     }
+    assert_eq!(all_functions, tested);
+}
+
+#[test]
+fn ensure_list_updated() {
+    if libm_test::ci() {
+        // Most CI tests run in Docker where we don't have Python or Rustdoc, so it's easiest
+        // to just run the python file directly when it is available.
+        eprintln!("skipping test; CI runs the python file directly");
+        return;
+    }
+
+    let res = Command::new("python3")
+        .arg(Path::new(env!("CARGO_MANIFEST_DIR")).join("../../etc/update-api-list.py"))
+        .arg("--check")
+        .status()
+        .unwrap();
+
+    assert!(res.success(), "May need to run `./etc/update-api-list.py`");
 }
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
index 208b8e286..cbb4bd49b 100644
--- a/crates/libm-test/tests/compare_built_musl.rs
+++ b/crates/libm-test/tests/compare_built_musl.rs
@@ -9,44 +9,135 @@
 // There are some targets we can't build musl for
 #![cfg(feature = "build-musl")]
 
-use libm_test::gen::random;
-use libm_test::{CheckBasis, CheckCtx, CheckOutput, TupleCall, musl_allowed_ulp};
-use musl_math_sys as musl;
+use libm_test::generate::{case_list, edge_cases, random, spaced};
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
 
-macro_rules! musl_rand_tests {
+const BASIS: CheckBasis = CheckBasis::Musl;
+
+fn musl_runner<Op: MathOp>(
+    ctx: &CheckCtx,
+    cases: impl Iterator<Item = Op::RustArgs>,
+    musl_fn: Op::CFn,
+) {
+    for input in cases {
+        let musl_res = input.call(musl_fn);
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
+
+        crate_res.validate(musl_res, input, ctx).unwrap();
+    }
+}
+
+/// Test against musl with generators from a domain.
+macro_rules! musl_tests {
     (
         fn_name: $fn_name:ident,
-        CFn: $CFn:ty,
-        CArgs: $CArgs:ty,
-        CRet: $CRet:ty,
-        RustFn: $RustFn:ty,
-        RustArgs: $RustArgs:ty,
-        RustRet: $RustRet:ty,
-        attrs: [$($meta:meta)*]
-    ) => { paste::paste! {
-        #[test]
-        $(#[$meta])*
-        fn [< musl_random_ $fn_name >]() {
-            let fname = stringify!($fn_name);
-            let ulp = musl_allowed_ulp(fname);
-            let cases = random::get_test_cases::<$RustArgs>(fname);
-            let ctx = CheckCtx::new(ulp, fname, CheckBasis::Musl);
-
-            for input in cases {
-                let musl_res = input.call(musl::$fn_name as $CFn);
-                let crate_res = input.call(libm::$fn_name as $RustFn);
-
-                crate_res.validate(musl_res, input, &ctx).unwrap();
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< musl_case_list_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_basis::<Op>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< musl_random_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< musl_edge_case_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< musl_quickspace_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let cases = spaced::get_test_cases::<Op>(&ctx).0;
+                musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
         }
-    } };
+    };
 }
 
 libm_macros::for_each_function! {
-    callback: musl_rand_tests,
-    skip: [],
-    attributes: [
-        #[cfg_attr(x86_no_sse, ignore)] // FIXME(correctness): wrong result on i586
-        [exp10, exp10f, exp2, exp2f, rint]
+    callback: musl_tests,
+    attributes: [],
+    skip: [
+        // TODO integer inputs
+        jn,
+        jnf,
+        ldexp,
+        ldexpf,
+        scalbn,
+        scalbnf,
+        yn,
+        ynf,
+
+        // Not provided by musl
+        // verify-sorted-start
+        ceilf128,
+        ceilf16,
+        copysignf128,
+        copysignf16,
+        fabsf128,
+        fabsf16,
+        fdimf128,
+        fdimf16,
+        floorf128,
+        floorf16,
+        fmaf128,
+        fmaxf128,
+        fmaxf16,
+        fmaximum,
+        fmaximum_num,
+        fmaximum_numf,
+        fmaximum_numf128,
+        fmaximum_numf16,
+        fmaximumf,
+        fmaximumf128,
+        fmaximumf16,
+        fminf128,
+        fminf16,
+        fminimum,
+        fminimum_num,
+        fminimum_numf,
+        fminimum_numf128,
+        fminimum_numf16,
+        fminimumf,
+        fminimumf128,
+        fminimumf16,
+        fmodf128,
+        fmodf16,
+        ldexpf128,
+        ldexpf16,
+        rintf128,
+        rintf16,
+        roundeven,
+        roundevenf,
+        roundevenf128,
+        roundevenf16,
+        roundf128,
+        roundf16,
+        scalbnf128,
+        scalbnf16,
+        sqrtf128,
+        sqrtf16,
+        truncf128,
+        truncf16,
+        // verify-sorted-end
     ],
 }
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
new file mode 100644
index 000000000..80b2c7868
--- /dev/null
+++ b/crates/libm-test/tests/multiprecision.rs
@@ -0,0 +1,79 @@
+//! Test with "infinite precision"
+
+#![cfg(feature = "build-mpfr")]
+
+use libm_test::generate::{case_list, edge_cases, random, spaced};
+use libm_test::mpfloat::MpOp;
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
+
+const BASIS: CheckBasis = CheckBasis::Mpfr;
+
+fn mp_runner<Op: MathOp + MpOp>(ctx: &CheckCtx, cases: impl Iterator<Item = Op::RustArgs>) {
+    let mut mp_vals = Op::new_mp();
+    for input in cases {
+        let mp_res = Op::run(&mut mp_vals, input);
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
+
+        crate_res.validate(mp_res, input, ctx).unwrap();
+    }
+}
+
+macro_rules! mp_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< mp_case_list_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_basis::<Op>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< mp_random_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Random);
+                let cases = random::get_test_cases::<<Op as MathOp>::RustArgs>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< mp_edge_case_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::EdgeCases);
+                let cases = edge_cases::get_test_cases::<Op>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
+            }
+
+            #[test]
+            $(#[$attr])*
+            fn [< mp_quickspace_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let cases = spaced::get_test_cases::<Op>(&ctx).0;
+                mp_runner::<Op>(&ctx, cases);
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: mp_tests,
+    attributes: [
+        // Also an assertion failure on i686: at `MPFR_ASSERTN (! mpfr_erangeflag_p ())`
+        #[ignore = "large values are infeasible in MPFR"]
+        [jn, jnf, yn, ynf],
+    ],
+    skip: [
+        // FIXME: test needed, see
+        // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
+        nextafter,
+        nextafterf,
+    ],
+}
diff --git a/crates/libm-test/tests/musl_biteq.rs b/crates/libm-test/tests/musl_biteq.rs
deleted file mode 100644
index f586fd03d..000000000
--- a/crates/libm-test/tests/musl_biteq.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-//! compare
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(all(test, feature = "test-musl-serialized"))]
-include!(concat!(env!("OUT_DIR"), "/musl-tests.rs"));
diff --git a/crates/libm-test/tests/standalone.rs b/crates/libm-test/tests/standalone.rs
new file mode 100644
index 000000000..7b30a3b48
--- /dev/null
+++ b/crates/libm-test/tests/standalone.rs
@@ -0,0 +1,38 @@
+//! Test cases that have both an input and an output, so do not require a basis.
+
+use libm_test::generate::case_list;
+use libm_test::{CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TupleCall};
+
+const BASIS: CheckBasis = CheckBasis::None;
+
+fn standalone_runner<Op: MathOp>(
+    ctx: &CheckCtx,
+    cases: impl Iterator<Item = (Op::RustArgs, Op::RustRet)>,
+) {
+    for (input, expected) in cases {
+        let crate_res = input.call_intercept_panics(Op::ROUTINE);
+        crate_res.validate(expected, input, ctx).unwrap();
+    }
+}
+
+macro_rules! mp_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+    ) => {
+        paste::paste! {
+            #[test]
+            $(#[$attr])*
+            fn [< standalone_ $fn_name >]() {
+                type Op = libm_test::op::$fn_name::Routine;
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::List);
+                let cases = case_list::get_test_cases_standalone::<Op>(&ctx);
+                standalone_runner::<Op>(&ctx, cases);
+            }
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: mp_tests,
+}
diff --git a/crates/libm-test/tests/u256.rs b/crates/libm-test/tests/u256.rs
new file mode 100644
index 000000000..4444036d0
--- /dev/null
+++ b/crates/libm-test/tests/u256.rs
@@ -0,0 +1,147 @@
+//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128`
+//! routines, so this only does a few million fuzz iterations against GMP.
+
+#![cfg(feature = "build-mpfr")]
+
+use std::sync::LazyLock;
+
+use libm::support::{HInt, u256};
+type BigInt = rug::Integer;
+
+use libm_test::bigint_fuzz_iteration_count;
+use libm_test::generate::random::SEED;
+use rand::{Rng, SeedableRng};
+use rand_chacha::ChaCha8Rng;
+use rug::Assign;
+use rug::integer::Order;
+use rug::ops::NotAssign;
+
+static BIGINT_U256_MAX: LazyLock<BigInt> =
+    LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
+
+/// Copied from the test module.
+fn hexu(v: u256) -> String {
+    format!("0x{:032x}{:032x}", v.hi, v.lo)
+}
+
+fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
+    let lo: u128 = rng.random();
+    let hi: u128 = rng.random();
+    u256 { lo, hi }
+}
+
+fn assign_bigint(bx: &mut BigInt, x: u256) {
+    bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
+}
+
+fn from_bigint(bx: &mut BigInt) -> u256 {
+    // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
+    *bx &= &*BIGINT_U256_MAX;
+    let mut bres = [0u128, 0];
+    bx.write_digits(&mut bres, Order::Lsf);
+    bx.assign(0);
+    u256 { lo: bres[0], hi: bres[1] }
+}
+
+fn check_one(
+    x: impl FnOnce() -> String,
+    y: impl FnOnce() -> Option<String>,
+    actual: u256,
+    expected: &mut BigInt,
+) {
+    let expected = from_bigint(expected);
+    if actual != expected {
+        let xmsg = x();
+        let ymsg = y().map(|y| format!("y:        {y}\n")).unwrap_or_default();
+        panic!(
+            "Results do not match\n\
+            input:    {xmsg}\n\
+            {ymsg}\
+            actual:   {}\n\
+            expected: {}\
+            ",
+            hexu(actual),
+            hexu(expected),
+        )
+    }
+}
+
+#[test]
+fn mp_u256_bitor() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+        let actual = x | y;
+        bx |= &by;
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_not() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        let actual = !x;
+        bx.not_assign();
+        check_one(|| hexu(x), || None, actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_add() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+        let actual = x + y;
+        bx += &by;
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_shr() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let shift: u32 = rng.random_range(0..255);
+        assign_bigint(&mut bx, x);
+        let actual = x >> shift;
+        bx >>= shift;
+        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_widen_mul() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x: u128 = rng.random();
+        let y: u128 = rng.random();
+        bx.assign(x);
+        by.assign(y);
+        let actual = x.widen_mul(y);
+        bx *= &by;
+        check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx);
+    }
+}
diff --git a/crates/libm-test/tests/z_extensive/main.rs b/crates/libm-test/tests/z_extensive/main.rs
new file mode 100644
index 000000000..5448cb6ea
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/main.rs
@@ -0,0 +1,14 @@
+//! `main` is just a wrapper to handle configuration.
+
+#[cfg(not(feature = "build-mpfr"))]
+fn main() {
+    eprintln!("multiprecision not enabled; skipping extensive tests");
+}
+
+#[cfg(feature = "build-mpfr")]
+mod run;
+
+#[cfg(feature = "build-mpfr")]
+fn main() {
+    run::run();
+}
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
new file mode 100644
index 000000000..b10c231d1
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -0,0 +1,233 @@
+//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`.
+
+use std::fmt;
+use std::io::{self, IsTerminal};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::Duration;
+
+use indicatif::{ProgressBar, ProgressStyle};
+use libm_test::generate::spaced;
+use libm_test::mpfloat::MpOp;
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, GeneratorKind, MathOp, TestResult, TupleCall,
+    skip_extensive_test,
+};
+use libtest_mimic::{Arguments, Trial};
+use rayon::prelude::*;
+use spaced::SpacedInput;
+
+const BASIS: CheckBasis = CheckBasis::Mpfr;
+const GEN_KIND: GeneratorKind = GeneratorKind::Extensive;
+
+/// Run the extensive test suite.
+pub fn run() {
+    let mut args = Arguments::from_args();
+    // Prevent multiple tests from running in parallel, each test gets parallized internally.
+    args.test_threads = Some(1);
+    let tests = register_all_tests();
+
+    // With default parallelism, the CPU doesn't saturate. We don't need to be nice to
+    // other processes, so do 1.5x to make sure we use all available resources.
+    let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2;
+    rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap();
+
+    libtest_mimic::run(&args, tests).exit();
+}
+
+macro_rules! mp_extensive_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        extra: [$push_to:ident],
+    ) => {
+        $(#[$attr])*
+        register_single_test::<libm_test::op::$fn_name::Routine>(&mut $push_to);
+    };
+}
+
+/// Create a list of tests for consumption by `libtest_mimic`.
+fn register_all_tests() -> Vec<Trial> {
+    let mut all_tests = Vec::new();
+
+    libm_macros::for_each_function! {
+        callback: mp_extensive_tests,
+        extra: [all_tests],
+        skip: [
+            // FIXME: test needed, see
+            // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
+            nextafter,
+            nextafterf,
+        ],
+    }
+
+    all_tests
+}
+
+/// Add a single test to the list.
+fn register_single_test<Op>(all: &mut Vec<Trial>)
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: SpacedInput<Op> + Send,
+{
+    let test_name = format!("mp_extensive_{}", Op::NAME);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND);
+    let skip = skip_extensive_test(&ctx);
+
+    let runner = move || {
+        if !cfg!(optimizations_enabled) {
+            panic!("extensive tests should be run with --release");
+        }
+
+        let res = run_single_test::<Op>(&ctx);
+        let e = match res {
+            Ok(()) => return Ok(()),
+            Err(e) => e,
+        };
+
+        // Format with the `Debug` implementation so we get the error cause chain, and print it
+        // here so we see the result immediately (rather than waiting for all tests to conclude).
+        let e = format!("{e:?}");
+        eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER);
+
+        Err(e.into())
+    };
+
+    all.push(Trial::test(test_name, runner).with_ignored_flag(skip));
+}
+
+/// Test runner for a signle routine.
+fn run_single_test<Op>(ctx: &CheckCtx) -> TestResult
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: SpacedInput<Op> + Send,
+{
+    // Small delay before printing anything so other output from the runner has a chance to flush.
+    std::thread::sleep(Duration::from_millis(500));
+    eprintln!();
+
+    let completed = AtomicU64::new(0);
+    let (ref mut cases, total) = spaced::get_test_cases::<Op>(ctx);
+    let pb = Progress::new(Op::NAME, total);
+
+    let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {
+        for input in input_vec {
+            // Test the input.
+            let mp_res = Op::run(mp_vals, input);
+            let crate_res = input.call_intercept_panics(Op::ROUTINE);
+            crate_res.validate(mp_res, input, ctx)?;
+
+            let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
+            pb.update(completed, input);
+        }
+
+        Ok(())
+    };
+
+    // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near
+    // a performance sweet spot. Ideally we would reuse these allocations rather than discarding,
+    // but that is difficult with Rayon's API.
+    let chunk_size = 50_000;
+    let chunks = std::iter::from_fn(move || {
+        let mut v = Vec::with_capacity(chunk_size);
+        v.extend(cases.take(chunk_size));
+        (!v.is_empty()).then_some(v)
+    });
+
+    // Run the actual tests
+    let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk);
+
+    let real_total = completed.load(Ordering::Relaxed);
+    pb.complete(real_total);
+
+    if res.is_ok() && real_total != total {
+        // Provide a warning if our estimate needs to be updated.
+        panic!("total run {real_total} does not match expected {total}");
+    }
+
+    res
+}
+
+/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages.
+struct Progress {
+    pb: ProgressBar,
+    name_padded: String,
+    final_style: ProgressStyle,
+    is_tty: bool,
+}
+
+impl Progress {
+    const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}";
+    const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}";
+
+    fn new(name: &str, total: u64) -> Self {
+        eprintln!("starting extensive tests for `{name}`");
+        let name_padded = format!("{name:9}");
+        let is_tty = io::stderr().is_terminal();
+
+        let initial_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let final_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let pb = ProgressBar::new(total);
+        pb.set_style(initial_style);
+
+        Self { pb, final_style, name_padded, is_tty }
+    }
+
+    fn update(&self, completed: u64, input: impl fmt::Debug) {
+        // Infrequently update the progress bar.
+        if completed % 20_000 == 0 {
+            self.pb.set_position(completed);
+        }
+
+        if completed % 500_000 == 0 {
+            self.pb.set_message(format!("input: {input:<24?}"));
+        }
+
+        if !self.is_tty && completed % 5_000_000 == 0 {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3?}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = completed as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = completed,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                eta = self.pb.eta().as_secs()
+            );
+        }
+    }
+
+    fn complete(self, real_total: u64) {
+        self.pb.set_style(self.final_style);
+        self.pb.set_position(real_total);
+        self.pb.abandon();
+
+        if !self.is_tty {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = real_total as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = real_total,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                elapsed_precise = self.pb.elapsed().as_secs(),
+            );
+        }
+
+        eprintln!();
+    }
+}
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index 7f6272d79..9e866a970 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
 name = "musl-math-sys"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 
 [dependencies]
 
 [dev-dependencies]
-libm = { path = "../../" }
+libm = { path = "../../libm" }
 
 [build-dependencies]
-cc = "1.1.24"
+cc = "1.2.16"
diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index 03df06c79..f06d84ee2 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -79,17 +79,12 @@ impl Config {
         let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
-
-        // Default to the `{workspace_root}/musl` if not specified
-        let musl_dir = env::var("MUSL_SOURCE_DIR")
-            .map(PathBuf::from)
-            .unwrap_or_else(|_| manifest_dir.parent().unwrap().parent().unwrap().join("musl"));
+        let musl_dir = manifest_dir.join("musl");
 
         let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
         let musl_arch = if target_arch == "x86" { "i386".to_owned() } else { target_arch.clone() };
 
         println!("cargo::rerun-if-changed={}/c_patches", manifest_dir.display());
-        println!("cargo::rerun-if-env-changed=MUSL_SOURCE_DIR");
         println!("cargo::rerun-if-changed={}", musl_dir.display());
 
         Self {
@@ -111,20 +106,17 @@ impl Config {
 /// Build musl math symbols to a static library
 fn build_musl_math(cfg: &Config) {
     let musl_dir = &cfg.musl_dir;
-    assert!(
-        musl_dir.exists(),
-        "musl source is missing. it can be downloaded with ./ci/download-musl.sh"
-    );
-
     let math = musl_dir.join("src/math");
     let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch);
+    assert!(math.exists(), "musl source not found. Is the submodule up to date?");
+
     let source_map = find_math_source(&math, cfg);
     let out_path = cfg.out_dir.join(format!("lib{LIB_NAME}.a"));
 
     // Run configuration steps. Usually done as part of the musl `Makefile`.
     let obj_include = cfg.out_dir.join("musl_obj/include");
     fs::create_dir_all(&obj_include).unwrap();
-    fs::create_dir_all(&obj_include.join("bits")).unwrap();
+    fs::create_dir_all(obj_include.join("bits")).unwrap();
     let sed_stat = Command::new("sed")
         .arg("-f")
         .arg(musl_dir.join("tools/mkalltypes.sed"))
@@ -151,7 +143,6 @@ fn build_musl_math(cfg: &Config) {
         .flag_if_supported("-ffreestanding")
         .flag_if_supported("-nostdinc")
         .define("_ALL_SOURCE", "1")
-        .opt_level(3)
         .define(
             "ROOT_INCLUDE_FEATURES",
             Some(musl_dir.join("include/features.h").to_str().unwrap()),
diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
new file mode 160000
index 000000000..61399d4bd
--- /dev/null
+++ b/crates/musl-math-sys/musl
@@ -0,0 +1 @@
+Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index fe3c89229..6a4bf4859 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -7,14 +7,16 @@ use std::ffi::{c_char, c_int, c_long};
 /// unsound.
 macro_rules! functions {
     ( $(
+        $( #[$meta:meta] )*
         $pfx_name:ident: $name:ident( $($arg:ident: $aty:ty),+ ) -> $rty:ty;
     )* ) => {
-        extern "C" {
+        unsafe extern "C" {
             $( fn $pfx_name( $($arg: $aty),+ ) -> $rty; )*
         }
 
         $(
             // Expose a safe version
+            $( #[$meta] )*
             pub fn $name( $($arg: $aty),+ ) -> $rty {
                 // SAFETY: FFI calls with no preconditions
                 unsafe { $pfx_name( $($arg),+ ) }
@@ -231,8 +233,13 @@ functions! {
     musl_logf: logf(a: f32) -> f32;
     musl_modf: modf(a: f64, b: &mut f64) -> f64;
     musl_modff: modff(a: f32, b: &mut f32) -> f32;
+
+    // FIXME: these need to be unsafe
+    #[allow(clippy::not_unsafe_ptr_arg_deref)]
     musl_nan: nan(a: *const c_char) -> f64;
+    #[allow(clippy::not_unsafe_ptr_arg_deref)]
     musl_nanf: nanf(a: *const c_char) -> f32;
+
     musl_nearbyint: nearbyint(a: f64) -> f64;
     musl_nearbyintf: nearbyintf(a: f32) -> f32;
     musl_nextafter: nextafter(a: f64, b: f64) -> f64;
@@ -275,5 +282,6 @@ functions! {
     musl_y0f: y0f(a: f32) -> f32;
     musl_y1: y1(a: f64) -> f64;
     musl_y1f: y1f(a: f32) -> f32;
+    musl_yn: yn(a: c_int, b: f64) -> f64;
     musl_ynf: ynf(a: c_int, b: f32) -> f32;
 }
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
new file mode 100644
index 000000000..4bcb97472
--- /dev/null
+++ b/crates/util/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "util"
+version = "0.1.0"
+edition = "2024"
+publish = false
+
+[features]
+default = ["build-musl", "build-mpfr", "unstable-float"]
+build-musl = ["libm-test/build-musl", "dep:musl-math-sys"]
+build-mpfr = ["libm-test/build-mpfr", "dep:rug"]
+unstable-float = ["libm/unstable-float", "libm-test/unstable-float", "rug?/nightly-float"]
+
+[dependencies]
+libm = { path = "../../libm", default-features = false }
+libm-macros = { path = "../libm-macros" }
+libm-test = { path = "../libm-test", default-features = false }
+musl-math-sys = { path = "../musl-math-sys", optional = true }
+rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "std"] }
diff --git a/crates/util/build.rs b/crates/util/build.rs
new file mode 100644
index 000000000..a1be41275
--- /dev/null
+++ b/crates/util/build.rs
@@ -0,0 +1,10 @@
+#![allow(unexpected_cfgs)]
+
+#[path = "../../libm/configure.rs"]
+mod configure;
+
+fn main() {
+    println!("cargo:rerun-if-changed=../../libm/configure.rs");
+    let cfg = configure::Config::from_env();
+    configure::emit_libm_config(&cfg);
+}
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
new file mode 100644
index 000000000..ef70ec903
--- /dev/null
+++ b/crates/util/src/main.rs
@@ -0,0 +1,382 @@
+//! Helper CLI utility for common tasks.
+
+#![cfg_attr(f16_enabled, feature(f16))]
+#![cfg_attr(f128_enabled, feature(f128))]
+
+use std::any::type_name;
+use std::env;
+use std::num::ParseIntError;
+use std::str::FromStr;
+
+use libm::support::{Hexf, hf32, hf64};
+#[cfg(feature = "build-mpfr")]
+use libm_test::mpfloat::MpOp;
+use libm_test::{MathOp, TupleCall};
+#[cfg(feature = "build-mpfr")]
+use rug::az::{self, Az};
+
+const USAGE: &str = "\
+usage:
+
+cargo run -p util -- <SUBCOMMAND>
+
+SUBCOMMAND:
+    eval <BASIS> <OP> inputs...
+        Evaulate the expression with a given basis. This can be useful for
+        running routines with a debugger, or quickly checking input. Examples:
+        * eval musl sinf 1.234 # print the results of musl sinf(1.234f32)
+        * eval mpfr pow 1.234 2.432 # print the results of mpfr pow(1.234, 2.432)
+";
+
+fn main() {
+    let args = env::args().collect::<Vec<_>>();
+    let str_args = args.iter().map(|s| s.as_str()).collect::<Vec<_>>();
+
+    match &str_args.as_slice()[1..] {
+        ["eval", basis, op, inputs @ ..] => do_eval(basis, op, inputs),
+        _ => {
+            println!("{USAGE}\nunrecognized input `{str_args:?}`");
+            std::process::exit(1);
+        }
+    }
+}
+
+macro_rules! handle_call {
+    (
+        fn_name: $fn_name:ident,
+        CFn: $CFn:ty,
+        RustFn: $RustFn:ty,
+        RustArgs: $RustArgs:ty,
+        attrs: [$($attr:meta),*],
+        extra: ($basis:ident, $op:ident, $inputs:ident),
+        fn_extra: $musl_fn:expr,
+    ) => {
+        $(#[$attr])*
+        if $op == stringify!($fn_name) {
+            type Op = libm_test::op::$fn_name::Routine;
+
+            let input = <$RustArgs>::parse($inputs);
+            let libm_fn: <Op as MathOp>::RustFn = libm::$fn_name;
+
+            let output = match $basis {
+                "libm" => input.call_intercept_panics(libm_fn),
+                #[cfg(feature = "build-musl")]
+                "musl" => {
+                    let musl_fn: <Op as MathOp>::CFn =
+                        $musl_fn.unwrap_or_else(|| panic!("no musl function for {}", $op));
+                    input.call(musl_fn)
+                }
+                #[cfg(feature = "build-mpfr")]
+                "mpfr" => {
+                    let mut mp = <Op as MpOp>::new_mp();
+                    Op::run(&mut mp, input)
+                }
+                _ => panic!("unrecognized or disabled basis '{}'", $basis),
+            };
+            println!("{output:?} {:x}", Hexf(output));
+            return;
+        }
+    };
+}
+
+/// Evaluate the specified operation with a given basis.
+fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
+    libm_macros::for_each_function! {
+        callback: handle_call,
+        emit_types: [CFn, RustFn, RustArgs],
+        extra: (basis, op, inputs),
+        fn_extra: match MACRO_FN_NAME {
+            ceilf128
+            | ceilf16
+            | copysignf128
+            | copysignf16
+            | fabsf128
+            | fabsf16
+            | fdimf128
+            | fdimf16
+            | floorf128
+            | floorf16
+            | fmaf128
+            | fmaxf128
+            | fmaxf16
+            | fmaximum
+            | fmaximum_num
+            | fmaximum_numf
+            | fmaximum_numf128
+            | fmaximum_numf16
+            | fmaximumf
+            | fmaximumf128
+            | fmaximumf16
+            | fminf128
+            | fminf16
+            | fminimum
+            | fminimum_num
+            | fminimum_numf
+            | fminimum_numf128
+            | fminimum_numf16
+            | fminimumf
+            | fminimumf128
+            | fminimumf16
+            | fmodf128
+            | fmodf16
+            | ldexpf128
+            | ldexpf16
+            | rintf128
+            | rintf16
+            | roundeven
+            | roundevenf
+            | roundevenf128
+            | roundevenf16
+            | roundf128
+            | roundf16
+            | scalbnf128
+            | scalbnf16
+            | sqrtf128
+            | sqrtf16
+            | truncf128
+            | truncf16  => None,
+            _ => Some(musl_math_sys::MACRO_FN_NAME)
+        }
+    }
+
+    panic!("no operation matching {op}");
+}
+
+/// Parse a tuple from a space-delimited string.
+trait ParseTuple {
+    fn parse(input: &[&str]) -> Self;
+}
+
+macro_rules! impl_parse_tuple {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 1, "expected a single argument, got {input:?}");
+                (parse(input, 0),)
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 3, "expected three arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1), parse(input, 2))
+            }
+        }
+    };
+}
+
+#[allow(unused_macros)]
+#[cfg(feature = "build-mpfr")]
+macro_rules! impl_parse_tuple_via_rug {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 1, "expected a single argument, got {input:?}");
+                (parse_rug(input, 0),)
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse_rug(input, 0), parse_rug(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse_rug(input, 0), parse(input, 1))
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse_rug(input, 1))
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 3, "expected three arguments, got {input:?}");
+                (parse_rug(input, 0), parse_rug(input, 1), parse_rug(input, 2))
+            }
+        }
+    };
+}
+
+// Fallback for when Rug is not built.
+#[allow(unused_macros)]
+#[cfg(not(feature = "build-mpfr"))]
+macro_rules! impl_parse_tuple_via_rug {
+    ($ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, i32) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for (i32, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty, $ty) {
+            fn parse(_input: &[&str]) -> Self {
+                panic!("parsing this type requires the `build-mpfr` feature")
+            }
+        }
+    };
+}
+
+impl_parse_tuple!(f32);
+impl_parse_tuple!(f64);
+
+#[cfg(f16_enabled)]
+impl_parse_tuple_via_rug!(f16);
+#[cfg(f128_enabled)]
+impl_parse_tuple_via_rug!(f128);
+
+/// Try to parse the number, printing a nice message on failure.
+fn parse<T: FromStr + FromStrRadix>(input: &[&str], idx: usize) -> T {
+    let s = input[idx];
+
+    let msg = || format!("invalid {} input '{s}'", type_name::<T>());
+
+    if s.starts_with("0x") || s.starts_with("-0x") {
+        return T::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    if s.starts_with("0b") {
+        return T::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    s.parse().unwrap_or_else(|_| panic!("{}", msg()))
+}
+
+/// Try to parse the float type going via `rug`, for `f16` and `f128` which don't yet implement
+/// `FromStr`.
+#[cfg(feature = "build-mpfr")]
+fn parse_rug<F>(input: &[&str], idx: usize) -> F
+where
+    F: libm_test::Float + FromStrRadix,
+    rug::Float: az::Cast<F>,
+{
+    let s = input[idx];
+
+    let msg = || format!("invalid {} input '{s}'", type_name::<F>());
+
+    if s.starts_with("0x") {
+        return F::from_str_radix(s, 16).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    if s.starts_with("0b") {
+        return F::from_str_radix(s, 2).unwrap_or_else(|_| panic!("{}", msg()));
+    }
+
+    let x = rug::Float::parse(s).unwrap_or_else(|_| panic!("{}", msg()));
+    let x = rug::Float::with_val(F::BITS, x);
+    x.az()
+}
+
+trait FromStrRadix: Sized {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError>;
+}
+
+impl FromStrRadix for i32 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        let s = strip_radix_prefix(s, radix);
+        i32::from_str_radix(s, radix)
+    }
+}
+
+#[cfg(f16_enabled)]
+impl FromStrRadix for f16 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            return Ok(libm::support::hf16(s));
+        }
+
+        let s = strip_radix_prefix(s, radix);
+        u16::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+impl FromStrRadix for f32 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            // Parse as hex float
+            return Ok(hf32(s));
+        }
+
+        let s = strip_radix_prefix(s, radix);
+        u32::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+impl FromStrRadix for f64 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if s.contains("p") {
+            return Ok(hf64(s));
+        }
+
+        let s = strip_radix_prefix(s, radix);
+        u64::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+#[cfg(f128_enabled)]
+impl FromStrRadix for f128 {
+    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+        if radix == 16 && s.contains("p") {
+            return Ok(libm::support::hf128(s));
+        }
+        let s = strip_radix_prefix(s, radix);
+        u128::from_str_radix(s, radix).map(Self::from_bits)
+    }
+}
+
+fn strip_radix_prefix(s: &str, radix: u32) -> &str {
+    if radix == 16 {
+        s.strip_prefix("0x").unwrap()
+    } else if radix == 2 {
+        s.strip_prefix("0b").unwrap()
+    } else {
+        s
+    }
+}
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
new file mode 100644
index 000000000..3e33343c4
--- /dev/null
+++ b/etc/function-definitions.json
@@ -0,0 +1,1069 @@
+{
+    "__comment": "Autogenerated by update-api-list.py. List of files that define a function with a given name. This file is checked in to make it obvious if refactoring breaks things",
+    "acos": {
+        "sources": [
+            "libm/src/math/acos.rs"
+        ],
+        "type": "f64"
+    },
+    "acosf": {
+        "sources": [
+            "libm/src/math/acosf.rs"
+        ],
+        "type": "f32"
+    },
+    "acosh": {
+        "sources": [
+            "libm/src/math/acosh.rs"
+        ],
+        "type": "f64"
+    },
+    "acoshf": {
+        "sources": [
+            "libm/src/math/acoshf.rs"
+        ],
+        "type": "f32"
+    },
+    "asin": {
+        "sources": [
+            "libm/src/math/asin.rs"
+        ],
+        "type": "f64"
+    },
+    "asinf": {
+        "sources": [
+            "libm/src/math/asinf.rs"
+        ],
+        "type": "f32"
+    },
+    "asinh": {
+        "sources": [
+            "libm/src/math/asinh.rs"
+        ],
+        "type": "f64"
+    },
+    "asinhf": {
+        "sources": [
+            "libm/src/math/asinhf.rs"
+        ],
+        "type": "f32"
+    },
+    "atan": {
+        "sources": [
+            "libm/src/math/atan.rs"
+        ],
+        "type": "f64"
+    },
+    "atan2": {
+        "sources": [
+            "libm/src/math/atan2.rs"
+        ],
+        "type": "f64"
+    },
+    "atan2f": {
+        "sources": [
+            "libm/src/math/atan2f.rs"
+        ],
+        "type": "f32"
+    },
+    "atanf": {
+        "sources": [
+            "libm/src/math/atanf.rs"
+        ],
+        "type": "f32"
+    },
+    "atanh": {
+        "sources": [
+            "libm/src/math/atanh.rs"
+        ],
+        "type": "f64"
+    },
+    "atanhf": {
+        "sources": [
+            "libm/src/math/atanhf.rs"
+        ],
+        "type": "f32"
+    },
+    "cbrt": {
+        "sources": [
+            "libm/src/math/cbrt.rs"
+        ],
+        "type": "f64"
+    },
+    "cbrtf": {
+        "sources": [
+            "libm/src/math/cbrtf.rs"
+        ],
+        "type": "f32"
+    },
+    "ceil": {
+        "sources": [
+            "libm/src/math/arch/i586.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
+        ],
+        "type": "f64"
+    },
+    "ceilf": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
+        ],
+        "type": "f32"
+    },
+    "ceilf128": {
+        "sources": [
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
+        ],
+        "type": "f128"
+    },
+    "ceilf16": {
+        "sources": [
+            "libm/src/math/ceil.rs",
+            "libm/src/math/generic/ceil.rs"
+        ],
+        "type": "f16"
+    },
+    "copysign": {
+        "sources": [
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
+        ],
+        "type": "f64"
+    },
+    "copysignf": {
+        "sources": [
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
+        ],
+        "type": "f32"
+    },
+    "copysignf128": {
+        "sources": [
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
+        ],
+        "type": "f128"
+    },
+    "copysignf16": {
+        "sources": [
+            "libm/src/math/copysign.rs",
+            "libm/src/math/generic/copysign.rs"
+        ],
+        "type": "f16"
+    },
+    "cos": {
+        "sources": [
+            "libm/src/math/cos.rs"
+        ],
+        "type": "f64"
+    },
+    "cosf": {
+        "sources": [
+            "libm/src/math/cosf.rs"
+        ],
+        "type": "f32"
+    },
+    "cosh": {
+        "sources": [
+            "libm/src/math/cosh.rs"
+        ],
+        "type": "f64"
+    },
+    "coshf": {
+        "sources": [
+            "libm/src/math/coshf.rs"
+        ],
+        "type": "f32"
+    },
+    "erf": {
+        "sources": [
+            "libm/src/math/erf.rs"
+        ],
+        "type": "f64"
+    },
+    "erfc": {
+        "sources": [
+            "libm/src/math/erf.rs"
+        ],
+        "type": "f64"
+    },
+    "erfcf": {
+        "sources": [
+            "libm/src/math/erff.rs"
+        ],
+        "type": "f32"
+    },
+    "erff": {
+        "sources": [
+            "libm/src/math/erff.rs"
+        ],
+        "type": "f32"
+    },
+    "exp": {
+        "sources": [
+            "libm/src/math/exp.rs"
+        ],
+        "type": "f64"
+    },
+    "exp10": {
+        "sources": [
+            "libm/src/math/exp10.rs"
+        ],
+        "type": "f64"
+    },
+    "exp10f": {
+        "sources": [
+            "libm/src/math/exp10f.rs"
+        ],
+        "type": "f32"
+    },
+    "exp2": {
+        "sources": [
+            "libm/src/math/exp2.rs"
+        ],
+        "type": "f64"
+    },
+    "exp2f": {
+        "sources": [
+            "libm/src/math/exp2f.rs"
+        ],
+        "type": "f32"
+    },
+    "expf": {
+        "sources": [
+            "libm/src/math/expf.rs"
+        ],
+        "type": "f32"
+    },
+    "expm1": {
+        "sources": [
+            "libm/src/math/expm1.rs"
+        ],
+        "type": "f64"
+    },
+    "expm1f": {
+        "sources": [
+            "libm/src/math/expm1f.rs"
+        ],
+        "type": "f32"
+    },
+    "fabs": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
+        ],
+        "type": "f64"
+    },
+    "fabsf": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
+        ],
+        "type": "f32"
+    },
+    "fabsf128": {
+        "sources": [
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
+        ],
+        "type": "f128"
+    },
+    "fabsf16": {
+        "sources": [
+            "libm/src/math/fabs.rs",
+            "libm/src/math/generic/fabs.rs"
+        ],
+        "type": "f16"
+    },
+    "fdim": {
+        "sources": [
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
+        ],
+        "type": "f64"
+    },
+    "fdimf": {
+        "sources": [
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
+        ],
+        "type": "f32"
+    },
+    "fdimf128": {
+        "sources": [
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
+        ],
+        "type": "f128"
+    },
+    "fdimf16": {
+        "sources": [
+            "libm/src/math/fdim.rs",
+            "libm/src/math/generic/fdim.rs"
+        ],
+        "type": "f16"
+    },
+    "floor": {
+        "sources": [
+            "libm/src/math/arch/i586.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
+        ],
+        "type": "f64"
+    },
+    "floorf": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
+        ],
+        "type": "f32"
+    },
+    "floorf128": {
+        "sources": [
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
+        ],
+        "type": "f128"
+    },
+    "floorf16": {
+        "sources": [
+            "libm/src/math/floor.rs",
+            "libm/src/math/generic/floor.rs"
+        ],
+        "type": "f16"
+    },
+    "fma": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/fma.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaf": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/fma_wide.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaf128": {
+        "sources": [
+            "libm/src/math/fma.rs"
+        ],
+        "type": "f128"
+    },
+    "fmax": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaxf": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaxf128": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaxf16": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmax.rs"
+        ],
+        "type": "f16"
+    },
+    "fmaximum": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaximum_num": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f64"
+    },
+    "fmaximum_numf": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaximum_numf128": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaximum_numf16": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fmaximum_num.rs"
+        ],
+        "type": "f16"
+    },
+    "fmaximumf": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
+        ],
+        "type": "f32"
+    },
+    "fmaximumf128": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
+        ],
+        "type": "f128"
+    },
+    "fmaximumf16": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fmaximum.rs"
+        ],
+        "type": "f16"
+    },
+    "fmin": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
+        ],
+        "type": "f64"
+    },
+    "fminf": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
+        ],
+        "type": "f32"
+    },
+    "fminf128": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
+        ],
+        "type": "f128"
+    },
+    "fminf16": {
+        "sources": [
+            "libm/src/math/fmin_fmax.rs",
+            "libm/src/math/generic/fmin.rs"
+        ],
+        "type": "f16"
+    },
+    "fminimum": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
+        ],
+        "type": "f64"
+    },
+    "fminimum_num": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f64"
+    },
+    "fminimum_numf": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f32"
+    },
+    "fminimum_numf128": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f128"
+    },
+    "fminimum_numf16": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum_num.rs",
+            "libm/src/math/generic/fminimum_num.rs"
+        ],
+        "type": "f16"
+    },
+    "fminimumf": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
+        ],
+        "type": "f32"
+    },
+    "fminimumf128": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
+        ],
+        "type": "f128"
+    },
+    "fminimumf16": {
+        "sources": [
+            "libm/src/math/fminimum_fmaximum.rs",
+            "libm/src/math/generic/fminimum.rs"
+        ],
+        "type": "f16"
+    },
+    "fmod": {
+        "sources": [
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
+        ],
+        "type": "f64"
+    },
+    "fmodf": {
+        "sources": [
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
+        ],
+        "type": "f32"
+    },
+    "fmodf128": {
+        "sources": [
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
+        ],
+        "type": "f128"
+    },
+    "fmodf16": {
+        "sources": [
+            "libm/src/math/fmod.rs",
+            "libm/src/math/generic/fmod.rs"
+        ],
+        "type": "f16"
+    },
+    "frexp": {
+        "sources": [
+            "libm/src/math/frexp.rs"
+        ],
+        "type": "f64"
+    },
+    "frexpf": {
+        "sources": [
+            "libm/src/math/frexpf.rs"
+        ],
+        "type": "f32"
+    },
+    "hypot": {
+        "sources": [
+            "libm/src/math/hypot.rs"
+        ],
+        "type": "f64"
+    },
+    "hypotf": {
+        "sources": [
+            "libm/src/math/hypotf.rs"
+        ],
+        "type": "f32"
+    },
+    "ilogb": {
+        "sources": [
+            "libm/src/math/ilogb.rs"
+        ],
+        "type": "f64"
+    },
+    "ilogbf": {
+        "sources": [
+            "libm/src/math/ilogbf.rs"
+        ],
+        "type": "f32"
+    },
+    "j0": {
+        "sources": [
+            "libm/src/math/j0.rs"
+        ],
+        "type": "f64"
+    },
+    "j0f": {
+        "sources": [
+            "libm/src/math/j0f.rs"
+        ],
+        "type": "f32"
+    },
+    "j1": {
+        "sources": [
+            "libm/src/math/j1.rs"
+        ],
+        "type": "f64"
+    },
+    "j1f": {
+        "sources": [
+            "libm/src/math/j1f.rs"
+        ],
+        "type": "f32"
+    },
+    "jn": {
+        "sources": [
+            "libm/src/math/jn.rs"
+        ],
+        "type": "f64"
+    },
+    "jnf": {
+        "sources": [
+            "libm/src/math/jnf.rs"
+        ],
+        "type": "f32"
+    },
+    "ldexp": {
+        "sources": [
+            "libm/src/math/ldexp.rs"
+        ],
+        "type": "f64"
+    },
+    "ldexpf": {
+        "sources": [
+            "libm/src/math/ldexp.rs"
+        ],
+        "type": "f32"
+    },
+    "ldexpf128": {
+        "sources": [
+            "libm/src/math/ldexp.rs"
+        ],
+        "type": "f128"
+    },
+    "ldexpf16": {
+        "sources": [
+            "libm/src/math/ldexp.rs"
+        ],
+        "type": "f16"
+    },
+    "lgamma": {
+        "sources": [
+            "libm/src/math/lgamma.rs"
+        ],
+        "type": "f64"
+    },
+    "lgamma_r": {
+        "sources": [
+            "libm/src/math/lgamma_r.rs"
+        ],
+        "type": "f64"
+    },
+    "lgammaf": {
+        "sources": [
+            "libm/src/math/lgammaf.rs"
+        ],
+        "type": "f32"
+    },
+    "lgammaf_r": {
+        "sources": [
+            "libm/src/math/lgammaf_r.rs"
+        ],
+        "type": "f32"
+    },
+    "log": {
+        "sources": [
+            "libm/src/math/log.rs"
+        ],
+        "type": "f64"
+    },
+    "log10": {
+        "sources": [
+            "libm/src/math/log10.rs"
+        ],
+        "type": "f64"
+    },
+    "log10f": {
+        "sources": [
+            "libm/src/math/log10f.rs"
+        ],
+        "type": "f32"
+    },
+    "log1p": {
+        "sources": [
+            "libm/src/math/log1p.rs"
+        ],
+        "type": "f64"
+    },
+    "log1pf": {
+        "sources": [
+            "libm/src/math/log1pf.rs"
+        ],
+        "type": "f32"
+    },
+    "log2": {
+        "sources": [
+            "libm/src/math/log2.rs"
+        ],
+        "type": "f64"
+    },
+    "log2f": {
+        "sources": [
+            "libm/src/math/log2f.rs"
+        ],
+        "type": "f32"
+    },
+    "logf": {
+        "sources": [
+            "libm/src/math/logf.rs"
+        ],
+        "type": "f32"
+    },
+    "modf": {
+        "sources": [
+            "libm/src/math/modf.rs"
+        ],
+        "type": "f64"
+    },
+    "modff": {
+        "sources": [
+            "libm/src/math/modff.rs"
+        ],
+        "type": "f32"
+    },
+    "nextafter": {
+        "sources": [
+            "libm/src/math/nextafter.rs"
+        ],
+        "type": "f64"
+    },
+    "nextafterf": {
+        "sources": [
+            "libm/src/math/nextafterf.rs"
+        ],
+        "type": "f32"
+    },
+    "pow": {
+        "sources": [
+            "libm/src/math/pow.rs"
+        ],
+        "type": "f64"
+    },
+    "powf": {
+        "sources": [
+            "libm/src/math/powf.rs"
+        ],
+        "type": "f32"
+    },
+    "remainder": {
+        "sources": [
+            "libm/src/math/remainder.rs"
+        ],
+        "type": "f64"
+    },
+    "remainderf": {
+        "sources": [
+            "libm/src/math/remainderf.rs"
+        ],
+        "type": "f32"
+    },
+    "remquo": {
+        "sources": [
+            "libm/src/math/remquo.rs"
+        ],
+        "type": "f64"
+    },
+    "remquof": {
+        "sources": [
+            "libm/src/math/remquof.rs"
+        ],
+        "type": "f32"
+    },
+    "rint": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/rint.rs"
+        ],
+        "type": "f64"
+    },
+    "rintf": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/rint.rs"
+        ],
+        "type": "f32"
+    },
+    "rintf128": {
+        "sources": [
+            "libm/src/math/rint.rs"
+        ],
+        "type": "f128"
+    },
+    "rintf16": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/rint.rs"
+        ],
+        "type": "f16"
+    },
+    "round": {
+        "sources": [
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
+        ],
+        "type": "f64"
+    },
+    "roundeven": {
+        "sources": [
+            "libm/src/math/roundeven.rs"
+        ],
+        "type": "f64"
+    },
+    "roundevenf": {
+        "sources": [
+            "libm/src/math/roundeven.rs"
+        ],
+        "type": "f32"
+    },
+    "roundevenf128": {
+        "sources": [
+            "libm/src/math/roundeven.rs"
+        ],
+        "type": "f128"
+    },
+    "roundevenf16": {
+        "sources": [
+            "libm/src/math/roundeven.rs"
+        ],
+        "type": "f16"
+    },
+    "roundf": {
+        "sources": [
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
+        ],
+        "type": "f32"
+    },
+    "roundf128": {
+        "sources": [
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
+        ],
+        "type": "f128"
+    },
+    "roundf16": {
+        "sources": [
+            "libm/src/math/generic/round.rs",
+            "libm/src/math/round.rs"
+        ],
+        "type": "f16"
+    },
+    "scalbn": {
+        "sources": [
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
+        ],
+        "type": "f64"
+    },
+    "scalbnf": {
+        "sources": [
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
+        ],
+        "type": "f32"
+    },
+    "scalbnf128": {
+        "sources": [
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
+        ],
+        "type": "f128"
+    },
+    "scalbnf16": {
+        "sources": [
+            "libm/src/math/generic/scalbn.rs",
+            "libm/src/math/scalbn.rs"
+        ],
+        "type": "f16"
+    },
+    "sin": {
+        "sources": [
+            "libm/src/math/sin.rs"
+        ],
+        "type": "f64"
+    },
+    "sincos": {
+        "sources": [
+            "libm/src/math/sincos.rs"
+        ],
+        "type": "f64"
+    },
+    "sincosf": {
+        "sources": [
+            "libm/src/math/sincosf.rs"
+        ],
+        "type": "f32"
+    },
+    "sinf": {
+        "sources": [
+            "libm/src/math/sinf.rs"
+        ],
+        "type": "f32"
+    },
+    "sinh": {
+        "sources": [
+            "libm/src/math/sinh.rs"
+        ],
+        "type": "f64"
+    },
+    "sinhf": {
+        "sources": [
+            "libm/src/math/sinhf.rs"
+        ],
+        "type": "f32"
+    },
+    "sqrt": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/i686.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
+        ],
+        "type": "f64"
+    },
+    "sqrtf": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/i686.rs",
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
+        ],
+        "type": "f32"
+    },
+    "sqrtf128": {
+        "sources": [
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
+        ],
+        "type": "f128"
+    },
+    "sqrtf16": {
+        "sources": [
+            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/generic/sqrt.rs",
+            "libm/src/math/sqrt.rs"
+        ],
+        "type": "f16"
+    },
+    "tan": {
+        "sources": [
+            "libm/src/math/tan.rs"
+        ],
+        "type": "f64"
+    },
+    "tanf": {
+        "sources": [
+            "libm/src/math/tanf.rs"
+        ],
+        "type": "f32"
+    },
+    "tanh": {
+        "sources": [
+            "libm/src/math/tanh.rs"
+        ],
+        "type": "f64"
+    },
+    "tanhf": {
+        "sources": [
+            "libm/src/math/tanhf.rs"
+        ],
+        "type": "f32"
+    },
+    "tgamma": {
+        "sources": [
+            "libm/src/math/tgamma.rs"
+        ],
+        "type": "f64"
+    },
+    "tgammaf": {
+        "sources": [
+            "libm/src/math/tgammaf.rs"
+        ],
+        "type": "f32"
+    },
+    "trunc": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
+        ],
+        "type": "f64"
+    },
+    "truncf": {
+        "sources": [
+            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
+        ],
+        "type": "f32"
+    },
+    "truncf128": {
+        "sources": [
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
+        ],
+        "type": "f128"
+    },
+    "truncf16": {
+        "sources": [
+            "libm/src/math/generic/trunc.rs",
+            "libm/src/math/trunc.rs"
+        ],
+        "type": "f16"
+    },
+    "y0": {
+        "sources": [
+            "libm/src/math/j0.rs"
+        ],
+        "type": "f64"
+    },
+    "y0f": {
+        "sources": [
+            "libm/src/math/j0f.rs"
+        ],
+        "type": "f32"
+    },
+    "y1": {
+        "sources": [
+            "libm/src/math/j1.rs"
+        ],
+        "type": "f64"
+    },
+    "y1f": {
+        "sources": [
+            "libm/src/math/j1f.rs"
+        ],
+        "type": "f32"
+    },
+    "yn": {
+        "sources": [
+            "libm/src/math/jn.rs"
+        ],
+        "type": "f64"
+    },
+    "ynf": {
+        "sources": [
+            "libm/src/math/jnf.rs"
+        ],
+        "type": "f32"
+    }
+}
diff --git a/etc/function-list.txt b/etc/function-list.txt
new file mode 100644
index 000000000..1f226c8c0
--- /dev/null
+++ b/etc/function-list.txt
@@ -0,0 +1,164 @@
+# autogenerated by update-api-list.py
+acos
+acosf
+acosh
+acoshf
+asin
+asinf
+asinh
+asinhf
+atan
+atan2
+atan2f
+atanf
+atanh
+atanhf
+cbrt
+cbrtf
+ceil
+ceilf
+ceilf128
+ceilf16
+copysign
+copysignf
+copysignf128
+copysignf16
+cos
+cosf
+cosh
+coshf
+erf
+erfc
+erfcf
+erff
+exp
+exp10
+exp10f
+exp2
+exp2f
+expf
+expm1
+expm1f
+fabs
+fabsf
+fabsf128
+fabsf16
+fdim
+fdimf
+fdimf128
+fdimf16
+floor
+floorf
+floorf128
+floorf16
+fma
+fmaf
+fmaf128
+fmax
+fmaxf
+fmaxf128
+fmaxf16
+fmaximum
+fmaximum_num
+fmaximum_numf
+fmaximum_numf128
+fmaximum_numf16
+fmaximumf
+fmaximumf128
+fmaximumf16
+fmin
+fminf
+fminf128
+fminf16
+fminimum
+fminimum_num
+fminimum_numf
+fminimum_numf128
+fminimum_numf16
+fminimumf
+fminimumf128
+fminimumf16
+fmod
+fmodf
+fmodf128
+fmodf16
+frexp
+frexpf
+hypot
+hypotf
+ilogb
+ilogbf
+j0
+j0f
+j1
+j1f
+jn
+jnf
+ldexp
+ldexpf
+ldexpf128
+ldexpf16
+lgamma
+lgamma_r
+lgammaf
+lgammaf_r
+log
+log10
+log10f
+log1p
+log1pf
+log2
+log2f
+logf
+modf
+modff
+nextafter
+nextafterf
+pow
+powf
+remainder
+remainderf
+remquo
+remquof
+rint
+rintf
+rintf128
+rintf16
+round
+roundeven
+roundevenf
+roundevenf128
+roundevenf16
+roundf
+roundf128
+roundf16
+scalbn
+scalbnf
+scalbnf128
+scalbnf16
+sin
+sincos
+sincosf
+sinf
+sinh
+sinhf
+sqrt
+sqrtf
+sqrtf128
+sqrtf16
+tan
+tanf
+tanh
+tanhf
+tgamma
+tgammaf
+trunc
+truncf
+truncf128
+truncf16
+y0
+y0f
+y1
+y1f
+yn
+ynf
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
new file mode 100755
index 000000000..0770a8b20
--- /dev/null
+++ b/etc/update-api-list.py
@@ -0,0 +1,359 @@
+#!/usr/bin/env python3
+"""Create a text file listing all public API. This can be used to ensure that all
+functions are covered by our macros.
+
+This file additionally does tidy-esque checks that all functions are listed where
+needed, or that lists are sorted.
+"""
+
+import difflib
+import json
+import re
+import subprocess as sp
+import sys
+from dataclasses import dataclass
+from glob import glob
+from pathlib import Path
+from typing import Any, Callable, TypeAlias
+
+SELF_PATH = Path(__file__)
+ETC_DIR = SELF_PATH.parent
+ROOT_DIR = ETC_DIR.parent
+
+# These files do not trigger a retest.
+IGNORED_SOURCES = ["libm/src/libm_helper.rs", "libm/src/math/support/float_traits.rs"]
+
+IndexTy: TypeAlias = dict[str, dict[str, Any]]
+"""Type of the `index` item in rustdoc's JSON output"""
+
+
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
+@dataclass
+class Crate:
+    """Representation of public interfaces and function defintion locations in
+    `libm`.
+    """
+
+    public_functions: list[str]
+    """List of all public functions."""
+    defs: dict[str, list[str]]
+    """Map from `name->[source files]` to find all places that define a public
+    function. We track this to know which tests need to be rerun when specific files
+    get updated.
+    """
+    types: dict[str, str]
+    """Map from `name->type`."""
+
+    def __init__(self) -> None:
+        self.public_functions = []
+        self.defs = {}
+        self.types = {}
+
+        j = self.get_rustdoc_json()
+        index: IndexTy = j["index"]
+        self._init_function_list(index)
+        self._init_defs(index)
+        self._init_types()
+
+    @staticmethod
+    def get_rustdoc_json() -> dict[Any, Any]:
+        """Get rustdoc's JSON output for the `libm` crate."""
+
+        j = sp.check_output(
+            [
+                "rustdoc",
+                "libm/src/lib.rs",
+                "--edition=2021",
+                "--document-private-items",
+                "--output-format=json",
+                "--cfg=f16_enabled",
+                "--cfg=f128_enabled",
+                "-Zunstable-options",
+                "-o-",
+            ],
+            cwd=ROOT_DIR,
+            text=True,
+        )
+        j = json.loads(j)
+        return j
+
+    def _init_function_list(self, index: IndexTy) -> None:
+        """Get a list of public functions from rustdoc JSON output.
+
+        Note that this only finds functions that are reexported in `lib.rs`, this will
+        need to be adjusted if we need to account for functions that are defined there, or
+        glob reexports in other locations.
+        """
+        # Filter out items that are not public
+        public = [i for i in index.values() if i["visibility"] == "public"]
+
+        # Collect a list of source IDs for reexported items in `lib.rs` or `mod math`.
+        use = (i for i in public if "use" in i["inner"])
+        use = (
+            i
+            for i in use
+            if i["span"]["filename"] in ["libm/src/math/mod.rs", "libm/src/lib.rs"]
+        )
+        reexported_ids = [item["inner"]["use"]["id"] for item in use]
+
+        # Collect a list of reexported items that are functions
+        for id in reexported_ids:
+            srcitem = index.get(str(id))
+            # External crate
+            if srcitem is None:
+                continue
+
+            # Skip if not a function
+            if "function" not in srcitem["inner"]:
+                continue
+
+            self.public_functions.append(srcitem["name"])
+        self.public_functions.sort()
+
+    def _init_defs(self, index: IndexTy) -> None:
+        defs = {name: set() for name in self.public_functions}
+        funcs = (i for i in index.values() if "function" in i["inner"])
+        funcs = (f for f in funcs if f["name"] in self.public_functions)
+        for func in funcs:
+            defs[func["name"]].add(func["span"]["filename"])
+
+        # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
+        # string matching as a fallback.
+        for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR):
+            contents = (ROOT_DIR.joinpath(fname)).read_text()
+
+            for name in self.public_functions:
+                if f"fn {name}" in contents:
+                    defs[name].add(fname)
+
+        for name, sources in defs.items():
+            base_sources = defs[base_name(name)[0]]
+            for src in (s for s in base_sources if "generic" in s):
+                sources.add(src)
+
+            for src in IGNORED_SOURCES:
+                sources.discard(src)
+
+        # Sort the set
+        self.defs = {k: sorted(v) for (k, v) in defs.items()}
+
+    def _init_types(self) -> None:
+        self.types = {name: base_name(name)[1] for name in self.public_functions}
+
+    def write_function_list(self, check: bool) -> None:
+        """Collect the list of public functions to a simple text file."""
+        output = "# autogenerated by update-api-list.py\n"
+        for name in self.public_functions:
+            output += f"{name}\n"
+
+        out_file = ETC_DIR.joinpath("function-list.txt")
+
+        if check:
+            with open(out_file, "r") as f:
+                current = f.read()
+            diff_and_exit(current, output, "function list")
+        else:
+            with open(out_file, "w") as f:
+                f.write(output)
+
+    def write_function_defs(self, check: bool) -> None:
+        """Collect the list of information about public functions to a JSON file ."""
+        comment = (
+            "Autogenerated by update-api-list.py. "
+            "List of files that define a function with a given name. "
+            "This file is checked in to make it obvious if refactoring breaks things"
+        )
+
+        d = {"__comment": comment}
+        d |= {
+            name: {"sources": self.defs[name], "type": self.types[name]}
+            for name in self.public_functions
+        }
+
+        out_file = ETC_DIR.joinpath("function-definitions.json")
+        output = json.dumps(d, indent=4) + "\n"
+
+        if check:
+            with open(out_file, "r") as f:
+                current = f.read()
+            diff_and_exit(current, output, "source list")
+        else:
+            with open(out_file, "w") as f:
+                f.write(output)
+
+    def tidy_lists(self) -> None:
+        """In each file, check annotations indicating blocks of code should be sorted or should
+        include all public API.
+        """
+
+        flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True)
+
+        for path in flist.splitlines():
+            fpath = ROOT_DIR.joinpath(path)
+            if fpath.is_dir() or fpath == SELF_PATH:
+                continue
+
+            lines = fpath.read_text().splitlines()
+
+            validate_delimited_block(
+                fpath,
+                lines,
+                "verify-sorted-start",
+                "verify-sorted-end",
+                ensure_sorted,
+            )
+
+            validate_delimited_block(
+                fpath,
+                lines,
+                "verify-apilist-start",
+                "verify-apilist-end",
+                lambda p, n, lines: self.ensure_contains_api(p, n, lines),
+            )
+
+    def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
+        """Given a list of strings, ensure that each public function we have is named
+        somewhere.
+        """
+        not_found = []
+        for func in self.public_functions:
+            # The function name may be on its own or somewhere in a snake case string.
+            pat = re.compile(rf"(\b|_){func}(\b|_)")
+            found = next((line for line in lines if pat.search(line)), None)
+
+            if found is None:
+                not_found.append(func)
+
+        if len(not_found) == 0:
+            return
+
+        relpath = fpath.relative_to(ROOT_DIR)
+        eprint(f"functions not found at {relpath}:{line_num}: {not_found}")
+        exit(1)
+
+
+def validate_delimited_block(
+    fpath: Path,
+    lines: list[str],
+    start: str,
+    end: str,
+    validate: Callable[[Path, int, list[str]], None],
+) -> None:
+    """Identify blocks of code wrapped within `start` and `end`, collect their contents
+    to a list of strings, and call `validate` for each of those lists.
+    """
+    relpath = fpath.relative_to(ROOT_DIR)
+    block_lines = []
+    block_start_line: None | int = None
+    for line_num, line in enumerate(lines):
+        line_num += 1
+
+        if start in line:
+            block_start_line = line_num
+            continue
+
+        if end in line:
+            if block_start_line is None:
+                eprint(f"`{end}` without `{start}` at {relpath}:{line_num}")
+                exit(1)
+
+            validate(fpath, block_start_line, block_lines)
+            block_lines = []
+            block_start_line = None
+            continue
+
+        if block_start_line is not None:
+            block_lines.append(line)
+
+    if block_start_line is not None:
+        eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}")
+        exit(1)
+
+
+def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
+    """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
+    relpath = fpath.relative_to(ROOT_DIR)
+    diff_and_exit(
+        "\n".join(lines),
+        "\n".join(sorted(lines)),
+        f"sorted block at {relpath}:{block_start_line}",
+    )
+
+
+def diff_and_exit(actual: str, expected: str, name: str):
+    """If the two strings are different, print a diff between them and then exit
+    with an error.
+    """
+    if actual == expected:
+        print(f"{name} output matches expected; success")
+        return
+
+    a = [f"{line}\n" for line in actual.splitlines()]
+    b = [f"{line}\n" for line in expected.splitlines()]
+
+    diff = difflib.unified_diff(a, b, "actual", "expected")
+    sys.stdout.writelines(diff)
+    print(f"mismatched {name}")
+    exit(1)
+
+
+def base_name(name: str) -> tuple[str, str]:
+    """Return the basename and type from a full function name. Keep in sync with Rust's
+    `fn base_name`.
+    """
+    known_mappings = [
+        ("erff", ("erf", "f32")),
+        ("erf", ("erf", "f64")),
+        ("modff", ("modf", "f32")),
+        ("modf", ("modf", "f64")),
+        ("lgammaf_r", ("lgamma_r", "f32")),
+        ("lgamma_r", ("lgamma_r", "f64")),
+    ]
+
+    found = next((base for (full, base) in known_mappings if full == name), None)
+    if found is not None:
+        return found
+
+    if name.endswith("f"):
+        return (name.rstrip("f"), "f32")
+
+    if name.endswith("f16"):
+        return (name.rstrip("f16"), "f16")
+
+    if name.endswith("f128"):
+        return (name.rstrip("f128"), "f128")
+
+    return (name, "f64")
+
+
+def ensure_updated_list(check: bool) -> None:
+    """Runner to update the function list and JSON, or check that it is already up
+    to date.
+    """
+    crate = Crate()
+    crate.write_function_list(check)
+    crate.write_function_defs(check)
+
+    crate.tidy_lists()
+
+
+def main():
+    """By default overwrite the file. If `--check` is passed, print a diff instead and
+    error if the files are different.
+    """
+    match sys.argv:
+        case [_]:
+            ensure_updated_list(False)
+        case [_, "--check"]:
+            ensure_updated_list(True)
+        case _:
+            print("unrecognized arguments")
+            exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
new file mode 100644
index 000000000..44154c1a8
--- /dev/null
+++ b/libm/Cargo.toml
@@ -0,0 +1,49 @@
+[package]
+authors = ["Jorge Aparicio <jorge@japaric.io>"]
+categories = ["no-std"]
+description = "libm in pure Rust"
+documentation = "https://docs.rs/libm"
+keywords = ["libm", "math"]
+license = "MIT"
+name = "libm"
+readme = "README.md"
+repository = "https://github.com/rust-lang/libm"
+version = "0.2.11"
+edition = "2021"
+rust-version = "1.63"
+
+[features]
+default = ["arch"]
+
+# Enable architecture-specific features such as SIMD or assembly routines.
+arch = []
+
+# This tells the compiler to assume that a Nightly toolchain is being used and
+# that it should activate any useful Nightly things accordingly.
+unstable = ["unstable-intrinsics", "unstable-float"]
+
+# Enable calls to functions in `core::intrinsics`
+unstable-intrinsics = []
+
+# Make some internal things public for testing.
+unstable-public-internals = []
+
+# Enable the nightly-only `f16` and `f128`.
+unstable-float = []
+
+# Used to prevent using any intrinsics or arch-specific code.
+#
+# HACK: this is a negative feature which is generally a bad idea in Cargo, but
+# we need it to be able to forbid other features when this crate is used in
+# Rust dependencies. Setting this overrides all features that may enable
+# hard float operations.
+force-soft-floats = []
+
+[dev-dependencies]
+no-panic = "0.1.35"
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+  # compiler-builtins sets this feature, but we use it in `libm`
+  'cfg(feature, values("compiler-builtins"))',
+] }
diff --git a/libm/LICENSE.txt b/libm/LICENSE.txt
new file mode 120000
index 000000000..4ab43736a
--- /dev/null
+++ b/libm/LICENSE.txt
@@ -0,0 +1 @@
+../LICENSE.txt
\ No newline at end of file
diff --git a/libm/README.md b/libm/README.md
new file mode 120000
index 000000000..32d46ee88
--- /dev/null
+++ b/libm/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file
diff --git a/libm/build.rs b/libm/build.rs
new file mode 100644
index 000000000..07d08ed43
--- /dev/null
+++ b/libm/build.rs
@@ -0,0 +1,18 @@
+use std::env;
+
+mod configure;
+
+fn main() {
+    let cfg = configure::Config::from_env();
+
+    println!("cargo:rerun-if-changed=build.rs");
+    println!("cargo:rerun-if-changed=configure.rs");
+    println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
+
+    // If set, enable `no-panic`. Requires LTO (`release-opt` profile).
+    if env::var("ENSURE_NO_PANIC").is_ok() {
+        println!("cargo:rustc-cfg=assert_no_panic");
+    }
+
+    configure::emit_libm_config(&cfg);
+}
diff --git a/libm/configure.rs b/libm/configure.rs
new file mode 100644
index 000000000..8b8ba9815
--- /dev/null
+++ b/libm/configure.rs
@@ -0,0 +1,183 @@
+// Configuration shared with both libm and libm-test
+
+use std::env;
+use std::path::PathBuf;
+
+#[allow(dead_code)]
+pub struct Config {
+    pub manifest_dir: PathBuf,
+    pub out_dir: PathBuf,
+    pub opt_level: String,
+    pub cargo_features: Vec<String>,
+    pub target_arch: String,
+    pub target_env: String,
+    pub target_family: Option<String>,
+    pub target_os: String,
+    pub target_string: String,
+    pub target_vendor: String,
+    pub target_features: Vec<String>,
+}
+
+impl Config {
+    pub fn from_env() -> Self {
+        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
+            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
+            .unwrap_or_default();
+        let cargo_features = env::vars()
+            .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
+            .map(|s| s.to_lowercase().replace("_", "-"))
+            .collect();
+
+        Self {
+            manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
+            out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
+            opt_level: env::var("OPT_LEVEL").unwrap(),
+            cargo_features,
+            target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
+            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
+            target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
+            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
+            target_string: env::var("TARGET").unwrap(),
+            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
+            target_features,
+        }
+    }
+}
+
+/// Libm gets most config options made available.
+#[allow(dead_code)]
+pub fn emit_libm_config(cfg: &Config) {
+    emit_intrinsics_cfg();
+    emit_arch_cfg();
+    emit_optimization_cfg(cfg);
+    emit_cfg_shorthands(cfg);
+    emit_cfg_env(cfg);
+    emit_f16_f128_cfg(cfg);
+}
+
+/// Tests don't need most feature-related config.
+#[allow(dead_code)]
+pub fn emit_test_config(cfg: &Config) {
+    emit_optimization_cfg(cfg);
+    emit_cfg_shorthands(cfg);
+    emit_cfg_env(cfg);
+    emit_f16_f128_cfg(cfg);
+}
+
+/// Simplify the feature logic for enabling intrinsics so code only needs to use
+/// `cfg(intrinsics_enabled)`.
+fn emit_intrinsics_cfg() {
+    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
+
+    // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides
+    // to disable.
+    if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=intrinsics_enabled");
+    }
+}
+
+/// Simplify the feature logic for enabling arch-specific features so code only needs to use
+/// `cfg(arch_enabled)`.
+fn emit_arch_cfg() {
+    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
+
+    // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable.
+    if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") {
+        println!("cargo:rustc-cfg=arch_enabled");
+    }
+}
+
+/// Some tests are extremely slow. Emit a config option based on optimization level.
+fn emit_optimization_cfg(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
+
+    if !matches!(cfg.opt_level.as_str(), "0" | "1") {
+        println!("cargo:rustc-cfg=optimizations_enabled");
+    }
+}
+
+/// Provide an alias for common longer config combinations.
+fn emit_cfg_shorthands(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
+    if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") {
+        // Shorthand to detect i586 targets
+        println!("cargo:rustc-cfg=x86_no_sse");
+    }
+}
+
+/// Reemit config that we make use of for test logging.
+fn emit_cfg_env(cfg: &Config) {
+    println!("cargo:rustc-env=CFG_CARGO_FEATURES={:?}", cfg.cargo_features);
+    println!("cargo:rustc-env=CFG_OPT_LEVEL={}", cfg.opt_level);
+    println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", cfg.target_features);
+}
+
+/// Configure whether or not `f16` and `f128` support should be enabled.
+fn emit_f16_f128_cfg(cfg: &Config) {
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+
+    // `unstable-float` enables these features.
+    if !cfg!(feature = "unstable-float") {
+        return;
+    }
+
+    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
+    // that the backend will not crash when using these types and generates code that can be called
+    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
+    // ABI or other bugs.
+    //
+    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
+    // not straightforward.
+    //
+    // Original source of this list:
+    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
+    let f16_enabled = match cfg.target_arch.as_str() {
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        "arm64ec" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
+        "s390x" => false,
+        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
+        // FIXME(llvm): loongarch fixed by <https://github.com/llvm/llvm-project/pull/107791>
+        "csky" => false,
+        "hexagon" => false,
+        "loongarch64" => false,
+        "mips" | "mips64" | "mips32r6" | "mips64r6" => false,
+        "powerpc" | "powerpc64" => false,
+        "sparc" | "sparc64" => false,
+        "wasm32" | "wasm64" => false,
+        // Most everything else works as of LLVM 19
+        _ => true,
+    };
+
+    let f128_enabled = match cfg.target_arch.as_str() {
+        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
+        "amdgpu" => false,
+        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
+        "arm64ec" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/96432>
+        "mips64" | "mips64r6" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
+        "nvptx64" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
+        "powerpc64" if &cfg.target_os == "aix" => false,
+        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
+        "sparc" => false,
+        // Most everything else works as of LLVM 19
+        _ => true,
+    };
+
+    // If the feature is set, disable these types.
+    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+
+    if f16_enabled && !disable_both {
+        println!("cargo:rustc-cfg=f16_enabled");
+    }
+
+    if f128_enabled && !disable_both {
+        println!("cargo:rustc-cfg=f128_enabled");
+    }
+}
diff --git a/libm/src/lib.rs b/libm/src/lib.rs
new file mode 100644
index 000000000..7e56bd079
--- /dev/null
+++ b/libm/src/lib.rs
@@ -0,0 +1,29 @@
+//! libm in pure Rust
+#![no_std]
+#![cfg_attr(intrinsics_enabled, allow(internal_features))]
+#![cfg_attr(intrinsics_enabled, feature(core_intrinsics))]
+#![cfg_attr(all(intrinsics_enabled, target_family = "wasm"), feature(wasm_numeric_instr))]
+#![cfg_attr(f128_enabled, feature(f128))]
+#![cfg_attr(f16_enabled, feature(f16))]
+#![allow(clippy::assign_op_pattern)]
+#![allow(clippy::deprecated_cfg_attr)]
+#![allow(clippy::eq_op)]
+#![allow(clippy::excessive_precision)]
+#![allow(clippy::float_cmp)]
+#![allow(clippy::int_plus_one)]
+#![allow(clippy::many_single_char_names)]
+#![allow(clippy::mixed_case_hex_literals)]
+#![allow(clippy::needless_late_init)]
+#![allow(clippy::needless_return)]
+#![allow(clippy::unreadable_literal)]
+#![allow(clippy::zero_divided_by_zero)]
+#![forbid(unsafe_op_in_unsafe_fn)]
+
+mod libm_helper;
+mod math;
+
+use core::{f32, f64};
+
+pub use libm_helper::*;
+
+pub use self::math::*;
diff --git a/src/libm_helper.rs b/libm/src/libm_helper.rs
similarity index 67%
rename from src/libm_helper.rs
rename to libm/src/libm_helper.rs
index 52d0c4c2a..dfa1ff77b 100644
--- a/src/libm_helper.rs
+++ b/libm/src/libm_helper.rs
@@ -30,7 +30,7 @@ macro_rules! libm_helper {
         }
     };
 
-    ({$($func:tt);*}) => {
+    ({$($func:tt;)*}) => {
         $(
             libm_helper! { $func }
         )*
@@ -44,9 +44,11 @@ macro_rules! libm_helper {
     };
 }
 
+// verify-apilist-start
 libm_helper! {
     f32,
     funcs: {
+        // verify-sorted-start
         (fn acos(x: f32) -> (f32);                  => acosf);
         (fn acosh(x: f32) -> (f32);                 => acoshf);
         (fn asin(x: f32) -> (f32);                  => asinf);
@@ -62,8 +64,8 @@ libm_helper! {
         (fn erf(x: f32) -> (f32);                   => erff);
         (fn erfc(x: f32) -> (f32);                  => erfcf);
         (fn exp(x: f32) -> (f32);                   => expf);
-        (fn exp2(x: f32) -> (f32);                  => exp2f);
         (fn exp10(x: f32) -> (f32);                 => exp10f);
+        (fn exp2(x: f32) -> (f32);                  => exp2f);
         (fn expm1(x: f32) -> (f32);                 => expm1f);
         (fn fabs(x: f32) -> (f32);                  => fabsf);
         (fn fdim(x: f32, y: f32) -> (f32);          => fdimf);
@@ -79,12 +81,12 @@ libm_helper! {
         (fn j1(x: f32) -> (f32);                    => j1f);
         (fn jn(n: i32, x: f32) -> (f32);            => jnf);
         (fn ldexp(x: f32, n: i32) -> (f32);         => ldexpf);
-        (fn lgamma_r(x: f32) -> (f32, i32);         => lgammaf_r);
         (fn lgamma(x: f32) -> (f32);                => lgammaf);
+        (fn lgamma_r(x: f32) -> (f32, i32);         => lgammaf_r);
         (fn log(x: f32) -> (f32);                   => logf);
+        (fn log10(x: f32) -> (f32);                 => log10f);
         (fn log1p(x: f32) -> (f32);                 => log1pf);
         (fn log2(x: f32) -> (f32);                  => log2f);
-        (fn log10(x: f32) -> (f32);                 => log10f);
         (fn modf(x: f32) -> (f32, f32);             => modff);
         (fn nextafter(x: f32, y: f32) -> (f32);     => nextafterf);
         (fn pow(x: f32, y: f32) -> (f32);           => powf);
@@ -92,6 +94,7 @@ libm_helper! {
         (fn remquo(x: f32, y: f32) -> (f32, i32);   => remquof);
         (fn rint(x: f32) -> (f32);                  => rintf);
         (fn round(x: f32) -> (f32);                 => roundf);
+        (fn roundeven(x: f32) -> (f32);             => roundevenf);
         (fn scalbn(x: f32, n: i32) -> (f32);        => scalbnf);
         (fn sin(x: f32) -> (f32);                   => sinf);
         (fn sincos(x: f32) -> (f32, f32);           => sincosf);
@@ -103,13 +106,15 @@ libm_helper! {
         (fn trunc(x: f32) -> (f32);                 => truncf);
         (fn y0(x: f32) -> (f32);                    => y0f);
         (fn y1(x: f32) -> (f32);                    => y1f);
-        (fn yn(n: i32, x: f32) -> (f32);            => ynf)
+        (fn yn(n: i32, x: f32) -> (f32);            => ynf);
+        // verify-sorted-end
     }
 }
 
 libm_helper! {
     f64,
     funcs: {
+        // verify-sorted-start
         (fn acos(x: f64) -> (f64);                  => acos);
         (fn acosh(x: f64) -> (f64);                 => acosh);
         (fn asin(x: f64) -> (f64);                  => asin);
@@ -125,15 +130,23 @@ libm_helper! {
         (fn erf(x: f64) -> (f64);                   => erf);
         (fn erfc(x: f64) -> (f64);                  => erfc);
         (fn exp(x: f64) -> (f64);                   => exp);
-        (fn exp2(x: f64) -> (f64);                  => exp2);
         (fn exp10(x: f64) -> (f64);                 => exp10);
+        (fn exp2(x: f64) -> (f64);                  => exp2);
         (fn expm1(x: f64) -> (f64);                 => expm1);
         (fn fabs(x: f64) -> (f64);                  => fabs);
         (fn fdim(x: f64, y: f64) -> (f64);          => fdim);
         (fn floor(x: f64) -> (f64);                 => floor);
         (fn fma(x: f64, y: f64, z: f64) -> (f64);   => fma);
         (fn fmax(x: f64, y: f64) -> (f64);          => fmax);
+        (fn fmaximum(x: f64, y: f64) -> (f64);      => fmaximum);
+        (fn fmaximum_num(x: f64, y: f64) -> (f64);  => fmaximum_num);
+        (fn fmaximum_numf(x: f32, y: f32) -> (f32); => fmaximum_numf);
+        (fn fmaximumf(x: f32, y: f32) -> (f32);     => fmaximumf);
         (fn fmin(x: f64, y: f64) -> (f64);          => fmin);
+        (fn fminimum(x: f64, y: f64) -> (f64);      => fminimum);
+        (fn fminimum_num(x: f64, y: f64) -> (f64);  => fminimum_num);
+        (fn fminimum_numf(x: f32, y: f32) -> (f32); => fminimum_numf);
+        (fn fminimumf(x: f32, y: f32) -> (f32);     => fminimumf);
         (fn fmod(x: f64, y: f64) -> (f64);          => fmod);
         (fn frexp(x: f64) -> (f64, i32);            => frexp);
         (fn hypot(x: f64, y: f64) -> (f64);         => hypot);
@@ -142,12 +155,12 @@ libm_helper! {
         (fn j1(x: f64) -> (f64);                    => j1);
         (fn jn(n: i32, x: f64) -> (f64);            => jn);
         (fn ldexp(x: f64, n: i32) -> (f64);         => ldexp);
-        (fn lgamma_r(x: f64) -> (f64, i32);         => lgamma_r);
         (fn lgamma(x: f64) -> (f64);                => lgamma);
+        (fn lgamma_r(x: f64) -> (f64, i32);         => lgamma_r);
         (fn log(x: f64) -> (f64);                   => log);
+        (fn log10(x: f64) -> (f64);                 => log10);
         (fn log1p(x: f64) -> (f64);                 => log1p);
         (fn log2(x: f64) -> (f64);                  => log2);
-        (fn log10(x: f64) -> (f64);                 => log10);
         (fn modf(x: f64) -> (f64, f64);             => modf);
         (fn nextafter(x: f64, y: f64) -> (f64);     => nextafter);
         (fn pow(x: f64, y: f64) -> (f64);           => pow);
@@ -155,6 +168,7 @@ libm_helper! {
         (fn remquo(x: f64, y: f64) -> (f64, i32);   => remquo);
         (fn rint(x: f64) -> (f64);                  => rint);
         (fn round(x: f64) -> (f64);                 => round);
+        (fn roundevem(x: f64) -> (f64);             => roundeven);
         (fn scalbn(x: f64, n: i32) -> (f64);        => scalbn);
         (fn sin(x: f64) -> (f64);                   => sin);
         (fn sincos(x: f64) -> (f64, f64);           => sincos);
@@ -166,6 +180,65 @@ libm_helper! {
         (fn trunc(x: f64) -> (f64);                 => trunc);
         (fn y0(x: f64) -> (f64);                    => y0);
         (fn y1(x: f64) -> (f64);                    => y1);
-        (fn yn(n: i32, x: f64) -> (f64);            => yn)
+        (fn yn(n: i32, x: f64) -> (f64);            => yn);
+        // verify-sorted-end
+    }
+}
+
+#[cfg(f16_enabled)]
+libm_helper! {
+    f16,
+    funcs: {
+        // verify-sorted-start
+        (fn ceil(x: f16) -> (f16);                  => ceilf16);
+        (fn copysign(x: f16, y: f16) -> (f16);      => copysignf16);
+        (fn fabs(x: f16) -> (f16);                  => fabsf16);
+        (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
+        (fn floor(x: f16) -> (f16);                 => floorf16);
+        (fn fmax(x: f16, y: f16) -> (f16);          => fmaxf16);
+        (fn fmaximum_num(x: f16, y: f16) -> (f16);  => fmaximum_numf16);
+        (fn fmaximumf16(x: f16, y: f16) -> (f16);   => fmaximumf16);
+        (fn fmin(x: f16, y: f16) -> (f16);          => fminf16);
+        (fn fminimum(x: f16, y: f16) -> (f16);      => fminimumf16);
+        (fn fminimum_num(x: f16, y: f16) -> (f16);  => fminimum_numf16);
+        (fn fmod(x: f16, y: f16) -> (f16);          => fmodf16);
+        (fn ldexp(x: f16, n: i32) -> (f16);         => ldexpf16);
+        (fn rint(x: f16) -> (f16);                  => rintf16);
+        (fn round(x: f16) -> (f16);                 => roundf16);
+        (fn roundeven(x: f16) -> (f16);             => roundevenf16);
+        (fn scalbn(x: f16, n: i32) -> (f16);        => scalbnf16);
+        (fn sqrtf(x: f16) -> (f16);                 => sqrtf16);
+        (fn truncf(x: f16) -> (f16);                => truncf16);
+        // verify-sorted-end
+    }
+}
+
+#[cfg(f128_enabled)]
+libm_helper! {
+    f128,
+    funcs: {
+        // verify-sorted-start
+        (fn ceil(x: f128) -> (f128);                => ceilf128);
+        (fn copysign(x: f128, y: f128) -> (f128);   => copysignf128);
+        (fn fabs(x: f128) -> (f128);                => fabsf128);
+        (fn fdim(x: f128, y: f128) -> (f128);       => fdimf128);
+        (fn floor(x: f128) -> (f128);               => floorf128);
+        (fn fma(x: f128, y: f128, z: f128) -> (f128); => fmaf128);
+        (fn fmax(x: f128, y: f128) -> (f128);       => fmaxf128);
+        (fn fmaximum(x: f128, y: f128) -> (f128);      => fmaximumf128);
+        (fn fmaximum_num(x: f128, y: f128) -> (f128);  => fmaximum_numf128);
+        (fn fmin(x: f128, y: f128) -> (f128);       => fminf128);
+        (fn fminimum(x: f128, y: f128) -> (f128);      => fminimumf128);
+        (fn fminimum_num(x: f128, y: f128) -> (f128);  => fminimum_numf128);
+        (fn fmod(x: f128, y: f128) -> (f128);       => fmodf128);
+        (fn ldexp(x: f128, n: i32) -> (f128);       => ldexpf128);
+        (fn rint(x: f128) -> (f128);                => rintf128);
+        (fn round(x: f128) -> (f128);               => roundf128);
+        (fn roundeven(x: f128) -> (f128);           => roundevenf128);
+        (fn scalbn(x: f128, n: i32) -> (f128);      => scalbnf128);
+        (fn sqrt(x: f128) -> (f128);                => sqrtf128);
+        (fn trunc(x: f128) -> (f128);               => truncf128);
+        // verify-sorted-end
     }
 }
+// verify-apilist-end
diff --git a/src/math/acos.rs b/libm/src/math/acos.rs
similarity index 100%
rename from src/math/acos.rs
rename to libm/src/math/acos.rs
diff --git a/src/math/acosf.rs b/libm/src/math/acosf.rs
similarity index 98%
rename from src/math/acosf.rs
rename to libm/src/math/acosf.rs
index 1a60479e3..dd88eea5b 100644
--- a/src/math/acosf.rs
+++ b/libm/src/math/acosf.rs
@@ -13,7 +13,7 @@
  * ====================================================
  */
 
-use super::sqrtf::sqrtf;
+use super::sqrt::sqrtf;
 
 const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */
 const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */
diff --git a/src/math/acosh.rs b/libm/src/math/acosh.rs
similarity index 100%
rename from src/math/acosh.rs
rename to libm/src/math/acosh.rs
diff --git a/src/math/acoshf.rs b/libm/src/math/acoshf.rs
similarity index 100%
rename from src/math/acoshf.rs
rename to libm/src/math/acoshf.rs
diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs
new file mode 100644
index 000000000..020bb731c
--- /dev/null
+++ b/libm/src/math/arch/aarch64.rs
@@ -0,0 +1,115 @@
+//! Architecture-specific support for aarch64 with neon.
+
+use core::arch::asm;
+
+pub fn fma(mut x: f64, y: f64, z: f64) -> f64 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:d}, {x:d}, {y:d}, {z:d}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:s}, {x:s}, {y:s}, {z:s}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn rint(mut x: f64) -> f64 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn rintf(mut x: f32) -> f32 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn rintf16(mut x: f16) -> f16 {
+    // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn sqrtf16(mut x: f16) -> f16 {
+    // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no
+    // side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs
new file mode 100644
index 000000000..f92b9a2af
--- /dev/null
+++ b/libm/src/math/arch/i586.rs
@@ -0,0 +1,37 @@
+//! Architecture-specific support for x86-32 without SSE2
+
+use super::super::fabs;
+
+/// Use an alternative implementation on x86, because the
+/// main implementation fails with the x87 FPU used by
+/// debian i386, probably due to excess precision issues.
+/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
+pub fn ceil(x: f64) -> f64 {
+    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+        let truncated = x as i64 as f64;
+        if truncated < x {
+            return truncated + 1.0;
+        } else {
+            return truncated;
+        }
+    } else {
+        return x;
+    }
+}
+
+/// Use an alternative implementation on x86, because the
+/// main implementation fails with the x87 FPU used by
+/// debian i386, probably due to excess precision issues.
+/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
+pub fn floor(x: f64) -> f64 {
+    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
+        let truncated = x as i64 as f64;
+        if truncated > x {
+            return truncated - 1.0;
+        } else {
+            return truncated;
+        }
+    } else {
+        return x;
+    }
+}
diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/i686.rs
new file mode 100644
index 000000000..3e1d19bfa
--- /dev/null
+++ b/libm/src/math/arch/i686.rs
@@ -0,0 +1,27 @@
+//! Architecture-specific support for x86-32 and x86-64 with SSE2
+
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
+    unsafe {
+        core::arch::asm!(
+            "sqrtss {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
+}
+
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
+    unsafe {
+        core::arch::asm!(
+            "sqrtsd {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
+}
diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs
new file mode 100644
index 000000000..d9f2aad66
--- /dev/null
+++ b/libm/src/math/arch/mod.rs
@@ -0,0 +1,50 @@
+//! Architecture-specific routines and operations.
+//!
+//! LLVM will already optimize calls to some of these in cases that there are hardware
+//! instructions. Providing an implementation here just ensures that the faster implementation
+//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
+//! well as improving things when these routines are called as part of other implementations.
+
+// Most implementations should be defined here, to ensure they are not made available when
+// soft floats are required.
+#[cfg(arch_enabled)]
+cfg_if! {
+    if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] {
+        mod wasm32;
+        pub use wasm32::{
+            ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf,
+        };
+    } else if #[cfg(target_feature = "sse2")] {
+        mod i686;
+        pub use i686::{sqrt, sqrtf};
+    } else if #[cfg(all(
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_feature = "neon"
+    ))] {
+        mod aarch64;
+
+        pub use aarch64::{
+            fma,
+            fmaf,
+            rint,
+            rintf,
+            sqrt,
+            sqrtf,
+        };
+
+        #[cfg(all(f16_enabled, target_feature = "fp16"))]
+        pub use aarch64::{
+            rintf16,
+            sqrtf16,
+        };
+    }
+}
+
+// There are certain architecture-specific implementations that are needed for correctness
+// even with `force-soft-float`. These are configured here.
+cfg_if! {
+    if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] {
+        mod i586;
+        pub use i586::{ceil, floor};
+    }
+}
diff --git a/libm/src/math/arch/wasm32.rs b/libm/src/math/arch/wasm32.rs
new file mode 100644
index 000000000..de80c8a58
--- /dev/null
+++ b/libm/src/math/arch/wasm32.rs
@@ -0,0 +1,50 @@
+//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which
+//! are significantly faster than soft float operations.
+
+pub fn ceil(x: f64) -> f64 {
+    core::arch::wasm32::f64_ceil(x)
+}
+
+pub fn ceilf(x: f32) -> f32 {
+    core::arch::wasm32::f32_ceil(x)
+}
+
+pub fn fabs(x: f64) -> f64 {
+    x.abs()
+}
+
+pub fn fabsf(x: f32) -> f32 {
+    x.abs()
+}
+
+pub fn floor(x: f64) -> f64 {
+    core::arch::wasm32::f64_floor(x)
+}
+
+pub fn floorf(x: f32) -> f32 {
+    core::arch::wasm32::f32_floor(x)
+}
+
+pub fn rint(x: f64) -> f64 {
+    core::arch::wasm32::f64_nearest(x)
+}
+
+pub fn rintf(x: f32) -> f32 {
+    core::arch::wasm32::f32_nearest(x)
+}
+
+pub fn sqrt(x: f64) -> f64 {
+    core::arch::wasm32::f64_sqrt(x)
+}
+
+pub fn sqrtf(x: f32) -> f32 {
+    core::arch::wasm32::f32_sqrt(x)
+}
+
+pub fn trunc(x: f64) -> f64 {
+    core::arch::wasm32::f64_trunc(x)
+}
+
+pub fn truncf(x: f32) -> f32 {
+    core::arch::wasm32::f32_trunc(x)
+}
diff --git a/src/math/asin.rs b/libm/src/math/asin.rs
similarity index 98%
rename from src/math/asin.rs
rename to libm/src/math/asin.rs
index 12fe08fc7..12d0cd35f 100644
--- a/src/math/asin.rs
+++ b/libm/src/math/asin.rs
@@ -90,7 +90,7 @@ pub fn asin(mut x: f64) -> f64 {
     /* |x| < 0.5 */
     if ix < 0x3fe00000 {
         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
-        if ix < 0x3e500000 && ix >= 0x00100000 {
+        if (0x00100000..0x3e500000).contains(&ix) {
             return x;
         } else {
             return x + x * comp_r(x * x);
diff --git a/src/math/asinf.rs b/libm/src/math/asinf.rs
similarity index 94%
rename from src/math/asinf.rs
rename to libm/src/math/asinf.rs
index 2c785abe2..ed6855567 100644
--- a/src/math/asinf.rs
+++ b/libm/src/math/asinf.rs
@@ -13,8 +13,8 @@
  * ====================================================
  */
 
-use super::fabsf::fabsf;
 use super::sqrt::sqrt;
+use super::support::Float;
 
 const PIO2: f64 = 1.570796326794896558e+00;
 
@@ -54,14 +54,14 @@ pub fn asinf(mut x: f32) -> f32 {
     if ix < 0x3f000000 {
         /* |x| < 0.5 */
         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
-        if (ix < 0x39800000) && (ix >= 0x00800000) {
+        if (0x00800000..0x39800000).contains(&ix) {
             return x;
         }
         return x + x * r(x * x);
     }
 
     /* 1 > |x| >= 0.5 */
-    let z = (1. - fabsf(x)) * 0.5;
+    let z = (1. - Float::abs(x)) * 0.5;
     let s = sqrt(z as f64);
     x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32;
     if (hx >> 31) != 0 { -x } else { x }
diff --git a/src/math/asinh.rs b/libm/src/math/asinh.rs
similarity index 100%
rename from src/math/asinh.rs
rename to libm/src/math/asinh.rs
diff --git a/src/math/asinhf.rs b/libm/src/math/asinhf.rs
similarity index 100%
rename from src/math/asinhf.rs
rename to libm/src/math/asinhf.rs
diff --git a/src/math/atan.rs b/libm/src/math/atan.rs
similarity index 100%
rename from src/math/atan.rs
rename to libm/src/math/atan.rs
diff --git a/src/math/atan2.rs b/libm/src/math/atan2.rs
similarity index 89%
rename from src/math/atan2.rs
rename to libm/src/math/atan2.rs
index b9bf0da93..c668731cf 100644
--- a/src/math/atan2.rs
+++ b/libm/src/math/atan2.rs
@@ -114,12 +114,18 @@ pub fn atan2(y: f64, x: f64) -> f64 {
     }
 }
 
-#[test]
-fn sanity_check() {
-    assert_eq!(atan2(0.0, 1.0), 0.0);
-    assert_eq!(atan2(0.0, -1.0), PI);
-    assert_eq!(atan2(-0.0, -1.0), -PI);
-    assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0));
-    assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);
-    assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI);
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    fn sanity_check() {
+        assert_eq!(atan2(0.0, 1.0), 0.0);
+        assert_eq!(atan2(0.0, -1.0), PI);
+        assert_eq!(atan2(-0.0, -1.0), -PI);
+        assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0));
+        assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI);
+        assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI);
+    }
 }
diff --git a/src/math/atan2f.rs b/libm/src/math/atan2f.rs
similarity index 78%
rename from src/math/atan2f.rs
rename to libm/src/math/atan2f.rs
index fa33f54f6..95b466fff 100644
--- a/src/math/atan2f.rs
+++ b/libm/src/math/atan2f.rs
@@ -42,9 +42,9 @@ pub fn atan2f(y: f32, x: f32) -> f32 {
     /* when y = 0 */
     if iy == 0 {
         return match m {
-            0 | 1 => y,   /* atan(+-0,+anything)=+-0 */
-            2 => PI,      /* atan(+0,-anything) = pi */
-            3 | _ => -PI, /* atan(-0,-anything) =-pi */
+            0 | 1 => y, /* atan(+-0,+anything)=+-0 */
+            2 => PI,    /* atan(+0,-anything) = pi */
+            _ => -PI,   /* atan(-0,-anything) =-pi */
         };
     }
     /* when x = 0 */
@@ -55,17 +55,17 @@ pub fn atan2f(y: f32, x: f32) -> f32 {
     if ix == 0x7f800000 {
         return if iy == 0x7f800000 {
             match m {
-                0 => PI / 4.,           /* atan(+INF,+INF) */
-                1 => -PI / 4.,          /* atan(-INF,+INF) */
-                2 => 3. * PI / 4.,      /* atan(+INF,-INF)*/
-                3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/
+                0 => PI / 4.,       /* atan(+INF,+INF) */
+                1 => -PI / 4.,      /* atan(-INF,+INF) */
+                2 => 3. * PI / 4.,  /* atan(+INF,-INF)*/
+                _ => -3. * PI / 4., /* atan(-INF,-INF)*/
             }
         } else {
             match m {
-                0 => 0.,      /* atan(+...,+INF) */
-                1 => -0.,     /* atan(-...,+INF) */
-                2 => PI,      /* atan(+...,-INF) */
-                3 | _ => -PI, /* atan(-...,-INF) */
+                0 => 0.,  /* atan(+...,+INF) */
+                1 => -0., /* atan(-...,+INF) */
+                2 => PI,  /* atan(+...,-INF) */
+                _ => -PI, /* atan(-...,-INF) */
             }
         };
     }
diff --git a/src/math/atanf.rs b/libm/src/math/atanf.rs
similarity index 100%
rename from src/math/atanf.rs
rename to libm/src/math/atanf.rs
diff --git a/src/math/atanh.rs b/libm/src/math/atanh.rs
similarity index 100%
rename from src/math/atanh.rs
rename to libm/src/math/atanh.rs
diff --git a/src/math/atanhf.rs b/libm/src/math/atanhf.rs
similarity index 95%
rename from src/math/atanhf.rs
rename to libm/src/math/atanhf.rs
index 3545411bb..80ccec1f6 100644
--- a/src/math/atanhf.rs
+++ b/libm/src/math/atanhf.rs
@@ -18,7 +18,7 @@ pub fn atanhf(mut x: f32) -> f32 {
         if u < 0x3f800000 - (32 << 23) {
             /* handle underflow */
             if u < (1 << 23) {
-                force_eval!((x * x) as f32);
+                force_eval!(x * x);
             }
         } else {
             /* |x| < 0.5, up to 1.7ulp error */
diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs
new file mode 100644
index 000000000..9d3311cd6
--- /dev/null
+++ b/libm/src/math/cbrt.rs
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: core-math/src/binary64/cbrt/cbrt.c
+ * Copyright (c) 2021-2022 Alexei Sibidanov.
+ * Ported to Rust in 2025 by Trevor Gross.
+ */
+
+use super::Float;
+use super::support::{FpResult, Round, cold_path};
+
+/// Compute the cube root of the argument.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn cbrt(x: f64) -> f64 {
+    cbrt_round(x, Round::Nearest).val
+}
+
+pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
+    const ESCALE: [f64; 3] = [
+        1.0,
+        hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
+        hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */
+    ];
+
+    /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2]
+    with maximal error < 9.2e-5 (attained at x=2) */
+    const C: [f64; 4] = [
+        hf64!("0x1.1b0babccfef9cp-1"),
+        hf64!("0x1.2c9a3e94d1da5p-1"),
+        hf64!("-0x1.4dc30b1a1ddbap-3"),
+        hf64!("0x1.7a8d3e4ec9b07p-6"),
+    ];
+
+    let u0: f64 = hf64!("0x1.5555555555555p-2");
+    let u1: f64 = hf64!("0x1.c71c71c71c71cp-3");
+
+    let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
+
+    let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
+
+    /* rm=0 for rounding to nearest, and other values for directed roundings */
+    let hx: u64 = x.to_bits();
+    let mut mant: u64 = hx & f64::SIG_MASK;
+    let sign: u64 = hx >> 63;
+
+    let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT;
+
+    if ((e + 1) & f64::EXP_SAT) < 2 {
+        cold_path();
+
+        let ix: u64 = hx & !f64::SIGN_MASK;
+
+        /* 0, inf, nan: we return x + x instead of simply x,
+        to that for x a signaling NaN, it correctly triggers
+        the invalid exception. */
+        if e == f64::EXP_SAT || ix == 0 {
+            return FpResult::ok(x + x);
+        }
+
+        let nz = ix.leading_zeros() - 11; /* subnormal */
+        mant <<= nz;
+        mant &= f64::SIG_MASK;
+        e = e.wrapping_sub(nz - 1);
+    }
+
+    e = e.wrapping_add(3072);
+    let cvt1: u64 = mant | (0x3ffu64 << 52);
+    let mut cvt5: u64 = cvt1;
+
+    let et: u32 = e / 3;
+    let it: u32 = e % 3;
+
+    /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
+    cvt5 += u64::from(it) << f64::SIG_BITS;
+    cvt5 |= sign << 63;
+    let zz: f64 = f64::from_bits(cvt5);
+
+    /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
+    let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index
+    isc |= sign << 63;
+    let cvt2: u64 = isc;
+    let z: f64 = f64::from_bits(cvt1);
+
+    /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
+    and 1 <= z < 2 */
+    let r: f64 = 1.0 / z;
+    let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize];
+    let z2: f64 = z * z;
+    let c0: f64 = C[0] + z * C[1];
+    let c2: f64 = C[2] + z * C[3];
+    let mut y: f64 = c0 + z2 * c2;
+    let mut y2: f64 = y * y;
+
+    /* y is an approximation of z^(1/3) */
+    let mut h: f64 = y2 * (y * r) - 1.0;
+
+    /* h determines the error between y and z^(1/3) */
+    y -= (h * y) * (u0 - u1 * h);
+
+    /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
+    of Newton's method, with the function f(y) = 1-z/y^3. */
+    y *= f64::from_bits(cvt2);
+
+    /* Now y is an approximation of zz^(1/3),
+     * and rr an approximation of 1/zz. We now perform another iteration of
+     * Newton-Raphson, this time with a linear approximation only. */
+    y2 = y * y;
+    let mut y2l: f64 = y.fma(y, -y2);
+
+    /* y2 + y2l = y^2 exactly */
+    let mut y3: f64 = y2 * y;
+    let mut y3l: f64 = y.fma(y2, -y3) + y * y2l;
+
+    /* y3 + y3l approximates y^3 with about 106 bits of accuracy */
+    h = ((y3 - zz) + y3l) * rr;
+    let mut dy: f64 = h * (y * u0);
+
+    /* the approximation of zz^(1/3) is y - dy */
+    let mut y1: f64 = y - dy;
+    dy = (y - y1) - dy;
+
+    /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y)
+     * (for rounding to nearest) */
+    let mut ady: f64 = dy.abs();
+
+    /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near
+     * from ulp(1);
+     * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
+     * or from 3/2 ulp(1). */
+    let mut ady0: f64 = (ady - off[round as usize]).abs();
+    let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
+
+    if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
+        cold_path();
+
+        y2 = y1 * y1;
+        y2l = y1.fma(y1, -y2);
+        y3 = y2 * y1;
+        y3l = y1.fma(y2, -y3) + y1 * y2l;
+        h = ((y3 - zz) + y3l) * rr;
+        dy = h * (y1 * u0);
+        y = y1 - dy;
+        dy = (y1 - y) - dy;
+        y1 = y;
+        ady = dy.abs();
+        ady0 = (ady - off[round as usize]).abs();
+        ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
+
+        if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
+            cold_path();
+            let azz: f64 = zz.abs();
+
+            // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0
+            if azz == hf64!("0x1.9b78223aa307cp+1") {
+                y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz);
+            }
+
+            // ~ 0x1.de87aa837820e80000000000001c0f08p+0
+            if azz == hf64!("0x1.a202bfc89ddffp+2") {
+                y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
+            }
+
+            if round != Round::Nearest {
+                let wlist = [
+                    (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
+                    (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
+                    (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0
+                    (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0
+                    (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0
+                    (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0
+                    (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0
+                ];
+
+                for (a, b) in wlist {
+                    if azz == a {
+                        let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
+                        y1 = (b + tmp).copysign(zz);
+                    }
+                }
+            }
+        }
+    }
+
+    let mut cvt3: u64 = y1.to_bits();
+    cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52);
+    let m0: u64 = cvt3 << 30;
+    let m1 = m0 >> 63;
+
+    if (m0 ^ m1) <= (1u64 << 30) {
+        cold_path();
+
+        let mut cvt4: u64 = y1.to_bits();
+        cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64;
+
+        if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 {
+            cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64;
+        }
+    }
+
+    FpResult::ok(f64::from_bits(cvt3))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn spot_checks() {
+        if !cfg!(x86_no_sse) {
+            // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA.
+            assert_biteq!(
+                cbrt(f64::from_bits(0xf7f792b28f600000)),
+                f64::from_bits(0xd29ce68655d962f3)
+            );
+        }
+    }
+}
diff --git a/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs
similarity index 100%
rename from src/math/cbrtf.rs
rename to libm/src/math/cbrtf.rs
diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs
new file mode 100644
index 000000000..4e1035457
--- /dev/null
+++ b/libm/src/math/ceil.rs
@@ -0,0 +1,46 @@
+/// Ceil (f16)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf16(x: f16) -> f16 {
+    super::generic::ceil(x)
+}
+
+/// Ceil (f32)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf(x: f32) -> f32 {
+    select_implementation! {
+        name: ceilf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::ceil(x)
+}
+
+/// Ceil (f64)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceil(x: f64) -> f64 {
+    select_implementation! {
+        name: ceil,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
+        args: x,
+    }
+
+    super::generic::ceil(x)
+}
+
+/// Ceil (f128)
+///
+/// Finds the nearest integer greater than or equal to `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ceilf128(x: f128) -> f128 {
+    super::generic::ceil(x)
+}
diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs
new file mode 100644
index 000000000..d2a86e7fd
--- /dev/null
+++ b/libm/src/math/copysign.rs
@@ -0,0 +1,88 @@
+/// Sign of Y, magnitude of X (f16)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf16(x: f16, y: f16) -> f16 {
+    super::generic::copysign(x, y)
+}
+
+/// Sign of Y, magnitude of X (f32)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf(x: f32, y: f32) -> f32 {
+    super::generic::copysign(x, y)
+}
+
+/// Sign of Y, magnitude of X (f64)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysign(x: f64, y: f64) -> f64 {
+    super::generic::copysign(x, y)
+}
+
+/// Sign of Y, magnitude of X (f128)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf128(x: f128, y: f128) -> f128 {
+    super::generic::copysign(x, y)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Float;
+
+    fn spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        assert_biteq!(f(F::ZERO, F::ZERO), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert_biteq!(f(F::ZERO, F::NEG_ZERO), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, F::NEG_ZERO), F::NEG_ZERO);
+
+        assert_biteq!(f(F::ONE, F::ONE), F::ONE);
+        assert_biteq!(f(F::NEG_ONE, F::ONE), F::ONE);
+        assert_biteq!(f(F::ONE, F::NEG_ONE), F::NEG_ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NEG_ONE), F::NEG_ONE);
+
+        assert_biteq!(f(F::INFINITY, F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::INFINITY, F::NEG_INFINITY), F::NEG_INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, F::NEG_INFINITY), F::NEG_INFINITY);
+
+        // Not required but we expect it
+        assert_biteq!(f(F::NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>(copysignf16);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>(copysignf);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>(copysign);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>(copysignf128);
+    }
+}
diff --git a/src/math/copysignf.rs b/libm/src/math/copysignf.rs
similarity index 68%
rename from src/math/copysignf.rs
rename to libm/src/math/copysignf.rs
index 6c346e3a5..8b9bed4c0 100644
--- a/src/math/copysignf.rs
+++ b/libm/src/math/copysignf.rs
@@ -4,9 +4,5 @@
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn copysignf(x: f32, y: f32) -> f32 {
-    let mut ux = x.to_bits();
-    let uy = y.to_bits();
-    ux &= 0x7fffffff;
-    ux |= uy & 0x80000000;
-    f32::from_bits(ux)
+    super::generic::copysign(x, y)
 }
diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs
new file mode 100644
index 000000000..7bd81d42b
--- /dev/null
+++ b/libm/src/math/copysignf128.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f128)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf128(x: f128, y: f128) -> f128 {
+    super::generic::copysign(x, y)
+}
diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs
new file mode 100644
index 000000000..820658686
--- /dev/null
+++ b/libm/src/math/copysignf16.rs
@@ -0,0 +1,8 @@
+/// Sign of Y, magnitude of X (f16)
+///
+/// Constructs a number with the magnitude (absolute value) of its
+/// first argument, `x`, and the sign of its second argument, `y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn copysignf16(x: f16, y: f16) -> f16 {
+    super::generic::copysign(x, y)
+}
diff --git a/src/math/cos.rs b/libm/src/math/cos.rs
similarity index 100%
rename from src/math/cos.rs
rename to libm/src/math/cos.rs
diff --git a/src/math/cosf.rs b/libm/src/math/cosf.rs
similarity index 100%
rename from src/math/cosf.rs
rename to libm/src/math/cosf.rs
diff --git a/src/math/cosh.rs b/libm/src/math/cosh.rs
similarity index 100%
rename from src/math/cosh.rs
rename to libm/src/math/cosh.rs
diff --git a/src/math/coshf.rs b/libm/src/math/coshf.rs
similarity index 100%
rename from src/math/coshf.rs
rename to libm/src/math/coshf.rs
diff --git a/src/math/erf.rs b/libm/src/math/erf.rs
similarity index 100%
rename from src/math/erf.rs
rename to libm/src/math/erf.rs
diff --git a/src/math/erff.rs b/libm/src/math/erff.rs
similarity index 100%
rename from src/math/erff.rs
rename to libm/src/math/erff.rs
diff --git a/src/math/exp.rs b/libm/src/math/exp.rs
similarity index 100%
rename from src/math/exp.rs
rename to libm/src/math/exp.rs
diff --git a/src/math/exp10.rs b/libm/src/math/exp10.rs
similarity index 88%
rename from src/math/exp10.rs
rename to libm/src/math/exp10.rs
index 559930e10..7c33c92b6 100644
--- a/src/math/exp10.rs
+++ b/libm/src/math/exp10.rs
@@ -6,12 +6,13 @@ const P10: &[f64] = &[
     1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
 ];
 
+/// Calculates 10 raised to the power of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn exp10(x: f64) -> f64 {
     let (mut y, n) = modf(x);
     let u: u64 = n.to_bits();
     /* fabs(n) < 16 without raising invalid on nan */
-    if (u >> 52 & 0x7ff) < 0x3ff + 4 {
+    if ((u >> 52) & 0x7ff) < 0x3ff + 4 {
         if y == 0.0 {
             return i!(P10, ((n as isize) + 15) as usize);
         }
diff --git a/src/math/exp10f.rs b/libm/src/math/exp10f.rs
similarity index 88%
rename from src/math/exp10f.rs
rename to libm/src/math/exp10f.rs
index 786305481..0520a41f2 100644
--- a/src/math/exp10f.rs
+++ b/libm/src/math/exp10f.rs
@@ -5,12 +5,13 @@ const LN10_F64: f64 = 3.32192809488736234787031942948939;
 const P10: &[f32] =
     &[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7];
 
+/// Calculates 10 raised to the power of `x` (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn exp10f(x: f32) -> f32 {
     let (mut y, n) = modff(x);
     let u = n.to_bits();
     /* fabsf(n) < 8 without raising invalid on nan */
-    if (u >> 23 & 0xff) < 0x7f + 3 {
+    if ((u >> 23) & 0xff) < 0x7f + 3 {
         if y == 0.0 {
             return i!(P10, ((n as isize) + 7) as usize);
         }
diff --git a/src/math/exp2.rs b/libm/src/math/exp2.rs
similarity index 99%
rename from src/math/exp2.rs
rename to libm/src/math/exp2.rs
index dce2ab4df..6e98d066c 100644
--- a/src/math/exp2.rs
+++ b/libm/src/math/exp2.rs
@@ -341,7 +341,7 @@ pub fn exp2(mut x: f64) -> f64 {
 
     /* Filter out exceptional cases. */
     let ui = f64::to_bits(x);
-    let ix = ui >> 32 & 0x7fffffff;
+    let ix = (ui >> 32) & 0x7fffffff;
     if ix >= 0x408ff000 {
         /* |x| >= 1022 or nan */
         if ix >= 0x40900000 && ui >> 63 == 0 {
diff --git a/src/math/exp2f.rs b/libm/src/math/exp2f.rs
similarity index 98%
rename from src/math/exp2f.rs
rename to libm/src/math/exp2f.rs
index f4867b80e..f452b6a20 100644
--- a/src/math/exp2f.rs
+++ b/libm/src/math/exp2f.rs
@@ -95,7 +95,7 @@ pub fn exp2f(mut x: f32) -> f32 {
             /* NaN */
             return x;
         }
-        if ui >= 0x43000000 && ui < 0x80000000 {
+        if (0x43000000..0x80000000).contains(&ui) {
             /* x >= 128 */
             x *= x1p127;
             return x;
@@ -127,7 +127,7 @@ pub fn exp2f(mut x: f32) -> f32 {
     let z: f64 = (x - uf) as f64;
     /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
     let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
-    let t: f64 = r as f64 * z;
+    let t: f64 = r * z;
     let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);
 
     /* Scale by 2**k */
diff --git a/src/math/expf.rs b/libm/src/math/expf.rs
similarity index 100%
rename from src/math/expf.rs
rename to libm/src/math/expf.rs
diff --git a/src/math/expm1.rs b/libm/src/math/expm1.rs
similarity index 99%
rename from src/math/expm1.rs
rename to libm/src/math/expm1.rs
index 42608509a..f25153f32 100644
--- a/src/math/expm1.rs
+++ b/libm/src/math/expm1.rs
@@ -115,7 +115,7 @@ pub fn expm1(mut x: f64) -> f64 {
     }
     ui = ((0x3ff + k) as u64) << 52; /* 2^k */
     let twopk = f64::from_bits(ui);
-    if k < 0 || k > 56 {
+    if !(0..=56).contains(&k) {
         /* suffice to return exp(x)-1 */
         y = x - e + 1.0;
         if k == 1024 {
diff --git a/src/math/expm1f.rs b/libm/src/math/expm1f.rs
similarity index 99%
rename from src/math/expm1f.rs
rename to libm/src/math/expm1f.rs
index a862fe255..12c6f532b 100644
--- a/src/math/expm1f.rs
+++ b/libm/src/math/expm1f.rs
@@ -115,7 +115,7 @@ pub fn expm1f(mut x: f32) -> f32 {
         return 1. + 2. * (x - e);
     }
     let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */
-    if (k < 0) || (k > 56) {
+    if !(0..=56).contains(&k) {
         /* suffice to return exp(x)-1 */
         let mut y = x - e + 1.;
         if k == 128 {
diff --git a/src/math/expo2.rs b/libm/src/math/expo2.rs
similarity index 100%
rename from src/math/expo2.rs
rename to libm/src/math/expo2.rs
diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs
new file mode 100644
index 000000000..0050a309f
--- /dev/null
+++ b/libm/src/math/fabs.rs
@@ -0,0 +1,116 @@
+/// Absolute value (magnitude) (f16)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf16(x: f16) -> f16 {
+    super::generic::fabs(x)
+}
+
+/// Absolute value (magnitude) (f32)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf(x: f32) -> f32 {
+    select_implementation! {
+        name: fabsf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
+/// Absolute value (magnitude) (f64)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabs(x: f64) -> f64 {
+    select_implementation! {
+        name: fabs,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::fabs(x)
+}
+
+/// Absolute value (magnitude) (f128)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf128(x: f128) -> f128 {
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Float;
+
+    /// Based on https://en.cppreference.com/w/cpp/numeric/math/fabs
+    fn spec_test<F: Float>(f: impl Fn(F) -> F) {
+        assert_biteq!(f(F::ZERO), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO), F::ZERO);
+        assert_biteq!(f(F::INFINITY), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY), F::INFINITY);
+        assert!(f(F::NAN).is_nan());
+
+        // Not spec rewquired but we expect it
+        assert!(f(F::NAN).is_sign_positive());
+        assert!(f(F::from_bits(F::NAN.to_bits() | F::SIGN_MASK)).is_sign_positive());
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_eq!(fabsf16(-1.0f16), 1.0);
+        assert_eq!(fabsf16(2.8f16), 2.8);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>(fabsf16);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(fabsf(-1.0f32), 1.0);
+        assert_eq!(fabsf(2.8f32), 2.8);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>(fabsf);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(fabs(-1.0f64), 1.0);
+        assert_eq!(fabs(2.8f64), 2.8);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>(fabs);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_eq!(fabsf128(-1.0f128), 1.0);
+        assert_eq!(fabsf128(2.8f128), 2.8);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>(fabsf128);
+    }
+}
diff --git a/src/math/fabsf.rs b/libm/src/math/fabsf.rs
similarity index 59%
rename from src/math/fabsf.rs
rename to libm/src/math/fabsf.rs
index f81c8ca44..e5820a26c 100644
--- a/src/math/fabsf.rs
+++ b/libm/src/math/fabsf.rs
@@ -1,25 +1,22 @@
 /// Absolute value (magnitude) (f32)
+///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabsf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf32(x) }
-        }
+    select_implementation! {
+        name: fabsf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
     }
-    f32::from_bits(x.to_bits() & 0x7fffffff)
+
+    super::generic::fabs(x)
 }
 
 // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
 #[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
-    use core::f32::*;
-
     use super::*;
 
     #[test]
@@ -31,12 +28,12 @@ mod tests {
     /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
     #[test]
     fn spec_tests() {
-        assert!(fabsf(NAN).is_nan());
+        assert!(fabsf(f32::NAN).is_nan());
         for f in [0.0, -0.0].iter().copied() {
             assert_eq!(fabsf(f), 0.0);
         }
-        for f in [INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf(f), INFINITY);
+        for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf(f), f32::INFINITY);
         }
     }
 }
diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs
new file mode 100644
index 000000000..46429ca49
--- /dev/null
+++ b/libm/src/math/fabsf128.rs
@@ -0,0 +1,31 @@
+/// Absolute value (magnitude) (f128)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf128(x: f128) -> f128 {
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf128(-1.0), 1.0);
+        assert_eq!(fabsf128(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf128(f128::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf128(f), 0.0);
+        }
+        for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf128(f), f128::INFINITY);
+        }
+    }
+}
diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs
new file mode 100644
index 000000000..eee42ac6a
--- /dev/null
+++ b/libm/src/math/fabsf16.rs
@@ -0,0 +1,31 @@
+/// Absolute value (magnitude) (f16)
+///
+/// Calculates the absolute value (magnitude) of the argument `x`,
+/// by direct manipulation of the bit representation of `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fabsf16(x: f16) -> f16 {
+    super::generic::fabs(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanity_check() {
+        assert_eq!(fabsf16(-1.0), 1.0);
+        assert_eq!(fabsf16(2.8), 2.8);
+    }
+
+    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
+    #[test]
+    fn spec_tests() {
+        assert!(fabsf16(f16::NAN).is_nan());
+        for f in [0.0, -0.0].iter().copied() {
+            assert_eq!(fabsf16(f), 0.0);
+        }
+        for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() {
+            assert_eq!(fabsf16(f), f16::INFINITY);
+        }
+    }
+}
diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs
new file mode 100644
index 000000000..082c5478b
--- /dev/null
+++ b/libm/src/math/fdim.rs
@@ -0,0 +1,53 @@
+/// Positive difference (f16)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf16(x: f16, y: f16) -> f16 {
+    super::generic::fdim(x, y)
+}
+
+/// Positive difference (f32)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf(x: f32, y: f32) -> f32 {
+    super::generic::fdim(x, y)
+}
+
+/// Positive difference (f64)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdim(x: f64, y: f64) -> f64 {
+    super::generic::fdim(x, y)
+}
+
+/// Positive difference (f128)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf128(x: f128, y: f128) -> f128 {
+    super::generic::fdim(x, y)
+}
diff --git a/src/math/fdimf.rs b/libm/src/math/fdimf.rs
similarity index 50%
rename from src/math/fdimf.rs
rename to libm/src/math/fdimf.rs
index ea0b592d7..367ef517c 100644
--- a/src/math/fdimf.rs
+++ b/libm/src/math/fdimf.rs
@@ -1,22 +1,12 @@
-use core::f32;
-
 /// Positive difference (f32)
 ///
 /// Determines the positive difference between arguments, returning:
-/// * x - y	if x > y, or
-/// * +0	if x <= y, or
-/// * NAN	if either argument is NAN.
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fdimf(x: f32, y: f32) -> f32 {
-    if x.is_nan() {
-        x
-    } else if y.is_nan() {
-        y
-    } else if x > y {
-        x - y
-    } else {
-        0.0
-    }
+    super::generic::fdim(x, y)
 }
diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs
new file mode 100644
index 000000000..6f3d1d0ff
--- /dev/null
+++ b/libm/src/math/fdimf128.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f128)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf128(x: f128, y: f128) -> f128 {
+    super::generic::fdim(x, y)
+}
diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs
new file mode 100644
index 000000000..37bd68858
--- /dev/null
+++ b/libm/src/math/fdimf16.rs
@@ -0,0 +1,12 @@
+/// Positive difference (f16)
+///
+/// Determines the positive difference between arguments, returning:
+/// * x - y if x > y, or
+/// * +0    if x <= y, or
+/// * NAN   if either argument is NAN.
+///
+/// A range error may occur.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fdimf16(x: f16, y: f16) -> f16 {
+    super::generic::fdim(x, y)
+}
diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs
new file mode 100644
index 000000000..3c5eab101
--- /dev/null
+++ b/libm/src/math/floor.rs
@@ -0,0 +1,46 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
+
+/// Floor (f64)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floor(x: f64) -> f64 {
+    select_implementation! {
+        name: floor,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
+        args: x,
+    }
+
+    return super::generic::floor(x);
+}
+
+/// Floor (f32)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf(x: f32) -> f32 {
+    select_implementation! {
+        name: floorf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    return super::generic::floor(x);
+}
+
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs
new file mode 100644
index 000000000..16957b7f3
--- /dev/null
+++ b/libm/src/math/floorf.rs
@@ -0,0 +1,13 @@
+/// Floor (f32)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf(x: f32) -> f32 {
+    select_implementation! {
+        name: floorf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs
new file mode 100644
index 000000000..9a9fe4151
--- /dev/null
+++ b/libm/src/math/floorf128.rs
@@ -0,0 +1,7 @@
+/// Floor (f128)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf128(x: f128) -> f128 {
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs
new file mode 100644
index 000000000..f9b868e04
--- /dev/null
+++ b/libm/src/math/floorf16.rs
@@ -0,0 +1,7 @@
+/// Floor (f16)
+///
+/// Finds the nearest integer less than or equal to `x`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn floorf16(x: f16) -> f16 {
+    return super::generic::floor(x);
+}
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
new file mode 100644
index 000000000..e0b3347ac
--- /dev/null
+++ b/libm/src/math/fma.rs
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
+use super::{CastFrom, CastInto, Float, Int, MinInt};
+
+/// Fused multiply add (f64)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    select_implementation! {
+        name: fma,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
+    fma_round(x, y, z, Round::Nearest).val
+}
+
+/// Fused multiply add (f128)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
+    fma_round(x, y, z, Round::Nearest).val
+}
+
+/// Fused multiply-add that works when there is not a larger float size available. Computes
+/// `(x * y) + z`.
+#[inline]
+pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
+where
+    F: Float,
+    F: CastFrom<F::SignedInt>,
+    F: CastFrom<i8>,
+    F::Int: HInt,
+    u32: CastInto<F::Int>,
+{
+    let one = IntTy::<F>::ONE;
+    let zero = IntTy::<F>::ZERO;
+
+    // Normalize such that the top of the mantissa is zero and we have a guard bit.
+    let nx = Norm::from_float(x);
+    let ny = Norm::from_float(y);
+    let nz = Norm::from_float(z);
+
+    if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
+        // Value will overflow, defer to non-fused operations.
+        return FpResult::ok(x * y + z);
+    }
+
+    if nz.is_zero_nan_inf() {
+        if nz.is_zero() {
+            // Empty add component means we only need to multiply.
+            return FpResult::ok(x * y);
+        }
+        // `z` is NaN or infinity, which sets the result.
+        return FpResult::ok(z);
+    }
+
+    // multiply: r = x * y
+    let zhi: F::Int;
+    let zlo: F::Int;
+    let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
+
+    // Exponent result of multiplication
+    let mut e: i32 = nx.e + ny.e;
+    // Needed shift to align `z` to the multiplication result
+    let mut d: i32 = nz.e - e;
+    let sbits = F::BITS as i32;
+
+    // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz)
+    if d > 0 {
+        // The magnitude of `z` is larger than `x * y`
+        if d < sbits {
+            // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift
+            // it into `(zhi, zlo)`. No exponent adjustment necessary.
+            zlo = nz.m << d;
+            zhi = nz.m >> (sbits - d);
+        } else {
+            // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts
+            // as a shift by `sbits`).
+            zlo = zero;
+            zhi = nz.m;
+            d -= sbits;
+
+            // `z`'s exponent is large enough that it now needs to be taken into account.
+            e = nz.e - sbits;
+
+            if d == 0 {
+                // Exactly `sbits`, nothing to do
+            } else if d < sbits {
+                // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y`
+                rlo = (rhi << (sbits - d)) | (rlo >> d);
+                // Set the sticky bit
+                rlo |= IntTy::<F>::from((rlo << (sbits - d)) != zero);
+                rhi = rhi >> d;
+            } else {
+                // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set
+                // the sticky bit.
+                rlo = one;
+                rhi = zero;
+            }
+        }
+    } else {
+        // `z`'s magnitude once shifted fits entirely within `zlo`
+        zhi = zero;
+        d = -d;
+        if d == 0 {
+            // No shift needed
+            zlo = nz.m;
+        } else if d < sbits {
+            // Shift s.t. `nz.m` fits into `zlo`
+            let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
+            zlo = (nz.m >> d) | sticky;
+        } else {
+            // Would be entirely shifted out, only set the sticky bit
+            zlo = one;
+        }
+    }
+
+    /* addition */
+
+    let mut neg = nx.neg ^ ny.neg;
+    let samesign: bool = !neg ^ nz.neg;
+    let mut rhi_nonzero = true;
+
+    if samesign {
+        // r += z
+        rlo = rlo.wrapping_add(zlo);
+        rhi += zhi + IntTy::<F>::from(rlo < zlo);
+    } else {
+        // r -= z
+        let (res, borrow) = rlo.overflowing_sub(zlo);
+        rlo = res;
+        rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
+        if (rhi >> (F::BITS - 1)) != zero {
+            rlo = rlo.signed().wrapping_neg().unsigned();
+            rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
+            neg = !neg;
+        }
+        rhi_nonzero = rhi != zero;
+    }
+
+    /* Construct result */
+
+    // Shift result into `rhi`, left-aligned. Last bit is sticky
+    if rhi_nonzero {
+        // `d` > 0, need to shift both `rhi` and `rlo` into result
+        e += sbits;
+        d = rhi.leading_zeros() as i32 - 1;
+        rhi = (rhi << d) | (rlo >> (sbits - d));
+        // Update sticky
+        rhi |= IntTy::<F>::from((rlo << d) != zero);
+    } else if rlo != zero {
+        // `rhi` is zero, `rlo` is the entire result and needs to be shifted
+        d = rlo.leading_zeros() as i32 - 1;
+        if d < 0 {
+            // Shift and set sticky
+            rhi = (rlo >> 1) | (rlo & one);
+        } else {
+            rhi = rlo << d;
+        }
+    } else {
+        // exact +/- 0.0
+        return FpResult::ok(x * y + z);
+    }
+
+    e -= d;
+
+    // Use int->float conversion to populate the significand.
+    // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1]
+    let mut i: F::SignedInt = rhi.signed();
+
+    if neg {
+        i = -i;
+    }
+
+    // `|r|` is in `[0x1p62,0x1p63]` for `f64`
+    let mut r: F = F::cast_from_lossy(i);
+
+    /* Account for subnormal and rounding */
+
+    // Unbiased exponent for the maximum value of `r`
+    let max_pow = F::BITS - 1 + F::EXP_BIAS;
+
+    let mut status = Status::OK;
+
+    if e < -(max_pow as i32 - 2) {
+        // Result is subnormal before rounding
+        if e == -(max_pow as i32 - 1) {
+            let mut c = F::from_parts(false, max_pow, zero);
+            if neg {
+                c = -c;
+            }
+
+            if r == c {
+                // Min normal after rounding,
+                status.set_underflow(true);
+                r = F::MIN_POSITIVE_NORMAL.copysign(r);
+                return FpResult::new(r, status);
+            }
+
+            if (rhi << (F::SIG_BITS + 1)) != zero {
+                // Account for truncated bits. One bit will be lost in the `scalbn` call, add
+                // another top bit to avoid double rounding if inexact.
+                let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
+                i = iu.signed();
+
+                if neg {
+                    i = -i;
+                }
+
+                r = F::cast_from_lossy(i);
+
+                // Remove the top bit
+                r = F::cast_from(2i8) * r - c;
+                status.set_underflow(true);
+            }
+        } else {
+            // Only round once when scaled
+            d = F::EXP_BITS as i32 - 1;
+            let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
+            i = (((rhi >> d) | sticky) << d).signed();
+
+            if neg {
+                i = -i;
+            }
+
+            r = F::cast_from_lossy(i);
+        }
+    }
+
+    // Use our exponent to scale the final value.
+    FpResult::new(super::generic::scalbn(r, e), status)
+}
+
+/// Representation of `F` that has handled subnormals.
+#[derive(Clone, Copy, Debug)]
+struct Norm<F: Float> {
+    /// Normalized significand with one guard bit, unsigned.
+    m: F::Int,
+    /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa
+    /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`.
+    e: i32,
+    neg: bool,
+}
+
+impl<F: Float> Norm<F> {
+    /// Unbias the exponent and account for the mantissa's precision, including the guard bit.
+    const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1;
+
+    /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we
+    /// adjusted the exponent such that it exceeds this threashold.
+    const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
+
+    fn from_float(x: F) -> Self {
+        let mut ix = x.to_bits();
+        let mut e = x.ex() as i32;
+        let neg = x.is_sign_negative();
+        if e == 0 {
+            // Normalize subnormals by multiplication
+            let scale_i = F::BITS - 1;
+            let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
+            let scaled = x * scale_f;
+            ix = scaled.to_bits();
+            e = scaled.ex() as i32;
+            e = if e == 0 {
+                // If the exponent is still zero, the input was zero. Artifically set this value
+                // such that the final `e` will exceed `ZERO_INF_NAN`.
+                1 << F::EXP_BITS
+            } else {
+                // Otherwise, account for the scaling we just did.
+                e - scale_i as i32
+            };
+        }
+
+        e -= Self::EXP_UNBIAS as i32;
+
+        // Absolute  value, set the implicit bit, and shift to create a guard bit
+        ix &= F::SIG_MASK;
+        ix |= F::IMPLICIT_BIT;
+        ix <<= 1;
+
+        Self { m: ix, e, neg }
+    }
+
+    /// True if the value was zero, infinity, or NaN.
+    fn is_zero_nan_inf(self) -> bool {
+        self.e >= Self::ZERO_INF_NAN as i32
+    }
+
+    /// The only value we have
+    fn is_zero(self) -> bool {
+        // The only exponent that strictly exceeds this value is our sentinel value for zero.
+        self.e > Self::ZERO_INF_NAN as i32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test the generic `fma_round` algorithm for a given float.
+    fn spec_test<F>()
+    where
+        F: Float,
+        F: CastFrom<F::SignedInt>,
+        F: CastFrom<i8>,
+        F::Int: HInt,
+        u32: CastInto<F::Int>,
+    {
+        let x = F::from_bits(F::Int::ONE);
+        let y = F::from_bits(F::Int::ONE);
+        let z = F::ZERO;
+
+        let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
+
+        // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
+        // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
+        // exact result"
+        assert_biteq!(fma(x, y, z), F::ZERO);
+        assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
+        assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
+        assert_biteq!(fma(-x, -y, z), F::ZERO);
+    }
+
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>();
+
+        let expect_underflow = [
+            (
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.ffffffffffffp-1023"),
+                hf64!("0x0.ffffffffffff8p-1022"),
+            ),
+            (
+                // FIXME: we raise underflow but this should only be inexact (based on C and
+                // `rustc_apfloat`).
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("-0x1.0p-1022"),
+                hf64!("-0x1.0p-1022"),
+            ),
+        ];
+
+        for (x, y, z, res) in expect_underflow {
+            let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
+            assert_biteq!(val, res);
+            assert_eq!(status, Status::UNDERFLOW);
+        }
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>();
+    }
+
+    #[test]
+    fn fma_segfault() {
+        // These two inputs cause fma to segfault on release due to overflow:
+        assert_eq!(
+            fma(
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313
+            ),
+            -0.00000000000000022204460492503126,
+        );
+
+        let result = fma(-0.992, -0.992, -0.992);
+        //force rounding to storage format on x87 to prevent superious errors.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let result = force_eval!(result);
+        assert_eq!(result, -0.007936000000000007,);
+    }
+
+    #[test]
+    fn fma_sbb() {
+        assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
+    }
+
+    #[test]
+    fn fma_underflow() {
+        assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
+    }
+}
diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs
new file mode 100644
index 000000000..08b78b022
--- /dev/null
+++ b/libm/src/math/fma_wide.rs
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::support::{FpResult, IntTy, Round, Status};
+use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
+
+// Placeholder so we can have `fmaf16` in the `Float` trait.
+#[allow(unused)]
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
+    unimplemented!()
+}
+
+/// Floating multiply add (f32)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    select_implementation! {
+        name: fmaf,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
+    fma_wide_round(x, y, z, Round::Nearest).val
+}
+
+/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
+/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+#[inline]
+pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B = x.widen() * y.widen();
+    let mut result: B = xy + z.widen();
+    let mut ui: B::Int = result.to_bits();
+    let re = result.ex();
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let halfway = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine if...
+    // This is not a halfway case
+    if excess_prec != halfway
+        // Or the result is NaN
+        || re == B::EXP_SAT
+        // Or the result is exact
+        || (result - xy == zb && result - zb == xy)
+        // Or the mode is something other than round to nearest
+        || round != Round::Nearest
+    {
+        let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
+        let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
+
+        let mut status = Status::OK;
+
+        if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
+            // This branch is never hit; requires previous operations to set a status
+            status.set_inexact(false);
+
+            result = xy + z.widen();
+            if status.inexact() {
+                status.set_underflow(true);
+            } else {
+                status.set_inexact(true);
+            }
+        }
+
+        return FpResult { val: result.narrow(), status };
+    }
+
+    let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    FpResult::ok(B::from_bits(ui).narrow())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn issue_263() {
+        let a = f32::from_bits(1266679807);
+        let b = f32::from_bits(1300234242);
+        let c = f32::from_bits(1115553792);
+        let expected = f32::from_bits(1501560833);
+        assert_eq!(fmaf(a, b, c), expected);
+    }
+}
diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs
new file mode 100644
index 000000000..2947b783e
--- /dev/null
+++ b/libm/src/math/fmin_fmax.rs
@@ -0,0 +1,167 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf16(x: f16, y: f16) -> f16 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf(x: f32, y: f32) -> f32 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmin(x: f64, y: f64) -> f64 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminf128(x: f128, y: f128) -> f128 {
+    super::generic::fmin(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf16(x: f16, y: f16) -> f16 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf(x: f32, y: f32) -> f32 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmax(x: f64, y: f64) -> f64 {
+    super::generic::fmax(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
+/// the inputs are -0.0 and +0.0, either may be returned).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaxf128(x: f128, y: f128) -> f128 {
+    super::generic::fmax(x, y)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fmin_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmin_spec_tests_f16() {
+        fmin_spec_test::<f16>(fminf16);
+    }
+
+    #[test]
+    fn fmin_spec_tests_f32() {
+        fmin_spec_test::<f32>(fminf);
+    }
+
+    #[test]
+    fn fmin_spec_tests_f64() {
+        fmin_spec_test::<f64>(fmin);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmin_spec_tests_f128() {
+        fmin_spec_test::<f128>(fminf128);
+    }
+
+    fn fmax_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmax_spec_tests_f16() {
+        fmax_spec_test::<f16>(fmaxf16);
+    }
+
+    #[test]
+    fn fmax_spec_tests_f32() {
+        fmax_spec_test::<f32>(fmaxf);
+    }
+
+    #[test]
+    fn fmax_spec_tests_f64() {
+        fmax_spec_test::<f64>(fmax);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmax_spec_tests_f128() {
+        fmax_spec_test::<f128>(fmaxf128);
+    }
+}
diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs
new file mode 100644
index 000000000..b7999e273
--- /dev/null
+++ b/libm/src/math/fminimum_fmaximum.rs
@@ -0,0 +1,163 @@
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf16(x: f16, y: f16) -> f16 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum(x: f64, y: f64) -> f64 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf(x: f32, y: f32) -> f32 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimumf128(x: f128, y: f128) -> f128 {
+    super::generic::fminimum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf16(x: f16, y: f16) -> f16 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf(x: f32, y: f32) -> f32 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum(x: f64, y: f64) -> f64 {
+    super::generic::fmaximum(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, the other argument.
+///
+/// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximumf128(x: f128, y: f128) -> f128 {
+    super::generic::fmaximum(x, y)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fminimum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fminimum_spec_tests_f16() {
+        fminimum_spec_test::<f16>(fminimumf16);
+    }
+
+    #[test]
+    fn fminimum_spec_tests_f32() {
+        fminimum_spec_test::<f32>(fminimumf);
+    }
+
+    #[test]
+    fn fminimum_spec_tests_f64() {
+        fminimum_spec_test::<f64>(fminimum);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fminimum_spec_tests_f128() {
+        fminimum_spec_test::<f128>(fminimumf128);
+    }
+
+    fn fmaximum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::NAN),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmaximum_spec_tests_f16() {
+        fmaximum_spec_test::<f16>(fmaximumf16);
+    }
+
+    #[test]
+    fn fmaximum_spec_tests_f32() {
+        fmaximum_spec_test::<f32>(fmaximumf);
+    }
+
+    #[test]
+    fn fmaximum_spec_tests_f64() {
+        fmaximum_spec_test::<f64>(fmaximum);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmaximum_spec_tests_f128() {
+        fmaximum_spec_test::<f128>(fmaximumf128);
+    }
+}
diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs
new file mode 100644
index 000000000..180d21f72
--- /dev/null
+++ b/libm/src/math/fminimum_fmaximum_num.rs
@@ -0,0 +1,163 @@
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf(x: f32, y: f32) -> f32 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_num(x: f64, y: f64) -> f64 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the lesser of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
+    super::generic::fminimum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_num(x: f64, y: f64) -> f64 {
+    super::generic::fmaximum_num(x, y)
+}
+
+/// Return the greater of two arguments or, if either argument is NaN, NaN.
+///
+/// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
+    super::generic::fmaximum_num(x, y)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hexf};
+
+    fn fminimum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fminimum_num_spec_tests_f16() {
+        fminimum_num_spec_test::<f16>(fminimum_numf16);
+    }
+
+    #[test]
+    fn fminimum_num_spec_tests_f32() {
+        fminimum_num_spec_test::<f32>(fminimum_numf);
+    }
+
+    #[test]
+    fn fminimum_num_spec_tests_f64() {
+        fminimum_num_spec_test::<f64>(fminimum_num);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fminimum_num_spec_tests_f128() {
+        fminimum_num_spec_test::<f128>(fminimum_numf128);
+    }
+
+    fn fmaximum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
+        let cases = [
+            (F::ZERO, F::ZERO, F::ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::ONE, F::ONE),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NAN, F::ZERO, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NAN, F::NAN),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+        ];
+
+        for (x, y, res) in cases {
+            let val = f(x, y);
+            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn fmaximum_num_spec_tests_f16() {
+        fmaximum_num_spec_test::<f16>(fmaximum_numf16);
+    }
+
+    #[test]
+    fn fmaximum_num_spec_tests_f32() {
+        fmaximum_num_spec_test::<f32>(fmaximum_numf);
+    }
+
+    #[test]
+    fn fmaximum_num_spec_tests_f64() {
+        fmaximum_num_spec_test::<f64>(fmaximum_num);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn fmaximum_num_spec_tests_f128() {
+        fmaximum_num_spec_test::<f128>(fmaximum_numf128);
+    }
+}
diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs
new file mode 100644
index 000000000..c4752b925
--- /dev/null
+++ b/libm/src/math/fmod.rs
@@ -0,0 +1,25 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
+
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf(x: f32, y: f32) -> f32 {
+    super::generic::fmod(x, y)
+}
+
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmod(x: f64, y: f64) -> f64 {
+    super::generic::fmod(x, y)
+}
+
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf128(x: f128, y: f128) -> f128 {
+    super::generic::fmod(x, y)
+}
diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs
new file mode 100644
index 000000000..4e95696e2
--- /dev/null
+++ b/libm/src/math/fmodf.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf(x: f32, y: f32) -> f32 {
+    super::generic::fmod(x, y)
+}
diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs
new file mode 100644
index 000000000..ff0e0493e
--- /dev/null
+++ b/libm/src/math/fmodf128.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf128(x: f128, y: f128) -> f128 {
+    super::generic::fmod(x, y)
+}
diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs
new file mode 100644
index 000000000..11972a7de
--- /dev/null
+++ b/libm/src/math/fmodf16.rs
@@ -0,0 +1,5 @@
+/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmodf16(x: f16, y: f16) -> f16 {
+    super::generic::fmod(x, y)
+}
diff --git a/src/math/frexp.rs b/libm/src/math/frexp.rs
similarity index 89%
rename from src/math/frexp.rs
rename to libm/src/math/frexp.rs
index badad786a..de7a64fda 100644
--- a/src/math/frexp.rs
+++ b/libm/src/math/frexp.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn frexp(x: f64) -> (f64, i32) {
     let mut y = x.to_bits();
     let ee = ((y >> 52) & 0x7ff) as i32;
diff --git a/src/math/frexpf.rs b/libm/src/math/frexpf.rs
similarity index 88%
rename from src/math/frexpf.rs
rename to libm/src/math/frexpf.rs
index 2919c0ab0..0ec91c2d3 100644
--- a/src/math/frexpf.rs
+++ b/libm/src/math/frexpf.rs
@@ -1,3 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn frexpf(x: f32) -> (f32, i32) {
     let mut y = x.to_bits();
     let ee: i32 = ((y >> 23) & 0xff) as i32;
diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs
new file mode 100644
index 000000000..5c5bb4763
--- /dev/null
+++ b/libm/src/math/generic/ceil.rs
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/ceilf.c */
+
+//! Generic `ceil` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::support::{FpResult, Status};
+use super::super::{Float, Int, IntTy, MinInt};
+
+#[inline]
+pub fn ceil<F: Float>(x: F) -> F {
+    ceil_status(x).val
+}
+
+#[inline]
+pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let status;
+    let res = if e >= 0 {
+        // |x| >= 1.0
+        let m = F::SIG_MASK >> e.unsigned();
+        if (ix & m) == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return FpResult::ok(x);
+        }
+
+        // Otherwise, raise an inexact exception.
+        status = Status::INEXACT;
+
+        if x.is_sign_positive() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
+
+        if x.is_sign_negative() {
+            // -1.0 < x <= -0.0; rounding up goes toward -0.0.
+            F::NEG_ZERO
+        } else if ix << 1 != zero {
+            // 0.0 < x < 1.0; rounding up goes toward +1.0.
+            F::ONE
+        } else {
+            // +0.0 remains unchanged
+            x
+        }
+    };
+
+    FpResult::new(res, status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(ceil(1.1f32), 2.0);
+        assert_eq!(ceil(2.9f32), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(ceil(1.1f64), 2.0);
+        assert_eq!(ceil(2.9f64), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f128>(&cases);
+    }
+}
diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs
new file mode 100644
index 000000000..a61af22f0
--- /dev/null
+++ b/libm/src/math/generic/copysign.rs
@@ -0,0 +1,11 @@
+use super::super::Float;
+
+/// Copy the sign of `y` to `x`.
+#[inline]
+pub fn copysign<F: Float>(x: F, y: F) -> F {
+    let mut ux = x.to_bits();
+    let uy = y.to_bits();
+    ux &= !F::SIGN_MASK;
+    ux |= uy & F::SIGN_MASK;
+    F::from_bits(ux)
+}
diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs
new file mode 100644
index 000000000..0fa0edf9b
--- /dev/null
+++ b/libm/src/math/generic/fabs.rs
@@ -0,0 +1,8 @@
+use super::super::Float;
+
+/// Absolute value.
+#[inline]
+pub fn fabs<F: Float>(x: F) -> F {
+    let abs_mask = !F::SIGN_MASK;
+    F::from_bits(x.to_bits() & abs_mask)
+}
diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs
new file mode 100644
index 000000000..a63007b19
--- /dev/null
+++ b/libm/src/math/generic/fdim.rs
@@ -0,0 +1,6 @@
+use super::super::Float;
+
+#[inline]
+pub fn fdim<F: Float>(x: F, y: F) -> F {
+    if x <= y { F::ZERO } else { x - y }
+}
diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs
new file mode 100644
index 000000000..243804625
--- /dev/null
+++ b/libm/src/math/generic/floor.rs
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/floor.c */
+
+//! Generic `floor` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use super::super::support::{FpResult, Status};
+use super::super::{Float, Int, IntTy, MinInt};
+
+#[inline]
+pub fn floor<F: Float>(x: F) -> F {
+    floor_status(x).val
+}
+
+#[inline]
+pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let status;
+    let res = if e >= 0 {
+        // |x| >= 1.0
+        let m = F::SIG_MASK >> e.unsigned();
+        if ix & m == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return FpResult::ok(x);
+        }
+
+        // Otherwise, raise an inexact exception.
+        status = Status::INEXACT;
+
+        if x.is_sign_negative() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen.
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
+
+        if x.is_sign_positive() {
+            // 0.0 <= x < 1.0; rounding down goes toward +0.0.
+            F::ZERO
+        } else if ix << 1 != zero {
+            // -1.0 < x < 0.0; rounding down goes toward -1.0.
+            F::NEG_ONE
+        } else {
+            // -0.0 remains unchanged
+            x
+        }
+    };
+
+    FpResult::new(res, status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(floor(0.5f32), 0.0);
+        assert_eq!(floor(1.1f32), 1.0);
+        assert_eq!(floor(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(floor(1.1f64), 1.0);
+        assert_eq!(floor(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}
diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs
new file mode 100644
index 000000000..bf3f847e8
--- /dev/null
+++ b/libm/src/math/generic/fmax.rs
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
+use super::super::Float;
+
+#[inline]
+pub fn fmax<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() || x < y { y } else { x };
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
new file mode 100644
index 000000000..387055af2
--- /dev/null
+++ b/libm/src/math/generic/fmaximum.rs
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - qNaN if either operation is NaN
+//! - Logic following +0.0 > -0.0
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+#[inline]
+pub fn fmaximum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
new file mode 100644
index 000000000..f7efdde80
--- /dev/null
+++ b/libm/src/math/generic/fmaximum_num.rs
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - Non-NaN if one operand is NaN
+//! - Logic following +0.0 > -0.0
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - qNaN if either operand is a NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+#[inline]
+pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
+    let res =
+        if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+            y
+        } else {
+            x
+        };
+
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs
new file mode 100644
index 000000000..cd3caeee4
--- /dev/null
+++ b/libm/src/math/generic/fmin.rs
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
+use super::super::Float;
+
+#[inline]
+pub fn fmin<F: Float>(x: F, y: F) -> F {
+    let res = if y.is_nan() || x < y { x } else { y };
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
new file mode 100644
index 000000000..4ddb36455
--- /dev/null
+++ b/libm/src/math/generic/fminimum.rs
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - qNaN if either operation is NaN
+//! - Logic following +0.0 > -0.0
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+#[inline]
+pub fn fminimum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
new file mode 100644
index 000000000..441c204a9
--- /dev/null
+++ b/libm/src/math/generic/fminimum_num.rs
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - Non-NaN if one operand is NaN
+//! - Logic following +0.0 > -0.0
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - qNaN if either operand is a NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use super::super::Float;
+
+#[inline]
+pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
+    let res =
+        if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+            x
+        } else {
+            y
+        };
+
+    // Canonicalize
+    res * F::ONE
+}
diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs
new file mode 100644
index 000000000..6414bbd25
--- /dev/null
+++ b/libm/src/math/generic/fmod.rs
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fmod.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use super::super::{CastFrom, Float, Int, MinInt};
+
+#[inline]
+pub fn fmod<F: Float>(x: F, y: F) -> F {
+    let zero = F::Int::ZERO;
+    let one = F::Int::ONE;
+    let mut ix = x.to_bits();
+    let mut iy = y.to_bits();
+    let mut ex = x.ex().signed();
+    let mut ey = y.ex().signed();
+    let sx = ix & F::SIGN_MASK;
+
+    if iy << 1 == zero || y.is_nan() || ex == F::EXP_SAT as i32 {
+        return (x * y) / (x * y);
+    }
+
+    if ix << 1 <= iy << 1 {
+        if ix << 1 == iy << 1 {
+            return F::ZERO * x;
+        }
+        return x;
+    }
+
+    /* normalize x and y */
+    if ex == 0 {
+        let i = ix << (F::EXP_BITS + 1);
+        ex -= i.leading_zeros() as i32;
+        ix <<= -ex + 1;
+    } else {
+        ix &= F::Int::MAX >> F::EXP_BITS;
+        ix |= one << F::SIG_BITS;
+    }
+
+    if ey == 0 {
+        let i = iy << (F::EXP_BITS + 1);
+        ey -= i.leading_zeros() as i32;
+        iy <<= -ey + 1;
+    } else {
+        iy &= F::Int::MAX >> F::EXP_BITS;
+        iy |= one << F::SIG_BITS;
+    }
+
+    /* x mod y */
+    while ex > ey {
+        let i = ix.wrapping_sub(iy);
+        if i >> (F::BITS - 1) == zero {
+            if i == zero {
+                return F::ZERO * x;
+            }
+            ix = i;
+        }
+
+        ix <<= 1;
+        ex -= 1;
+    }
+
+    let i = ix.wrapping_sub(iy);
+    if i >> (F::BITS - 1) == zero {
+        if i == zero {
+            return F::ZERO * x;
+        }
+
+        ix = i;
+    }
+
+    let shift = ix.leading_zeros().saturating_sub(F::EXP_BITS);
+    ix <<= shift;
+    ex -= shift as i32;
+
+    /* scale result */
+    if ex > 0 {
+        ix -= one << F::SIG_BITS;
+        ix |= F::Int::cast_from(ex) << F::SIG_BITS;
+    } else {
+        ix >>= -ex + 1;
+    }
+
+    ix |= sx;
+
+    F::from_bits(ix)
+}
diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs
new file mode 100644
index 000000000..35846351a
--- /dev/null
+++ b/libm/src/math/generic/mod.rs
@@ -0,0 +1,38 @@
+// Note: generic functions are marked `#[inline]` because, even though generic functions are
+// typically inlined, this does not seem to always be the case.
+
+mod ceil;
+mod copysign;
+mod fabs;
+mod fdim;
+mod floor;
+mod fmax;
+mod fmaximum;
+mod fmaximum_num;
+mod fmin;
+mod fminimum;
+mod fminimum_num;
+mod fmod;
+mod rint;
+mod round;
+mod scalbn;
+mod sqrt;
+mod trunc;
+
+pub use ceil::ceil;
+pub use copysign::copysign;
+pub use fabs::fabs;
+pub use fdim::fdim;
+pub use floor::floor;
+pub use fmax::fmax;
+pub use fmaximum::fmaximum;
+pub use fmaximum_num::fmaximum_num;
+pub use fmin::fmin;
+pub use fminimum::fminimum;
+pub use fminimum_num::fminimum_num;
+pub use fmod::fmod;
+pub use rint::rint_round;
+pub use round::round;
+pub use scalbn::scalbn;
+pub use sqrt::sqrt;
+pub use trunc::trunc;
diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs
new file mode 100644
index 000000000..9cdeb1185
--- /dev/null
+++ b/libm/src/math/generic/rint.rs
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/rint.c */
+
+use super::super::Float;
+use super::super::support::{FpResult, Round};
+
+/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
+/// applicable.
+#[inline]
+pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
+    let toint = F::ONE / F::EPSILON;
+    let e = x.ex();
+    let positive = x.is_sign_positive();
+
+    // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
+    // the excess precission from x87 would cause an incorrect final result.
+    let force = |x| {
+        if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) { force_eval!(x) } else { x }
+    };
+
+    let res = if e >= F::EXP_BIAS + F::SIG_BITS {
+        // No fractional part; exact result can be returned.
+        x
+    } else {
+        // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For
+        // Rust this is always nearest, but ideally it would take `round` into account.
+        let y = if positive {
+            force(force(x) + toint) - toint
+        } else {
+            force(force(x) - toint) + toint
+        };
+
+        if y == F::ZERO {
+            // A zero result takes the sign of the input.
+            if positive { F::ZERO } else { F::NEG_ZERO }
+        } else {
+            y
+        }
+    };
+
+    FpResult::ok(res)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Status};
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, x, "rint_round({})", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, res, "rint_round({})", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}
diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs
new file mode 100644
index 000000000..01314ac70
--- /dev/null
+++ b/libm/src/math/generic/round.rs
@@ -0,0 +1,83 @@
+use super::super::{Float, MinInt};
+use super::{copysign, trunc};
+
+#[inline]
+pub fn round<F: Float>(x: F) -> F {
+    let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
+    let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
+
+    trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn zeroes_f16() {
+        assert_biteq!(round(0.0_f16), 0.0_f16);
+        assert_biteq!(round(-0.0_f16), -0.0_f16);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_eq!(round(-1.0_f16), -1.0);
+        assert_eq!(round(2.8_f16), 3.0);
+        assert_eq!(round(-0.5_f16), -1.0);
+        assert_eq!(round(0.5_f16), 1.0);
+        assert_eq!(round(-1.5_f16), -2.0);
+        assert_eq!(round(1.5_f16), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f32() {
+        assert_biteq!(round(0.0_f32), 0.0_f32);
+        assert_biteq!(round(-0.0_f32), -0.0_f32);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(round(-1.0_f32), -1.0);
+        assert_eq!(round(2.8_f32), 3.0);
+        assert_eq!(round(-0.5_f32), -1.0);
+        assert_eq!(round(0.5_f32), 1.0);
+        assert_eq!(round(-1.5_f32), -2.0);
+        assert_eq!(round(1.5_f32), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f64() {
+        assert_biteq!(round(0.0_f64), 0.0_f64);
+        assert_biteq!(round(-0.0_f64), -0.0_f64);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(round(-1.0_f64), -1.0);
+        assert_eq!(round(2.8_f64), 3.0);
+        assert_eq!(round(-0.5_f64), -1.0);
+        assert_eq!(round(0.5_f64), 1.0);
+        assert_eq!(round(-1.5_f64), -2.0);
+        assert_eq!(round(1.5_f64), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn zeroes_f128() {
+        assert_biteq!(round(0.0_f128), 0.0_f128);
+        assert_biteq!(round(-0.0_f128), -0.0_f128);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_eq!(round(-1.0_f128), -1.0);
+        assert_eq!(round(2.8_f128), 3.0);
+        assert_eq!(round(-0.5_f128), -1.0);
+        assert_eq!(round(0.5_f128), 1.0);
+        assert_eq!(round(-1.5_f128), -2.0);
+        assert_eq!(round(1.5_f128), 2.0);
+    }
+}
diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs
new file mode 100644
index 000000000..a45db1b4a
--- /dev/null
+++ b/libm/src/math/generic/scalbn.rs
@@ -0,0 +1,121 @@
+use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
+
+/// Scale the exponent.
+///
+/// From N3220:
+///
+/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type
+/// > of the function is a standard floating type, or `b = 10` if the return type of the function
+/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
+/// >
+/// > [...]
+/// >
+/// > * `scalbn(±0, n)` returns `±0`.
+/// > * `scalbn(x, 0)` returns `x`.
+/// > * `scalbn(±∞, n)` returns `±∞`.
+/// >
+/// > If the calculation does not overflow or underflow, the returned value is exact and
+/// > independent of the current rounding direction mode.
+#[inline]
+pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastFrom<i32>,
+    F::Int: CastFrom<u32>,
+{
+    let zero = IntTy::<F>::ZERO;
+
+    // Bits including the implicit bit
+    let sig_total_bits = F::SIG_BITS + 1;
+
+    // Maximum and minimum values when biased
+    let exp_max = F::EXP_MAX;
+    let exp_min = F::EXP_MIN;
+
+    // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
+    let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
+
+    // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
+    let f_exp_min = F::from_parts(false, 1, zero);
+
+    // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
+    let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
+
+    /*
+     * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
+     * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
+     * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
+     * the final scale operation by prescaling by the max/min power representable by `F`.
+     */
+
+    if n > exp_max {
+        // Worse case positive `n`: `x`  is the minimum subnormal value, the result is `F::MAX`.
+        // This can be reached by three scaling multiplications (two here and one final).
+        debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);
+
+        x *= f_exp_max;
+        n -= exp_max;
+        if n > exp_max {
+            x *= f_exp_max;
+            n -= exp_max;
+            if n > exp_max {
+                n = exp_max;
+            }
+        }
+    } else if n < exp_min {
+        // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
+        // go subnormal. This avoids double rounding.
+        if F::BITS > 16 {
+            // `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
+            let mul = f_exp_min * f_pow_subnorm;
+            let add = -exp_min - sig_total_bits as i32;
+
+            // Worse case negative `n`: `x`  is the maximum positive value, the result is `F::MIN`.
+            // This must be reachable by three scaling multiplications (two here and one final).
+            debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
+
+            x *= mul;
+            n += add;
+
+            if n < exp_min {
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
+            }
+        } else {
+            // `f16` is unique compared to other float types in that the difference between the
+            // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
+            // small, only three. The above method depend on decrementing `n` by `add` two times;
+            // for other float types this works out because `add` is a substantial fraction of
+            // the exponent range. For `f16`, however, 3 is relatively small compared to the
+            // exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
+            //
+            // Work aroudn this by using a different algorithm that calculates the prescale
+            // dynamically based on the maximum possible value. This adds more operations per round
+            // since it needs to construct the scale, but works better in the general case.
+            let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
+            let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+            x *= mul;
+            n += add;
+
+            if n < exp_min {
+                let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
+                let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
+            }
+        }
+    }
+
+    let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
+    x * scale
+}
diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs
new file mode 100644
index 000000000..ec9ff22df
--- /dev/null
+++ b/libm/src/math/generic/sqrt.rs
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */
+
+//! Generic square root algorithm.
+//!
+//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa
+//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt
+//! iterations at various widths are used to approach the real values.
+//!
+//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number
+//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4).
+//!
+//! With Newton-Raphson iterations, this would be:
+//!
+//! - `w = r * r           w ~ 1 / m`
+//! - `u = 3 - m * w       u ~ 3 - m * w = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//!
+//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r),
+//! since the value of performing one iteration is in reducing the error representable by `~`).
+//!
+//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate
+//! `s = m * r`:
+//!
+//! - `s = m * r           s ~ m / sqrt(m)`
+//! - `u = 3 - s * r       u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! The above is precise because it uses the original value `m`. There is also a faster version
+//! that performs fewer steps but does not use `m`:
+//!
+//! - `u = 3 - s * r       u ~ 3 - 1`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! Rounding errors accumulate faster with the second version, so it is only used for subsequent
+//! iterations within the same width integer. The first version is always used for the first
+//! iteration at a new width in order to avoid this accumulation.
+//!
+//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
+//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
+
+use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
+use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+
+#[inline]
+pub fn sqrt<F>(x: F) -> F
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    sqrt_round(x, Round::Nearest).val
+}
+
+#[inline]
+pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    let zero = IntTy::<F>::ZERO;
+    let one = IntTy::<F>::ONE;
+
+    let mut ix = x.to_bits();
+
+    // Top is the exponent and sign, which may or may not be shifted. If the float fits into a
+    // `u32`, we can get by without paying shifting costs.
+    let noshift = F::BITS <= u32::BITS;
+    let (mut top, special_case) = if noshift {
+        let exp_lsb = one << F::SIG_BITS;
+        let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb;
+        (Exp::NoShift(()), special_case)
+    } else {
+        let top = u32::cast_from(ix >> F::SIG_BITS);
+        let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1;
+        (Exp::Shifted(top), special_case)
+    };
+
+    // Handle NaN, zero, and out of domain (<= 0)
+    if special_case {
+        cold_path();
+
+        // +/-0
+        if ix << 1 == zero {
+            return FpResult::ok(x);
+        }
+
+        // Positive infinity
+        if ix == F::EXP_MASK {
+            return FpResult::ok(x);
+        }
+
+        // NaN or negative
+        if ix > F::EXP_MASK {
+            return FpResult::new(F::NAN, Status::INVALID);
+        }
+
+        // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
+        let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero);
+        ix = scaled.to_bits();
+        match top {
+            Exp::Shifted(ref mut v) => {
+                *v = scaled.ex();
+                *v = (*v).wrapping_sub(F::SIG_BITS);
+            }
+            Exp::NoShift(()) => {
+                ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast());
+            }
+        }
+    }
+
+    // Reduce arguments such that `x = 4^e * m`:
+    //
+    // - m_u2 ∈ [1, 4), a fixed point U2.BITS number
+    // - 2^e is the exponent part of the result
+    let (m_u2, exp) = match top {
+        Exp::Shifted(top) => {
+            // We now know `x` is positive, so `top` is just its (biased) exponent
+            let mut e = top;
+            // Construct a fixed point representation of the mantissa.
+            let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS;
+            let even = (e & 1) != 0;
+            if even {
+                m_u2 >>= 1;
+            }
+            e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1;
+            (m_u2, Exp::Shifted(e))
+        }
+        Exp::NoShift(()) => {
+            let even = ix & (one << F::SIG_BITS) != zero;
+
+            // Exponent part of the return value
+            let mut e_noshift = ix >> 1;
+            // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0)
+            e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1;
+            e_noshift &= F::EXP_MASK;
+
+            let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK;
+            let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK;
+            let m_u2 = if even { m0 } else { m1 };
+
+            (m_u2, Exp::NoShift(e_noshift))
+        }
+    };
+
+    // Extract the top 6 bits of the significand with the lowest bit of the exponent.
+    let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111;
+
+    // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an
+    // initial value for `s = sqrt(m)`. See the module documentation for details.
+    let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16);
+    let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast();
+
+    // Perform iterations, if any, at quarter width (used for `f128`).
+    let (r1_u0, _s1_u2) = goldschmidt::<F, F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
+
+    // Widen values and perform iterations at half width (used for `f64` and `f128`).
+    let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS);
+    let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast();
+    let (r2_u0, _s2_u2) = goldschmidt::<F, F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
+
+    // Perform final iterations at full width (used for all float types).
+    let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS);
+    let s_u2: F::Int = m_u2;
+    let (_r_u0, s_u2) = goldschmidt::<F, F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
+
+    // Shift back to mantissa position.
+    let mut m = s_u2 >> (F::EXP_BITS - 2);
+
+    // The musl source includes the following comment (with literals replaced):
+    //
+    // > s < sqrt(m) < s + 0x1.09p-SIG_BITS
+    // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or
+    // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m.
+    //
+    // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to
+    // `d0` and `d1`:
+    //
+    // - `2^(2p)m ≟ ((2^p)m + 0.5)^2`
+    // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25`
+    // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25`
+    // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?)
+    //
+    // I do not follow how the rounding bit is extracted from this comparison with the below
+    // operations. In any case, the algorithm is well tested.
+
+    // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`,
+    // `F::BITS - 2` accounts for the offset that `m_u2` already has.
+    let shift = 2 * F::SIG_BITS - (F::BITS - 2);
+
+    // `2^(2p)m - m^2`
+    let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m));
+    // `m - 2^(2p)m + m^2`
+    let d1 = m.wrapping_sub(d0);
+    m += d1 >> (F::BITS - 1);
+    m &= F::SIG_MASK;
+
+    match exp {
+        Exp::Shifted(e) => m |= IntTy::<F>::cast_from(e) << F::SIG_BITS,
+        Exp::NoShift(e) => m |= e,
+    };
+
+    let mut y = F::from_bits(m);
+
+    // FIXME(f16): the fenv math does not work for `f16`
+    if F::BITS > 16 {
+        // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add
+        // a tiny value to cause fenv effects.
+        let d2 = d1.wrapping_add(m).wrapping_add(one);
+        let mut tiny = if d2 == zero {
+            cold_path();
+            zero
+        } else {
+            F::IMPLICIT_BIT
+        };
+
+        tiny |= (d1 ^ d2) & F::SIGN_MASK;
+        let t = F::from_bits(tiny);
+        y = y + t;
+    }
+
+    FpResult::ok(y)
+}
+
+/// Multiply at the wider integer size, returning the high half.
+fn wmulh<I: HInt>(a: I, b: I) -> I {
+    a.widen_mul(b).hi()
+}
+
+/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`.
+///
+/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0.
+/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2.
+/// - `count` is the number of iterations to perform.
+/// - `final_set` should be true if this is the last round (same-sized integer). If so, the
+///   returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2.
+///
+/// Note that performance relies on the optimizer being able to unroll these loops (reasonably
+/// trivial, `count` is a constant when called).
+#[inline]
+fn goldschmidt<F, I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
+where
+    F: SqrtHelper,
+    I: HInt + From<u8>,
+{
+    let three_u2 = I::from(0b11u8) << (I::BITS - 2);
+    let mut u_u0 = r_u0;
+
+    for i in 0..count {
+        // First iteration: `s = m*r` (`u_u0 = r_u0` set above)
+        // Subsequent iterations: `s=s*u/2`
+        s_u2 = wmulh(s_u2, u_u0);
+
+        // Perform `s /= 2` if:
+        //
+        // 1. This is not the first iteration (the first iteration is `s = m*r`)...
+        // 2. ... and this is not the last set of iterations
+        // 3. ... or, if this is the last set, it is not the last iteration
+        //
+        // This step is not performed for the final iteration because the shift is combined with
+        // a later shift (moving `s` into the mantissa).
+        if i > 0 && (!final_set || i + 1 < count) {
+            s_u2 <<= 1;
+        }
+
+        // u = 3 - s*r
+        let d_u2 = wmulh(s_u2, r_u0);
+        u_u0 = three_u2.wrapping_sub(d_u2);
+
+        // r = r*u/2
+        r_u0 = wmulh(r_u0, u_u0) << 1;
+    }
+
+    (r_u0, s_u2)
+}
+
+/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save
+/// the shift operations.
+enum Exp<T> {
+    /// The exponent has been shifted to a `u32` and is LSB-aligned.
+    Shifted(u32),
+    /// The exponent is in its natural position in integer repr.
+    NoShift(T),
+}
+
+/// Size-specific constants related to the square root routine.
+pub trait SqrtHelper: Float {
+    /// Integer for the first set of rounds. If unused, set to the same type as the next set.
+    type ISet1: HInt + Into<Self::ISet2> + CastFrom<Self::Int> + From<u8>;
+    /// Integer for the second set of rounds. If unused, set to the same type as the next set.
+    type ISet2: HInt + From<Self::ISet1> + From<u8>;
+
+    /// Number of rounds at `ISet1`.
+    const SET1_ROUNDS: u32 = 0;
+    /// Number of rounds at `ISet2`.
+    const SET2_ROUNDS: u32 = 0;
+    /// Number of rounds at `Self::Int`.
+    const FINAL_ROUNDS: u32;
+}
+
+#[cfg(f16_enabled)]
+impl SqrtHelper for f16 {
+    type ISet1 = u16; // unused
+    type ISet2 = u16; // unused
+
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+impl SqrtHelper for f32 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32; // unused
+
+    const FINAL_ROUNDS: u32 = 3;
+}
+
+impl SqrtHelper for f64 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32;
+
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+#[cfg(f128_enabled)]
+impl SqrtHelper for f128 {
+    type ISet1 = u32;
+    type ISet2 = u64;
+
+    const SET1_ROUNDS: u32 = 1;
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+/// A U0.16 representation of `1/sqrt(x)`.
+///
+/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
+#[rustfmt::skip]
+static RSQRT_TAB: [u16; 128] = [
+    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
+    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
+    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
+    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
+    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
+    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
+    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
+    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
+    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
+    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
+    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
+    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
+    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
+    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
+    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
+    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test behavior specified in IEEE 754 `squareRoot`.
+    fn spec_test<F>()
+    where
+        F: Float + SqrtHelper,
+        F::Int: HInt,
+        F::Int: From<u8>,
+        F::Int: From<F::ISet2>,
+        F::Int: CastInto<F::ISet1>,
+        F::Int: CastInto<F::ISet2>,
+        u32: CastInto<F::Int>,
+    {
+        // Values that should return a NaN and raise invalid
+        let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
+
+        // Values that return unaltered
+        let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
+
+        for x in nan {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert!(val.is_nan());
+            assert!(status == Status::INVALID);
+        }
+
+        for x in roundtrip {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert_biteq!(val, x);
+            assert!(status == Status::OK);
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_biteq!(sqrt(100.0f16), 10.0);
+        assert_biteq!(sqrt(4.0f16), 2.0);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f16() {
+        let cases = [
+            (f16::PI, 0x3f17_u16),
+            // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as
+            // part of the AST, so the `cfg` is irrelevant here).
+            (f16::from_bits(0x70e2), 0x5640_u16),
+            (f16::from_bits(0x0000000f), 0x13bf_u16),
+            (f16::INFINITY, f16::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f16::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_biteq!(sqrt(100.0f32), 10.0);
+        assert_biteq!(sqrt(4.0f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f32() {
+        let cases = [
+            (f32::PI, 0x3fe2dfc5_u32),
+            (10000.0f32, 0x42c80000_u32),
+            (f32::from_bits(0x0000000f), 0x1b2f456f_u32),
+            (f32::INFINITY, f32::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f32::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_biteq!(sqrt(100.0f64), 10.0);
+        assert_biteq!(sqrt(4.0f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f64() {
+        let cases = [
+            (f64::PI, 0x3ffc5bf891b4ef6a_u64),
+            (10000.0, 0x4059000000000000_u64),
+            (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
+            (f64::INFINITY, f64::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f64::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_biteq!(sqrt(100.0f128), 10.0);
+        assert_biteq!(sqrt(4.0f128), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f128() {
+        let cases = [
+            (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128),
+            // 10_000.0, see `f16` for reasoning.
+            (
+                f128::from_bits(0x400c3880000000000000000000000000),
+                0x40059000000000000000000000000000_u128,
+            ),
+            (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128),
+            (f128::INFINITY, f128::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f128::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+}
diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs
new file mode 100644
index 000000000..25414ecf4
--- /dev/null
+++ b/libm/src/math/generic/trunc.rs
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/trunc.c */
+
+use super::super::support::{FpResult, Status};
+use super::super::{Float, Int, IntTy, MinInt};
+
+#[inline]
+pub fn trunc<F: Float>(x: F) -> F {
+    trunc_status(x).val
+}
+
+#[inline]
+pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
+    let mut xi: F::Int = x.to_bits();
+    let e: i32 = x.exp_unbiased();
+
+    // C1: The represented value has no fractional part, so no truncation is needed
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let mask = if e < 0 {
+        // C2: If the exponent is negative, the result will be zero so we mask out everything
+        // except the sign.
+        F::SIGN_MASK
+    } else {
+        // C3: Otherwise, we mask out the last `e` bits of the significand.
+        !(F::SIG_MASK >> e.unsigned())
+    };
+
+    // C4: If the to-be-masked-out portion is already zero, we have an exact result
+    if (xi & !mask) == IntTy::<F>::ZERO {
+        return FpResult::ok(x);
+    }
+
+    // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the
+    // result, and return.
+
+    let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT };
+    xi &= mask;
+    FpResult::new(F::from_bits(xi), status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
+
+        for x in roundtrip {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(trunc(0.5f32), 0.0);
+        assert_eq!(trunc(1.1f32), 1.0);
+        assert_eq!(trunc(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+
+        assert_biteq!(trunc(1.1f32), 1.0);
+        assert_biteq!(trunc(1.1f64), 1.0);
+
+        // C1
+        assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23"));
+        assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52"));
+        assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23"));
+        assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52"));
+
+        // C2
+        assert_biteq!(trunc(hf32!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf64!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0);
+        assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(trunc(1.1f64), 1.0);
+        assert_eq!(trunc(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}
diff --git a/src/math/hypot.rs b/libm/src/math/hypot.rs
similarity index 100%
rename from src/math/hypot.rs
rename to libm/src/math/hypot.rs
diff --git a/src/math/hypotf.rs b/libm/src/math/hypotf.rs
similarity index 100%
rename from src/math/hypotf.rs
rename to libm/src/math/hypotf.rs
diff --git a/src/math/ilogb.rs b/libm/src/math/ilogb.rs
similarity index 90%
rename from src/math/ilogb.rs
rename to libm/src/math/ilogb.rs
index 9d58d0608..ccc4914be 100644
--- a/src/math/ilogb.rs
+++ b/libm/src/math/ilogb.rs
@@ -21,7 +21,7 @@ pub fn ilogb(x: f64) -> i32 {
         e
     } else if e == 0x7ff {
         force_eval!(0.0 / 0.0);
-        if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() }
+        if (i << 12) != 0 { FP_ILOGBNAN } else { i32::MAX }
     } else {
         e - 0x3ff
     }
diff --git a/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs
similarity index 90%
rename from src/math/ilogbf.rs
rename to libm/src/math/ilogbf.rs
index 85deb43c8..3585d6d36 100644
--- a/src/math/ilogbf.rs
+++ b/libm/src/math/ilogbf.rs
@@ -21,7 +21,7 @@ pub fn ilogbf(x: f32) -> i32 {
         e
     } else if e == 0xff {
         force_eval!(0.0 / 0.0);
-        if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() }
+        if (i << 9) != 0 { FP_ILOGBNAN } else { i32::MAX }
     } else {
         e - 0x7f
     }
diff --git a/src/math/j0.rs b/libm/src/math/j0.rs
similarity index 99%
rename from src/math/j0.rs
rename to libm/src/math/j0.rs
index 5e5e839f8..99d656f0d 100644
--- a/src/math/j0.rs
+++ b/libm/src/math/j0.rs
@@ -110,6 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */
 const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j0(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
@@ -164,6 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */
 const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y0(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/src/math/j0f.rs b/libm/src/math/j0f.rs
similarity index 98%
rename from src/math/j0f.rs
rename to libm/src/math/j0f.rs
index afb6ee9ba..25e5b325c 100644
--- a/src/math/j0f.rs
+++ b/libm/src/math/j0f.rs
@@ -63,6 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */
 const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j0f(mut x: f32) -> f32 {
     let z: f32;
     let r: f32;
@@ -109,6 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */
 const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y0f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/src/math/j1.rs b/libm/src/math/j1.rs
similarity index 98%
rename from src/math/j1.rs
rename to libm/src/math/j1.rs
index cef17a63e..9b604d9e4 100644
--- a/src/math/j1.rs
+++ b/libm/src/math/j1.rs
@@ -114,6 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */
 const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j1(x: f64) -> f64 {
     let mut z: f64;
     let r: f64;
@@ -160,6 +161,7 @@ const V0: [f64; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y1(x: f64) -> f64 {
     let z: f64;
     let u: f64;
@@ -171,10 +173,10 @@ pub fn y1(x: f64) -> f64 {
     lx = get_low_word(x);
 
     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
-    if (ix << 1 | lx) == 0 {
+    if (ix << 1) | lx == 0 {
         return -1.0 / 0.0;
     }
-    if (ix >> 31) != 0 {
+    if ix >> 31 != 0 {
         return 0.0 / 0.0;
     }
     if ix >= 0x7ff00000 {
diff --git a/src/math/j1f.rs b/libm/src/math/j1f.rs
similarity index 98%
rename from src/math/j1f.rs
rename to libm/src/math/j1f.rs
index 02a3efd24..a47472401 100644
--- a/src/math/j1f.rs
+++ b/libm/src/math/j1f.rs
@@ -64,6 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */
 const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn j1f(x: f32) -> f32 {
     let mut z: f32;
     let r: f32;
@@ -109,6 +110,7 @@ const V0: [f32; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn y1f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/src/math/jn.rs b/libm/src/math/jn.rs
similarity index 58%
rename from src/math/jn.rs
rename to libm/src/math/jn.rs
index aff051f24..31f8d9c53 100644
--- a/src/math/jn.rs
+++ b/libm/src/math/jn.rs
@@ -39,6 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0,
 const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn jn(n: i32, mut x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
@@ -55,7 +56,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
     ix &= 0x7fffffff;
 
     // -lx == !lx + 1
-    if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 {
+    if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 {
         /* nan */
         return x;
     }
@@ -104,7 +105,8 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
                 0 => -cos(x) + sin(x),
                 1 => -cos(x) - sin(x),
                 2 => cos(x) - sin(x),
-                3 | _ => cos(x) + sin(x),
+                // 3
+                _ => cos(x) + sin(x),
             };
             b = INVSQRTPI * temp / sqrt(x);
         } else {
@@ -118,130 +120,128 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
                 a = temp;
             }
         }
-    } else {
-        if ix < 0x3e100000 {
-            /* x < 2**-29 */
-            /* x is tiny, return the first Taylor expansion of J(n,x)
-             * J(n,x) = 1/n!*(x/2)^n  - ...
-             */
-            if nm1 > 32 {
-                /* underflow */
-                b = 0.0;
-            } else {
-                temp = x * 0.5;
-                b = temp;
-                a = 1.0;
-                i = 2;
-                while i <= nm1 + 1 {
-                    a *= i as f64; /* a = n! */
-                    b *= temp; /* b = (x/2)^n */
-                    i += 1;
-                }
-                b = b / a;
-            }
+    } else if ix < 0x3e100000 {
+        /* x < 2**-29 */
+        /* x is tiny, return the first Taylor expansion of J(n,x)
+         * J(n,x) = 1/n!*(x/2)^n  - ...
+         */
+        if nm1 > 32 {
+            /* underflow */
+            b = 0.0;
         } else {
-            /* use backward recurrence */
-            /*                      x      x^2      x^2
-             *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
-             *                      2n  - 2(n+1) - 2(n+2)
-             *
-             *                      1      1        1
-             *  (for large x)   =  ----  ------   ------   .....
-             *                      2n   2(n+1)   2(n+2)
-             *                      -- - ------ - ------ -
-             *                       x     x         x
-             *
-             * Let w = 2n/x and h=2/x, then the above quotient
-             * is equal to the continued fraction:
-             *                  1
-             *      = -----------------------
-             *                     1
-             *         w - -----------------
-             *                        1
-             *              w+h - ---------
-             *                     w+2h - ...
-             *
-             * To determine how many terms needed, let
-             * Q(0) = w, Q(1) = w(w+h) - 1,
-             * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
-             * When Q(k) > 1e4      good for single
-             * When Q(k) > 1e9      good for double
-             * When Q(k) > 1e17     good for quadruple
-             */
-            /* determine k */
-            let mut t: f64;
-            let mut q0: f64;
-            let mut q1: f64;
-            let mut w: f64;
-            let h: f64;
-            let mut z: f64;
-            let mut tmp: f64;
-            let nf: f64;
+            temp = x * 0.5;
+            b = temp;
+            a = 1.0;
+            i = 2;
+            while i <= nm1 + 1 {
+                a *= i as f64; /* a = n! */
+                b *= temp; /* b = (x/2)^n */
+                i += 1;
+            }
+            b = b / a;
+        }
+    } else {
+        /* use backward recurrence */
+        /*                      x      x^2      x^2
+         *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+         *                      2n  - 2(n+1) - 2(n+2)
+         *
+         *                      1      1        1
+         *  (for large x)   =  ----  ------   ------   .....
+         *                      2n   2(n+1)   2(n+2)
+         *                      -- - ------ - ------ -
+         *                       x     x         x
+         *
+         * Let w = 2n/x and h=2/x, then the above quotient
+         * is equal to the continued fraction:
+         *                  1
+         *      = -----------------------
+         *                     1
+         *         w - -----------------
+         *                        1
+         *              w+h - ---------
+         *                     w+2h - ...
+         *
+         * To determine how many terms needed, let
+         * Q(0) = w, Q(1) = w(w+h) - 1,
+         * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+         * When Q(k) > 1e4      good for single
+         * When Q(k) > 1e9      good for double
+         * When Q(k) > 1e17     good for quadruple
+         */
+        /* determine k */
+        let mut t: f64;
+        let mut q0: f64;
+        let mut q1: f64;
+        let mut w: f64;
+        let h: f64;
+        let mut z: f64;
+        let mut tmp: f64;
+        let nf: f64;
 
-            let mut k: i32;
+        let mut k: i32;
 
-            nf = (nm1 as f64) + 1.0;
-            w = 2.0 * nf / x;
-            h = 2.0 / x;
-            z = w + h;
-            q0 = w;
-            q1 = w * z - 1.0;
-            k = 1;
-            while q1 < 1.0e9 {
-                k += 1;
-                z += h;
-                tmp = z * q1 - q0;
-                q0 = q1;
-                q1 = tmp;
-            }
-            t = 0.0;
-            i = k;
-            while i >= 0 {
-                t = 1.0 / (2.0 * ((i as f64) + nf) / x - t);
+        nf = (nm1 as f64) + 1.0;
+        w = 2.0 * nf / x;
+        h = 2.0 / x;
+        z = w + h;
+        q0 = w;
+        q1 = w * z - 1.0;
+        k = 1;
+        while q1 < 1.0e9 {
+            k += 1;
+            z += h;
+            tmp = z * q1 - q0;
+            q0 = q1;
+            q1 = tmp;
+        }
+        t = 0.0;
+        i = k;
+        while i >= 0 {
+            t = 1.0 / (2.0 * ((i as f64) + nf) / x - t);
+            i -= 1;
+        }
+        a = t;
+        b = 1.0;
+        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+         *  Hence, if n*(log(2n/x)) > ...
+         *  single 8.8722839355e+01
+         *  double 7.09782712893383973096e+02
+         *  long double 1.1356523406294143949491931077970765006170e+04
+         *  then recurrent value may overflow and the result is
+         *  likely underflow to zero
+         */
+        tmp = nf * log(fabs(w));
+        if tmp < 7.09782712893383973096e+02 {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = b * (2.0 * (i as f64)) / x - a;
+                a = temp;
                 i -= 1;
             }
-            a = t;
-            b = 1.0;
-            /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
-             *  Hence, if n*(log(2n/x)) > ...
-             *  single 8.8722839355e+01
-             *  double 7.09782712893383973096e+02
-             *  long double 1.1356523406294143949491931077970765006170e+04
-             *  then recurrent value may overflow and the result is
-             *  likely underflow to zero
-             */
-            tmp = nf * log(fabs(w));
-            if tmp < 7.09782712893383973096e+02 {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = b * (2.0 * (i as f64)) / x - a;
-                    a = temp;
-                    i -= 1;
-                }
-            } else {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = b * (2.0 * (i as f64)) / x - a;
-                    a = temp;
-                    /* scale b to avoid spurious overflow */
-                    let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500
-                    if b > x1p500 {
-                        a /= b;
-                        t /= b;
-                        b = 1.0;
-                    }
-                    i -= 1;
+        } else {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = b * (2.0 * (i as f64)) / x - a;
+                a = temp;
+                /* scale b to avoid spurious overflow */
+                let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500
+                if b > x1p500 {
+                    a /= b;
+                    t /= b;
+                    b = 1.0;
                 }
+                i -= 1;
             }
-            z = j0(x);
-            w = j1(x);
-            if fabs(z) >= fabs(w) {
-                b = t * z / b;
-            } else {
-                b = t * w / a;
-            }
+        }
+        z = j0(x);
+        w = j1(x);
+        if fabs(z) >= fabs(w) {
+            b = t * z / b;
+        } else {
+            b = t * w / a;
         }
     }
 
@@ -249,6 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn yn(n: i32, x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
@@ -266,7 +267,7 @@ pub fn yn(n: i32, x: f64) -> f64 {
     ix &= 0x7fffffff;
 
     // -lx == !lx + 1
-    if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 {
+    if ix | ((lx | (!lx).wrapping_add(1)) >> 31) > 0x7ff00000 {
         /* nan */
         return x;
     }
@@ -315,7 +316,8 @@ pub fn yn(n: i32, x: f64) -> f64 {
             0 => -sin(x) - cos(x),
             1 => -sin(x) + cos(x),
             2 => sin(x) + cos(x),
-            3 | _ => sin(x) - cos(x),
+            // 3
+            _ => sin(x) - cos(x),
         };
         b = INVSQRTPI * temp / sqrt(x);
     } else {
diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs
new file mode 100644
index 000000000..52cf7d8a8
--- /dev/null
+++ b/libm/src/math/jnf.rs
@@ -0,0 +1,253 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+use super::{fabsf, j0f, j1f, logf, y0f, y1f};
+
+/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn jnf(n: i32, mut x: f32) -> f32 {
+    let mut ix: u32;
+    let mut nm1: i32;
+    let mut sign: bool;
+    let mut i: i32;
+    let mut a: f32;
+    let mut b: f32;
+    let mut temp: f32;
+
+    ix = x.to_bits();
+    sign = (ix >> 31) != 0;
+    ix &= 0x7fffffff;
+    if ix > 0x7f800000 {
+        /* nan */
+        return x;
+    }
+
+    /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */
+    if n == 0 {
+        return j0f(x);
+    }
+    if n < 0 {
+        nm1 = -(n + 1);
+        x = -x;
+        sign = !sign;
+    } else {
+        nm1 = n - 1;
+    }
+    if nm1 == 0 {
+        return j1f(x);
+    }
+
+    sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */
+    x = fabsf(x);
+    if ix == 0 || ix == 0x7f800000 {
+        /* if x is 0 or inf */
+        b = 0.0;
+    } else if (nm1 as f32) < x {
+        /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
+        a = j0f(x);
+        b = j1f(x);
+        i = 0;
+        while i < nm1 {
+            i += 1;
+            temp = b;
+            b = b * (2.0 * (i as f32) / x) - a;
+            a = temp;
+        }
+    } else if ix < 0x35800000 {
+        /* x < 2**-20 */
+        /* x is tiny, return the first Taylor expansion of J(n,x)
+         * J(n,x) = 1/n!*(x/2)^n  - ...
+         */
+        if nm1 > 8 {
+            /* underflow */
+            nm1 = 8;
+        }
+        temp = 0.5 * x;
+        b = temp;
+        a = 1.0;
+        i = 2;
+        while i <= nm1 + 1 {
+            a *= i as f32; /* a = n! */
+            b *= temp; /* b = (x/2)^n */
+            i += 1;
+        }
+        b = b / a;
+    } else {
+        /* use backward recurrence */
+        /*                      x      x^2      x^2
+         *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+         *                      2n  - 2(n+1) - 2(n+2)
+         *
+         *                      1      1        1
+         *  (for large x)   =  ----  ------   ------   .....
+         *                      2n   2(n+1)   2(n+2)
+         *                      -- - ------ - ------ -
+         *                       x     x         x
+         *
+         * Let w = 2n/x and h=2/x, then the above quotient
+         * is equal to the continued fraction:
+         *                  1
+         *      = -----------------------
+         *                     1
+         *         w - -----------------
+         *                        1
+         *              w+h - ---------
+         *                     w+2h - ...
+         *
+         * To determine how many terms needed, let
+         * Q(0) = w, Q(1) = w(w+h) - 1,
+         * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+         * When Q(k) > 1e4      good for single
+         * When Q(k) > 1e9      good for double
+         * When Q(k) > 1e17     good for quadruple
+         */
+        /* determine k */
+        let mut t: f32;
+        let mut q0: f32;
+        let mut q1: f32;
+        let mut w: f32;
+        let h: f32;
+        let mut z: f32;
+        let mut tmp: f32;
+        let nf: f32;
+        let mut k: i32;
+
+        nf = (nm1 as f32) + 1.0;
+        w = 2.0 * nf / x;
+        h = 2.0 / x;
+        z = w + h;
+        q0 = w;
+        q1 = w * z - 1.0;
+        k = 1;
+        while q1 < 1.0e4 {
+            k += 1;
+            z += h;
+            tmp = z * q1 - q0;
+            q0 = q1;
+            q1 = tmp;
+        }
+        t = 0.0;
+        i = k;
+        while i >= 0 {
+            t = 1.0 / (2.0 * ((i as f32) + nf) / x - t);
+            i -= 1;
+        }
+        a = t;
+        b = 1.0;
+        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+         *  Hence, if n*(log(2n/x)) > ...
+         *  single 8.8722839355e+01
+         *  double 7.09782712893383973096e+02
+         *  long double 1.1356523406294143949491931077970765006170e+04
+         *  then recurrent value may overflow and the result is
+         *  likely underflow to zero
+         */
+        tmp = nf * logf(fabsf(w));
+        if tmp < 88.721679688 {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = 2.0 * (i as f32) * b / x - a;
+                a = temp;
+                i -= 1;
+            }
+        } else {
+            i = nm1;
+            while i > 0 {
+                temp = b;
+                b = 2.0 * (i as f32) * b / x - a;
+                a = temp;
+                /* scale b to avoid spurious overflow */
+                let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60
+                if b > x1p60 {
+                    a /= b;
+                    t /= b;
+                    b = 1.0;
+                }
+                i -= 1;
+            }
+        }
+        z = j0f(x);
+        w = j1f(x);
+        if fabsf(z) >= fabsf(w) {
+            b = t * z / b;
+        } else {
+            b = t * w / a;
+        }
+    }
+
+    if sign { -b } else { b }
+}
+
+/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ynf(n: i32, x: f32) -> f32 {
+    let mut ix: u32;
+    let mut ib: u32;
+    let nm1: i32;
+    let mut sign: bool;
+    let mut i: i32;
+    let mut a: f32;
+    let mut b: f32;
+    let mut temp: f32;
+
+    ix = x.to_bits();
+    sign = (ix >> 31) != 0;
+    ix &= 0x7fffffff;
+    if ix > 0x7f800000 {
+        /* nan */
+        return x;
+    }
+    if sign && ix != 0 {
+        /* x < 0 */
+        return 0.0 / 0.0;
+    }
+    if ix == 0x7f800000 {
+        return 0.0;
+    }
+
+    if n == 0 {
+        return y0f(x);
+    }
+    if n < 0 {
+        nm1 = -(n + 1);
+        sign = (n & 1) != 0;
+    } else {
+        nm1 = n - 1;
+        sign = false;
+    }
+    if nm1 == 0 {
+        if sign {
+            return -y1f(x);
+        } else {
+            return y1f(x);
+        }
+    }
+
+    a = y0f(x);
+    b = y1f(x);
+    /* quit if b is -inf */
+    ib = b.to_bits();
+    i = 0;
+    while i < nm1 && ib != 0xff800000 {
+        i += 1;
+        temp = b;
+        b = (2.0 * (i as f32) / x) * b - a;
+        ib = b.to_bits();
+        a = temp;
+    }
+
+    if sign { -b } else { b }
+}
diff --git a/src/math/k_cos.rs b/libm/src/math/k_cos.rs
similarity index 100%
rename from src/math/k_cos.rs
rename to libm/src/math/k_cos.rs
diff --git a/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs
similarity index 100%
rename from src/math/k_cosf.rs
rename to libm/src/math/k_cosf.rs
diff --git a/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs
similarity index 100%
rename from src/math/k_expo2.rs
rename to libm/src/math/k_expo2.rs
diff --git a/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs
similarity index 100%
rename from src/math/k_expo2f.rs
rename to libm/src/math/k_expo2f.rs
diff --git a/src/math/k_sin.rs b/libm/src/math/k_sin.rs
similarity index 100%
rename from src/math/k_sin.rs
rename to libm/src/math/k_sin.rs
diff --git a/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs
similarity index 100%
rename from src/math/k_sinf.rs
rename to libm/src/math/k_sinf.rs
diff --git a/src/math/k_tan.rs b/libm/src/math/k_tan.rs
similarity index 100%
rename from src/math/k_tan.rs
rename to libm/src/math/k_tan.rs
diff --git a/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs
similarity index 100%
rename from src/math/k_tanf.rs
rename to libm/src/math/k_tanf.rs
diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs
new file mode 100644
index 000000000..24899ba30
--- /dev/null
+++ b/libm/src/math/ldexp.rs
@@ -0,0 +1,21 @@
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf16(x: f16, n: i32) -> f16 {
+    super::scalbnf16(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf(x: f32, n: i32) -> f32 {
+    super::scalbnf(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexp(x: f64, n: i32) -> f64 {
+    super::scalbn(x, n)
+}
+
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf128(x: f128, n: i32) -> f128 {
+    super::scalbnf128(x, n)
+}
diff --git a/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs
similarity index 100%
rename from src/math/ldexpf.rs
rename to libm/src/math/ldexpf.rs
diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs
new file mode 100644
index 000000000..b35277d15
--- /dev/null
+++ b/libm/src/math/ldexpf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf128(x: f128, n: i32) -> f128 {
+    super::scalbnf128(x, n)
+}
diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs
new file mode 100644
index 000000000..8de6cffd6
--- /dev/null
+++ b/libm/src/math/ldexpf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn ldexpf16(x: f16, n: i32) -> f16 {
+    super::scalbnf16(x, n)
+}
diff --git a/src/math/lgamma.rs b/libm/src/math/lgamma.rs
similarity index 55%
rename from src/math/lgamma.rs
rename to libm/src/math/lgamma.rs
index a08bc5b64..8312dc186 100644
--- a/src/math/lgamma.rs
+++ b/libm/src/math/lgamma.rs
@@ -1,5 +1,7 @@
 use super::lgamma_r;
 
+/// The natural logarithm of the
+/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn lgamma(x: f64) -> f64 {
     lgamma_r(x).0
diff --git a/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs
similarity index 99%
rename from src/math/lgamma_r.rs
rename to libm/src/math/lgamma_r.rs
index b26177e6e..6becaad2c 100644
--- a/src/math/lgamma_r.rs
+++ b/libm/src/math/lgamma_r.rs
@@ -160,7 +160,8 @@ fn sin_pi(mut x: f64) -> f64 {
         1 => k_cos(x, 0.0),
         2 => k_sin(-x, 0.0, 0),
         3 => -k_cos(x, 0.0),
-        0 | _ => k_sin(x, 0.0, 0),
+        // 0
+        _ => k_sin(x, 0.0, 0),
     }
 }
 
diff --git a/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs
similarity index 55%
rename from src/math/lgammaf.rs
rename to libm/src/math/lgammaf.rs
index a9c2da75b..d37512397 100644
--- a/src/math/lgammaf.rs
+++ b/libm/src/math/lgammaf.rs
@@ -1,5 +1,7 @@
 use super::lgammaf_r;
 
+/// The natural logarithm of the
+/// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn lgammaf(x: f32) -> f32 {
     lgammaf_r(x).0
diff --git a/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs
similarity index 99%
rename from src/math/lgammaf_r.rs
rename to libm/src/math/lgammaf_r.rs
index 723c90daf..10cecee54 100644
--- a/src/math/lgammaf_r.rs
+++ b/libm/src/math/lgammaf_r.rs
@@ -95,7 +95,8 @@ fn sin_pi(mut x: f32) -> f32 {
         1 => k_cosf(y),
         2 => k_sinf(-y),
         3 => -k_cosf(y),
-        0 | _ => k_sinf(y),
+        // 0
+        _ => k_sinf(y),
     }
 }
 
diff --git a/src/math/log.rs b/libm/src/math/log.rs
similarity index 100%
rename from src/math/log.rs
rename to libm/src/math/log.rs
diff --git a/src/math/log10.rs b/libm/src/math/log10.rs
similarity index 98%
rename from src/math/log10.rs
rename to libm/src/math/log10.rs
index f9d118f12..8c9d68c49 100644
--- a/src/math/log10.rs
+++ b/libm/src/math/log10.rs
@@ -78,7 +78,7 @@ pub fn log10(mut x: f64) -> f64 {
     hx += 0x3ff00000 - 0x3fe6a09e;
     k += (hx >> 20) as i32 - 0x3ff;
     hx = (hx & 0x000fffff) + 0x3fe6a09e;
-    ui = (hx as u64) << 32 | (ui & 0xffffffff);
+    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
     x = f64::from_bits(ui);
 
     f = x - 1.0;
diff --git a/src/math/log10f.rs b/libm/src/math/log10f.rs
similarity index 100%
rename from src/math/log10f.rs
rename to libm/src/math/log10f.rs
diff --git a/src/math/log1p.rs b/libm/src/math/log1p.rs
similarity index 98%
rename from src/math/log1p.rs
rename to libm/src/math/log1p.rs
index 80561ec74..b7f3fb09e 100644
--- a/src/math/log1p.rs
+++ b/libm/src/math/log1p.rs
@@ -125,7 +125,7 @@ pub fn log1p(x: f64) -> f64 {
         }
         /* reduce u into [sqrt(2)/2, sqrt(2)] */
         hu = (hu & 0x000fffff) + 0x3fe6a09e;
-        ui = (hu as u64) << 32 | (ui & 0xffffffff);
+        ui = ((hu as u64) << 32) | (ui & 0xffffffff);
         f = f64::from_bits(ui) - 1.;
     }
     hfsq = 0.5 * f * f;
diff --git a/src/math/log1pf.rs b/libm/src/math/log1pf.rs
similarity index 100%
rename from src/math/log1pf.rs
rename to libm/src/math/log1pf.rs
diff --git a/src/math/log2.rs b/libm/src/math/log2.rs
similarity index 98%
rename from src/math/log2.rs
rename to libm/src/math/log2.rs
index 59533340b..701f63c25 100644
--- a/src/math/log2.rs
+++ b/libm/src/math/log2.rs
@@ -75,7 +75,7 @@ pub fn log2(mut x: f64) -> f64 {
     hx += 0x3ff00000 - 0x3fe6a09e;
     k += (hx >> 20) as i32 - 0x3ff;
     hx = (hx & 0x000fffff) + 0x3fe6a09e;
-    ui = (hx as u64) << 32 | (ui & 0xffffffff);
+    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
     x = f64::from_bits(ui);
 
     f = x - 1.0;
diff --git a/src/math/log2f.rs b/libm/src/math/log2f.rs
similarity index 100%
rename from src/math/log2f.rs
rename to libm/src/math/log2f.rs
diff --git a/src/math/logf.rs b/libm/src/math/logf.rs
similarity index 100%
rename from src/math/logf.rs
rename to libm/src/math/logf.rs
diff --git a/src/math/mod.rs b/libm/src/math/mod.rs
similarity index 66%
rename from src/math/mod.rs
rename to libm/src/math/mod.rs
index 85c9fc5bf..949c18b40 100644
--- a/src/math/mod.rs
+++ b/libm/src/math/mod.rs
@@ -60,31 +60,68 @@ macro_rules! i {
 // the time of this writing this is only used in a few places, and once
 // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and
 // the native `/` operator can be used and panics won't be codegen'd.
-#[cfg(any(debug_assertions, not(feature = "unstable")))]
+#[cfg(any(debug_assertions, not(intrinsics_enabled)))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         $a / $b
     };
 }
 
-#[cfg(all(not(debug_assertions), feature = "unstable"))]
+#[cfg(all(not(debug_assertions), intrinsics_enabled))]
 macro_rules! div {
     ($a:expr, $b:expr) => {
         unsafe { core::intrinsics::unchecked_div($a, $b) }
     };
 }
 
-macro_rules! llvm_intrinsically_optimized {
-    (#[cfg($($clause:tt)*)] $e:expr) => {
-        #[cfg(all(feature = "unstable", not(feature = "force-soft-floats"), $($clause)*))]
-        {
-            if true { // thwart the dead code lint
-                $e
-            }
-        }
-    };
+// `support` may be public for testing
+#[macro_use]
+#[cfg(feature = "unstable-public-internals")]
+pub mod support;
+
+#[macro_use]
+#[cfg(not(feature = "unstable-public-internals"))]
+pub(crate) mod support;
+
+cfg_if! {
+    if #[cfg(feature = "unstable-public-internals")] {
+        pub mod generic;
+    } else {
+        mod generic;
+    }
 }
 
+// Private modules
+mod arch;
+mod expo2;
+mod k_cos;
+mod k_cosf;
+mod k_expo2;
+mod k_expo2f;
+mod k_sin;
+mod k_sinf;
+mod k_tan;
+mod k_tanf;
+mod rem_pio2;
+mod rem_pio2_large;
+mod rem_pio2f;
+
+// Private re-imports
+use self::expo2::expo2;
+use self::k_cos::k_cos;
+use self::k_cosf::k_cosf;
+use self::k_expo2::k_expo2;
+use self::k_expo2f::k_expo2f;
+use self::k_sin::k_sin;
+use self::k_sinf::k_sinf;
+use self::k_tan::k_tan;
+use self::k_tanf::k_tanf;
+use self::rem_pio2::rem_pio2;
+use self::rem_pio2_large::rem_pio2_large;
+use self::rem_pio2f::rem_pio2f;
+#[allow(unused_imports)]
+use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
+
 // Public modules
 mod acos;
 mod acosf;
@@ -103,9 +140,7 @@ mod atanhf;
 mod cbrt;
 mod cbrtf;
 mod ceil;
-mod ceilf;
 mod copysign;
-mod copysignf;
 mod cos;
 mod cosf;
 mod cosh;
@@ -121,19 +156,14 @@ mod expf;
 mod expm1;
 mod expm1f;
 mod fabs;
-mod fabsf;
 mod fdim;
-mod fdimf;
 mod floor;
-mod floorf;
 mod fma;
-mod fmaf;
-mod fmax;
-mod fmaxf;
-mod fmin;
-mod fminf;
+mod fma_wide;
+mod fmin_fmax;
+mod fminimum_fmaximum;
+mod fminimum_fmaximum_num;
 mod fmod;
-mod fmodf;
 mod frexp;
 mod frexpf;
 mod hypot;
@@ -147,7 +177,6 @@ mod j1f;
 mod jn;
 mod jnf;
 mod ldexp;
-mod ldexpf;
 mod lgamma;
 mod lgamma_r;
 mod lgammaf;
@@ -171,11 +200,9 @@ mod remainderf;
 mod remquo;
 mod remquof;
 mod rint;
-mod rintf;
 mod round;
-mod roundf;
+mod roundeven;
 mod scalbn;
-mod scalbnf;
 mod sin;
 mod sincos;
 mod sincosf;
@@ -183,7 +210,6 @@ mod sinf;
 mod sinh;
 mod sinhf;
 mod sqrt;
-mod sqrtf;
 mod tan;
 mod tanf;
 mod tanh;
@@ -191,7 +217,6 @@ mod tanhf;
 mod tgamma;
 mod tgammaf;
 mod trunc;
-mod truncf;
 
 // Use separated imports instead of {}-grouped imports for easier merging.
 pub use self::acos::acos;
@@ -210,10 +235,8 @@ pub use self::atanh::atanh;
 pub use self::atanhf::atanhf;
 pub use self::cbrt::cbrt;
 pub use self::cbrtf::cbrtf;
-pub use self::ceil::ceil;
-pub use self::ceilf::ceilf;
-pub use self::copysign::copysign;
-pub use self::copysignf::copysignf;
+pub use self::ceil::{ceil, ceilf};
+pub use self::copysign::{copysign, copysignf};
 pub use self::cos::cos;
 pub use self::cosf::cosf;
 pub use self::cosh::cosh;
@@ -228,20 +251,15 @@ pub use self::exp10f::exp10f;
 pub use self::expf::expf;
 pub use self::expm1::expm1;
 pub use self::expm1f::expm1f;
-pub use self::fabs::fabs;
-pub use self::fabsf::fabsf;
-pub use self::fdim::fdim;
-pub use self::fdimf::fdimf;
-pub use self::floor::floor;
-pub use self::floorf::floorf;
+pub use self::fabs::{fabs, fabsf};
+pub use self::fdim::{fdim, fdimf};
+pub use self::floor::{floor, floorf};
 pub use self::fma::fma;
-pub use self::fmaf::fmaf;
-pub use self::fmax::fmax;
-pub use self::fmaxf::fmaxf;
-pub use self::fmin::fmin;
-pub use self::fminf::fminf;
-pub use self::fmod::fmod;
-pub use self::fmodf::fmodf;
+pub use self::fma_wide::fmaf;
+pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
+pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
+pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
+pub use self::fmod::{fmod, fmodf};
 pub use self::frexp::frexp;
 pub use self::frexpf::frexpf;
 pub use self::hypot::hypot;
@@ -254,8 +272,7 @@ pub use self::j1::{j1, y1};
 pub use self::j1f::{j1f, y1f};
 pub use self::jn::{jn, yn};
 pub use self::jnf::{jnf, ynf};
-pub use self::ldexp::ldexp;
-pub use self::ldexpf::ldexpf;
+pub use self::ldexp::{ldexp, ldexpf};
 pub use self::lgamma::lgamma;
 pub use self::lgamma_r::lgamma_r;
 pub use self::lgammaf::lgammaf;
@@ -278,57 +295,74 @@ pub use self::remainder::remainder;
 pub use self::remainderf::remainderf;
 pub use self::remquo::remquo;
 pub use self::remquof::remquof;
-pub use self::rint::rint;
-pub use self::rintf::rintf;
-pub use self::round::round;
-pub use self::roundf::roundf;
-pub use self::scalbn::scalbn;
-pub use self::scalbnf::scalbnf;
+pub use self::rint::{rint, rintf};
+pub use self::round::{round, roundf};
+pub use self::roundeven::{roundeven, roundevenf};
+pub use self::scalbn::{scalbn, scalbnf};
 pub use self::sin::sin;
 pub use self::sincos::sincos;
 pub use self::sincosf::sincosf;
 pub use self::sinf::sinf;
 pub use self::sinh::sinh;
 pub use self::sinhf::sinhf;
-pub use self::sqrt::sqrt;
-pub use self::sqrtf::sqrtf;
+pub use self::sqrt::{sqrt, sqrtf};
 pub use self::tan::tan;
 pub use self::tanf::tanf;
 pub use self::tanh::tanh;
 pub use self::tanhf::tanhf;
 pub use self::tgamma::tgamma;
 pub use self::tgammaf::tgammaf;
-pub use self::trunc::trunc;
-pub use self::truncf::truncf;
+pub use self::trunc::{trunc, truncf};
 
-// Private modules
-mod expo2;
-mod fenv;
-mod k_cos;
-mod k_cosf;
-mod k_expo2;
-mod k_expo2f;
-mod k_sin;
-mod k_sinf;
-mod k_tan;
-mod k_tanf;
-mod rem_pio2;
-mod rem_pio2_large;
-mod rem_pio2f;
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        // verify-sorted-start
+        pub use self::ceil::ceilf16;
+        pub use self::copysign::copysignf16;
+        pub use self::fabs::fabsf16;
+        pub use self::fdim::fdimf16;
+        pub use self::floor::floorf16;
+        pub use self::fmin_fmax::{fmaxf16, fminf16};
+        pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16};
+        pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
+        pub use self::fmod::fmodf16;
+        pub use self::ldexp::ldexpf16;
+        pub use self::rint::rintf16;
+        pub use self::round::roundf16;
+        pub use self::roundeven::roundevenf16;
+        pub use self::scalbn::scalbnf16;
+        pub use self::sqrt::sqrtf16;
+        pub use self::trunc::truncf16;
+        // verify-sorted-end
 
-// Private re-imports
-use self::expo2::expo2;
-use self::k_cos::k_cos;
-use self::k_cosf::k_cosf;
-use self::k_expo2::k_expo2;
-use self::k_expo2f::k_expo2f;
-use self::k_sin::k_sin;
-use self::k_sinf::k_sinf;
-use self::k_tan::k_tan;
-use self::k_tanf::k_tanf;
-use self::rem_pio2::rem_pio2;
-use self::rem_pio2_large::rem_pio2_large;
-use self::rem_pio2f::rem_pio2f;
+        #[allow(unused_imports)]
+        pub(crate) use self::fma_wide::fmaf16;
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        // verify-sorted-start
+        pub use self::ceil::ceilf128;
+        pub use self::copysign::copysignf128;
+        pub use self::fabs::fabsf128;
+        pub use self::fdim::fdimf128;
+        pub use self::floor::floorf128;
+        pub use self::fma::fmaf128;
+        pub use self::fmin_fmax::{fmaxf128, fminf128};
+        pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
+        pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};
+        pub use self::fmod::fmodf128;
+        pub use self::ldexp::ldexpf128;
+        pub use self::rint::rintf128;
+        pub use self::round::roundf128;
+        pub use self::roundeven::roundevenf128;
+        pub use self::scalbn::scalbnf128;
+        pub use self::sqrt::sqrtf128;
+        pub use self::trunc::truncf128;
+        // verify-sorted-end
+    }
+}
 
 #[inline]
 fn get_high_word(x: f64) -> u32 {
@@ -358,5 +392,5 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 {
 
 #[inline]
 fn combine_words(hi: u32, lo: u32) -> f64 {
-    f64::from_bits((hi as u64) << 32 | lo as u64)
+    f64::from_bits(((hi as u64) << 32) | lo as u64)
 }
diff --git a/src/math/modf.rs b/libm/src/math/modf.rs
similarity index 86%
rename from src/math/modf.rs
rename to libm/src/math/modf.rs
index bcab33a81..6541862cd 100644
--- a/src/math/modf.rs
+++ b/libm/src/math/modf.rs
@@ -1,8 +1,9 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn modf(x: f64) -> (f64, f64) {
     let rv2: f64;
     let mut u = x.to_bits();
     let mask: u64;
-    let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff;
+    let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff;
 
     /* no fractional part */
     if e >= 52 {
diff --git a/src/math/modff.rs b/libm/src/math/modff.rs
similarity index 86%
rename from src/math/modff.rs
rename to libm/src/math/modff.rs
index 56ece12e3..90c6bca7d 100644
--- a/src/math/modff.rs
+++ b/libm/src/math/modff.rs
@@ -1,8 +1,9 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn modff(x: f32) -> (f32, f32) {
     let rv2: f32;
     let mut u: u32 = x.to_bits();
     let mask: u32;
-    let e = ((u >> 23 & 0xff) as i32) - 0x7f;
+    let e = (((u >> 23) & 0xff) as i32) - 0x7f;
 
     /* no fractional part */
     if e >= 23 {
diff --git a/src/math/nextafter.rs b/libm/src/math/nextafter.rs
similarity index 76%
rename from src/math/nextafter.rs
rename to libm/src/math/nextafter.rs
index 057626191..c991ff6f2 100644
--- a/src/math/nextafter.rs
+++ b/libm/src/math/nextafter.rs
@@ -10,20 +10,20 @@ pub fn nextafter(x: f64, y: f64) -> f64 {
         return y;
     }
 
-    let ax = ux_i & !1_u64 / 2;
-    let ay = uy_i & !1_u64 / 2;
+    let ax = ux_i & (!1_u64 / 2);
+    let ay = uy_i & (!1_u64 / 2);
     if ax == 0 {
         if ay == 0 {
             return y;
         }
-        ux_i = (uy_i & 1_u64 << 63) | 1;
-    } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 {
+        ux_i = (uy_i & (1_u64 << 63)) | 1;
+    } else if ax > ay || ((ux_i ^ uy_i) & (1_u64 << 63)) != 0 {
         ux_i -= 1;
     } else {
         ux_i += 1;
     }
 
-    let e = ux_i >> 52 & 0x7ff;
+    let e = (ux_i >> 52) & 0x7ff;
     // raise overflow if ux.f is infinite and x is finite
     if e == 0x7ff {
         force_eval!(x + x);
diff --git a/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs
similarity index 100%
rename from src/math/nextafterf.rs
rename to libm/src/math/nextafterf.rs
diff --git a/src/math/pow.rs b/libm/src/math/pow.rs
similarity index 87%
rename from src/math/pow.rs
rename to libm/src/math/pow.rs
index 7ecad291d..80b2a2499 100644
--- a/src/math/pow.rs
+++ b/libm/src/math/pow.rs
@@ -98,8 +98,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
     let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32);
     let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32);
 
-    let mut ix: i32 = (hx & 0x7fffffff) as i32;
-    let iy: i32 = (hy & 0x7fffffff) as i32;
+    let mut ix: i32 = hx & 0x7fffffff_i32;
+    let iy: i32 = hy & 0x7fffffff_i32;
 
     /* x**0 = 1, even if x is NaN */
     if ((iy as u32) | ly) == 0 {
@@ -355,7 +355,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
     }
 
     /* compute 2**(p_h+p_l) */
-    let i: i32 = j & (0x7fffffff as i32);
+    let i: i32 = j & 0x7fffffff_i32;
     k = (i >> 20) - 0x3ff;
     let mut n: i32 = 0;
 
@@ -398,7 +398,6 @@ mod tests {
     extern crate core;
 
     use self::core::f64::consts::{E, PI};
-    use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY};
     use super::pow;
 
     const POS_ZERO: &[f64] = &[0.0];
@@ -407,15 +406,15 @@ mod tests {
     const NEG_ONE: &[f64] = &[-1.0];
     const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI];
     const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI];
-    const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON];
-    const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON];
-    const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX];
-    const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0];
+    const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), f64::MIN_POSITIVE, f64::EPSILON];
+    const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -f64::MIN_POSITIVE, -f64::EPSILON];
+    const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, f64::MAX];
+    const NEG_EVENS: &[f64] = &[f64::MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0];
     const POS_ODDS: &[f64] = &[3.0, 7.0];
     const NEG_ODDS: &[f64] = &[-7.0, -3.0];
-    const NANS: &[f64] = &[NAN];
-    const POS_INF: &[f64] = &[INFINITY];
-    const NEG_INF: &[f64] = &[NEG_INFINITY];
+    const NANS: &[f64] = &[f64::NAN];
+    const POS_INF: &[f64] = &[f64::INFINITY];
+    const NEG_INF: &[f64] = &[f64::NEG_INFINITY];
 
     const ALL: &[&[f64]] = &[
         POS_ZERO,
@@ -492,83 +491,83 @@ mod tests {
     #[test]
     fn nan_inputs() {
         // NAN as the base:
-        // (NAN ^ anything *but 0* should be NAN)
-        test_sets_as_exponent(NAN, &ALL[2..], NAN);
+        // (f64::NAN ^ anything *but 0* should be f64::NAN)
+        test_sets_as_exponent(f64::NAN, &ALL[2..], f64::NAN);
 
-        // NAN as the exponent:
-        // (anything *but 1* ^ NAN should be NAN)
-        test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN);
+        // f64::NAN as the exponent:
+        // (anything *but 1* ^ f64::NAN should be f64::NAN)
+        test_sets_as_base(&ALL[..(ALL.len() - 2)], f64::NAN, f64::NAN);
     }
 
     #[test]
     fn infinity_as_base() {
         // Positive Infinity as the base:
-        // (+Infinity ^ positive anything but 0 and NAN should be +Infinity)
-        test_sets_as_exponent(INFINITY, &POS[1..], INFINITY);
+        // (+Infinity ^ positive anything but 0 and f64::NAN should be +Infinity)
+        test_sets_as_exponent(f64::INFINITY, &POS[1..], f64::INFINITY);
 
-        // (+Infinity ^ negative anything except 0 and NAN should be 0.0)
-        test_sets_as_exponent(INFINITY, &NEG[1..], 0.0);
+        // (+Infinity ^ negative anything except 0 and f64::NAN should be 0.0)
+        test_sets_as_exponent(f64::INFINITY, &NEG[1..], 0.0);
 
         // Negative Infinity as the base:
         // (-Infinity ^ positive odd ints should be -Infinity)
-        test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY);
+        test_sets_as_exponent(f64::NEG_INFINITY, &[POS_ODDS], f64::NEG_INFINITY);
 
         // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything))
         // We can lump in pos/neg odd ints here because they don't seem to
         // cause panics (div by zero) in release mode (I think).
-        test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v));
+        test_sets(ALL, &|v: f64| pow(f64::NEG_INFINITY, v), &|v: f64| pow(-0.0, -v));
     }
 
     #[test]
     fn infinity_as_exponent() {
         // Positive/Negative base greater than 1:
-        // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base)
-        test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY);
+        // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes f64::NAN as the base)
+        test_sets_as_base(&ALL[5..(ALL.len() - 2)], f64::INFINITY, f64::INFINITY);
 
         // (pos/neg > 1 ^ -Infinity should be 0.0)
-        test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0);
+        test_sets_as_base(&ALL[5..ALL.len() - 2], f64::NEG_INFINITY, 0.0);
 
         // Positive/Negative base less than 1:
         let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS];
 
-        // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base)
-        test_sets_as_base(base_below_one, INFINITY, 0.0);
+        // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes f64::NAN as the base)
+        test_sets_as_base(base_below_one, f64::INFINITY, 0.0);
 
         // (pos/neg < 1 ^ -Infinity should be Infinity)
-        test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY);
+        test_sets_as_base(base_below_one, f64::NEG_INFINITY, f64::INFINITY);
 
         // Positive/Negative 1 as the base:
         // (pos/neg 1 ^ Infinity should be 1)
-        test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0);
+        test_sets_as_base(&[NEG_ONE, POS_ONE], f64::INFINITY, 1.0);
 
         // (pos/neg 1 ^ -Infinity should be 1)
-        test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0);
+        test_sets_as_base(&[NEG_ONE, POS_ONE], f64::NEG_INFINITY, 1.0);
     }
 
     #[test]
     fn zero_as_base() {
         // Positive Zero as the base:
-        // (+0 ^ anything positive but 0 and NAN should be +0)
+        // (+0 ^ anything positive but 0 and f64::NAN should be +0)
         test_sets_as_exponent(0.0, &POS[1..], 0.0);
 
-        // (+0 ^ anything negative but 0 and NAN should be Infinity)
+        // (+0 ^ anything negative but 0 and f64::NAN should be Infinity)
         // (this should panic because we're dividing by zero)
-        test_sets_as_exponent(0.0, &NEG[1..], INFINITY);
+        test_sets_as_exponent(0.0, &NEG[1..], f64::INFINITY);
 
         // Negative Zero as the base:
-        // (-0 ^ anything positive but 0, NAN, and odd ints should be +0)
+        // (-0 ^ anything positive but 0, f64::NAN, and odd ints should be +0)
         test_sets_as_exponent(-0.0, &POS[3..], 0.0);
 
-        // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity)
+        // (-0 ^ anything negative but 0, f64::NAN, and odd ints should be Infinity)
         // (should panic because of divide by zero)
-        test_sets_as_exponent(-0.0, &NEG[3..], INFINITY);
+        test_sets_as_exponent(-0.0, &NEG[3..], f64::INFINITY);
 
         // (-0 ^ positive odd ints should be -0)
         test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0);
 
         // (-0 ^ negative odd ints should be -Infinity)
         // (should panic because of divide by zero)
-        test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY);
+        test_sets_as_exponent(-0.0, &[NEG_ODDS], f64::NEG_INFINITY);
     }
 
     #[test]
@@ -583,21 +582,17 @@ mod tests {
 
         // Factoring -1 out:
         // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer))
-        (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]).iter().for_each(
-            |int_set| {
-                int_set.iter().for_each(|int| {
-                    test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| {
-                        pow(-1.0, *int) * pow(v, *int)
-                    });
-                })
-            },
-        );
+        [POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS].iter().for_each(|int_set| {
+            int_set.iter().for_each(|int| {
+                test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| pow(-1.0, *int) * pow(v, *int));
+            })
+        });
 
         // Negative base (imaginary results):
         // (-anything except 0 and Infinity ^ non-integer should be NAN)
-        (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| {
+        NEG[1..(NEG.len() - 1)].iter().for_each(|set| {
             set.iter().for_each(|val| {
-                test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN);
+                test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| f64::NAN);
             })
         });
     }
diff --git a/src/math/powf.rs b/libm/src/math/powf.rs
similarity index 97%
rename from src/math/powf.rs
rename to libm/src/math/powf.rs
index 2d9d1e4bb..839c6c23d 100644
--- a/src/math/powf.rs
+++ b/libm/src/math/powf.rs
@@ -13,6 +13,8 @@
  * ====================================================
  */
 
+use core::cmp::Ordering;
+
 use super::{fabsf, scalbnf, sqrtf};
 
 const BP: [f32; 2] = [1.0, 1.5];
@@ -115,15 +117,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
     /* special value of y */
     if iy == 0x7f800000 {
         /* y is +-inf */
-        if ix == 0x3f800000 {
+        match ix.cmp(&0x3f800000) {
             /* (-1)**+-inf is 1 */
-            return 1.0;
-        } else if ix > 0x3f800000 {
+            Ordering::Equal => return 1.0,
             /* (|x|>1)**+-inf = inf,0 */
-            return if hy >= 0 { y } else { 0.0 };
-        } else {
+            Ordering::Greater => return if hy >= 0 { y } else { 0.0 },
             /* (|x|<1)**+-inf = 0,inf */
-            return if hy >= 0 { 0.0 } else { -y };
+            Ordering::Less => return if hy >= 0 { 0.0 } else { -y },
         }
     }
     if iy == 0x3f800000 {
diff --git a/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
similarity index 99%
rename from src/math/rem_pio2.rs
rename to libm/src/math/rem_pio2.rs
index 4dfb8c658..917e90819 100644
--- a/src/math/rem_pio2.rs
+++ b/libm/src/math/rem_pio2.rs
@@ -50,7 +50,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
 
     fn medium(x: f64, ix: u32) -> (i32, f64, f64) {
         /* rint(x/(pi/2)), Assume round-to-nearest. */
-        let tmp = x as f64 * INV_PIO2 + TO_INT;
+        let tmp = x * INV_PIO2 + TO_INT;
         // force rounding of tmp to it's storage format on x87 to avoid
         // excess precision issues.
         #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
diff --git a/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs
similarity index 99%
rename from src/math/rem_pio2_large.rs
rename to libm/src/math/rem_pio2_large.rs
index 1dfbba3b1..6d679bbe9 100644
--- a/src/math/rem_pio2_large.rs
+++ b/libm/src/math/rem_pio2_large.rs
@@ -226,8 +226,9 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
     let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
     let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
 
-    #[cfg(all(target_pointer_width = "64", feature = "checked"))]
-    assert!(e0 <= 16360);
+    if cfg!(target_pointer_width = "64") {
+        debug_assert!(e0 <= 16360);
+    }
 
     let nx = x.len();
 
@@ -425,8 +426,6 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
             for i in (0..=jz).rev() {
                 fw += i!(fq, i);
             }
-            // TODO: drop excess precision here once double_t is used
-            fw = fw as f64;
             i!(y, 0, =, if ih == 0 { fw } else { -fw });
             fw = i!(fq, 0) - fw;
             for i in 1..=jz {
diff --git a/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs
similarity index 100%
rename from src/math/rem_pio2f.rs
rename to libm/src/math/rem_pio2f.rs
diff --git a/src/math/remainder.rs b/libm/src/math/remainder.rs
similarity index 100%
rename from src/math/remainder.rs
rename to libm/src/math/remainder.rs
diff --git a/src/math/remainderf.rs b/libm/src/math/remainderf.rs
similarity index 100%
rename from src/math/remainderf.rs
rename to libm/src/math/remainderf.rs
diff --git a/src/math/remquo.rs b/libm/src/math/remquo.rs
similarity index 100%
rename from src/math/remquo.rs
rename to libm/src/math/remquo.rs
diff --git a/src/math/remquof.rs b/libm/src/math/remquof.rs
similarity index 100%
rename from src/math/remquof.rs
rename to libm/src/math/remquof.rs
diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs
new file mode 100644
index 000000000..e1c32c943
--- /dev/null
+++ b/libm/src/math/rint.rs
@@ -0,0 +1,51 @@
+use super::support::Round;
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf16(x: f16) -> f16 {
+    select_implementation! {
+        name: rintf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf(x: f32) -> f32 {
+    select_implementation! {
+        name: rintf,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
+        args: x,
+    }
+
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rint(x: f64) -> f64 {
+    select_implementation! {
+        name: rint,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+        ),
+        args: x,
+    }
+
+    super::generic::rint_round(x, Round::Nearest).val
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn rintf128(x: f128) -> f128 {
+    super::generic::rint_round(x, Round::Nearest).val
+}
diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs
new file mode 100644
index 000000000..6cd091cd7
--- /dev/null
+++ b/libm/src/math/round.rs
@@ -0,0 +1,25 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf16(x: f16) -> f16 {
+    super::generic::round(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf(x: f32) -> f32 {
+    super::generic::round(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn round(x: f64) -> f64 {
+    super::generic::round(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf128(x: f128) -> f128 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs
new file mode 100644
index 000000000..6e621d762
--- /dev/null
+++ b/libm/src/math/roundeven.rs
@@ -0,0 +1,36 @@
+use super::support::{Float, Round};
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf16(x: f16) -> f16 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf(x: f32) -> f32 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundeven(x: f64) -> f64 {
+    roundeven_impl(x)
+}
+
+/// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
+/// `roundToIntegralTiesToEven`.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundevenf128(x: f128) -> f128 {
+    roundeven_impl(x)
+}
+
+#[inline]
+pub fn roundeven_impl<F: Float>(x: F) -> F {
+    super::generic::rint_round(x, Round::Nearest).val
+}
diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs
new file mode 100644
index 000000000..b5d7c9d69
--- /dev/null
+++ b/libm/src/math/roundf.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf(x: f32) -> f32 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs
new file mode 100644
index 000000000..fc3164929
--- /dev/null
+++ b/libm/src/math/roundf128.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf128(x: f128) -> f128 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs
new file mode 100644
index 000000000..8b356eaab
--- /dev/null
+++ b/libm/src/math/roundf16.rs
@@ -0,0 +1,5 @@
+/// Round `x` to the nearest integer, breaking ties away from zero.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn roundf16(x: f16) -> f16 {
+    super::generic::round(x)
+}
diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs
new file mode 100644
index 000000000..ed73c3f94
--- /dev/null
+++ b/libm/src/math/scalbn.rs
@@ -0,0 +1,87 @@
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf16(x: f16, n: i32) -> f16 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf(x: f32, n: i32) -> f32 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbn(x: f64, n: i32) -> f64 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf128(x: f128, n: i32) -> f128 {
+    super::generic::scalbn(x, n)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{CastFrom, CastInto, Float};
+
+    // Tests against N3220
+    fn spec_test<F: Float>(f: impl Fn(F, i32) -> F)
+    where
+        u32: CastInto<F::Int>,
+        F::Int: CastFrom<i32>,
+        F::Int: CastFrom<u32>,
+    {
+        // `scalbn(±0, n)` returns `±0`.
+        assert_biteq!(f(F::NEG_ZERO, 10), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO);
+        assert_biteq!(f(F::NEG_ZERO, -10), F::NEG_ZERO);
+        assert_biteq!(f(F::ZERO, 10), F::ZERO);
+        assert_biteq!(f(F::ZERO, 0), F::ZERO);
+        assert_biteq!(f(F::ZERO, -10), F::ZERO);
+
+        // `scalbn(x, 0)` returns `x`.
+        assert_biteq!(f(F::MIN, 0), F::MIN);
+        assert_biteq!(f(F::MAX, 0), F::MAX);
+        assert_biteq!(f(F::INFINITY, 0), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, 0), F::NEG_INFINITY);
+        assert_biteq!(f(F::ZERO, 0), F::ZERO);
+        assert_biteq!(f(F::NEG_ZERO, 0), F::NEG_ZERO);
+
+        // `scalbn(±∞, n)` returns `±∞`.
+        assert_biteq!(f(F::INFINITY, 10), F::INFINITY);
+        assert_biteq!(f(F::INFINITY, -10), F::INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, 10), F::NEG_INFINITY);
+        assert_biteq!(f(F::NEG_INFINITY, -10), F::NEG_INFINITY);
+
+        // NaN should remain NaNs.
+        assert!(f(F::NAN, 10).is_nan());
+        assert!(f(F::NAN, 0).is_nan());
+        assert!(f(F::NAN, -10).is_nan());
+        assert!(f(-F::NAN, 10).is_nan());
+        assert!(f(-F::NAN, 0).is_nan());
+        assert!(f(-F::NAN, -10).is_nan());
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_test_f16() {
+        spec_test::<f16>(scalbnf16);
+    }
+
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>(scalbnf);
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>(scalbn);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>(scalbnf128);
+    }
+}
diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs
new file mode 100644
index 000000000..57e7ba76f
--- /dev/null
+++ b/libm/src/math/scalbnf.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf(x: f32, n: i32) -> f32 {
+    super::generic::scalbn(x, n)
+}
diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs
new file mode 100644
index 000000000..c1d2b4855
--- /dev/null
+++ b/libm/src/math/scalbnf128.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf128(x: f128, n: i32) -> f128 {
+    super::generic::scalbn(x, n)
+}
diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs
new file mode 100644
index 000000000..2209e1a17
--- /dev/null
+++ b/libm/src/math/scalbnf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn scalbnf16(x: f16, n: i32) -> f16 {
+    super::generic::scalbn(x, n)
+}
diff --git a/src/math/sin.rs b/libm/src/math/sin.rs
similarity index 87%
rename from src/math/sin.rs
rename to libm/src/math/sin.rs
index e04e0d6a0..229fa4bef 100644
--- a/src/math/sin.rs
+++ b/libm/src/math/sin.rs
@@ -81,12 +81,15 @@ pub fn sin(x: f64) -> f64 {
     }
 }
 
-#[test]
-fn test_near_pi() {
-    let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
-    let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
-    let result = sin(x);
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    let result = force_eval!(result);
-    assert_eq!(result, sx);
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    fn test_near_pi() {
+        let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
+        let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
+        assert_eq!(sin(x), sx);
+    }
 }
diff --git a/src/math/sincos.rs b/libm/src/math/sincos.rs
similarity index 100%
rename from src/math/sincos.rs
rename to libm/src/math/sincos.rs
diff --git a/src/math/sincosf.rs b/libm/src/math/sincosf.rs
similarity index 90%
rename from src/math/sincosf.rs
rename to libm/src/math/sincosf.rs
index 423845e44..f33607676 100644
--- a/src/math/sincosf.rs
+++ b/libm/src/math/sincosf.rs
@@ -67,14 +67,12 @@ pub fn sincosf(x: f32) -> (f32, f32) {
             }
         }
         /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
-        else {
-            if sign {
-                s = -k_sinf(x as f64 + S2PIO2);
-                c = -k_cosf(x as f64 + S2PIO2);
-            } else {
-                s = -k_sinf(x as f64 - S2PIO2);
-                c = -k_cosf(x as f64 - S2PIO2);
-            }
+        else if sign {
+            s = -k_sinf(x as f64 + S2PIO2);
+            c = -k_cosf(x as f64 + S2PIO2);
+        } else {
+            s = -k_sinf(x as f64 - S2PIO2);
+            c = -k_cosf(x as f64 - S2PIO2);
         }
 
         return (s, c);
@@ -91,14 +89,12 @@ pub fn sincosf(x: f32) -> (f32, f32) {
                 s = -k_cosf(x as f64 - S3PIO2);
                 c = k_sinf(x as f64 - S3PIO2);
             }
+        } else if sign {
+            s = k_sinf(x as f64 + S4PIO2);
+            c = k_cosf(x as f64 + S4PIO2);
         } else {
-            if sign {
-                s = k_sinf(x as f64 + S4PIO2);
-                c = k_cosf(x as f64 + S4PIO2);
-            } else {
-                s = k_sinf(x as f64 - S4PIO2);
-                c = k_cosf(x as f64 - S4PIO2);
-            }
+            s = k_sinf(x as f64 - S4PIO2);
+            c = k_cosf(x as f64 - S4PIO2);
         }
 
         return (s, c);
diff --git a/src/math/sinf.rs b/libm/src/math/sinf.rs
similarity index 100%
rename from src/math/sinf.rs
rename to libm/src/math/sinf.rs
diff --git a/src/math/sinh.rs b/libm/src/math/sinh.rs
similarity index 100%
rename from src/math/sinh.rs
rename to libm/src/math/sinh.rs
diff --git a/src/math/sinhf.rs b/libm/src/math/sinhf.rs
similarity index 100%
rename from src/math/sinhf.rs
rename to libm/src/math/sinhf.rs
diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs
new file mode 100644
index 000000000..76bc240cf
--- /dev/null
+++ b/libm/src/math/sqrt.rs
@@ -0,0 +1,51 @@
+/// The square root of `x` (f16).
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf16(x: f16) -> f16 {
+    select_implementation! {
+        name: sqrtf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
+    return super::generic::sqrt(x);
+}
+
+/// The square root of `x` (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf(x: f32) -> f32 {
+    select_implementation! {
+        name: sqrtf,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
+        args: x,
+    }
+
+    super::generic::sqrt(x)
+}
+
+/// The square root of `x` (f64).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrt(x: f64) -> f64 {
+    select_implementation! {
+        name: sqrt,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
+        args: x,
+    }
+
+    super::generic::sqrt(x)
+}
+
+/// The square root of `x` (f128).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf128(x: f128) -> f128 {
+    return super::generic::sqrt(x);
+}
diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs
new file mode 100644
index 000000000..c28a705e3
--- /dev/null
+++ b/libm/src/math/sqrtf.rs
@@ -0,0 +1,15 @@
+/// The square root of `x` (f32).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf(x: f32) -> f32 {
+    select_implementation! {
+        name: sqrtf,
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            all(target_arch = "wasm32", intrinsics_enabled),
+            target_feature = "sse2"
+        ),
+        args: x,
+    }
+
+    super::generic::sqrt(x)
+}
diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs
new file mode 100644
index 000000000..eaef6ae0c
--- /dev/null
+++ b/libm/src/math/sqrtf128.rs
@@ -0,0 +1,5 @@
+/// The square root of `x` (f128).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf128(x: f128) -> f128 {
+    return super::generic::sqrt(x);
+}
diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs
new file mode 100644
index 000000000..7bedb7f8b
--- /dev/null
+++ b/libm/src/math/sqrtf16.rs
@@ -0,0 +1,11 @@
+/// The square root of `x` (f16).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn sqrtf16(x: f16) -> f16 {
+    select_implementation! {
+        name: sqrtf16,
+        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
+        args: x,
+    }
+
+    return super::generic::sqrt(x);
+}
diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs
new file mode 100644
index 000000000..eae08238e
--- /dev/null
+++ b/libm/src/math/support/big.rs
@@ -0,0 +1,239 @@
+//! Integers used for wide operations, larger than `u128`.
+
+#[cfg(test)]
+mod tests;
+
+use core::ops;
+
+use super::{DInt, HInt, Int, MinInt};
+
+const U128_LO_MASK: u128 = u64::MAX as u128;
+
+/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+pub struct u256 {
+    pub lo: u128,
+    pub hi: u128,
+}
+
+impl u256 {
+    #[cfg(any(test, feature = "unstable-public-internals"))]
+    pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
+
+    /// Reinterpret as a signed integer
+    pub fn signed(self) -> i256 {
+        i256 { lo: self.lo, hi: self.hi }
+    }
+}
+
+/// A 256-bit signed integer represented as two 128-bit native-endian limbs.
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+pub struct i256 {
+    pub lo: u128,
+    pub hi: u128,
+}
+
+impl i256 {
+    /// Reinterpret as an unsigned integer
+    #[cfg(any(test, feature = "unstable-public-internals"))]
+    pub fn unsigned(self) -> u256 {
+        u256 { lo: self.lo, hi: self.hi }
+    }
+}
+
+impl MinInt for u256 {
+    type OtherSign = i256;
+
+    type Unsigned = u256;
+
+    const SIGNED: bool = false;
+    const BITS: u32 = 256;
+    const ZERO: Self = Self { lo: 0, hi: 0 };
+    const ONE: Self = Self { lo: 1, hi: 0 };
+    const MIN: Self = Self { lo: 0, hi: 0 };
+    const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
+}
+
+impl MinInt for i256 {
+    type OtherSign = u256;
+
+    type Unsigned = u256;
+
+    const SIGNED: bool = false;
+    const BITS: u32 = 256;
+    const ZERO: Self = Self { lo: 0, hi: 0 };
+    const ONE: Self = Self { lo: 1, hi: 0 };
+    const MIN: Self = Self { lo: 0, hi: 1 << 127 };
+    const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 };
+}
+
+macro_rules! impl_common {
+    ($ty:ty) => {
+        impl ops::BitOr for $ty {
+            type Output = Self;
+
+            fn bitor(mut self, rhs: Self) -> Self::Output {
+                self.lo |= rhs.lo;
+                self.hi |= rhs.hi;
+                self
+            }
+        }
+
+        impl ops::Not for $ty {
+            type Output = Self;
+
+            fn not(mut self) -> Self::Output {
+                self.lo = !self.lo;
+                self.hi = !self.hi;
+                self
+            }
+        }
+
+        impl ops::Shl<u32> for $ty {
+            type Output = Self;
+
+            fn shl(self, _rhs: u32) -> Self::Output {
+                unimplemented!("only used to meet trait bounds")
+            }
+        }
+    };
+}
+
+impl_common!(i256);
+impl_common!(u256);
+
+impl ops::Add<Self> for u256 {
+    type Output = Self;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        let (lo, carry) = self.lo.overflowing_add(rhs.lo);
+        let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi);
+
+        Self { lo, hi }
+    }
+}
+
+impl ops::Shr<u32> for u256 {
+    type Output = Self;
+
+    fn shr(mut self, rhs: u32) -> Self::Output {
+        debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+        if rhs >= Self::BITS {
+            return Self::ZERO;
+        }
+
+        if rhs == 0 {
+            return self;
+        }
+
+        if rhs < 128 {
+            self.lo >>= rhs;
+            self.lo |= self.hi << (128 - rhs);
+        } else {
+            self.lo = self.hi >> (rhs - 128);
+        }
+
+        if rhs < 128 {
+            self.hi >>= rhs;
+        } else {
+            self.hi = 0;
+        }
+
+        self
+    }
+}
+
+impl HInt for u128 {
+    type D = u256;
+
+    fn widen(self) -> Self::D {
+        u256 { lo: self, hi: 0 }
+    }
+
+    fn zero_widen(self) -> Self::D {
+        self.widen()
+    }
+
+    fn zero_widen_mul(self, rhs: Self) -> Self::D {
+        let l0 = self & U128_LO_MASK;
+        let l1 = rhs & U128_LO_MASK;
+        let h0 = self >> 64;
+        let h1 = rhs >> 64;
+
+        let p_ll: u128 = l0.overflowing_mul(l1).0;
+        let p_lh: u128 = l0.overflowing_mul(h1).0;
+        let p_hl: u128 = h0.overflowing_mul(l1).0;
+        let p_hh: u128 = h0.overflowing_mul(h1).0;
+
+        let s0 = p_hl + (p_ll >> 64);
+        let s1 = (p_ll & U128_LO_MASK) + (s0 << 64);
+        let s2 = p_lh + (s1 >> 64);
+
+        let lo = (p_ll & U128_LO_MASK) + (s2 << 64);
+        let hi = p_hh + (s0 >> 64) + (s2 >> 64);
+
+        u256 { lo, hi }
+    }
+
+    fn widen_mul(self, rhs: Self) -> Self::D {
+        self.zero_widen_mul(rhs)
+    }
+
+    fn widen_hi(self) -> Self::D {
+        self.widen() << <Self as MinInt>::BITS
+    }
+}
+
+impl HInt for i128 {
+    type D = i256;
+
+    fn widen(self) -> Self::D {
+        let mut ret = self.unsigned().zero_widen().signed();
+        if self.is_negative() {
+            ret.hi = u128::MAX;
+        }
+        ret
+    }
+
+    fn zero_widen(self) -> Self::D {
+        self.unsigned().zero_widen().signed()
+    }
+
+    fn zero_widen_mul(self, rhs: Self) -> Self::D {
+        self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
+    }
+
+    fn widen_mul(self, _rhs: Self) -> Self::D {
+        unimplemented!("signed i128 widening multiply is not used")
+    }
+
+    fn widen_hi(self) -> Self::D {
+        self.widen() << <Self as MinInt>::BITS
+    }
+}
+
+impl DInt for u256 {
+    type H = u128;
+
+    fn lo(self) -> Self::H {
+        self.lo
+    }
+
+    fn hi(self) -> Self::H {
+        self.hi
+    }
+}
+
+impl DInt for i256 {
+    type H = i128;
+
+    fn lo(self) -> Self::H {
+        self.lo as i128
+    }
+
+    fn hi(self) -> Self::H {
+        self.hi as i128
+    }
+}
diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs
new file mode 100644
index 000000000..2c71191ba
--- /dev/null
+++ b/libm/src/math/support/big/tests.rs
@@ -0,0 +1,149 @@
+extern crate std;
+use std::string::String;
+use std::{eprintln, format};
+
+use super::{HInt, MinInt, i256, u256};
+
+const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
+
+/// Print a `u256` as hex since we can't add format implementations
+fn hexu(v: u256) -> String {
+    format!("0x{:032x}{:032x}", v.hi, v.lo)
+}
+
+#[test]
+fn widen_u128() {
+    assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 });
+    assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 });
+}
+
+#[test]
+fn widen_i128() {
+    assert_eq!((-1i128).widen(), u256::MAX.signed());
+    assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX });
+    assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
+}
+
+#[test]
+fn widen_mul_u128() {
+    let tests = [
+        (u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }),
+        (u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }),
+        (u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }),
+        (0, 0, u256::ZERO),
+        (1234u128, 0, u256::ZERO),
+        (0, 1234, u256::ZERO),
+    ];
+
+    let mut has_errors = false;
+    let mut add_error = |i, a, b, expected, actual| {
+        has_errors = true;
+        eprintln!(
+            "\
+            FAILURE ({i}): {a:#034x} * {b:#034x}\n\
+            expected: {}\n\
+            got:      {}\
+            ",
+            hexu(expected),
+            hexu(actual)
+        );
+    };
+
+    for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
+        let res = a.widen_mul(b);
+        let res_z = a.zero_widen_mul(b);
+        assert_eq!(res, res_z);
+        if res != exp {
+            add_error(i, a, b, exp, res);
+        }
+    }
+
+    assert!(!has_errors);
+}
+
+#[test]
+fn not_u256() {
+    assert_eq!(!u256::ZERO, u256::MAX);
+}
+
+#[test]
+fn shr_u256() {
+    let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX];
+    let mut has_errors = false;
+
+    let mut add_error = |a, b, expected, actual| {
+        has_errors = true;
+        eprintln!(
+            "\
+            FAILURE:  {} >> {b}\n\
+            expected: {}\n\
+            actual:   {}\
+            ",
+            hexu(a),
+            hexu(expected),
+            hexu(actual),
+        );
+    };
+
+    for a in only_low {
+        for perturb in 0..10 {
+            let a = a.saturating_add(perturb);
+            for shift in 0..128 {
+                let res = a.widen() >> shift;
+                let expected = (a >> shift).widen();
+                if res != expected {
+                    add_error(a.widen(), shift, expected, res);
+                }
+            }
+        }
+    }
+
+    let check = [
+        (u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }),
+        (u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }),
+        (u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }),
+        (u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }),
+        (u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }),
+        (u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }),
+        (u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }),
+        (u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }),
+        (u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }),
+        (u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }),
+        (u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }),
+        (u256::MAX, 254, u256 { lo: 0b11, hi: 0 }),
+        (u256::MAX, 255, u256 { lo: 1, hi: 0 }),
+        (
+            u256 { hi: LOHI_SPLIT, lo: 0 },
+            64,
+            u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa },
+        ),
+    ];
+
+    for (input, shift, expected) in check {
+        let res = input >> shift;
+        if res != expected {
+            add_error(input, shift, expected, res);
+        }
+    }
+
+    assert!(!has_errors);
+}
+
+#[test]
+#[should_panic]
+#[cfg(debug_assertions)]
+// FIXME(ppc): ppc64le seems to have issues with `should_panic` tests.
+#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
+fn shr_u256_overflow() {
+    // Like regular shr, panic on overflow with debug assertions
+    let _ = u256::MAX >> 256;
+}
+
+#[test]
+#[cfg(not(debug_assertions))]
+fn shr_u256_overflow() {
+    // No panic without debug assertions
+    assert_eq!(u256::MAX >> 256, u256::ZERO);
+    assert_eq!(u256::MAX >> 257, u256::ZERO);
+    assert_eq!(u256::MAX >> u32::MAX, u256::ZERO);
+}
diff --git a/libm/src/math/support/env.rs b/libm/src/math/support/env.rs
new file mode 100644
index 000000000..796309372
--- /dev/null
+++ b/libm/src/math/support/env.rs
@@ -0,0 +1,127 @@
+//! Support for rounding directions and status flags as specified by IEEE 754.
+//!
+//! Rust does not support the floating point environment so rounding mode is passed as an argument
+//! and status flags are returned as part of the result. There is currently not much support for
+//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this
+//! has no side effects in Rust. Further, correct behavior relies on elementary operations making
+//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust.
+//!
+//! This module exists so no functionality is lost when porting algorithms that respect floating
+//! point environment, and so that some functionality may be tested (that which does not rely on
+//! side effects from elementary operations). Full support would require wrappers around basic
+//! operations, but there is no plan to add this at the current time.
+
+/// A value combined with a floating point status.
+pub struct FpResult<T> {
+    pub val: T,
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub status: Status,
+}
+
+impl<T> FpResult<T> {
+    pub fn new(val: T, status: Status) -> Self {
+        Self { val, status }
+    }
+
+    /// Return `val` with `Status::OK`.
+    pub fn ok(val: T) -> Self {
+        Self { val, status: Status::OK }
+    }
+}
+
+/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest.
+///
+/// Integer representation comes from what CORE-MATH uses for indexing.
+#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Round {
+    /// IEEE 754 nearest, `roundTiesToEven`.
+    Nearest = 0,
+    /// IEEE 754 `roundTowardNegative`.
+    Negative = 1,
+    /// IEEE 754 `roundTowardPositive`.
+    Positive = 2,
+    /// IEEE 754 `roundTowardZero`.
+    Zero = 3,
+}
+
+/// IEEE 754 exception status flags.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Status(u8);
+
+impl Status {
+    /// Default status indicating no errors.
+    pub const OK: Self = Self(0);
+
+    /// No definable result.
+    ///
+    /// Includes:
+    /// - Any ops on sNaN, with a few exceptions.
+    /// - `0 * inf`, `inf * 0`.
+    /// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`.
+    /// - `+inf + -inf` and similar (includes subtraction and fma).
+    /// - `0.0 / 0.0`, `inf / inf`
+    /// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN.
+    /// - `sqrt(x)` with `x < 0.0`.
+    pub const INVALID: Self = Self(1);
+
+    /// Division by zero.
+    ///
+    /// The default result for division is +/-inf based on operand sign. For `logB`, the default
+    /// result is -inf.
+    /// `x / y` when `x != 0.0` and `y == 0.0`,
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const DIVIDE_BY_ZERO: Self = Self(1 << 2);
+
+    /// The result exceeds the maximum finite value.
+    ///
+    /// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based
+    /// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and
+    /// `Negative` round to signed maximum finite in one direction, signed infinity in the other.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const OVERFLOW: Self = Self(1 << 3);
+
+    /// The result is subnormal and lost precision.
+    pub const UNDERFLOW: Self = Self(1 << 4);
+
+    /// The finite-precision result does not match that of infinite precision, and the reason
+    /// is not represented by one of the other flags.
+    pub const INEXACT: Self = Self(1 << 5);
+
+    /// True if `UNDERFLOW` is set.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const fn underflow(self) -> bool {
+        self.0 & Self::UNDERFLOW.0 != 0
+    }
+
+    /// True if `OVERFLOW` is set.
+    #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
+    pub const fn overflow(self) -> bool {
+        self.0 & Self::OVERFLOW.0 != 0
+    }
+
+    pub fn set_underflow(&mut self, val: bool) {
+        self.set_flag(val, Self::UNDERFLOW);
+    }
+
+    /// True if `INEXACT` is set.
+    pub const fn inexact(self) -> bool {
+        self.0 & Self::INEXACT.0 != 0
+    }
+
+    pub fn set_inexact(&mut self, val: bool) {
+        self.set_flag(val, Self::INEXACT);
+    }
+
+    fn set_flag(&mut self, val: bool, mask: Self) {
+        if val {
+            self.0 |= mask.0;
+        } else {
+            self.0 &= !mask.0;
+        }
+    }
+
+    pub(crate) const fn with(self, rhs: Self) -> Self {
+        Self(self.0 | rhs.0)
+    }
+}
diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
new file mode 100644
index 000000000..fac104832
--- /dev/null
+++ b/libm/src/math/support/float_traits.rs
@@ -0,0 +1,484 @@
+use core::{fmt, mem, ops};
+
+use super::int_traits::{CastFrom, Int, MinInt};
+
+/// Trait for some basic operations on floats
+// #[allow(dead_code)]
+pub trait Float:
+    Copy
+    + fmt::Debug
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::MulAssign
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Mul<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Rem<Output = Self>
+    + ops::Neg<Output = Self>
+    + 'static
+{
+    /// A uint of the same width as the float
+    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
+
+    /// A int of the same width as the float
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + ops::Neg<Output = Self::SignedInt>;
+
+    const ZERO: Self;
+    const NEG_ZERO: Self;
+    const ONE: Self;
+    const NEG_ONE: Self;
+    const INFINITY: Self;
+    const NEG_INFINITY: Self;
+    const NAN: Self;
+    const NEG_NAN: Self;
+    const MAX: Self;
+    const MIN: Self;
+    const EPSILON: Self;
+    const PI: Self;
+    const NEG_PI: Self;
+    const FRAC_PI_2: Self;
+
+    const MIN_POSITIVE_NORMAL: Self;
+
+    /// The bitwidth of the float type
+    const BITS: u32;
+
+    /// The bitwidth of the significand
+    const SIG_BITS: u32;
+
+    /// The bitwidth of the exponent
+    const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
+
+    /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
+    /// representation.
+    ///
+    /// This shifted fully right, use `EXP_MASK` for the shifted value.
+    const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
+
+    /// The exponent bias value
+    const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
+
+    /// Maximum unbiased exponent value.
+    const EXP_MAX: i32 = Self::EXP_BIAS as i32;
+
+    /// Minimum *NORMAL* unbiased exponent value.
+    const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
+
+    /// Minimum subnormal exponent value.
+    const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
+
+    /// A mask for the sign bit
+    const SIGN_MASK: Self::Int;
+
+    /// A mask for the significand
+    const SIG_MASK: Self::Int;
+
+    /// A mask for the exponent
+    const EXP_MASK: Self::Int;
+
+    /// The implicit bit of the float format
+    const IMPLICIT_BIT: Self::Int;
+
+    /// Returns `self` transmuted to `Self::Int`
+    fn to_bits(self) -> Self::Int;
+
+    /// Returns `self` transmuted to `Self::SignedInt`
+    #[allow(dead_code)]
+    fn to_bits_signed(self) -> Self::SignedInt {
+        self.to_bits().signed()
+    }
+
+    /// Check bitwise equality.
+    #[allow(dead_code)]
+    fn biteq(self, rhs: Self) -> bool {
+        self.to_bits() == rhs.to_bits()
+    }
+
+    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
+    /// represented in multiple different ways.
+    ///
+    /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
+    /// if `NaN` should not be treated separately.
+    #[allow(dead_code)]
+    fn eq_repr(self, rhs: Self) -> bool {
+        if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) }
+    }
+
+    /// Returns true if the value is NaN.
+    fn is_nan(self) -> bool;
+
+    /// Returns true if the value is +inf or -inf.
+    fn is_infinite(self) -> bool;
+
+    /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
+    fn is_sign_negative(self) -> bool;
+
+    /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
+    fn is_sign_positive(self) -> bool {
+        !self.is_sign_negative()
+    }
+
+    /// Returns if `self` is subnormal.
+    #[allow(dead_code)]
+    fn is_subnormal(self) -> bool {
+        (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
+    }
+
+    /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
+    fn ex(self) -> u32 {
+        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
+    }
+
+    /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
+    fn exp_unbiased(self) -> i32 {
+        self.ex().signed() - (Self::EXP_BIAS as i32)
+    }
+
+    /// Returns the significand with no implicit bit (or the "fractional" part)
+    #[allow(dead_code)]
+    fn frac(self) -> Self::Int {
+        self.to_bits() & Self::SIG_MASK
+    }
+
+    /// Returns a `Self::Int` transmuted back to `Self`
+    fn from_bits(a: Self::Int) -> Self;
+
+    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
+    fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
+        let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
+        Self::from_bits(
+            (sign << (Self::BITS - 1))
+                | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
+                | (significand & Self::SIG_MASK),
+        )
+    }
+
+    #[allow(dead_code)]
+    fn abs(self) -> Self;
+
+    /// Returns a number composed of the magnitude of self and the sign of sign.
+    fn copysign(self, other: Self) -> Self;
+
+    /// Fused multiply add, rounding once.
+    fn fma(self, y: Self, z: Self) -> Self;
+
+    /// Returns (normalized exponent, normalized significand)
+    #[allow(dead_code)]
+    fn normalize(significand: Self::Int) -> (i32, Self::Int);
+
+    /// Returns a number that represents the sign of self.
+    #[allow(dead_code)]
+    fn signum(self) -> Self {
+        if self.is_nan() { self } else { Self::ONE.copysign(self) }
+    }
+}
+
+/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
+pub type IntTy<F> = <F as Float>::Int;
+
+macro_rules! float_impl {
+    (
+        $ty:ident,
+        $ity:ident,
+        $sity:ident,
+        $bits:expr,
+        $significand_bits:expr,
+        $from_bits:path,
+        $to_bits:path,
+        $fma_fn:ident,
+        $fma_intrinsic:ident
+    ) => {
+        impl Float for $ty {
+            type Int = $ity;
+            type SignedInt = $sity;
+
+            const ZERO: Self = 0.0;
+            const NEG_ZERO: Self = -0.0;
+            const ONE: Self = 1.0;
+            const NEG_ONE: Self = -1.0;
+            const INFINITY: Self = Self::INFINITY;
+            const NEG_INFINITY: Self = Self::NEG_INFINITY;
+            const NAN: Self = Self::NAN;
+            // NAN isn't guaranteed to be positive but it usually is. We only use this for
+            // tests.
+            const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK);
+            const MAX: Self = -Self::MIN;
+            // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
+            const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
+            const EPSILON: Self = <$ty>::EPSILON;
+
+            // Exponent is a 1 in the LSB
+            const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
+
+            const PI: Self = core::$ty::consts::PI;
+            const NEG_PI: Self = -Self::PI;
+            const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
+
+            const BITS: u32 = $bits;
+            const SIG_BITS: u32 = $significand_bits;
+
+            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
+            const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
+            const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
+            const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
+
+            fn to_bits(self) -> Self::Int {
+                self.to_bits()
+            }
+            fn is_nan(self) -> bool {
+                self.is_nan()
+            }
+            fn is_infinite(self) -> bool {
+                self.is_infinite()
+            }
+            fn is_sign_negative(self) -> bool {
+                self.is_sign_negative()
+            }
+            fn from_bits(a: Self::Int) -> Self {
+                Self::from_bits(a)
+            }
+            fn abs(self) -> Self {
+                cfg_if! {
+                    // FIXME(msrv): `abs` is available in `core` starting with 1.85.
+                    if #[cfg(intrinsics_enabled)] {
+                        self.abs()
+                    } else {
+                        super::super::generic::fabs(self)
+                    }
+                }
+            }
+            fn copysign(self, other: Self) -> Self {
+                cfg_if! {
+                    // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
+                    if #[cfg(intrinsics_enabled)] {
+                        self.copysign(other)
+                    } else {
+                        super::super::generic::copysign(self, other)
+                    }
+                }
+            }
+            fn fma(self, y: Self, z: Self) -> Self {
+                cfg_if! {
+                    // fma is not yet available in `core`
+                    if #[cfg(intrinsics_enabled)] {
+                        unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
+                    } else {
+                        super::super::$fma_fn(self, y, z)
+                    }
+                }
+            }
+            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
+                let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
+                (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits, f16::to_bits, fmaf16, fmaf16);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits, f32_to_bits, fmaf, fmaf32);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits, f64_to_bits, fma, fmaf64);
+#[cfg(f128_enabled)]
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits, f128::to_bits, fmaf128, fmaf128);
+
+/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
+
+/// `f32::from_bits`
+pub const fn f32_from_bits(bits: u32) -> f32 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<u32, f32>(bits) }
+}
+
+/// `f32::to_bits`
+pub const fn f32_to_bits(x: f32) -> u32 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<f32, u32>(x) }
+}
+
+/// `f64::from_bits`
+pub const fn f64_from_bits(bits: u64) -> f64 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<u64, f64>(bits) }
+}
+
+/// `f64::to_bits`
+pub const fn f64_to_bits(x: f64) -> u64 {
+    // SAFETY: POD cast with no preconditions
+    unsafe { mem::transmute::<f64, u64>(x) }
+}
+
+/// Trait for floats twice the bit width of another integer.
+pub trait DFloat: Float {
+    /// Float that is half the bit width of the floatthis trait is implemented for.
+    type H: HFloat<D = Self>;
+
+    /// Narrow the float type.
+    fn narrow(self) -> Self::H;
+}
+
+/// Trait for floats half the bit width of another float.
+pub trait HFloat: Float {
+    /// Float that is double the bit width of the float this trait is implemented for.
+    type D: DFloat<H = Self>;
+
+    /// Widen the float type.
+    fn widen(self) -> Self::D;
+}
+
+macro_rules! impl_d_float {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DFloat for $D {
+                type H = $X;
+
+                fn narrow(self) -> Self::H {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_float {
+    ($($H:ident $X:ident),*) => {
+        $(
+            impl HFloat for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+impl_d_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_d_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_d_float!(f64 f128);
+
+impl_h_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_h_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_h_float!(f64 f128);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        // Constants
+        assert_eq!(f16::EXP_SAT, 0b11111);
+        assert_eq!(f16::EXP_BIAS, 15);
+        assert_eq!(f16::EXP_MAX, 15);
+        assert_eq!(f16::EXP_MIN, -14);
+        assert_eq!(f16::EXP_MIN_SUBNORM, -24);
+
+        // `exp_unbiased`
+        assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f16::MAX.exp_unbiased(), 15);
+        assert_eq!(f16::MIN.exp_unbiased(), 15);
+        assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f16::ZERO.exp_unbiased(), -15);
+        assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
+        assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
+
+        // `from_parts`
+        assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
+        assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
+    }
+
+    #[test]
+    fn check_f32() {
+        // Constants
+        assert_eq!(f32::EXP_SAT, 0b11111111);
+        assert_eq!(f32::EXP_BIAS, 127);
+        assert_eq!(f32::EXP_MAX, 127);
+        assert_eq!(f32::EXP_MIN, -126);
+        assert_eq!(f32::EXP_MIN_SUBNORM, -149);
+
+        // `exp_unbiased`
+        assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f32::MAX.exp_unbiased(), 127);
+        assert_eq!(f32::MIN.exp_unbiased(), 127);
+        assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f32::ZERO.exp_unbiased(), -127);
+        assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
+        assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
+
+        // `from_parts`
+        assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
+        assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10"));
+        assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
+    }
+
+    #[test]
+    fn check_f64() {
+        // Constants
+        assert_eq!(f64::EXP_SAT, 0b11111111111);
+        assert_eq!(f64::EXP_BIAS, 1023);
+        assert_eq!(f64::EXP_MAX, 1023);
+        assert_eq!(f64::EXP_MIN, -1022);
+        assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
+
+        // `exp_unbiased`
+        assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f64::MAX.exp_unbiased(), 1023);
+        assert_eq!(f64::MIN.exp_unbiased(), 1023);
+        assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f64::ZERO.exp_unbiased(), -1023);
+        assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
+        assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
+
+        // `from_parts`
+        assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
+        assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10"));
+        assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        // Constants
+        assert_eq!(f128::EXP_SAT, 0b111111111111111);
+        assert_eq!(f128::EXP_BIAS, 16383);
+        assert_eq!(f128::EXP_MAX, 16383);
+        assert_eq!(f128::EXP_MIN, -16382);
+        assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
+
+        // `exp_unbiased`
+        assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
+        assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
+        assert_eq!(f128::MAX.exp_unbiased(), 16383);
+        assert_eq!(f128::MIN.exp_unbiased(), 16383);
+        assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
+        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
+        // results for zero and subnormals.
+        assert_eq!(f128::ZERO.exp_unbiased(), -16383);
+        assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
+        assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
+
+        // `from_parts`
+        assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
+        assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
+    }
+}
diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs
new file mode 100644
index 000000000..819e2f56e
--- /dev/null
+++ b/libm/src/math/support/hex_float.rs
@@ -0,0 +1,1155 @@
+//! Utilities for working with hex float formats.
+
+use core::fmt;
+
+use super::{Float, Round, Status, f32_from_bits, f64_from_bits};
+
+/// Construct a 16-bit float from hex float representation (C-style)
+#[cfg(f16_enabled)]
+pub const fn hf16(s: &str) -> f16 {
+    match parse_hex_exact(s, 16, 10) {
+        Ok(bits) => f16::from_bits(bits as u16),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
+}
+
+/// Construct a 32-bit float from hex float representation (C-style)
+#[allow(unused)]
+pub const fn hf32(s: &str) -> f32 {
+    match parse_hex_exact(s, 32, 23) {
+        Ok(bits) => f32_from_bits(bits as u32),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
+}
+
+/// Construct a 64-bit float from hex float representation (C-style)
+pub const fn hf64(s: &str) -> f64 {
+    match parse_hex_exact(s, 64, 52) {
+        Ok(bits) => f64_from_bits(bits as u64),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
+}
+
+/// Construct a 128-bit float from hex float representation (C-style)
+#[cfg(f128_enabled)]
+pub const fn hf128(s: &str) -> f128 {
+    match parse_hex_exact(s, 128, 112) {
+        Ok(bits) => f128::from_bits(bits),
+        Err(HexFloatParseError(s)) => panic!("{}", s),
+    }
+}
+#[derive(Copy, Clone, Debug)]
+pub struct HexFloatParseError(&'static str);
+
+/// Parses any float to its bitwise representation, returning an error if it cannot be represented exactly
+pub const fn parse_hex_exact(
+    s: &str,
+    bits: u32,
+    sig_bits: u32,
+) -> Result<u128, HexFloatParseError> {
+    match parse_any(s, bits, sig_bits, Round::Nearest) {
+        Err(e) => Err(e),
+        Ok((bits, Status::OK)) => Ok(bits),
+        Ok((_, status)) if status.overflow() => Err(HexFloatParseError("the value is too huge")),
+        Ok((_, status)) if status.underflow() => Err(HexFloatParseError("the value is too tiny")),
+        Ok((_, status)) if status.inexact() => Err(HexFloatParseError("the value is too precise")),
+        Ok(_) => unreachable!(),
+    }
+}
+
+/// Parse any float from hex to its bitwise representation.
+pub const fn parse_any(
+    s: &str,
+    bits: u32,
+    sig_bits: u32,
+    round: Round,
+) -> Result<(u128, Status), HexFloatParseError> {
+    let mut b = s.as_bytes();
+
+    if sig_bits > 119 || bits > 128 || bits < sig_bits + 3 || bits > sig_bits + 30 {
+        return Err(HexFloatParseError("unsupported target float configuration"));
+    }
+
+    let neg = matches!(b, [b'-', ..]);
+    if let &[b'-' | b'+', ref rest @ ..] = b {
+        b = rest;
+    }
+
+    let sign_bit = 1 << (bits - 1);
+    let quiet_bit = 1 << (sig_bits - 1);
+    let nan = sign_bit - quiet_bit;
+    let inf = nan - quiet_bit;
+
+    let (mut x, status) = match *b {
+        [b'i' | b'I', b'n' | b'N', b'f' | b'F'] => (inf, Status::OK),
+        [b'n' | b'N', b'a' | b'A', b'n' | b'N'] => (nan, Status::OK),
+        [b'0', b'x' | b'X', ref rest @ ..] => {
+            let round = match (neg, round) {
+                // parse("-x", Round::Positive) == -parse("x", Round::Negative)
+                (true, Round::Positive) => Round::Negative,
+                (true, Round::Negative) => Round::Positive,
+                // rounding toward nearest or zero are symmetric
+                (true, Round::Nearest | Round::Zero) | (false, _) => round,
+            };
+            match parse_finite(rest, bits, sig_bits, round) {
+                Err(e) => return Err(e),
+                Ok(res) => res,
+            }
+        }
+        _ => return Err(HexFloatParseError("no hex indicator")),
+    };
+
+    if neg {
+        x ^= sign_bit;
+    }
+
+    Ok((x, status))
+}
+
+const fn parse_finite(
+    b: &[u8],
+    bits: u32,
+    sig_bits: u32,
+    rounding_mode: Round,
+) -> Result<(u128, Status), HexFloatParseError> {
+    let exp_bits: u32 = bits - sig_bits - 1;
+    let max_msb: i32 = (1 << (exp_bits - 1)) - 1;
+    // The exponent of one ULP in the subnormals
+    let min_lsb: i32 = 1 - max_msb - sig_bits as i32;
+
+    let (mut sig, mut exp) = match parse_hex(b) {
+        Err(e) => return Err(e),
+        Ok(Parsed { sig: 0, .. }) => return Ok((0, Status::OK)),
+        Ok(Parsed { sig, exp }) => (sig, exp),
+    };
+
+    let mut round_bits = u128_ilog2(sig) as i32 - sig_bits as i32;
+
+    // Round at least up to min_lsb
+    if exp < min_lsb - round_bits {
+        round_bits = min_lsb - exp;
+    }
+
+    let mut status = Status::OK;
+
+    exp += round_bits;
+
+    if round_bits > 0 {
+        // first, prepare for rounding exactly two bits
+        if round_bits == 1 {
+            sig <<= 1;
+        } else if round_bits > 2 {
+            sig = shr_odd_rounding(sig, (round_bits - 2) as u32);
+        }
+
+        if sig & 0b11 != 0 {
+            status = Status::INEXACT;
+        }
+
+        sig = shr2_round(sig, rounding_mode);
+    } else if round_bits < 0 {
+        sig <<= -round_bits;
+    }
+
+    // The parsed value is X = sig * 2^exp
+    // Expressed as a multiple U of the smallest subnormal value:
+    // X = U * 2^min_lsb, so U = sig * 2^(exp-min_lsb)
+    let uexp = (exp - min_lsb) as u128;
+    let uexp = uexp << sig_bits;
+
+    // Note that it is possible for the exponent bits to equal 2 here
+    // if the value rounded up, but that means the mantissa is all zeroes
+    // so the value is still correct
+    debug_assert!(sig <= 2 << sig_bits);
+
+    let inf = ((1 << exp_bits) - 1) << sig_bits;
+
+    let bits = match sig.checked_add(uexp) {
+        Some(bits) if bits < inf => {
+            // inexact subnormal or zero?
+            if status.inexact() && bits < (1 << sig_bits) {
+                status = status.with(Status::UNDERFLOW);
+            }
+            bits
+        }
+        _ => {
+            // overflow to infinity
+            status = status.with(Status::OVERFLOW).with(Status::INEXACT);
+            match rounding_mode {
+                Round::Positive | Round::Nearest => inf,
+                Round::Negative | Round::Zero => inf - 1,
+            }
+        }
+    };
+    Ok((bits, status))
+}
+
+/// Shift right, rounding all inexact divisions to the nearest odd number
+/// E.g. (0 >> 4) -> 0, (1..=31 >> 4) -> 1, (32 >> 4) -> 2, ...
+///
+/// Useful for reducing a number before rounding the last two bits, since
+/// the result of the final rounding is preserved for all rounding modes.
+const fn shr_odd_rounding(x: u128, k: u32) -> u128 {
+    if k < 128 {
+        let inexact = x.trailing_zeros() < k;
+        (x >> k) | (inexact as u128)
+    } else {
+        (x != 0) as u128
+    }
+}
+
+/// Divide by 4, rounding with the given mode
+const fn shr2_round(mut x: u128, round: Round) -> u128 {
+    let t = (x as u32) & 0b111;
+    x >>= 2;
+    match round {
+        // Look-up-table on the last three bits for when to round up
+        Round::Nearest => x + ((0b11001000_u8 >> t) & 1) as u128,
+
+        Round::Negative => x,
+        Round::Zero => x,
+        Round::Positive => x + (t & 0b11 != 0) as u128,
+    }
+}
+
+/// A parsed finite and unsigned floating point number.
+struct Parsed {
+    /// Absolute value sig * 2^exp
+    sig: u128,
+    exp: i32,
+}
+
+/// Parse a hexadecimal float x
+const fn parse_hex(mut b: &[u8]) -> Result<Parsed, HexFloatParseError> {
+    let mut sig: u128 = 0;
+    let mut exp: i32 = 0;
+
+    let mut seen_point = false;
+    let mut some_digits = false;
+    let mut inexact = false;
+
+    while let &[c, ref rest @ ..] = b {
+        b = rest;
+
+        match c {
+            b'.' => {
+                if seen_point {
+                    return Err(HexFloatParseError("unexpected '.' parsing fractional digits"));
+                }
+                seen_point = true;
+                continue;
+            }
+            b'p' | b'P' => break,
+            c => {
+                let digit = match hex_digit(c) {
+                    Some(d) => d,
+                    None => return Err(HexFloatParseError("expected hexadecimal digit")),
+                };
+                some_digits = true;
+
+                if (sig >> 124) == 0 {
+                    sig <<= 4;
+                    sig |= digit as u128;
+                } else {
+                    // FIXME: it is technically possible for exp to overflow if parsing a string with >500M digits
+                    exp += 4;
+                    inexact |= digit != 0;
+                }
+                // Up until the fractional point, the value grows
+                // with more digits, but after it the exponent is
+                // compensated to match.
+                if seen_point {
+                    exp -= 4;
+                }
+            }
+        }
+    }
+    // If we've set inexact, the exact value has more than 125
+    // significant bits, and lies somewhere between sig and sig + 1.
+    // Because we'll round off at least two of the trailing bits,
+    // setting the last bit gives correct rounding for inexact values.
+    sig |= inexact as u128;
+
+    if !some_digits {
+        return Err(HexFloatParseError("at least one digit is required"));
+    };
+
+    some_digits = false;
+
+    let negate_exp = matches!(b, [b'-', ..]);
+    if let &[b'-' | b'+', ref rest @ ..] = b {
+        b = rest;
+    }
+
+    let mut pexp: u32 = 0;
+    while let &[c, ref rest @ ..] = b {
+        b = rest;
+        let digit = match dec_digit(c) {
+            Some(d) => d,
+            None => return Err(HexFloatParseError("expected decimal digit")),
+        };
+        some_digits = true;
+        pexp = pexp.saturating_mul(10);
+        pexp += digit as u32;
+    }
+
+    if !some_digits {
+        return Err(HexFloatParseError("at least one exponent digit is required"));
+    };
+
+    {
+        let e;
+        if negate_exp {
+            e = (exp as i64) - (pexp as i64);
+        } else {
+            e = (exp as i64) + (pexp as i64);
+        };
+
+        exp = if e < i32::MIN as i64 {
+            i32::MIN
+        } else if e > i32::MAX as i64 {
+            i32::MAX
+        } else {
+            e as i32
+        };
+    }
+    /* FIXME(msrv): once MSRV >= 1.66, replace the above workaround block with:
+    if negate_exp {
+        exp = exp.saturating_sub_unsigned(pexp);
+    } else {
+        exp = exp.saturating_add_unsigned(pexp);
+    };
+    */
+
+    Ok(Parsed { sig, exp })
+}
+
+const fn dec_digit(c: u8) -> Option<u8> {
+    match c {
+        b'0'..=b'9' => Some(c - b'0'),
+        _ => None,
+    }
+}
+
+const fn hex_digit(c: u8) -> Option<u8> {
+    match c {
+        b'0'..=b'9' => Some(c - b'0'),
+        b'a'..=b'f' => Some(c - b'a' + 10),
+        b'A'..=b'F' => Some(c - b'A' + 10),
+        _ => None,
+    }
+}
+
+/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
+
+/// `u128::ilog2`
+const fn u128_ilog2(v: u128) -> u32 {
+    assert!(v != 0);
+    u128::BITS - 1 - v.leading_zeros()
+}
+
+/// Format a floating point number as its IEEE hex (`%a`) representation.
+pub struct Hexf<F>(pub F);
+
+// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
+#[cfg(not(feature = "compiler-builtins"))]
+fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    if x.is_sign_negative() {
+        write!(f, "-")?;
+    }
+
+    if x.is_nan() {
+        return write!(f, "NaN");
+    } else if x.is_infinite() {
+        return write!(f, "inf");
+    } else if *x == F::ZERO {
+        return write!(f, "0x0p+0");
+    }
+
+    let mut exponent = x.exp_unbiased();
+    let sig = x.to_bits() & F::SIG_MASK;
+
+    let bias = F::EXP_BIAS as i32;
+    // The mantissa MSB needs to be shifted up to the nearest nibble.
+    let mshift = (4 - (F::SIG_BITS % 4)) % 4;
+    let sig = sig << mshift;
+    // The width is rounded up to the nearest char (4 bits)
+    let mwidth = (F::SIG_BITS as usize + 3) / 4;
+    let leading = if exponent == -bias {
+        // subnormal number means we shift our output by 1 bit.
+        exponent += 1;
+        "0."
+    } else {
+        "1."
+    };
+
+    write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
+}
+
+#[cfg(feature = "compiler-builtins")]
+fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    unimplemented!()
+}
+
+impl<F: Float> fmt::LowerHex for Hexf<F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt_any_hex(&self.0, f)
+            }
+        }
+    }
+}
+
+impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+            }
+        }
+    }
+}
+
+impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+            }
+        }
+    }
+}
+
+impl fmt::LowerHex for Hexf<i32> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt::LowerHex::fmt(&self.0, f)
+            }
+        }
+    }
+}
+
+impl<T> fmt::Debug for Hexf<T>
+where
+    Hexf<T>: fmt::LowerHex,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt::LowerHex::fmt(self, f)
+            }
+        }
+    }
+}
+
+impl<T> fmt::Display for Hexf<T>
+where
+    Hexf<T>: fmt::LowerHex,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                fmt::LowerHex::fmt(self, f)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod parse_tests {
+    extern crate std;
+    use std::{format, println};
+
+    use super::*;
+
+    #[cfg(f16_enabled)]
+    fn rounding_properties(s: &str) -> Result<(), HexFloatParseError> {
+        let (xd, s0) = parse_any(s, 16, 10, Round::Negative)?;
+        let (xu, s1) = parse_any(s, 16, 10, Round::Positive)?;
+        let (xz, s2) = parse_any(s, 16, 10, Round::Zero)?;
+        let (xn, s3) = parse_any(s, 16, 10, Round::Nearest)?;
+
+        // FIXME: A value between the least normal and largest subnormal
+        // could have underflow status depend on rounding mode.
+
+        if let Status::OK = s0 {
+            // an exact result is the same for all rounding modes
+            assert_eq!(s0, s1);
+            assert_eq!(s0, s2);
+            assert_eq!(s0, s3);
+
+            assert_eq!(xd, xu);
+            assert_eq!(xd, xz);
+            assert_eq!(xd, xn);
+        } else {
+            assert!([s0, s1, s2, s3].into_iter().all(Status::inexact));
+
+            let xd = f16::from_bits(xd as u16);
+            let xu = f16::from_bits(xu as u16);
+            let xz = f16::from_bits(xz as u16);
+            let xn = f16::from_bits(xn as u16);
+
+            assert_biteq!(xd.next_up(), xu, "s={s}, xd={xd:?}, xu={xu:?}");
+
+            let signs = [xd, xu, xz, xn].map(f16::is_sign_negative);
+
+            if signs == [true; 4] {
+                assert_biteq!(xz, xu);
+            } else {
+                assert_eq!(signs, [false; 4]);
+                assert_biteq!(xz, xd);
+            }
+
+            if xn.to_bits() != xd.to_bits() {
+                assert_biteq!(xn, xu);
+            }
+        }
+        Ok(())
+    }
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_rounding() {
+        let n = 1_i32 << 14;
+        for i in -n..n {
+            let u = i.rotate_right(11) as u32;
+            let s = format!("{}", Hexf(f32::from_bits(u)));
+            assert!(rounding_properties(&s).is_ok());
+        }
+    }
+
+    #[test]
+    fn test_parse_any() {
+        for k in -149..=127 {
+            let s = format!("0x1p{k}");
+            let x = hf32(&s);
+            let y = if k < 0 { 0.5f32.powi(-k) } else { 2.0f32.powi(k) };
+            assert_eq!(x, y);
+        }
+
+        let mut s = *b"0x.0000000p-121";
+        for e in 0..40 {
+            for k in 0..(1 << 15) {
+                let expected = f32::from_bits(k) * 2.0f32.powi(e);
+                let x = hf32(std::str::from_utf8(&s).unwrap());
+                assert_eq!(
+                    x.to_bits(),
+                    expected.to_bits(),
+                    "\
+                    e={e}\n\
+                    k={k}\n\
+                    x={x}\n\
+                    expected={expected}\n\
+                    s={}\n\
+                    f32::from_bits(k)={}\n\
+                    2.0f32.powi(e)={}\
+                    ",
+                    std::str::from_utf8(&s).unwrap(),
+                    f32::from_bits(k),
+                    2.0f32.powi(e),
+                );
+                for i in (3..10).rev() {
+                    if s[i] == b'f' {
+                        s[i] = b'0';
+                    } else if s[i] == b'9' {
+                        s[i] = b'a';
+                        break;
+                    } else {
+                        s[i] += 1;
+                        break;
+                    }
+                }
+            }
+            for i in (12..15).rev() {
+                if s[i] == b'0' {
+                    s[i] = b'9';
+                } else {
+                    s[i] -= 1;
+                    break;
+                }
+            }
+            for i in (3..10).rev() {
+                s[i] = b'0';
+            }
+        }
+    }
+
+    // FIXME: this test is causing failures that are likely UB on various platforms
+    #[cfg(all(target_arch = "x86_64", target_os = "linux"))]
+    #[test]
+    #[cfg(f128_enabled)]
+    fn rounding() {
+        let pi = std::f128::consts::PI;
+        let s = format!("{}", Hexf(pi));
+
+        for k in 0..=111 {
+            let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap();
+            let scale = (1u128 << (112 - k - 1)) as f128;
+            let expected = (pi * scale).round_ties_even() / scale;
+            assert_eq!(bits << k, expected.to_bits(), "k = {k}, s = {s}");
+            assert_eq!(expected != pi, status.inexact());
+        }
+    }
+    #[test]
+    fn rounding_extreme_underflow() {
+        for k in 1..1000 {
+            let s = format!("0x1p{}", -149 - k);
+            let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() };
+            assert_eq!(bits, 0, "{s} should round to zero, got bits={bits}");
+            assert!(status.underflow(), "should indicate underflow when parsing {s}");
+            assert!(status.inexact(), "should indicate inexact when parsing {s}");
+        }
+    }
+    #[test]
+    fn long_tail() {
+        for k in 1..1000 {
+            let s = format!("0x1.{}p0", "0".repeat(k));
+            let Ok(bits) = parse_hex_exact(&s, 32, 23) else { panic!("parsing {s} failed") };
+            assert_eq!(f32::from_bits(bits as u32), 1.0);
+
+            let s = format!("0x1.{}1p0", "0".repeat(k));
+            let Ok((bits, status)) = parse_any(&s, 32, 23, Round::Nearest) else { unreachable!() };
+            if status.inexact() {
+                assert!(1.0 == f32::from_bits(bits as u32));
+            } else {
+                assert!(1.0 < f32::from_bits(bits as u32));
+            }
+        }
+    }
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f16_enabled)]
+    macro_rules! f16_tests {
+        () => {
+            #[test]
+            fn test_f16() {
+                let checks = [
+                    ("0x.1234p+16", (0x1234 as f16).to_bits()),
+                    ("0x1.234p+12", (0x1234 as f16).to_bits()),
+                    ("0x12.34p+8", (0x1234 as f16).to_bits()),
+                    ("0x123.4p+4", (0x1234 as f16).to_bits()),
+                    ("0x1234p+0", (0x1234 as f16).to_bits()),
+                    ("0x1234.p+0", (0x1234 as f16).to_bits()),
+                    ("0x1234.0p+0", (0x1234 as f16).to_bits()),
+                    ("0x1.ffcp+15", f16::MAX.to_bits()),
+                    ("0x1.0p+1", 2.0f16.to_bits()),
+                    ("0x1.0p+0", 1.0f16.to_bits()),
+                    ("0x1.ffp+8", 0x5ffc),
+                    ("+0x1.ffp+8", 0x5ffc),
+                    ("0x1p+0", 0x3c00),
+                    ("0x1.998p-4", 0x2e66),
+                    ("0x1.9p+6", 0x5640),
+                    ("0x0.0p0", 0.0f16.to_bits()),
+                    ("-0x0.0p0", (-0.0f16).to_bits()),
+                    ("0x1.0p0", 1.0f16.to_bits()),
+                    ("0x1.998p-4", (0.1f16).to_bits()),
+                    ("-0x1.998p-4", (-0.1f16).to_bits()),
+                    ("0x0.123p-12", 0x0123),
+                    ("0x1p-24", 0x0001),
+                    ("nan", f16::NAN.to_bits()),
+                    ("-nan", (-f16::NAN).to_bits()),
+                    ("inf", f16::INFINITY.to_bits()),
+                    ("-inf", f16::NEG_INFINITY.to_bits()),
+                ];
+                for (s, exp) in checks {
+                    println!("parsing {s}");
+                    assert!(rounding_properties(s).is_ok());
+                    let act = hf16(s).to_bits();
+                    assert_eq!(
+                        act, exp,
+                        "parsing {s}: {act:#06x} != {exp:#06x}\nact: {act:#018b}\nexp: {exp:#018b}"
+                    );
+                }
+            }
+
+            #[test]
+            fn test_macros_f16() {
+                assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
+            }
+        };
+    }
+
+    #[cfg(f16_enabled)]
+    f16_tests!();
+
+    #[test]
+    fn test_f32() {
+        let checks = [
+            ("0x.1234p+16", (0x1234 as f32).to_bits()),
+            ("0x1.234p+12", (0x1234 as f32).to_bits()),
+            ("0x12.34p+8", (0x1234 as f32).to_bits()),
+            ("0x123.4p+4", (0x1234 as f32).to_bits()),
+            ("0x1234p+0", (0x1234 as f32).to_bits()),
+            ("0x1234.p+0", (0x1234 as f32).to_bits()),
+            ("0x1234.0p+0", (0x1234 as f32).to_bits()),
+            ("0x1.fffffep+127", f32::MAX.to_bits()),
+            ("0x1.0p+1", 2.0f32.to_bits()),
+            ("0x1.0p+0", 1.0f32.to_bits()),
+            ("0x1.ffep+8", 0x43fff000),
+            ("+0x1.ffep+8", 0x43fff000),
+            ("0x1p+0", 0x3f800000),
+            ("0x1.99999ap-4", 0x3dcccccd),
+            ("0x1.9p+6", 0x42c80000),
+            ("0x1.2d5ed2p+20", 0x4996af69),
+            ("-0x1.348eb8p+10", 0xc49a475c),
+            ("-0x1.33dcfep-33", 0xaf19ee7f),
+            ("0x0.0p0", 0.0f32.to_bits()),
+            ("-0x0.0p0", (-0.0f32).to_bits()),
+            ("0x1.0p0", 1.0f32.to_bits()),
+            ("0x1.99999ap-4", (0.1f32).to_bits()),
+            ("-0x1.99999ap-4", (-0.1f32).to_bits()),
+            ("0x1.111114p-127", 0x00444445),
+            ("0x1.23456p-130", 0x00091a2b),
+            ("0x1p-149", 0x00000001),
+            ("nan", f32::NAN.to_bits()),
+            ("-nan", (-f32::NAN).to_bits()),
+            ("inf", f32::INFINITY.to_bits()),
+            ("-inf", f32::NEG_INFINITY.to_bits()),
+        ];
+        for (s, exp) in checks {
+            println!("parsing {s}");
+            let act = hf32(s).to_bits();
+            assert_eq!(
+                act, exp,
+                "parsing {s}: {act:#010x} != {exp:#010x}\nact: {act:#034b}\nexp: {exp:#034b}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_f64() {
+        let checks = [
+            ("0x.1234p+16", (0x1234 as f64).to_bits()),
+            ("0x1.234p+12", (0x1234 as f64).to_bits()),
+            ("0x12.34p+8", (0x1234 as f64).to_bits()),
+            ("0x123.4p+4", (0x1234 as f64).to_bits()),
+            ("0x1234p+0", (0x1234 as f64).to_bits()),
+            ("0x1234.p+0", (0x1234 as f64).to_bits()),
+            ("0x1234.0p+0", (0x1234 as f64).to_bits()),
+            ("0x1.ffep+8", 0x407ffe0000000000),
+            ("0x1p+0", 0x3ff0000000000000),
+            ("0x1.999999999999ap-4", 0x3fb999999999999a),
+            ("0x1.9p+6", 0x4059000000000000),
+            ("0x1.2d5ed1fe1da7bp+20", 0x4132d5ed1fe1da7b),
+            ("-0x1.348eb851eb852p+10", 0xc09348eb851eb852),
+            ("-0x1.33dcfe54a3803p-33", 0xbde33dcfe54a3803),
+            ("0x1.0p0", 1.0f64.to_bits()),
+            ("0x0.0p0", 0.0f64.to_bits()),
+            ("-0x0.0p0", (-0.0f64).to_bits()),
+            ("0x1.999999999999ap-4", 0.1f64.to_bits()),
+            ("0x1.999999999998ap-4", (0.1f64 - f64::EPSILON).to_bits()),
+            ("-0x1.999999999999ap-4", (-0.1f64).to_bits()),
+            ("-0x1.999999999998ap-4", (-0.1f64 + f64::EPSILON).to_bits()),
+            ("0x0.8000000000001p-1022", 0x0008000000000001),
+            ("0x0.123456789abcdp-1022", 0x000123456789abcd),
+            ("0x0.0000000000002p-1022", 0x0000000000000002),
+            ("nan", f64::NAN.to_bits()),
+            ("-nan", (-f64::NAN).to_bits()),
+            ("inf", f64::INFINITY.to_bits()),
+            ("-inf", f64::NEG_INFINITY.to_bits()),
+        ];
+        for (s, exp) in checks {
+            println!("parsing {s}");
+            let act = hf64(s).to_bits();
+            assert_eq!(
+                act, exp,
+                "parsing {s}: {act:#018x} != {exp:#018x}\nact: {act:#066b}\nexp: {exp:#066b}"
+            );
+        }
+    }
+
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f128_enabled)]
+    macro_rules! f128_tests {
+        () => {
+            #[test]
+            fn test_f128() {
+                let checks = [
+                    ("0x.1234p+16", (0x1234 as f128).to_bits()),
+                    ("0x1.234p+12", (0x1234 as f128).to_bits()),
+                    ("0x12.34p+8", (0x1234 as f128).to_bits()),
+                    ("0x123.4p+4", (0x1234 as f128).to_bits()),
+                    ("0x1234p+0", (0x1234 as f128).to_bits()),
+                    ("0x1234.p+0", (0x1234 as f128).to_bits()),
+                    ("0x1234.0p+0", (0x1234 as f128).to_bits()),
+                    ("0x1.ffffffffffffffffffffffffffffp+16383", f128::MAX.to_bits()),
+                    ("0x1.0p+1", 2.0f128.to_bits()),
+                    ("0x1.0p+0", 1.0f128.to_bits()),
+                    ("0x1.ffep+8", 0x4007ffe0000000000000000000000000),
+                    ("+0x1.ffep+8", 0x4007ffe0000000000000000000000000),
+                    ("0x1p+0", 0x3fff0000000000000000000000000000),
+                    ("0x1.999999999999999999999999999ap-4", 0x3ffb999999999999999999999999999a),
+                    ("0x1.9p+6", 0x40059000000000000000000000000000),
+                    ("0x0.0p0", 0.0f128.to_bits()),
+                    ("-0x0.0p0", (-0.0f128).to_bits()),
+                    ("0x1.0p0", 1.0f128.to_bits()),
+                    ("0x1.999999999999999999999999999ap-4", (0.1f128).to_bits()),
+                    ("-0x1.999999999999999999999999999ap-4", (-0.1f128).to_bits()),
+                    ("0x0.abcdef0123456789abcdef012345p-16382", 0x0000abcdef0123456789abcdef012345),
+                    ("0x1p-16494", 0x00000000000000000000000000000001),
+                    ("nan", f128::NAN.to_bits()),
+                    ("-nan", (-f128::NAN).to_bits()),
+                    ("inf", f128::INFINITY.to_bits()),
+                    ("-inf", f128::NEG_INFINITY.to_bits()),
+                ];
+                for (s, exp) in checks {
+                    println!("parsing {s}");
+                    let act = hf128(s).to_bits();
+                    assert_eq!(
+                        act, exp,
+                        "parsing {s}: {act:#034x} != {exp:#034x}\nact: {act:#0130b}\nexp: {exp:#0130b}"
+                    );
+                }
+            }
+
+            #[test]
+            fn test_macros_f128() {
+                assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
+            }
+        }
+    }
+
+    #[cfg(f128_enabled)]
+    f128_tests!();
+
+    #[test]
+    fn test_macros() {
+        #[cfg(f16_enabled)]
+        assert_eq!(hf16!("0x1.ffp+8").to_bits(), 0x5ffc_u16);
+        assert_eq!(hf32!("0x1.ffep+8").to_bits(), 0x43fff000_u32);
+        assert_eq!(hf64!("0x1.ffep+8").to_bits(), 0x407ffe0000000000_u64);
+        #[cfg(f128_enabled)]
+        assert_eq!(hf128!("0x1.ffep+8").to_bits(), 0x4007ffe0000000000000000000000000_u128);
+    }
+}
+
+#[cfg(test)]
+// FIXME(ppc): something with `should_panic` tests cause a SIGILL with ppc64le
+#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
+mod tests_panicking {
+    extern crate std;
+    use super::*;
+
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f16_enabled)]
+    macro_rules! f16_tests {
+        () => {
+            #[test]
+            fn test_f16_almost_extra_precision() {
+                // Exact maximum precision allowed
+                hf16("0x1.ffcp+0");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too precise")]
+            fn test_f16_extra_precision() {
+                // One bit more than the above.
+                hf16("0x1.ffdp+0");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f16_overflow() {
+                // One bit more than the above.
+                hf16("0x1p+16");
+            }
+
+            #[test]
+            fn test_f16_tiniest() {
+                let x = hf16("0x1.p-24");
+                let y = hf16("0x0.001p-12");
+                let z = hf16("0x0.8p-23");
+                assert_eq!(x, y);
+                assert_eq!(x, z);
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_too_tiny() {
+                hf16("0x1.p-25");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_also_too_tiny() {
+                hf16("0x0.8p-24");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f16_again_too_tiny() {
+                hf16("0x0.001p-13");
+            }
+        };
+    }
+
+    #[cfg(f16_enabled)]
+    f16_tests!();
+
+    #[test]
+    fn test_f32_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf32("0x1.abcdeep+0");
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_f32_extra_precision2() {
+        // One bit more than the above.
+        hf32("0x1.ffffffp+127");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too huge")]
+    fn test_f32_overflow() {
+        // One bit more than the above.
+        hf32("0x1p+128");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f32_extra_precision() {
+        // One bit more than the above.
+        hf32("0x1.abcdefp+0");
+    }
+
+    #[test]
+    fn test_f32_tiniest() {
+        let x = hf32("0x1.p-149");
+        let y = hf32("0x0.0000000000000001p-85");
+        let z = hf32("0x0.8p-148");
+        assert_eq!(x, y);
+        assert_eq!(x, z);
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_too_tiny() {
+        hf32("0x1.p-150");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_also_too_tiny() {
+        hf32("0x0.8p-149");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f32_again_too_tiny() {
+        hf32("0x0.0000000000000001p-86");
+    }
+
+    #[test]
+    fn test_f64_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf64("0x1.abcdabcdabcdfp+0");
+    }
+
+    #[test]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f64_extra_precision() {
+        // One bit more than the above.
+        hf64("0x1.abcdabcdabcdf8p+0");
+    }
+
+    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
+    // hide them from the AST.
+    #[cfg(f128_enabled)]
+    macro_rules! f128_tests {
+        () => {
+            #[test]
+            fn test_f128_almost_extra_precision() {
+                // Exact maximum precision allowed
+                hf128("0x1.ffffffffffffffffffffffffffffp+16383");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too precise")]
+            fn test_f128_extra_precision() {
+                // Just below the maximum finite.
+                hf128("0x1.fffffffffffffffffffffffffffe8p+16383");
+            }
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f128_extra_precision_overflow() {
+                // One bit more than the above. Should overflow.
+                hf128("0x1.ffffffffffffffffffffffffffff8p+16383");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too huge")]
+            fn test_f128_overflow() {
+                // One bit more than the above.
+                hf128("0x1p+16384");
+            }
+
+            #[test]
+            fn test_f128_tiniest() {
+                let x = hf128("0x1.p-16494");
+                let y = hf128("0x0.0000000000000001p-16430");
+                let z = hf128("0x0.8p-16493");
+                assert_eq!(x, y);
+                assert_eq!(x, z);
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_too_tiny() {
+                hf128("0x1.p-16495");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_again_too_tiny() {
+                hf128("0x0.0000000000000001p-16431");
+            }
+
+            #[test]
+            #[should_panic(expected = "the value is too tiny")]
+            fn test_f128_also_too_tiny() {
+                hf128("0x0.8p-16494");
+            }
+        };
+    }
+
+    #[cfg(f128_enabled)]
+    f128_tests!();
+}
+
+#[cfg(test)]
+mod print_tests {
+    extern crate std;
+    use std::string::ToString;
+
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_f16() {
+        use std::format;
+        // Exhaustively check that `f16` roundtrips.
+        for x in 0..=u16::MAX {
+            let f = f16::from_bits(x);
+            let s = format!("{}", Hexf(f));
+            let from_s = hf16(&s);
+
+            if f.is_nan() && from_s.is_nan() {
+                continue;
+            }
+
+            assert_eq!(
+                f.to_bits(),
+                from_s.to_bits(),
+                "{f:?} formatted as {s} but parsed as {from_s:?}"
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_f16_to_f32() {
+        use std::format;
+        // Exhaustively check that these are equivalent for all `f16`:
+        //  - `f16 -> f32`
+        //  - `f16 -> str -> f32`
+        //  - `f16 -> f32 -> str -> f32`
+        //  - `f16 -> f32 -> str -> f16 -> f32`
+        for x in 0..=u16::MAX {
+            let f16 = f16::from_bits(x);
+            let s16 = format!("{}", Hexf(f16));
+            let f32 = f16 as f32;
+            let s32 = format!("{}", Hexf(f32));
+
+            let a = hf32(&s16);
+            let b = hf32(&s32);
+            let c = hf16(&s32);
+
+            if f32.is_nan() && a.is_nan() && b.is_nan() && c.is_nan() {
+                continue;
+            }
+
+            assert_eq!(
+                f32.to_bits(),
+                a.to_bits(),
+                "{f16:?} : f16 formatted as {s16} which parsed as {a:?} : f16"
+            );
+            assert_eq!(
+                f32.to_bits(),
+                b.to_bits(),
+                "{f32:?} : f32 formatted as {s32} which parsed as {b:?} : f32"
+            );
+            assert_eq!(
+                f32.to_bits(),
+                (c as f32).to_bits(),
+                "{f32:?} : f32 formatted as {s32} which parsed as {c:?} : f16"
+            );
+        }
+    }
+    #[test]
+    fn spot_checks() {
+        assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127");
+        assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023");
+
+        assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127");
+        assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023");
+
+        assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0");
+        assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0");
+
+        assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0");
+        assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0");
+
+        assert_eq!(Hexf(f32::NAN).to_string(), "NaN");
+        assert_eq!(Hexf(f64::NAN).to_string(), "NaN");
+
+        assert_eq!(Hexf(f32::INFINITY).to_string(), "inf");
+        assert_eq!(Hexf(f64::INFINITY).to_string(), "inf");
+
+        assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf");
+        assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf");
+
+        #[cfg(f16_enabled)]
+        {
+            assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15");
+            assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15");
+            assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0");
+            assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0");
+            assert_eq!(Hexf(f16::NAN).to_string(), "NaN");
+            assert_eq!(Hexf(f16::INFINITY).to_string(), "inf");
+            assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf");
+        }
+
+        #[cfg(f128_enabled)]
+        {
+            assert_eq!(Hexf(f128::MAX).to_string(), "0x1.ffffffffffffffffffffffffffffp+16383");
+            assert_eq!(Hexf(f128::MIN).to_string(), "-0x1.ffffffffffffffffffffffffffffp+16383");
+            assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0");
+            assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0");
+            assert_eq!(Hexf(f128::NAN).to_string(), "NaN");
+            assert_eq!(Hexf(f128::INFINITY).to_string(), "inf");
+            assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf");
+        }
+    }
+}
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
new file mode 100644
index 000000000..491adb1f2
--- /dev/null
+++ b/libm/src/math/support/int_traits.rs
@@ -0,0 +1,451 @@
+use core::{cmp, fmt, ops};
+
+/// Minimal integer implementations needed on all integer types, including wide integers.
+pub trait MinInt:
+    Copy
+    + fmt::Debug
+    + ops::BitOr<Output = Self>
+    + ops::Not<Output = Self>
+    + ops::Shl<u32, Output = Self>
+{
+    /// Type with the same width but other signedness
+    type OtherSign: MinInt;
+    /// Unsigned version of Self
+    type Unsigned: MinInt;
+
+    /// If `Self` is a signed integer
+    const SIGNED: bool;
+
+    /// The bitwidth of the int type
+    const BITS: u32;
+
+    const ZERO: Self;
+    const ONE: Self;
+    const MIN: Self;
+    const MAX: Self;
+}
+
+/// Access the associated `OtherSign` type from an int (helper to avoid ambiguous associated
+/// types).
+pub type OtherSign<I> = <I as MinInt>::OtherSign;
+
+/// Trait for some basic operations on integers
+#[allow(dead_code)]
+pub trait Int:
+    MinInt
+    + fmt::Display
+    + fmt::Binary
+    + fmt::LowerHex
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::SubAssign
+    + ops::BitAndAssign
+    + ops::BitOrAssign
+    + ops::BitXorAssign
+    + ops::ShlAssign<i32>
+    + ops::ShlAssign<u32>
+    + ops::ShrAssign<u32>
+    + ops::ShrAssign<i32>
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Mul<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Shl<i32, Output = Self>
+    + ops::Shl<u32, Output = Self>
+    + ops::Shr<i32, Output = Self>
+    + ops::Shr<u32, Output = Self>
+    + ops::BitXor<Output = Self>
+    + ops::BitAnd<Output = Self>
+    + cmp::Ord
+    + From<bool>
+    + CastFrom<i32>
+    + CastFrom<u16>
+    + CastFrom<u32>
+    + CastFrom<u8>
+    + CastFrom<usize>
+    + CastInto<i32>
+    + CastInto<u16>
+    + CastInto<u32>
+    + CastInto<u8>
+    + CastInto<usize>
+{
+    fn signed(self) -> OtherSign<Self::Unsigned>;
+    fn unsigned(self) -> Self::Unsigned;
+    fn from_unsigned(unsigned: Self::Unsigned) -> Self;
+    fn abs(self) -> Self;
+
+    fn from_bool(b: bool) -> Self;
+
+    /// Prevents the need for excessive conversions between signed and unsigned
+    fn logical_shr(self, other: u32) -> Self;
+
+    /// Absolute difference between two integers.
+    fn abs_diff(self, other: Self) -> Self::Unsigned;
+
+    // copied from primitive integers, but put in a trait
+    fn is_zero(self) -> bool;
+    fn checked_add(self, other: Self) -> Option<Self>;
+    fn checked_sub(self, other: Self) -> Option<Self>;
+    fn wrapping_neg(self) -> Self;
+    fn wrapping_add(self, other: Self) -> Self;
+    fn wrapping_mul(self, other: Self) -> Self;
+    fn wrapping_sub(self, other: Self) -> Self;
+    fn wrapping_shl(self, other: u32) -> Self;
+    fn wrapping_shr(self, other: u32) -> Self;
+    fn rotate_left(self, other: u32) -> Self;
+    fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
+    fn leading_zeros(self) -> u32;
+    fn ilog2(self) -> u32;
+}
+
+macro_rules! int_impl_common {
+    ($ty:ty) => {
+        fn from_bool(b: bool) -> Self {
+            b as $ty
+        }
+
+        fn logical_shr(self, other: u32) -> Self {
+            Self::from_unsigned(self.unsigned().wrapping_shr(other))
+        }
+
+        fn is_zero(self) -> bool {
+            self == Self::ZERO
+        }
+
+        fn checked_add(self, other: Self) -> Option<Self> {
+            self.checked_add(other)
+        }
+
+        fn checked_sub(self, other: Self) -> Option<Self> {
+            self.checked_sub(other)
+        }
+
+        fn wrapping_neg(self) -> Self {
+            <Self>::wrapping_neg(self)
+        }
+
+        fn wrapping_add(self, other: Self) -> Self {
+            <Self>::wrapping_add(self, other)
+        }
+
+        fn wrapping_mul(self, other: Self) -> Self {
+            <Self>::wrapping_mul(self, other)
+        }
+
+        fn wrapping_sub(self, other: Self) -> Self {
+            <Self>::wrapping_sub(self, other)
+        }
+
+        fn wrapping_shl(self, other: u32) -> Self {
+            <Self>::wrapping_shl(self, other)
+        }
+
+        fn wrapping_shr(self, other: u32) -> Self {
+            <Self>::wrapping_shr(self, other)
+        }
+
+        fn rotate_left(self, other: u32) -> Self {
+            <Self>::rotate_left(self, other)
+        }
+
+        fn overflowing_add(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_add(self, other)
+        }
+
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
+        fn leading_zeros(self) -> u32 {
+            <Self>::leading_zeros(self)
+        }
+
+        fn ilog2(self) -> u32 {
+            // On our older MSRV, this resolves to the trait method. Which won't actually work,
+            // but this is only called behind other gates.
+            #[allow(clippy::incompatible_msrv)]
+            <Self>::ilog2(self)
+        }
+    };
+}
+
+macro_rules! int_impl {
+    ($ity:ty, $uty:ty) => {
+        impl MinInt for $uty {
+            type OtherSign = $ity;
+            type Unsigned = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $uty {
+            fn signed(self) -> $ity {
+                self as $ity
+            }
+
+            fn unsigned(self) -> Self {
+                self
+            }
+
+            fn abs(self) -> Self {
+                unimplemented!()
+            }
+
+            // It makes writing macros easier if this is implemented for both signed and unsigned
+            #[allow(clippy::wrong_self_convention)]
+            fn from_unsigned(me: $uty) -> Self {
+                me
+            }
+
+            fn abs_diff(self, other: Self) -> Self {
+                self.abs_diff(other)
+            }
+
+            int_impl_common!($uty);
+        }
+
+        impl MinInt for $ity {
+            type OtherSign = $uty;
+            type Unsigned = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $ity {
+            fn signed(self) -> Self {
+                self
+            }
+
+            fn unsigned(self) -> $uty {
+                self as $uty
+            }
+
+            fn abs(self) -> Self {
+                self.abs()
+            }
+
+            fn from_unsigned(me: $uty) -> Self {
+                me as $ity
+            }
+
+            fn abs_diff(self, other: Self) -> $uty {
+                self.abs_diff(other)
+            }
+
+            int_impl_common!($ity);
+        }
+    };
+}
+
+int_impl!(isize, usize);
+int_impl!(i8, u8);
+int_impl!(i16, u16);
+int_impl!(i32, u32);
+int_impl!(i64, u64);
+int_impl!(i128, u128);
+
+/// Trait for integers twice the bit width of another integer. This is implemented for all
+/// primitives except for `u8`, because there is not a smaller primitive.
+pub trait DInt: MinInt {
+    /// Integer that is half the bit width of the integer this trait is implemented for
+    type H: HInt<D = Self>;
+
+    /// Returns the low half of `self`
+    fn lo(self) -> Self::H;
+    /// Returns the high half of `self`
+    fn hi(self) -> Self::H;
+    /// Returns the low and high halves of `self` as a tuple
+    fn lo_hi(self) -> (Self::H, Self::H) {
+        (self.lo(), self.hi())
+    }
+    /// Constructs an integer using lower and higher half parts
+    #[allow(unused)]
+    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
+        lo.zero_widen() | hi.widen_hi()
+    }
+}
+
+/// Trait for integers half the bit width of another integer. This is implemented for all
+/// primitives except for `u128`, because it there is not a larger primitive.
+pub trait HInt: Int {
+    /// Integer that is double the bit width of the integer this trait is implemented for
+    type D: DInt<H = Self> + MinInt;
+
+    // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for
+    // unknown reasons this can cause infinite recursion when optimizations are disabled. See
+    // <https://github.com/rust-lang/compiler-builtins/pull/707> for context.
+
+    /// Widens (using default extension) the integer to have double bit width
+    fn widen(self) -> Self::D;
+    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
+    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
+    fn zero_widen(self) -> Self::D;
+    /// Widens the integer to have double bit width and shifts the integer into the higher bits
+    #[allow(unused)]
+    fn widen_hi(self) -> Self::D;
+    /// Widening multiplication with zero widening. This cannot overflow.
+    fn zero_widen_mul(self, rhs: Self) -> Self::D;
+    /// Widening multiplication. This cannot overflow.
+    fn widen_mul(self, rhs: Self) -> Self::D;
+}
+
+macro_rules! impl_d_int {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DInt for $D {
+                type H = $X;
+
+                fn lo(self) -> Self::H {
+                    self as $X
+                }
+                fn hi(self) -> Self::H {
+                    (self >> <$X as MinInt>::BITS) as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_int {
+    ($($H:ident $uH:ident $X:ident),*) => {
+        $(
+            impl HInt for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+                fn zero_widen(self) -> Self::D {
+                    (self as $uH) as $X
+                }
+                fn zero_widen_mul(self, rhs: Self) -> Self::D {
+                    self.zero_widen().wrapping_mul(rhs.zero_widen())
+                }
+                fn widen_mul(self, rhs: Self) -> Self::D {
+                    self.widen().wrapping_mul(rhs.widen())
+                }
+                fn widen_hi(self) -> Self::D {
+                    (self as $X) << <Self as MinInt>::BITS
+                }
+            }
+        )*
+    };
+}
+
+impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
+impl_h_int!(
+    u8 u8 u16,
+    u16 u16 u32,
+    u32 u32 u64,
+    u64 u64 u128,
+    i8 u8 i16,
+    i16 u16 i32,
+    i32 u32 i64,
+    i64 u64 i128
+);
+
+/// Trait to express (possibly lossy) casting of integers
+pub trait CastInto<T: Copy>: Copy {
+    /// By default, casts should be exact.
+    fn cast(self) -> T;
+
+    /// Call for casts that are expected to truncate.
+    fn cast_lossy(self) -> T;
+}
+
+pub trait CastFrom<T: Copy>: Copy {
+    /// By default, casts should be exact.
+    fn cast_from(value: T) -> Self;
+
+    /// Call for casts that are expected to truncate.
+    fn cast_from_lossy(value: T) -> Self;
+}
+
+impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
+    fn cast_from(value: U) -> Self {
+        value.cast()
+    }
+
+    fn cast_from_lossy(value: U) -> Self {
+        value.cast_lossy()
+    }
+}
+
+macro_rules! cast_into {
+    ($ty:ty) => {
+        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                // All we can really do to enforce casting rules is check the rules when in
+                // debug mode.
+                #[cfg(not(feature = "compiler-builtins"))]
+                debug_assert!(<$into>::try_from(self).is_ok(), "failed cast from {self}");
+                self as $into
+            }
+
+            fn cast_lossy(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+macro_rules! cast_into_float {
+    ($ty:ty) => {
+        #[cfg(f16_enabled)]
+        cast_into_float!($ty; f16);
+
+        cast_into_float!($ty; f32, f64);
+
+        #[cfg(f128_enabled)]
+        cast_into_float!($ty; f128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                #[cfg(not(feature = "compiler-builtins"))]
+                debug_assert_eq!(self as $into as $ty, self, "inexact float cast");
+                self as $into
+            }
+
+            fn cast_lossy(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+cast_into!(usize);
+cast_into!(isize);
+cast_into!(u8);
+cast_into!(i8);
+cast_into!(u16);
+cast_into!(i16);
+cast_into!(u32);
+cast_into!(i32);
+cast_into!(u64);
+cast_into!(i64);
+cast_into!(u128);
+cast_into!(i128);
+
+cast_into_float!(i8);
+cast_into_float!(i16);
+cast_into_float!(i32);
+cast_into_float!(i64);
+cast_into_float!(i128);
diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs
new file mode 100644
index 000000000..0b72db0e4
--- /dev/null
+++ b/libm/src/math/support/macros.rs
@@ -0,0 +1,157 @@
+/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate
+/// (with some comments stripped for compactness).
+macro_rules! cfg_if {
+    // match if/else chains with a final `else`
+    ($(
+        if #[cfg($meta:meta)] { $($tokens:tt)* }
+    ) else * else {
+        $($tokens2:tt)*
+    }) => {
+        cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), }
+    };
+
+    // match if/else chains lacking a final `else`
+    (
+        if #[cfg($i_met:meta)] { $($i_tokens:tt)* }
+        $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )*
+    ) => {
+        cfg_if! {
+            @__items
+            () ;
+            ( ($i_met) ($($i_tokens)*) ),
+            $( ( ($e_met) ($($e_tokens)*) ), )*
+            ( () () ),
+        }
+    };
+
+    // Internal and recursive macro to emit all the items
+    //
+    // Collects all the negated cfgs in a list at the beginning and after the
+    // semicolon is all the remaining items
+    (@__items ($($not:meta,)*) ; ) => {};
+    (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => {
+        #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* }
+        cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* }
+    };
+
+    // Internal macro to make __apply work out right for different match types,
+    // because of how macros matching/expand stuff.
+    (@__identity $($tokens:tt)*) => { $($tokens)* };
+}
+
+/// Choose between using an arch-specific implementation and the function body. Returns directly
+/// if the arch implementation is used, otherwise continue with the rest of the function.
+///
+/// Specify a `use_arch` meta field if an architecture-specific implementation is provided.
+/// These live in the `math::arch::some_target_arch` module.
+///
+/// Specify a `use_arch_required` meta field if something architecture-specific must be used
+/// regardless of feature configuration (`force-soft-floats`).
+///
+/// The passed meta options do not need to account for the `arch` target feature.
+macro_rules! select_implementation {
+    (
+        name: $fn_name:ident,
+        // Configuration meta for when to use arch-specific implementation that requires hard
+        // float ops
+        $( use_arch: $use_arch:meta, )?
+        // Configuration meta for when to use the arch module regardless of whether softfloats
+        // have been requested.
+        $( use_arch_required: $use_arch_required:meta, )?
+        args: $($arg:ident),+ ,
+    ) => {
+        // FIXME: these use paths that are a pretty fragile (`super`). We should figure out
+        // something better w.r.t. how this is vendored into compiler-builtins.
+
+        // However, we do need a few things from `arch` that are used even with soft floats.
+        select_implementation! {
+            @cfg $($use_arch_required)?;
+            if true {
+                return  super::arch::$fn_name( $($arg),+ );
+            }
+        }
+
+        // By default, never use arch-specific implementations if we have force-soft-floats
+        #[cfg(arch_enabled)]
+        select_implementation! {
+            @cfg $($use_arch)?;
+            // Wrap in `if true` to avoid unused warnings
+            if true {
+                return  super::arch::$fn_name( $($arg),+ );
+            }
+        }
+    };
+
+    // Coalesce helper to construct an expression only if a config is provided
+    (@cfg ; $ex:expr) => { };
+    (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
+}
+
+/// Construct a 16-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[cfg(f16_enabled)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
+#[allow(unused_macros)]
+macro_rules! hf16 {
+    ($s:literal) => {{
+        const X: f16 = $crate::support::hf16($s);
+        X
+    }};
+}
+
+/// Construct a 32-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
+macro_rules! hf32 {
+    ($s:literal) => {{
+        const X: f32 = $crate::support::hf32($s);
+        X
+    }};
+}
+
+/// Construct a 64-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
+macro_rules! hf64 {
+    ($s:literal) => {{
+        const X: f64 = $crate::support::hf64($s);
+        X
+    }};
+}
+
+/// Construct a 128-bit float from hex float representation (C-style), guaranteed to
+/// evaluate at compile time.
+#[cfg(f128_enabled)]
+#[allow(unused_macros)]
+#[cfg_attr(feature = "unstable-public-internals", macro_export)]
+macro_rules! hf128 {
+    ($s:literal) => {{
+        const X: f128 = $crate::support::hf128($s);
+        X
+    }};
+}
+
+/// Assert `F::biteq` with better messages.
+#[cfg(test)]
+macro_rules! assert_biteq {
+    ($left:expr, $right:expr, $($tt:tt)*) => {{
+        use $crate::support::Int;
+        let l = $left;
+        let r = $right;
+        let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value
+        assert!(
+            l.biteq(r),
+            "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
+            format_args!($($tt)*),
+            lb = l.to_bits(),
+            rb = r.to_bits(),
+            width = ((bits / 4) + 2) as usize,
+
+        );
+    }};
+    ($left:expr, $right:expr $(,)?) => {
+        assert_biteq!($left, $right, "")
+    };
+}
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
new file mode 100644
index 000000000..ee3f2bbdf
--- /dev/null
+++ b/libm/src/math/support/mod.rs
@@ -0,0 +1,29 @@
+#[macro_use]
+pub mod macros;
+mod big;
+mod env;
+mod float_traits;
+pub mod hex_float;
+mod int_traits;
+
+#[allow(unused_imports)]
+pub use big::{i256, u256};
+pub use env::{FpResult, Round, Status};
+#[allow(unused_imports)]
+pub use float_traits::{DFloat, Float, HFloat, IntTy};
+pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
+#[cfg(f16_enabled)]
+#[allow(unused_imports)]
+pub use hex_float::hf16;
+#[cfg(f128_enabled)]
+#[allow(unused_imports)]
+pub use hex_float::hf128;
+#[allow(unused_imports)]
+pub use hex_float::{Hexf, hf32, hf64};
+pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+
+/// Hint to the compiler that the current path is cold.
+pub fn cold_path() {
+    #[cfg(intrinsics_enabled)]
+    core::intrinsics::cold_path();
+}
diff --git a/src/math/tan.rs b/libm/src/math/tan.rs
similarity index 100%
rename from src/math/tan.rs
rename to libm/src/math/tan.rs
diff --git a/src/math/tanf.rs b/libm/src/math/tanf.rs
similarity index 100%
rename from src/math/tanf.rs
rename to libm/src/math/tanf.rs
diff --git a/src/math/tanh.rs b/libm/src/math/tanh.rs
similarity index 100%
rename from src/math/tanh.rs
rename to libm/src/math/tanh.rs
diff --git a/src/math/tanhf.rs b/libm/src/math/tanhf.rs
similarity index 100%
rename from src/math/tanhf.rs
rename to libm/src/math/tanhf.rs
diff --git a/src/math/tgamma.rs b/libm/src/math/tgamma.rs
similarity index 97%
rename from src/math/tgamma.rs
rename to libm/src/math/tgamma.rs
index 3f38c0b1d..305986064 100644
--- a/src/math/tgamma.rs
+++ b/libm/src/math/tgamma.rs
@@ -45,7 +45,8 @@ fn sinpi(mut x: f64) -> f64 {
         1 => k_cos(x, 0.0),
         2 => k_sin(-x, 0.0, 0),
         3 => -k_cos(x, 0.0),
-        0 | _ => k_sin(x, 0.0, 0),
+        // 0
+        _ => k_sin(x, 0.0, 0),
     }
 }
 
@@ -129,6 +130,7 @@ fn s(x: f64) -> f64 {
     return num / den;
 }
 
+/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn tgamma(mut x: f64) -> f64 {
     let u: u64 = x.to_bits();
@@ -143,7 +145,7 @@ pub fn tgamma(mut x: f64) -> f64 {
     /* special cases */
     if ix >= 0x7ff00000 {
         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
-        return x + core::f64::INFINITY;
+        return x + f64::INFINITY;
     }
     if ix < ((0x3ff - 54) << 20) {
         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
diff --git a/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs
similarity index 64%
rename from src/math/tgammaf.rs
rename to libm/src/math/tgammaf.rs
index 23e3814f9..fe178f7a3 100644
--- a/src/math/tgammaf.rs
+++ b/libm/src/math/tgammaf.rs
@@ -1,5 +1,6 @@
 use super::tgamma;
 
+/// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn tgammaf(x: f32) -> f32 {
     tgamma(x as f64) as f32
diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs
new file mode 100644
index 000000000..fa50d55e1
--- /dev/null
+++ b/libm/src/math/trunc.rs
@@ -0,0 +1,53 @@
+/// Rounds the number toward 0 to the closest integral value (f16).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf16(x: f16) -> f16 {
+    super::generic::trunc(x)
+}
+
+/// Rounds the number toward 0 to the closest integral value (f32).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf(x: f32) -> f32 {
+    select_implementation! {
+        name: truncf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::trunc(x)
+}
+
+/// Rounds the number toward 0 to the closest integral value (f64).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn trunc(x: f64) -> f64 {
+    select_implementation! {
+        name: trunc,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::trunc(x)
+}
+
+/// Rounds the number toward 0 to the closest integral value (f128).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf128(x: f128) -> f128 {
+    super::generic::trunc(x)
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn sanity_check() {
+        assert_eq!(super::truncf(1.1), 1.0);
+    }
+}
diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs
new file mode 100644
index 000000000..14533a267
--- /dev/null
+++ b/libm/src/math/truncf.rs
@@ -0,0 +1,23 @@
+/// Rounds the number toward 0 to the closest integral value (f32).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf(x: f32) -> f32 {
+    select_implementation! {
+        name: truncf,
+        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
+        args: x,
+    }
+
+    super::generic::trunc(x)
+}
+
+// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
+#[cfg(not(target_arch = "powerpc64"))]
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn sanity_check() {
+        assert_eq!(super::truncf(1.1), 1.0);
+    }
+}
diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs
new file mode 100644
index 000000000..9dccc0d0e
--- /dev/null
+++ b/libm/src/math/truncf128.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f128).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf128(x: f128) -> f128 {
+    super::generic::trunc(x)
+}
diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs
new file mode 100644
index 000000000..d7c3d225c
--- /dev/null
+++ b/libm/src/math/truncf16.rs
@@ -0,0 +1,7 @@
+/// Rounds the number toward 0 to the closest integral value (f16).
+///
+/// This effectively removes the decimal part of the number, leaving the integral part.
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn truncf16(x: f16) -> f16 {
+    super::generic::trunc(x)
+}
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index 6d95fa173..000000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-//! libm in pure Rust
-#![no_std]
-#![cfg_attr(feature = "unstable", allow(internal_features))]
-#![cfg_attr(feature = "unstable", feature(core_intrinsics))]
-#![allow(clippy::assign_op_pattern)]
-#![allow(clippy::deprecated_cfg_attr)]
-#![allow(clippy::eq_op)]
-#![allow(clippy::float_cmp)]
-#![allow(clippy::int_plus_one)]
-#![allow(clippy::many_single_char_names)]
-#![allow(clippy::mixed_case_hex_literals)]
-#![allow(clippy::needless_return)]
-#![allow(clippy::unreadable_literal)]
-
-mod libm_helper;
-mod math;
-
-use core::{f32, f64};
-
-pub use libm_helper::*;
-
-pub use self::math::*;
-
-/// Approximate equality with 1 ULP of tolerance
-#[doc(hidden)]
-#[inline]
-pub fn _eqf(a: f32, b: f32) -> Result<(), u32> {
-    if a.is_nan() && b.is_nan() {
-        Ok(())
-    } else {
-        let err = (a.to_bits() as i32).wrapping_sub(b.to_bits() as i32).abs();
-
-        if err <= 1 { Ok(()) } else { Err(err as u32) }
-    }
-}
-
-#[doc(hidden)]
-#[inline]
-pub fn _eq(a: f64, b: f64) -> Result<(), u64> {
-    if a.is_nan() && b.is_nan() {
-        Ok(())
-    } else {
-        let err = (a.to_bits() as i64).wrapping_sub(b.to_bits() as i64).abs();
-
-        if err <= 1 { Ok(()) } else { Err(err as u64) }
-    }
-}
diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs
deleted file mode 100644
index b4e77eaa2..000000000
--- a/src/math/cbrt.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- *
- * Optimized by Bruce D. Evans.
- */
-/* cbrt(x)
- * Return cube root of x
- */
-
-use core::f64;
-
-const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */
-const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
-
-/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
-const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */
-const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */
-const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */
-const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */
-const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
-
-// Cube root (f64)
-///
-/// Computes the cube root of the argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn cbrt(x: f64) -> f64 {
-    let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
-
-    let mut ui: u64 = x.to_bits();
-    let mut r: f64;
-    let s: f64;
-    let mut t: f64;
-    let w: f64;
-    let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff;
-
-    if hx >= 0x7ff00000 {
-        /* cbrt(NaN,INF) is itself */
-        return x + x;
-    }
-
-    /*
-     * Rough cbrt to 5 bits:
-     *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
-     * where e is integral and >= 0, m is real and in [0, 1), and "/" and
-     * "%" are integer division and modulus with rounding towards minus
-     * infinity.  The RHS is always >= the LHS and has a maximum relative
-     * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
-     * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
-     * floating point representation, for finite positive normal values,
-     * ordinary integer divison of the value in bits magically gives
-     * almost exactly the RHS of the above provided we first subtract the
-     * exponent bias (1023 for doubles) and later add it back.  We do the
-     * subtraction virtually to keep e >= 0 so that ordinary integer
-     * division rounds towards minus infinity; this is also efficient.
-     */
-    if hx < 0x00100000 {
-        /* zero or subnormal? */
-        ui = (x * x1p54).to_bits();
-        hx = (ui >> 32) as u32 & 0x7fffffff;
-        if hx == 0 {
-            return x; /* cbrt(0) is itself */
-        }
-        hx = hx / 3 + B2;
-    } else {
-        hx = hx / 3 + B1;
-    }
-    ui &= 1 << 63;
-    ui |= (hx as u64) << 32;
-    t = f64::from_bits(ui);
-
-    /*
-     * New cbrt to 23 bits:
-     *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
-     * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
-     * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
-     * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
-     * gives us bounds for r = t**3/x.
-     *
-     * Try to optimize for parallel evaluation as in __tanf.c.
-     */
-    r = (t * t) * (t / x);
-    t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
-
-    /*
-     * Round t away from zero to 23 bits (sloppily except for ensuring that
-     * the result is larger in magnitude than cbrt(x) but not much more than
-     * 2 23-bit ulps larger).  With rounding towards zero, the error bound
-     * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
-     * in the rounded t, the infinite-precision error in the Newton
-     * approximation barely affects third digit in the final error
-     * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
-     * before the final error is larger than 0.667 ulps.
-     */
-    ui = t.to_bits();
-    ui = (ui + 0x80000000) & 0xffffffffc0000000;
-    t = f64::from_bits(ui);
-
-    /* one step Newton iteration to 53 bits with error < 0.667 ulps */
-    s = t * t; /* t*t is exact */
-    r = x / s; /* error <= 0.5 ulps; |r| < |t| */
-    w = t + t; /* t+t is exact */
-    r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
-    t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
-    t
-}
diff --git a/src/math/ceil.rs b/src/math/ceil.rs
deleted file mode 100644
index 1593fdaff..000000000
--- a/src/math/ceil.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
-/// Ceil (f64)
-///
-/// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ceil(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf64(x) }
-        }
-    }
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    {
-        //use an alternative implementation on x86, because the
-        //main implementation fails with the x87 FPU used by
-        //debian i386, probably due to excess precision issues.
-        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
-        use super::fabs;
-        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-            let truncated = x as i64 as f64;
-            if truncated < x {
-                return truncated + 1.0;
-            } else {
-                return truncated;
-            }
-        } else {
-            return x;
-        }
-    }
-    let u: u64 = x.to_bits();
-    let e: i64 = (u >> 52 & 0x7ff) as i64;
-    let y: f64;
-
-    if e >= 0x3ff + 52 || x == 0. {
-        return x;
-    }
-    // y = int(x) - x, where int(x) is an integer neighbor of x
-    y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    // special case because of non-nearest rounding modes
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (u >> 63) != 0 { -0. } else { 1. };
-    }
-    if y < 0. { x + y + 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use core::f64::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceil(1.1), 2.0);
-        assert_eq!(ceil(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceil(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(ceil(f), f);
-        }
-    }
-}
diff --git a/src/math/ceilf.rs b/src/math/ceilf.rs
deleted file mode 100644
index bf9ba1227..000000000
--- a/src/math/ceilf.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-use core::f32;
-
-/// Ceil (f32)
-///
-/// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ceilf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf32(x) }
-        }
-    }
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            return -0.0;
-        } else if ui << 1 != 0 {
-            return 1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use core::f32::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(ceilf(1.1), 2.0);
-        assert_eq!(ceilf(2.9), 3.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(ceilf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(ceilf(f), f);
-        }
-    }
-}
diff --git a/src/math/copysign.rs b/src/math/copysign.rs
deleted file mode 100644
index 1f4a35a33..000000000
--- a/src/math/copysign.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Sign of Y, magnitude of X (f64)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysign(x: f64, y: f64) -> f64 {
-    let mut ux = x.to_bits();
-    let uy = y.to_bits();
-    ux &= (!0) >> 1;
-    ux |= uy & (1 << 63);
-    f64::from_bits(ux)
-}
diff --git a/src/math/fabs.rs b/src/math/fabs.rs
deleted file mode 100644
index 3b0628aa6..000000000
--- a/src/math/fabs.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-use core::u64;
-
-/// Absolute value (magnitude) (f64)
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabs(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf64(x) }
-        }
-    }
-    f64::from_bits(x.to_bits() & (u64::MAX / 2))
-}
-
-#[cfg(test)]
-mod tests {
-    use core::f64::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabs(-1.0), 1.0);
-        assert_eq!(fabs(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabs(NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabs(f), 0.0);
-        }
-        for f in [INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(fabs(f), INFINITY);
-        }
-    }
-}
diff --git a/src/math/fdim.rs b/src/math/fdim.rs
deleted file mode 100644
index 014930097..000000000
--- a/src/math/fdim.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-use core::f64;
-
-/// Positive difference (f64)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y	if x > y, or
-/// * +0	if x <= y, or
-/// * NAN	if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdim(x: f64, y: f64) -> f64 {
-    if x.is_nan() {
-        x
-    } else if y.is_nan() {
-        y
-    } else if x > y {
-        x - y
-    } else {
-        0.0
-    }
-}
diff --git a/src/math/fenv.rs b/src/math/fenv.rs
deleted file mode 100644
index c91272e82..000000000
--- a/src/math/fenv.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// src: musl/src/fenv/fenv.c
-/* Dummy functions for archs lacking fenv implementation */
-
-pub(crate) const FE_UNDERFLOW: i32 = 0;
-pub(crate) const FE_INEXACT: i32 = 0;
-
-pub(crate) const FE_TONEAREST: i32 = 0;
-
-#[inline]
-pub(crate) fn feclearexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn feraiseexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn fetestexcept(_mask: i32) -> i32 {
-    0
-}
-
-#[inline]
-pub(crate) fn fegetround() -> i32 {
-    FE_TONEAREST
-}
diff --git a/src/math/floor.rs b/src/math/floor.rs
deleted file mode 100644
index e8fb21e58..000000000
--- a/src/math/floor.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-#![allow(unreachable_code)]
-use core::f64;
-
-const TOINT: f64 = 1. / f64::EPSILON;
-
-/// Floor (f64)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floor(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf64(x) }
-        }
-    }
-    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-    {
-        //use an alternative implementation on x86, because the
-        //main implementation fails with the x87 FPU used by
-        //debian i386, probably due to excess precision issues.
-        //basic implementation taken from https://github.com/rust-lang/libm/issues/219
-        use super::fabs;
-        if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-            let truncated = x as i64 as f64;
-            if truncated > x {
-                return truncated - 1.0;
-            } else {
-                return truncated;
-            }
-        } else {
-            return x;
-        }
-    }
-    let ui = x.to_bits();
-    let e = ((ui >> 52) & 0x7ff) as i32;
-
-    if (e >= 0x3ff + 52) || (x == 0.) {
-        return x;
-    }
-    /* y = int(x) - x, where int(x) is an integer neighbor of x */
-    let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
-    /* special case because of non-nearest rounding modes */
-    if e < 0x3ff {
-        force_eval!(y);
-        return if (ui >> 63) != 0 { -1. } else { 0. };
-    }
-    if y > 0. { x + y - 1. } else { x + y }
-}
-
-#[cfg(test)]
-mod tests {
-    use core::f64::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floor(1.1), 1.0);
-        assert_eq!(floor(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floor(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(floor(f), f);
-        }
-    }
-}
diff --git a/src/math/floorf.rs b/src/math/floorf.rs
deleted file mode 100644
index f66cab74f..000000000
--- a/src/math/floorf.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-use core::f32;
-
-/// Floor (f32)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf32(x) }
-        }
-    }
-    let mut ui = x.to_bits();
-    let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
-
-    if e >= 23 {
-        return x;
-    }
-    if e >= 0 {
-        let m: u32 = 0x007fffff >> e;
-        if (ui & m) == 0 {
-            return x;
-        }
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 != 0 {
-            ui += m;
-        }
-        ui &= !m;
-    } else {
-        force_eval!(x + f32::from_bits(0x7b800000));
-        if ui >> 31 == 0 {
-            ui = 0;
-        } else if ui << 1 != 0 {
-            return -1.0;
-        }
-    }
-    f32::from_bits(ui)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use core::f32::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(floorf(0.5), 0.0);
-        assert_eq!(floorf(1.1), 1.0);
-        assert_eq!(floorf(2.9), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: that the current rounding mode has no effect.
-        assert!(floorf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() {
-            assert_eq!(floorf(f), f);
-        }
-    }
-}
diff --git a/src/math/fma.rs b/src/math/fma.rs
deleted file mode 100644
index bb2028fa7..000000000
--- a/src/math/fma.rs
+++ /dev/null
@@ -1,226 +0,0 @@
-use core::{f32, f64};
-
-use super::scalbn;
-
-const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
-
-struct Num {
-    m: u64,
-    e: i32,
-    sign: i32,
-}
-
-fn normalize(x: f64) -> Num {
-    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
-
-    let mut ix: u64 = x.to_bits();
-    let mut e: i32 = (ix >> 52) as i32;
-    let sign: i32 = e & 0x800;
-    e &= 0x7ff;
-    if e == 0 {
-        ix = (x * x1p63).to_bits();
-        e = (ix >> 52) as i32 & 0x7ff;
-        e = if e != 0 { e - 63 } else { 0x800 };
-    }
-    ix &= (1 << 52) - 1;
-    ix |= 1 << 52;
-    ix <<= 1;
-    e -= 0x3ff + 52 + 1;
-    Num { m: ix, e, sign }
-}
-
-#[inline]
-fn mul(x: u64, y: u64) -> (u64, u64) {
-    let t = (x as u128).wrapping_mul(y as u128);
-    ((t >> 64) as u64, t as u64)
-}
-
-/// Floating multiply add (f64)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation:
-/// Computes the value (as if) to infinite precision and rounds once to the result format,
-/// according to the rounding mode characterized by the value of FLT_ROUNDS.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fma(x: f64, y: f64, z: f64) -> f64 {
-    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
-    let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
-
-    /* normalize so top 10bits and last bit are 0 */
-    let nx = normalize(x);
-    let ny = normalize(y);
-    let nz = normalize(z);
-
-    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
-        return x * y + z;
-    }
-    if nz.e >= ZEROINFNAN {
-        if nz.e > ZEROINFNAN {
-            /* z==0 */
-            return x * y + z;
-        }
-        return z;
-    }
-
-    /* mul: r = x*y */
-    let zhi: u64;
-    let zlo: u64;
-    let (mut rhi, mut rlo) = mul(nx.m, ny.m);
-    /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
-
-    /* align exponents */
-    let mut e: i32 = nx.e + ny.e;
-    let mut d: i32 = nz.e - e;
-    /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
-    if d > 0 {
-        if d < 64 {
-            zlo = nz.m << d;
-            zhi = nz.m >> (64 - d);
-        } else {
-            zlo = 0;
-            zhi = nz.m;
-            e = nz.e - 64;
-            d -= 64;
-            if d == 0 {
-            } else if d < 64 {
-                rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64;
-                rhi = rhi >> d;
-            } else {
-                rlo = 1;
-                rhi = 0;
-            }
-        }
-    } else {
-        zhi = 0;
-        d = -d;
-        if d == 0 {
-            zlo = nz.m;
-        } else if d < 64 {
-            zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64;
-        } else {
-            zlo = 1;
-        }
-    }
-
-    /* add */
-    let mut sign: i32 = nx.sign ^ ny.sign;
-    let samesign: bool = (sign ^ nz.sign) == 0;
-    let mut nonzero: i32 = 1;
-    if samesign {
-        /* r += z */
-        rlo = rlo.wrapping_add(zlo);
-        rhi += zhi + (rlo < zlo) as u64;
-    } else {
-        /* r -= z */
-        let (res, borrow) = rlo.overflowing_sub(zlo);
-        rlo = res;
-        rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64));
-        if (rhi >> 63) != 0 {
-            rlo = (rlo as i64).wrapping_neg() as u64;
-            rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64;
-            sign = (sign == 0) as i32;
-        }
-        nonzero = (rhi != 0) as i32;
-    }
-
-    /* set rhi to top 63bit of the result (last bit is sticky) */
-    if nonzero != 0 {
-        e += 64;
-        d = rhi.leading_zeros() as i32 - 1;
-        /* note: d > 0 */
-        rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64;
-    } else if rlo != 0 {
-        d = rlo.leading_zeros() as i32 - 1;
-        if d < 0 {
-            rhi = rlo >> 1 | (rlo & 1);
-        } else {
-            rhi = rlo << d;
-        }
-    } else {
-        /* exact +-0 */
-        return x * y + z;
-    }
-    e -= d;
-
-    /* convert to double */
-    let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
-    if sign != 0 {
-        i = -i;
-    }
-    let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
-
-    if e < -1022 - 62 {
-        /* result is subnormal before rounding */
-        if e == -1022 - 63 {
-            let mut c: f64 = x1p63;
-            if sign != 0 {
-                c = -c;
-            }
-            if r == c {
-                /* min normal after rounding, underflow depends
-                on arch behaviour which can be imitated by
-                a double to float conversion */
-                let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
-                return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
-            }
-            /* one bit is lost when scaled, add another top bit to
-            only round once at conversion if it is inexact */
-            if (rhi << 53) != 0 {
-                i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
-                if sign != 0 {
-                    i = -i;
-                }
-                r = i as f64;
-                r = 2. * r - c; /* remove top bit */
-
-                /* raise underflow portably, such that it
-                cannot be optimized away */
-                {
-                    let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
-                    r += (tiny * tiny) * (r - r);
-                }
-            }
-        } else {
-            /* only round once when scaled */
-            d = 10;
-            i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
-            if sign != 0 {
-                i = -i;
-            }
-            r = i as f64;
-        }
-    }
-    scalbn(r, e)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn fma_segfault() {
-        // These two inputs cause fma to segfault on release due to overflow:
-        assert_eq!(
-            fma(
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313
-            ),
-            -0.00000000000000022204460492503126,
-        );
-
-        let result = fma(-0.992, -0.992, -0.992);
-        //force rounding to storage format on x87 to prevent superious errors.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-        let result = force_eval!(result);
-        assert_eq!(result, -0.007936000000000007,);
-    }
-
-    #[test]
-    fn fma_sbb() {
-        assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
-    }
-
-    #[test]
-    fn fma_underflow() {
-        assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
-    }
-}
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
deleted file mode 100644
index 10bdaeab3..000000000
--- a/src/math/fmaf.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
-/*-
- * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-use core::f32;
-use core::ptr::read_volatile;
-
-use super::fenv::{
-    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
-};
-
-/*
- * Fused multiply-add: Compute x * y + z with a single rounding error.
- *
- * A double has more than twice as much precision than a float, so
- * direct double-precision arithmetic suffices, except where double
- * rounding occurs.
- */
-
-/// Floating multiply add (f32)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation:
-/// Computes the value (as if) to infinite precision and rounds once to the result format,
-/// according to the rounding mode characterized by the value of FLT_ROUNDS.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
-    let xy: f64;
-    let mut result: f64;
-    let mut ui: u64;
-    let e: i32;
-
-    xy = x as f64 * y as f64;
-    result = xy + z as f64;
-    ui = result.to_bits();
-    e = (ui >> 52) as i32 & 0x7ff;
-    /* Common case: The double precision result is fine. */
-    if (
-        /* not a halfway case */
-        ui & 0x1fffffff) != 0x10000000 ||
-        /* NaN */
-        e == 0x7ff ||
-        /* exact */
-        (result - xy == z as f64 && result - z as f64 == xy) ||
-        /* not round-to-nearest */
-        fegetround() != FE_TONEAREST
-    {
-        /*
-            underflow may not be raised correctly, example:
-            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
-        */
-        if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 {
-            feclearexcept(FE_INEXACT);
-            // prevent `xy + vz` from being CSE'd with `xy + z` above
-            let vz: f32 = unsafe { read_volatile(&z) };
-            result = xy + vz as f64;
-            if fetestexcept(FE_INEXACT) != 0 {
-                feraiseexcept(FE_UNDERFLOW);
-            } else {
-                feraiseexcept(FE_INEXACT);
-            }
-        }
-        z = result as f32;
-        return z;
-    }
-
-    /*
-     * If result is inexact, and exactly halfway between two float values,
-     * we need to adjust the low-order bit in the direction of the error.
-     */
-    let neg = ui >> 63 != 0;
-    let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy };
-    if neg == (err < 0.0) {
-        ui += 1;
-    } else {
-        ui -= 1;
-    }
-    f64::from_bits(ui) as f32
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn issue_263() {
-        let a = f32::from_bits(1266679807);
-        let b = f32::from_bits(1300234242);
-        let c = f32::from_bits(1115553792);
-        let expected = f32::from_bits(1501560833);
-        assert_eq!(super::fmaf(a, b, c), expected);
-    }
-}
diff --git a/src/math/fmax.rs b/src/math/fmax.rs
deleted file mode 100644
index 93c97bc61..000000000
--- a/src/math/fmax.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmax(x: f64, y: f64) -> f64 {
-    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if x.is_nan() || x < y { y } else { x }) * 1.0
-}
diff --git a/src/math/fmaxf.rs b/src/math/fmaxf.rs
deleted file mode 100644
index 607746647..000000000
--- a/src/math/fmaxf.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaxf(x: f32, y: f32) -> f32 {
-    // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if x.is_nan() || x < y { y } else { x }) * 1.0
-}
diff --git a/src/math/fmin.rs b/src/math/fmin.rs
deleted file mode 100644
index ab1509f34..000000000
--- a/src/math/fmin.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmin(x: f64, y: f64) -> f64 {
-    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if y.is_nan() || x < y { x } else { y }) * 1.0
-}
diff --git a/src/math/fminf.rs b/src/math/fminf.rs
deleted file mode 100644
index 0049e7117..000000000
--- a/src/math/fminf.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fminf(x: f32, y: f32) -> f32 {
-    // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the
-    // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it
-    // is either x or y, canonicalized (this means results might differ among implementations).
-    // When either x or y is a signalingNaN, then the result is according to 6.2.
-    //
-    // Since we do not support sNaN in Rust yet, we do not need to handle them.
-    // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by
-    // multiplying by 1.0. Should switch to the `canonicalize` when it works.
-    (if y.is_nan() || x < y { x } else { y }) * 1.0
-}
diff --git a/src/math/fmod.rs b/src/math/fmod.rs
deleted file mode 100644
index d892ffd8b..000000000
--- a/src/math/fmod.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-use core::u64;
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmod(x: f64, y: f64) -> f64 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = (uxi >> 52 & 0x7ff) as i64;
-    let mut ey = (uyi >> 52 & 0x7ff) as i64;
-    let sx = uxi >> 63;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff {
-        return (x * y) / (x * y);
-    }
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 12;
-        while i >> 63 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u64::MAX >> 12;
-        uxi |= 1 << 52;
-    }
-    if ey == 0 {
-        i = uyi << 12;
-        while i >> 63 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u64::MAX >> 12;
-        uyi |= 1 << 52;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 63 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-        ex -= 1;
-    }
-    i = uxi.wrapping_sub(uyi);
-    if i >> 63 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-    while uxi >> 52 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result */
-    if ex > 0 {
-        uxi -= 1 << 52;
-        uxi |= (ex as u64) << 52;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= (sx as u64) << 63;
-
-    f64::from_bits(uxi)
-}
diff --git a/src/math/fmodf.rs b/src/math/fmodf.rs
deleted file mode 100644
index 1d8001384..000000000
--- a/src/math/fmodf.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-use core::{f32, u32};
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf(x: f32, y: f32) -> f32 {
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let mut ex = (uxi >> 23 & 0xff) as i32;
-    let mut ey = (uyi >> 23 & 0xff) as i32;
-    let sx = uxi & 0x80000000;
-    let mut i;
-
-    if uyi << 1 == 0 || y.is_nan() || ex == 0xff {
-        return (x * y) / (x * y);
-    }
-
-    if uxi << 1 <= uyi << 1 {
-        if uxi << 1 == uyi << 1 {
-            return 0.0 * x;
-        }
-
-        return x;
-    }
-
-    /* normalize x and y */
-    if ex == 0 {
-        i = uxi << 9;
-        while i >> 31 == 0 {
-            ex -= 1;
-            i <<= 1;
-        }
-
-        uxi <<= -ex + 1;
-    } else {
-        uxi &= u32::MAX >> 9;
-        uxi |= 1 << 23;
-    }
-
-    if ey == 0 {
-        i = uyi << 9;
-        while i >> 31 == 0 {
-            ey -= 1;
-            i <<= 1;
-        }
-
-        uyi <<= -ey + 1;
-    } else {
-        uyi &= u32::MAX >> 9;
-        uyi |= 1 << 23;
-    }
-
-    /* x mod y */
-    while ex > ey {
-        i = uxi.wrapping_sub(uyi);
-        if i >> 31 == 0 {
-            if i == 0 {
-                return 0.0 * x;
-            }
-            uxi = i;
-        }
-        uxi <<= 1;
-
-        ex -= 1;
-    }
-
-    i = uxi.wrapping_sub(uyi);
-    if i >> 31 == 0 {
-        if i == 0 {
-            return 0.0 * x;
-        }
-        uxi = i;
-    }
-
-    while uxi >> 23 == 0 {
-        uxi <<= 1;
-        ex -= 1;
-    }
-
-    /* scale result up */
-    if ex > 0 {
-        uxi -= 1 << 23;
-        uxi |= (ex as u32) << 23;
-    } else {
-        uxi >>= -ex + 1;
-    }
-    uxi |= sx;
-
-    f32::from_bits(uxi)
-}
diff --git a/src/math/jnf.rs b/src/math/jnf.rs
deleted file mode 100644
index e5afda448..000000000
--- a/src/math/jnf.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-use super::{fabsf, j0f, j1f, logf, y0f, y1f};
-
-/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-pub fn jnf(n: i32, mut x: f32) -> f32 {
-    let mut ix: u32;
-    let mut nm1: i32;
-    let mut sign: bool;
-    let mut i: i32;
-    let mut a: f32;
-    let mut b: f32;
-    let mut temp: f32;
-
-    ix = x.to_bits();
-    sign = (ix >> 31) != 0;
-    ix &= 0x7fffffff;
-    if ix > 0x7f800000 {
-        /* nan */
-        return x;
-    }
-
-    /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */
-    if n == 0 {
-        return j0f(x);
-    }
-    if n < 0 {
-        nm1 = -(n + 1);
-        x = -x;
-        sign = !sign;
-    } else {
-        nm1 = n - 1;
-    }
-    if nm1 == 0 {
-        return j1f(x);
-    }
-
-    sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */
-    x = fabsf(x);
-    if ix == 0 || ix == 0x7f800000 {
-        /* if x is 0 or inf */
-        b = 0.0;
-    } else if (nm1 as f32) < x {
-        /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
-        a = j0f(x);
-        b = j1f(x);
-        i = 0;
-        while i < nm1 {
-            i += 1;
-            temp = b;
-            b = b * (2.0 * (i as f32) / x) - a;
-            a = temp;
-        }
-    } else {
-        if ix < 0x35800000 {
-            /* x < 2**-20 */
-            /* x is tiny, return the first Taylor expansion of J(n,x)
-             * J(n,x) = 1/n!*(x/2)^n  - ...
-             */
-            if nm1 > 8 {
-                /* underflow */
-                nm1 = 8;
-            }
-            temp = 0.5 * x;
-            b = temp;
-            a = 1.0;
-            i = 2;
-            while i <= nm1 + 1 {
-                a *= i as f32; /* a = n! */
-                b *= temp; /* b = (x/2)^n */
-                i += 1;
-            }
-            b = b / a;
-        } else {
-            /* use backward recurrence */
-            /*                      x      x^2      x^2
-             *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
-             *                      2n  - 2(n+1) - 2(n+2)
-             *
-             *                      1      1        1
-             *  (for large x)   =  ----  ------   ------   .....
-             *                      2n   2(n+1)   2(n+2)
-             *                      -- - ------ - ------ -
-             *                       x     x         x
-             *
-             * Let w = 2n/x and h=2/x, then the above quotient
-             * is equal to the continued fraction:
-             *                  1
-             *      = -----------------------
-             *                     1
-             *         w - -----------------
-             *                        1
-             *              w+h - ---------
-             *                     w+2h - ...
-             *
-             * To determine how many terms needed, let
-             * Q(0) = w, Q(1) = w(w+h) - 1,
-             * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
-             * When Q(k) > 1e4      good for single
-             * When Q(k) > 1e9      good for double
-             * When Q(k) > 1e17     good for quadruple
-             */
-            /* determine k */
-            let mut t: f32;
-            let mut q0: f32;
-            let mut q1: f32;
-            let mut w: f32;
-            let h: f32;
-            let mut z: f32;
-            let mut tmp: f32;
-            let nf: f32;
-            let mut k: i32;
-
-            nf = (nm1 as f32) + 1.0;
-            w = 2.0 * (nf as f32) / x;
-            h = 2.0 / x;
-            z = w + h;
-            q0 = w;
-            q1 = w * z - 1.0;
-            k = 1;
-            while q1 < 1.0e4 {
-                k += 1;
-                z += h;
-                tmp = z * q1 - q0;
-                q0 = q1;
-                q1 = tmp;
-            }
-            t = 0.0;
-            i = k;
-            while i >= 0 {
-                t = 1.0 / (2.0 * ((i as f32) + nf) / x - t);
-                i -= 1;
-            }
-            a = t;
-            b = 1.0;
-            /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
-             *  Hence, if n*(log(2n/x)) > ...
-             *  single 8.8722839355e+01
-             *  double 7.09782712893383973096e+02
-             *  long double 1.1356523406294143949491931077970765006170e+04
-             *  then recurrent value may overflow and the result is
-             *  likely underflow to zero
-             */
-            tmp = nf * logf(fabsf(w));
-            if tmp < 88.721679688 {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = 2.0 * (i as f32) * b / x - a;
-                    a = temp;
-                    i -= 1;
-                }
-            } else {
-                i = nm1;
-                while i > 0 {
-                    temp = b;
-                    b = 2.0 * (i as f32) * b / x - a;
-                    a = temp;
-                    /* scale b to avoid spurious overflow */
-                    let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60
-                    if b > x1p60 {
-                        a /= b;
-                        t /= b;
-                        b = 1.0;
-                    }
-                    i -= 1;
-                }
-            }
-            z = j0f(x);
-            w = j1f(x);
-            if fabsf(z) >= fabsf(w) {
-                b = t * z / b;
-            } else {
-                b = t * w / a;
-            }
-        }
-    }
-
-    if sign { -b } else { b }
-}
-
-/// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-pub fn ynf(n: i32, x: f32) -> f32 {
-    let mut ix: u32;
-    let mut ib: u32;
-    let nm1: i32;
-    let mut sign: bool;
-    let mut i: i32;
-    let mut a: f32;
-    let mut b: f32;
-    let mut temp: f32;
-
-    ix = x.to_bits();
-    sign = (ix >> 31) != 0;
-    ix &= 0x7fffffff;
-    if ix > 0x7f800000 {
-        /* nan */
-        return x;
-    }
-    if sign && ix != 0 {
-        /* x < 0 */
-        return 0.0 / 0.0;
-    }
-    if ix == 0x7f800000 {
-        return 0.0;
-    }
-
-    if n == 0 {
-        return y0f(x);
-    }
-    if n < 0 {
-        nm1 = -(n + 1);
-        sign = (n & 1) != 0;
-    } else {
-        nm1 = n - 1;
-        sign = false;
-    }
-    if nm1 == 0 {
-        if sign {
-            return -y1f(x);
-        } else {
-            return y1f(x);
-        }
-    }
-
-    a = y0f(x);
-    b = y1f(x);
-    /* quit if b is -inf */
-    ib = b.to_bits();
-    i = 0;
-    while i < nm1 && ib != 0xff800000 {
-        i += 1;
-        temp = b;
-        b = (2.0 * (i as f32) / x) * b - a;
-        ib = b.to_bits();
-        a = temp;
-    }
-
-    if sign { -b } else { b }
-}
diff --git a/src/math/ldexp.rs b/src/math/ldexp.rs
deleted file mode 100644
index e46242e55..000000000
--- a/src/math/ldexp.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexp(x: f64, n: i32) -> f64 {
-    super::scalbn(x, n)
-}
diff --git a/src/math/rint.rs b/src/math/rint.rs
deleted file mode 100644
index 618b26e54..000000000
--- a/src/math/rint.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn rint(x: f64) -> f64 {
-    let one_over_e = 1.0 / f64::EPSILON;
-    let as_u64: u64 = x.to_bits();
-    let exponent: u64 = as_u64 >> 52 & 0x7ff;
-    let is_positive = (as_u64 >> 63) == 0;
-    if exponent >= 0x3ff + 52 {
-        x
-    } else {
-        let ans = if is_positive {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xplusoneovere = x + one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xplusoneovere = force_eval!(xplusoneovere);
-            xplusoneovere - one_over_e
-        } else {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xminusoneovere = x - one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xminusoneovere = force_eval!(xminusoneovere);
-            xminusoneovere + one_over_e
-        };
-
-        if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans }
-    }
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::rint;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(rint(-1.0), -1.0);
-        assert_eq!(rint(2.8), 3.0);
-        assert_eq!(rint(-0.5), -0.0);
-        assert_eq!(rint(0.5), 0.0);
-        assert_eq!(rint(-1.5), -2.0);
-        assert_eq!(rint(1.5), 2.0);
-    }
-}
diff --git a/src/math/rintf.rs b/src/math/rintf.rs
deleted file mode 100644
index 0726d83ba..000000000
--- a/src/math/rintf.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn rintf(x: f32) -> f32 {
-    let one_over_e = 1.0 / f32::EPSILON;
-    let as_u32: u32 = x.to_bits();
-    let exponent: u32 = as_u32 >> 23 & 0xff;
-    let is_positive = (as_u32 >> 31) == 0;
-    if exponent >= 0x7f + 23 {
-        x
-    } else {
-        let ans = if is_positive {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xplusoneovere = x + one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xplusoneovere = force_eval!(xplusoneovere);
-            xplusoneovere - one_over_e
-        } else {
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let x = force_eval!(x);
-            let xminusoneovere = x - one_over_e;
-            #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-            let xminusoneovere = force_eval!(xminusoneovere);
-            xminusoneovere + one_over_e
-        };
-
-        if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans }
-    }
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::rintf;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(rintf(-1.0), -1.0);
-        assert_eq!(rintf(2.8), 3.0);
-        assert_eq!(rintf(-0.5), -0.0);
-        assert_eq!(rintf(0.5), 0.0);
-        assert_eq!(rintf(-1.5), -2.0);
-        assert_eq!(rintf(1.5), 2.0);
-    }
-}
diff --git a/src/math/round.rs b/src/math/round.rs
deleted file mode 100644
index b81ebaa1d..000000000
--- a/src/math/round.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use core::f64;
-
-use super::{copysign, trunc};
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn round(x: f64) -> f64 {
-    trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::round;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(round(-1.0), -1.0);
-        assert_eq!(round(2.8), 3.0);
-        assert_eq!(round(-0.5), -1.0);
-        assert_eq!(round(0.5), 1.0);
-        assert_eq!(round(-1.5), -2.0);
-        assert_eq!(round(1.5), 2.0);
-    }
-}
diff --git a/src/math/roundf.rs b/src/math/roundf.rs
deleted file mode 100644
index fb974bbfe..000000000
--- a/src/math/roundf.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-use core::f32;
-
-use super::{copysignf, truncf};
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf(x: f32) -> f32 {
-    truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x))
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::roundf;
-
-    #[test]
-    fn negative_zero() {
-        assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits());
-    }
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(roundf(-1.0), -1.0);
-        assert_eq!(roundf(2.8), 3.0);
-        assert_eq!(roundf(-0.5), -1.0);
-        assert_eq!(roundf(0.5), 1.0);
-        assert_eq!(roundf(-1.5), -2.0);
-        assert_eq!(roundf(1.5), 2.0);
-    }
-}
diff --git a/src/math/scalbn.rs b/src/math/scalbn.rs
deleted file mode 100644
index 00c455a10..000000000
--- a/src/math/scalbn.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbn(x: f64, mut n: i32) -> f64 {
-    let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
-    let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
-    let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
-
-    let mut y = x;
-
-    if n > 1023 {
-        y *= x1p1023;
-        n -= 1023;
-        if n > 1023 {
-            y *= x1p1023;
-            n -= 1023;
-            if n > 1023 {
-                n = 1023;
-            }
-        }
-    } else if n < -1022 {
-        /* make sure final n < -53 to avoid double
-        rounding in the subnormal range */
-        y *= x1p_1022 * x1p53;
-        n += 1022 - 53;
-        if n < -1022 {
-            y *= x1p_1022 * x1p53;
-            n += 1022 - 53;
-            if n < -1022 {
-                n = -1022;
-            }
-        }
-    }
-    y * f64::from_bits(((0x3ff + n) as u64) << 52)
-}
diff --git a/src/math/scalbnf.rs b/src/math/scalbnf.rs
deleted file mode 100644
index 73f4bb57a..000000000
--- a/src/math/scalbnf.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf(mut x: f32, mut n: i32) -> f32 {
-    let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
-    let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126
-    let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
-
-    if n > 127 {
-        x *= x1p127;
-        n -= 127;
-        if n > 127 {
-            x *= x1p127;
-            n -= 127;
-            if n > 127 {
-                n = 127;
-            }
-        }
-    } else if n < -126 {
-        x *= x1p_126 * x1p24;
-        n += 126 - 24;
-        if n < -126 {
-            x *= x1p_126 * x1p24;
-            n += 126 - 24;
-            if n < -126 {
-                n = -126;
-            }
-        }
-    }
-    x * f32::from_bits(((0x7f + n) as u32) << 23)
-}
diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs
deleted file mode 100644
index e2907384d..000000000
--- a/src/math/sqrt.rs
+++ /dev/null
@@ -1,282 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* sqrt(x)
- * Return correctly rounded sqrt.
- *           ------------------------------------------
- *           |  Use the hardware sqrt if you have one |
- *           ------------------------------------------
- * Method:
- *   Bit by bit method using integer arithmetic. (Slow, but portable)
- *   1. Normalization
- *      Scale x to y in [1,4) with even powers of 2:
- *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
- *              sqrt(x) = 2^k * sqrt(y)
- *   2. Bit by bit computation
- *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
- *           i                                                   0
- *                                     i+1         2
- *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
- *           i      i            i                 i
- *
- *      To compute q    from q , one checks whether
- *                  i+1       i
- *
- *                            -(i+1) 2
- *                      (q + 2      ) <= y.                     (2)
- *                        i
- *                                                            -(i+1)
- *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
- *                             i+1   i             i+1   i
- *
- *      With some algebraic manipulation, it is not difficult to see
- *      that (2) is equivalent to
- *                             -(i+1)
- *                      s  +  2       <= y                      (3)
- *                       i                i
- *
- *      The advantage of (3) is that s  and y  can be computed by
- *                                    i      i
- *      the following recurrence formula:
- *          if (3) is false
- *
- *          s     =  s  ,       y    = y   ;                    (4)
- *           i+1      i          i+1    i
- *
- *          otherwise,
- *                         -i                     -(i+1)
- *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
- *           i+1      i          i+1    i     i
- *
- *      One may easily use induction to prove (4) and (5).
- *      Note. Since the left hand side of (3) contain only i+2 bits,
- *            it does not necessary to do a full (53-bit) comparison
- *            in (3).
- *   3. Final rounding
- *      After generating the 53 bits result, we compute one more bit.
- *      Together with the remainder, we can decide whether the
- *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
- *      (it will never equal to 1/2ulp).
- *      The rounding mode can be detected by checking whether
- *      huge + tiny is equal to huge, and whether huge - tiny is
- *      equal to huge for some floating point number "huge" and "tiny".
- *
- * Special cases:
- *      sqrt(+-0) = +-0         ... exact
- *      sqrt(inf) = inf
- *      sqrt(-ve) = NaN         ... with invalid signal
- *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
- */
-
-use core::f64;
-
-/// The square root of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrt(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                f64::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf64(x) }
-            }
-        }
-    }
-    #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse2 is available,
-        // but if someone does end up here they'll appreciate the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_sd(x);
-            let m_sqrt = _mm_sqrt_pd(m);
-            _mm_cvtsd_f64(m_sqrt)
-        }
-    }
-    #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))]
-    {
-        use core::num::Wrapping;
-
-        const TINY: f64 = 1.0e-300;
-
-        let mut z: f64;
-        let sign: Wrapping<u32> = Wrapping(0x80000000);
-        let mut ix0: i32;
-        let mut s0: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: Wrapping<u32>;
-        let mut t1: Wrapping<u32>;
-        let mut s1: Wrapping<u32>;
-        let mut ix1: Wrapping<u32>;
-        let mut q1: Wrapping<u32>;
-
-        ix0 = (x.to_bits() >> 32) as i32;
-        ix1 = Wrapping(x.to_bits() as u32);
-
-        /* take care of Inf and NaN */
-        if (ix0 & 0x7ff00000) == 0x7ff00000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-        }
-        /* take care of zero */
-        if ix0 <= 0 {
-            if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix0 < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
-        }
-        /* normalize x */
-        m = ix0 >> 20;
-        if m == 0 {
-            /* subnormal x */
-            while ix0 == 0 {
-                m -= 21;
-                ix0 |= (ix1 >> 11).0 as i32;
-                ix1 <<= 21;
-            }
-            i = 0;
-            while (ix0 & 0x00100000) == 0 {
-                i += 1;
-                ix0 <<= 1;
-            }
-            m -= i - 1;
-            ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
-            ix1 = ix1 << i as usize;
-        }
-        m -= 1023; /* unbias exponent */
-        ix0 = (ix0 & 0x000fffff) | 0x00100000;
-        if (m & 1) == 1 {
-            /* odd m, double x to make it even */
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-        }
-        m >>= 1; /* m = [m/2] */
-
-        /* generate sqrt(x) bit by bit */
-        ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-        ix1 += ix1;
-        q = 0; /* [q,q1] = sqrt(x) */
-        q1 = Wrapping(0);
-        s0 = 0;
-        s1 = Wrapping(0);
-        r = Wrapping(0x00200000); /* r = moving bit from right to left */
-
-        while r != Wrapping(0) {
-            t = s0 + r.0 as i32;
-            if t <= ix0 {
-                s0 = t + r.0 as i32;
-                ix0 -= t;
-                q += r.0 as i32;
-            }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
-        }
-
-        r = sign;
-        while r != Wrapping(0) {
-            t1 = s1 + r;
-            t = s0;
-            if t < ix0 || (t == ix0 && t1 <= ix1) {
-                s1 = t1 + r;
-                if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
-                    s0 += 1;
-                }
-                ix0 -= t;
-                if ix1 < t1 {
-                    ix0 -= 1;
-                }
-                ix1 -= t1;
-                q1 += r;
-            }
-            ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
-            ix1 += ix1;
-            r >>= 1;
-        }
-
-        /* use floating add to find out rounding direction */
-        if (ix0 as u32 | ix1.0) != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if q1.0 == 0xffffffff {
-                    q1 = Wrapping(0);
-                    q += 1;
-                } else if z > 1.0 {
-                    if q1.0 == 0xfffffffe {
-                        q += 1;
-                    }
-                    q1 += Wrapping(2);
-                } else {
-                    q1 += q1 & Wrapping(1);
-                }
-            }
-        }
-        ix0 = (q >> 1) + 0x3fe00000;
-        ix1 = q1 >> 1;
-        if (q & 1) == 1 {
-            ix1 |= sign;
-        }
-        ix0 += m << 20;
-        f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use core::f64::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(sqrt(100.0), 10.0);
-        assert_eq!(sqrt(4.0), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: FE_INVALID exception is raised if argument is negative.
-        assert!(sqrt(-1.0).is_nan());
-        assert!(sqrt(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY].iter().copied() {
-            assert_eq!(sqrt(f), f);
-        }
-    }
-
-    #[test]
-    fn conformance_tests() {
-        let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY];
-        let results = [
-            4610661241675116657u64,
-            4636737291354636288u64,
-            2197470602079456986u64,
-            9218868437227405312u64,
-        ];
-
-        for i in 0..values.len() {
-            let bits = f64::to_bits(sqrt(values[i]));
-            assert_eq!(results[i], bits);
-        }
-    }
-}
diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs
deleted file mode 100644
index a738fc0b6..000000000
--- a/src/math/sqrtf.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/// The square root of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                ::core::f32::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf32(x) }
-            }
-        }
-    }
-    #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
-    {
-        // Note: This path is unlikely since LLVM will usually have already
-        // optimized sqrt calls into hardware instructions if sse is available,
-        // but if someone does end up here they'll appreciate the speed increase.
-        #[cfg(target_arch = "x86")]
-        use core::arch::x86::*;
-        #[cfg(target_arch = "x86_64")]
-        use core::arch::x86_64::*;
-        unsafe {
-            let m = _mm_set_ss(x);
-            let m_sqrt = _mm_sqrt_ss(m);
-            _mm_cvtss_f32(m_sqrt)
-        }
-    }
-    #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))]
-    {
-        const TINY: f32 = 1.0e-30;
-
-        let mut z: f32;
-        let sign: i32 = 0x80000000u32 as i32;
-        let mut ix: i32;
-        let mut s: i32;
-        let mut q: i32;
-        let mut m: i32;
-        let mut t: i32;
-        let mut i: i32;
-        let mut r: u32;
-
-        ix = x.to_bits() as i32;
-
-        /* take care of Inf and NaN */
-        if (ix as u32 & 0x7f800000) == 0x7f800000 {
-            return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-        }
-
-        /* take care of zero */
-        if ix <= 0 {
-            if (ix & !sign) == 0 {
-                return x; /* sqrt(+-0) = +-0 */
-            }
-            if ix < 0 {
-                return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
-            }
-        }
-
-        /* normalize x */
-        m = ix >> 23;
-        if m == 0 {
-            /* subnormal x */
-            i = 0;
-            while ix & 0x00800000 == 0 {
-                ix <<= 1;
-                i = i + 1;
-            }
-            m -= i - 1;
-        }
-        m -= 127; /* unbias exponent */
-        ix = (ix & 0x007fffff) | 0x00800000;
-        if m & 1 == 1 {
-            /* odd m, double x to make it even */
-            ix += ix;
-        }
-        m >>= 1; /* m = [m/2] */
-
-        /* generate sqrt(x) bit by bit */
-        ix += ix;
-        q = 0;
-        s = 0;
-        r = 0x01000000; /* r = moving bit from right to left */
-
-        while r != 0 {
-            t = s + r as i32;
-            if t <= ix {
-                s = t + r as i32;
-                ix -= t;
-                q += r as i32;
-            }
-            ix += ix;
-            r >>= 1;
-        }
-
-        /* use floating add to find out rounding direction */
-        if ix != 0 {
-            z = 1.0 - TINY; /* raise inexact flag */
-            if z >= 1.0 {
-                z = 1.0 + TINY;
-                if z > 1.0 {
-                    q += 2;
-                } else {
-                    q += q & 1;
-                }
-            }
-        }
-
-        ix = (q >> 1) + 0x3f000000;
-        ix += m << 23;
-        f32::from_bits(ix as u32)
-    }
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use core::f32::*;
-
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(sqrtf(100.0), 10.0);
-        assert_eq!(sqrtf(4.0), 2.0);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
-    #[test]
-    fn spec_tests() {
-        // Not Asserted: FE_INVALID exception is raised if argument is negative.
-        assert!(sqrtf(-1.0).is_nan());
-        assert!(sqrtf(NAN).is_nan());
-        for f in [0.0, -0.0, INFINITY].iter().copied() {
-            assert_eq!(sqrtf(f), f);
-        }
-    }
-
-    #[test]
-    fn conformance_tests() {
-        let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY];
-        let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32];
-
-        for i in 0..values.len() {
-            let bits = f32::to_bits(sqrtf(values[i]));
-            assert_eq!(results[i], bits);
-        }
-    }
-}
diff --git a/src/math/trunc.rs b/src/math/trunc.rs
deleted file mode 100644
index f7892a2c5..000000000
--- a/src/math/trunc.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-use core::f64;
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn trunc(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf64(x) }
-        }
-    }
-    let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
-
-    let mut i: u64 = x.to_bits();
-    let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12;
-    let m: u64;
-
-    if e >= 52 + 12 {
-        return x;
-    }
-    if e < 12 {
-        e = 1;
-    }
-    m = -1i64 as u64 >> e;
-    if (i & m) == 0 {
-        return x;
-    }
-    force_eval!(x + x1p120);
-    i &= !m;
-    f64::from_bits(i)
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn sanity_check() {
-        assert_eq!(super::trunc(1.1), 1.0);
-    }
-}
diff --git a/src/math/truncf.rs b/src/math/truncf.rs
deleted file mode 100644
index 20d5b73bd..000000000
--- a/src/math/truncf.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-use core::f32;
-
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf32(x) }
-        }
-    }
-    let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
-
-    let mut i: u32 = x.to_bits();
-    let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9;
-    let m: u32;
-
-    if e >= 23 + 9 {
-        return x;
-    }
-    if e < 9 {
-        e = 1;
-    }
-    m = -1i32 as u32 >> e;
-    if (i & m) == 0 {
-        return x;
-    }
-    force_eval!(x + x1p120);
-    i &= !m;
-    f32::from_bits(i)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn sanity_check() {
-        assert_eq!(super::truncf(1.1), 1.0);
-    }
-}